MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
codelet_zgetrl.c
Go to the documentation of this file.
1 
17 #include "morse_starpu.h"
18 
19 #if (PLASMA_VERSION_MAJOR >= 2) && (PLASMA_VERSION_MINOR >= 4)
20 #define CORE_zgetrf CORE_zgetrf_incpiv
21 #endif
22 
23 /*
24  * Codelet CPU
25  */
26 static void cl_zgetrl_cpu_func(void *descr[], void *cl_arg)
27 {
28  int m;
29  int n;
30  int ib;
31  PLASMA_Complex64_t *A;
32  int lda;
33  PLASMA_Complex64_t *L;
34  int ldl;
35  int *IPIV;
36  PLASMA_bool check_info;
37  int iinfo;
38  int info;
39 
40  A = (PLASMA_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
41  L = (PLASMA_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
42 
43  starpu_unpack_cl_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, NULL, NULL, &check_info, &iinfo);
44  CORE_zgetrf(m, n, ib, A, lda, IPIV, &info);
45 
46 #if defined(MORSE_USE_CUDA) && !defined(WITHOUTTRTRI)
47  /*
48  * L stores:
49  * L1 L2 L3 ...
50  * L1^-1 L2^-1 L3^-1 ...
51  */
52  /* Compute L-1 in lower rectangle of L */
53  L += ib;
54  {
55  int i, sb;
56  for (i=0; i<n; i+=ib) {
57  sb = min( ib, n-i );
58  CORE_zlacpy(PlasmaUpperLower, sb, sb, A+(i*lda+i), lda, L+(i*ldl), ldl );
59 
60  CORE_ztrtri( PlasmaLower, PlasmaUnit, sb, L+(i*ldl), ldl, &info );
61  if (info != 0 ) {
62  fprintf(stderr, "ERROR, trtri returned with info = %d\n", info);
63  }
64  }
65  }
66 #endif
67 
68  /* if (check_info && info != PLASMA_SUCCESS) */
69  /* return iinfo+info */
70 }
71 
72 /*
73  * Codelet Multi-cores
74  */
75 #ifdef MORSE_USE_MULTICORE
76 static void cl_zgetrl_mc_func(void *descr[], void *cl_arg)
77 {
78 }
79 #endif
80 
81 /*
82  * Codelet GPU
83  */
84 #ifdef MORSE_USE_CUDA
85 static void cl_zgetrl_cuda_func(void *descr[], void *cl_arg)
86 {
87  int m;
88  int n;
89  int ib;
90  cuDoubleComplex *hA, *dA;
91  cuDoubleComplex *hL, *dL;
92  cuDoubleComplex *dwork;
93  morse_starpu_ws_t *h_work;
94  morse_starpu_ws_t *d_work;
95  int lda, ldl;
96  int *IPIV;
97  PLASMA_bool check_info;
98  int iinfo;
99  int info;
100 
101  starpu_unpack_cl_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &h_work, &d_work, &check_info, &iinfo);
102 
103  dA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
104  dL = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
105  /*
106  * hwork => at least (2*IB+NB)*NB contains all hA and hL
107  * dwork => at least IB*NB
108  */
109  hA = morse_starpu_ws_getlocal(h_work);
110  dwork = morse_starpu_ws_getlocal(d_work);
111 
112  hL = hA + lda*n;
113 
114  /* Initialize L to 0 */
115  memset(hL, 0, ldl*n*sizeof(cuDoubleComplex));
116 
117  /* Copy First panel */
118  cublasGetMatrix( m, min(ib,m), sizeof(cuDoubleComplex), dA, lda, hA, lda );
119 
120  magma_zgetrl_gpu( 'C', m, n, ib,
121  hA, lda, dA, lda,
122  hL, ldl, dL, ldl,
123  IPIV,
124  dwork, lda,
125  &info );
126 
127  cudaThreadSynchronize();
128 }
129 #endif
130 
131 /*
132  * Codelet definition
133  */
134 CODELETS(zgetrl, 2, cl_zgetrl_cpu_func, cl_zgetrl_cuda_func, cl_zgetrl_cpu_func)
135 
136 /*
137  * Wrapper
138  */
139 void MORSE_zgetrl( MorseOption_t *options,
140  int m, int n, int ib,
141  magma_desc_t *A, int Am, int An,
142  magma_desc_t *L, int Lm, int Ln,
143  int *IPIV,
144  PLASMA_bool check, int iinfo)
145 {
146  starpu_codelet *zgetrl_codelet;
147  void (*callback)(void*) = options->profiling ? cl_zgetrl_callback : NULL;
148  int lda = BLKLDD( A, Am );
149  int ldl = BLKLDD( L, Lm );
150  morse_starpu_ws_t *h_work = (morse_starpu_ws_t*)(options->ws_host);
151  morse_starpu_ws_t *d_work = (morse_starpu_ws_t*)(options->ws_device);
152 
153 #ifdef MORSE_USE_MULTICORE
154  zgetrl_codelet = options->parallel ? &cl_zgetrl_mc : &cl_zgetrl;
155 #else
156  zgetrl_codelet = &cl_zgetrl;
157 #endif
158 
159  starpu_Insert_Task(zgetrl_codelet,
160  VALUE, &m, sizeof(int),
161  VALUE, &n, sizeof(int),
162  VALUE, &ib, sizeof(int),
163  INOUT, BLKADDR( A, PLASMA_Complex64_t, Am, An ),
164  VALUE, &lda, sizeof(int),
165  OUTPUT, BLKADDR( L, PLASMA_Complex64_t, Lm, Ln ),
166  VALUE, &ldl, sizeof(int),
167  VALUE, &IPIV, sizeof(int*),
168  VALUE, &h_work, sizeof(morse_starpu_ws_t*),
169  VALUE, &d_work, sizeof(morse_starpu_ws_t*),
170  VALUE, &check, sizeof(PLASMA_bool),
171  VALUE, &iinfo, sizeof(int),
172  PRIORITY, options->priority,
173  CALLBACK, callback, NULL,
174  0);
175 }