29 PLASMA_Complex64_t alpha;
30 PLASMA_Complex64_t *
A;
32 PLASMA_Complex64_t *
B;
34 PLASMA_Complex64_t beta;
35 PLASMA_Complex64_t *
C;
38 A = (PLASMA_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
39 B = (PLASMA_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
40 C = (PLASMA_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
42 starpu_unpack_cl_args(cl_arg, &transA, &transB, &M, &N, &K, &alpha, &LDA, &LDB, &beta, &LDC);
55 #ifdef MORSE_USE_MULTICORE
63 PLASMA_Complex64_t alpha;
64 PLASMA_Complex64_t *
A;
66 PLASMA_Complex64_t *
B;
68 PLASMA_Complex64_t beta;
69 PLASMA_Complex64_t *
C;
72 A = (PLASMA_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
73 B = (PLASMA_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
74 C = (PLASMA_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
76 starpu_unpack_cl_args(cl_arg, &transA, &transB, &M, &N, &K, &alpha, &LDA, &LDB, &beta, &LDC);
78 PLASMA_zgemm_Lapack(transA, transB,
85 #define cl_zgemm_mc_func cl_zgemm_cpu_func
92 static void cl_zgemm_cuda_func(
void *descr[],
void *cl_arg)
99 cuDoubleComplex alpha;
104 cuDoubleComplex beta;
108 A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
109 B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
110 C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
112 starpu_unpack_cl_args(cl_arg, &transA, &transB, &M, &N, &K, &alpha, &LDA, &LDB, &beta, &LDC);
119 B, LDB, beta, C, LDC);
120 cudaThreadSynchronize();
134 int transA,
int transB,
136 PLASMA_Complex64_t alpha,
magma_desc_t *A,
int Am,
int An,
138 PLASMA_Complex64_t beta,
magma_desc_t *C,
int Cm,
int Cn)
140 starpu_codelet *zgemm_codelet;
141 void (*callback)(
void*) = options->profiling ? cl_zgemm_callback : NULL;
142 int lda =
BLKLDD( A, Am );
143 int ldb =
BLKLDD( B, Bm );
144 int ldc =
BLKLDD( C, Cm );
146 #ifdef MORSE_USE_MULTICORE
147 zgemm_codelet = options->parallel ? &cl_zgemm_mc : &cl_zgemm;
149 zgemm_codelet = &cl_zgemm;
154 VALUE, &transA,
sizeof(PLASMA_enum),
155 VALUE, &transB,
sizeof(PLASMA_enum),
156 VALUE, &m,
sizeof(
int),
157 VALUE, &n,
sizeof(
int),
158 VALUE, &k,
sizeof(
int),
159 VALUE, &alpha,
sizeof(PLASMA_Complex64_t),
161 VALUE, &lda,
sizeof(
int),
163 VALUE, &ldb,
sizeof(
int),
164 VALUE, &beta,
sizeof(PLASMA_Complex64_t),
166 VALUE, &ldc,
sizeof(
int),