PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_sgemm.c
Go to the documentation of this file.
1 
17 #include "common.h"
18 
19 /***************************************************************************/
24 #if defined(PLASMA_HAVE_WEAK)
25 #pragma weak CORE_sgemm = PCORE_sgemm
26 #define CORE_sgemm PCORE_sgemm
27 #endif
28 void CORE_sgemm(int transA, int transB,
29  int M, int N, int K,
30  float alpha, float *A, int LDA,
31  float *B, int LDB,
32  float beta, float *C, int LDC)
33 {
36  (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
37  M, N, K,
38  (alpha), A, LDA,
39  B, LDB,
40  (beta), C, LDC);
41 }
42 
43 /***************************************************************************/
46 void QUARK_CORE_sgemm(Quark *quark, Quark_Task_Flags *task_flags,
47  int transA, int transB,
48  int m, int n, int k, int nb,
49  float alpha, float *A, int lda,
50  float *B, int ldb,
51  float beta, float *C, int ldc)
52 {
54  QUARK_Insert_Task(quark, CORE_sgemm_quark, task_flags,
55  sizeof(PLASMA_enum), &transA, VALUE,
56  sizeof(PLASMA_enum), &transB, VALUE,
57  sizeof(int), &m, VALUE,
58  sizeof(int), &n, VALUE,
59  sizeof(int), &k, VALUE,
60  sizeof(float), &alpha, VALUE,
61  sizeof(float)*nb*nb, A, INPUT,
62  sizeof(int), &lda, VALUE,
63  sizeof(float)*nb*nb, B, INPUT,
64  sizeof(int), &ldb, VALUE,
65  sizeof(float), &beta, VALUE,
66  sizeof(float)*nb*nb, C, INOUT,
67  sizeof(int), &ldc, VALUE,
68  0);
69 }
70 
71 /***************************************************************************/
74 void QUARK_CORE_sgemm2( Quark *quark, Quark_Task_Flags *task_flags,
75  int transA, int transB,
76  int m, int n, int k, int nb,
77  float alpha, float *A, int lda,
78  float *B, int ldb,
79  float beta, float *C, int ldc)
80 {
82  QUARK_Insert_Task(quark, CORE_sgemm_quark, task_flags,
83  sizeof(PLASMA_enum), &transA, VALUE,
84  sizeof(PLASMA_enum), &transB, VALUE,
85  sizeof(int), &m, VALUE,
86  sizeof(int), &n, VALUE,
87  sizeof(int), &k, VALUE,
88  sizeof(float), &alpha, VALUE,
89  sizeof(float)*nb*nb, A, INPUT,
90  sizeof(int), &lda, VALUE,
91  sizeof(float)*nb*nb, B, INPUT,
92  sizeof(int), &ldb, VALUE,
93  sizeof(float), &beta, VALUE,
94  sizeof(float)*nb*nb, C, INOUT | LOCALITY | GATHERV,
95  sizeof(int), &ldc, VALUE,
96  0);
97 }
98 
99 /***************************************************************************/
102 #if defined(PLASMA_HAVE_WEAK)
103 #pragma weak CORE_sgemm_quark = PCORE_sgemm_quark
104 #define CORE_sgemm_quark PCORE_sgemm_quark
105 #endif
107 {
108  int transA;
109  int transB;
110  int m;
111  int n;
112  int k;
113  float alpha;
114  float *A;
115  int lda;
116  float *B;
117  int ldb;
118  float beta;
119  float *C;
120  int ldc;
121 
122  quark_unpack_args_13(quark, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
123  cblas_sgemm(
125  (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
126  m, n, k,
127  (alpha), A, lda,
128  B, ldb,
129  (beta), C, ldc);
130 }
131 
132 /***************************************************************************/
135 void QUARK_CORE_sgemm_f2(Quark *quark, Quark_Task_Flags *task_flags,
136  int transA, int transB,
137  int m, int n, int k, int nb,
138  float alpha, float *A, int lda,
139  float *B, int ldb,
140  float beta, float *C, int ldc,
141  float *fake1, int szefake1, int flag1,
142  float *fake2, int szefake2, int flag2)
143 {
145  QUARK_Insert_Task(quark, CORE_sgemm_f2_quark, task_flags,
146  sizeof(PLASMA_enum), &transA, VALUE,
147  sizeof(PLASMA_enum), &transB, VALUE,
148  sizeof(int), &m, VALUE,
149  sizeof(int), &n, VALUE,
150  sizeof(int), &k, VALUE,
151  sizeof(float), &alpha, VALUE,
152  sizeof(float)*nb*nb, A, INPUT,
153  sizeof(int), &lda, VALUE,
154  sizeof(float)*nb*nb, B, INPUT,
155  sizeof(int), &ldb, VALUE,
156  sizeof(float), &beta, VALUE,
157  sizeof(float)*nb*nb, C, INOUT | LOCALITY,
158  sizeof(int), &ldc, VALUE,
159  sizeof(float)*szefake1, fake1, flag1,
160  sizeof(float)*szefake2, fake2, flag2,
161  0);
162 }
163 
164 /***************************************************************************/
167 #if defined(PLASMA_HAVE_WEAK)
168 #pragma weak CORE_sgemm_f2_quark = PCORE_sgemm_f2_quark
169 #define CORE_sgemm_f2_quark PCORE_sgemm_f2_quark
170 #endif
172 {
173  int transA;
174  int transB;
175  int M;
176  int N;
177  int K;
178  float alpha;
179  float *A;
180  int LDA;
181  float *B;
182  int LDB;
183  float beta;
184  float *C;
185  int LDC;
186  void *fake1, *fake2;
187 
188  quark_unpack_args_15(quark, transA, transB, M, N, K, alpha,
189  A, LDA, B, LDB, beta, C, LDC, fake1, fake2);
190  cblas_sgemm(
192  (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
193  M, N, K,
194  (alpha), A, LDA,
195  B, LDB,
196  (beta), C, LDC);
197 }
198 
199 /***************************************************************************/
202 void QUARK_CORE_sgemm_p2(Quark *quark, Quark_Task_Flags *task_flags,
203  int transA, int transB,
204  int m, int n, int k, int nb,
205  float alpha, float *A, int lda,
206  float **B, int ldb,
207  float beta, float *C, int ldc)
208 {
210  QUARK_Insert_Task(quark, CORE_sgemm_p2_quark, task_flags,
211  sizeof(PLASMA_enum), &transA, VALUE,
212  sizeof(PLASMA_enum), &transB, VALUE,
213  sizeof(int), &m, VALUE,
214  sizeof(int), &n, VALUE,
215  sizeof(int), &k, VALUE,
216  sizeof(float), &alpha, VALUE,
217  sizeof(float)*lda*nb, A, INPUT,
218  sizeof(int), &lda, VALUE,
219  sizeof(float*), B, INPUT,
220  sizeof(int), &ldb, VALUE,
221  sizeof(float), &beta, VALUE,
222  sizeof(float)*ldc*nb, C, INOUT | LOCALITY,
223  sizeof(int), &ldc, VALUE,
224  0);
225 }
226 
227 /***************************************************************************/
230 #if defined(PLASMA_HAVE_WEAK)
231 #pragma weak CORE_sgemm_p2_quark = PCORE_sgemm_p2_quark
232 #define CORE_sgemm_p2_quark PCORE_sgemm_p2_quark
233 #endif
235 {
236  int transA;
237  int transB;
238  int M;
239  int N;
240  int K;
241  float alpha;
242  float *A;
243  int LDA;
244  float **B;
245  int LDB;
246  float beta;
247  float *C;
248  int LDC;
249 
250  quark_unpack_args_13(quark, transA, transB, M, N, K, alpha,
251  A, LDA, B, LDB, beta, C, LDC);
252  cblas_sgemm(
254  (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
255  M, N, K,
256  (alpha), A, LDA,
257  *B, LDB,
258  (beta), C, LDC);
259 }
260 
261 /***************************************************************************/
264 void QUARK_CORE_sgemm_p3(Quark *quark, Quark_Task_Flags *task_flags,
265  int transA, int transB,
266  int m, int n, int k, int nb,
267  float alpha, float *A, int lda,
268  float *B, int ldb,
269  float beta, float **C, int ldc)
270 {
272  QUARK_Insert_Task(quark, CORE_sgemm_p3_quark, task_flags,
273  sizeof(PLASMA_enum), &transA, VALUE,
274  sizeof(PLASMA_enum), &transB, VALUE,
275  sizeof(int), &m, VALUE,
276  sizeof(int), &n, VALUE,
277  sizeof(int), &k, VALUE,
278  sizeof(float), &alpha, VALUE,
279  sizeof(float)*lda*nb, A, INPUT,
280  sizeof(int), &lda, VALUE,
281  sizeof(float)*ldb*nb, B, INPUT,
282  sizeof(int), &ldb, VALUE,
283  sizeof(float), &beta, VALUE,
284  sizeof(float*), C, INOUT | LOCALITY,
285  sizeof(int), &ldc, VALUE,
286  0);
287 }
288 
289 /***************************************************************************/
292 #if defined(PLASMA_HAVE_WEAK)
293 #pragma weak CORE_sgemm_p3_quark = PCORE_sgemm_p3_quark
294 #define CORE_sgemm_p3_quark PCORE_sgemm_p3_quark
295 #endif
297 {
298  int transA;
299  int transB;
300  int M;
301  int N;
302  int K;
303  float alpha;
304  float *A;
305  int LDA;
306  float *B;
307  int LDB;
308  float beta;
309  float **C;
310  int LDC;
311 
312  quark_unpack_args_13(quark, transA, transB, M, N, K, alpha,
313  A, LDA, B, LDB, beta, C, LDC);
314  cblas_sgemm(
316  (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
317  M, N, K,
318  (alpha), A, LDA,
319  B, LDB,
320  (beta), *C, LDC);
321 }
322 
323 /***************************************************************************/
327  int transA, int transB,
328  int m, int n, int k, int nb,
329  float alpha, float *A, int lda,
330  float **B, int ldb,
331  float beta, float *C, int ldc,
332  float *fake1, int szefake1, int flag1)
333 {
335  QUARK_Insert_Task(quark, CORE_sgemm_p2f1_quark, task_flags,
336  sizeof(PLASMA_enum), &transA, VALUE,
337  sizeof(PLASMA_enum), &transB, VALUE,
338  sizeof(int), &m, VALUE,
339  sizeof(int), &n, VALUE,
340  sizeof(int), &k, VALUE,
341  sizeof(float), &alpha, VALUE,
342  sizeof(float)*lda*nb, A, INPUT,
343  sizeof(int), &lda, VALUE,
344  sizeof(float*), B, INPUT,
345  sizeof(int), &ldb, VALUE,
346  sizeof(float), &beta, VALUE,
347  sizeof(float)*ldc*nb, C, INOUT | LOCALITY,
348  sizeof(int), &ldc, VALUE,
349  sizeof(float)*szefake1, fake1, flag1,
350  0);
351 }
352 
353 /***************************************************************************/
356 #if defined(PLASMA_HAVE_WEAK)
357 #pragma weak CORE_sgemm_p2f1_quark = PCORE_sgemm_p2f1_quark
358 #define CORE_sgemm_p2f1_quark PCORE_sgemm_p2f1_quark
359 #endif
361 {
362  int transA;
363  int transB;
364  int M;
365  int N;
366  int K;
367  float alpha;
368  float *A;
369  int LDA;
370  float **B;
371  int LDB;
372  float beta;
373  float *C;
374  int LDC;
375  void *fake1;
376 
377  quark_unpack_args_14(quark, transA, transB, M, N, K, alpha,
378  A, LDA, B, LDB, beta, C, LDC, fake1);
379  cblas_sgemm(
381  (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
382  M, N, K,
383  (alpha), A, LDA,
384  *B, LDB,
385  (beta), C, LDC);
386 }