MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
magmablas_s.h
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  August 2013
7 
8  @generated s Tue Aug 13 16:43:27 2013
9 */
10 
11 #ifndef MAGMABLAS_S_H
12 #define MAGMABLAS_S_H
13 
14 #include "magma_types.h"
15 
16 #define REAL
17 
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21 
22  /*
23  * Interface to clean
24  */
25 float cpu_gpu_sdiff(
27  const float *hA, magma_int_t lda,
29 
30 // see also slaset
32  magmaFloat_ptr dA, magma_int_t ldda );
33 
35  magma_int_t nb,
36  magmaFloat_ptr dA, magma_int_t ldda );
37 
38 // see also slaswp
39 // ipiv gets updated
41  magma_int_t n,
43  magma_int_t *ipiv, magma_int_t nb, magma_int_t ind );
44 
45 // ipiv is not updated (unlike spermute_long2)
47  /*magma_int_t n,*/
48  magmaFloat_ptr dAT, magma_int_t ldda,
49  const magma_int_t *ipiv, magma_int_t nb, magma_int_t ind );
50 
51  /*
52  * Transpose functions
53  */
55  magma_int_t n,
56  magmaFloat_ptr dA, magma_int_t ldda );
57 
59  magmaFloat_ptr odata, magma_int_t ldo,
61  magma_int_t m, magma_int_t n );
62 
64  magmaFloat_ptr odata, magma_int_t ldo,
66  magma_int_t m, magma_int_t n );
67 
69  magmaFloat_ptr odata, magma_int_t ldo,
72  magma_queue_t stream );
73 
77  float *hA, magma_int_t lda,
79 
82  const float *hA, magma_int_t lda,
83  magmaFloat_ptr dAT, magma_int_t ldda,
84  magmaFloat_ptr dwork, magma_int_t lddwork, magma_int_t nb );
85 
86  /*
87  * Multi-GPU functions
88  */
90  magma_int_t ngpu, magma_queue_t stream[][2],
91  magmaFloat_ptr dAT[], magma_int_t ldda,
92  float *hA, magma_int_t lda,
95 
97  magma_int_t ngpu, magma_queue_t stream[][2],
98  const float *hA, magma_int_t lda,
99  magmaFloat_ptr dAT[], magma_int_t ldda,
100  magmaFloat_ptr dB[], magma_int_t lddb,
102 
105  magmaFloat_ptr dA[], magma_int_t ldda,
106  float *hA, magma_int_t lda,
107  magma_int_t ngpu, magma_int_t nb );
108 
111  const float *hA, magma_int_t lda,
112  magmaFloat_ptr dA[], magma_int_t ldda,
113  magma_int_t ngpu, magma_int_t nb );
114 
117  magmaFloat_ptr dA[], magma_int_t ldda,
118  float *hA, magma_int_t lda,
119  magma_int_t ngpu, magma_int_t nb );
120 
123  const float *hA, magma_int_t lda,
124  magmaFloat_ptr dA[], magma_int_t ldda,
125  magma_int_t ngpu, magma_int_t nb );
126 
127 // in src/ssytrd_mgpu.cpp
129  magma_int_t num_gpus, char *uplo, magma_int_t n, magma_int_t nb,
130  float *a, magma_int_t lda,
131  float **dwork, magma_int_t ldda,
132  magma_queue_t stream[][10], magma_int_t *info );
133 
134 // in src/spotrf3_mgpu.cpp
136  magma_int_t num_gpus, char *uplo, magma_int_t m, magma_int_t n,
137  magma_int_t off_i, magma_int_t off_j, magma_int_t nb,
138  float *h_A, magma_int_t lda,
139  float *d_lA[], magma_int_t ldda,
140  magma_queue_t stream[][3], magma_int_t *info );
141 
142 // in src/spotrf3_mgpu.cpp
144  magma_int_t num_gpus, char *uplo, magma_int_t m, magma_int_t n,
145  magma_int_t off_i, magma_int_t off_j, magma_int_t nb, magma_int_t NB,
146  float *a, magma_int_t lda,
147  float *work[], magma_int_t ldda,
148  magma_queue_t stream[][3], magma_int_t *info );
149 
151  char uplo, magma_int_t n,
152  float alpha,
153  float **A, magma_int_t lda,
154  float **X, magma_int_t incx,
155  float beta,
156  float **Y, magma_int_t incy,
157  float **work, magma_int_t lwork,
158  magma_int_t num_gpus,
159  magma_int_t nb,
160  magma_int_t offset,
161  magma_queue_t stream[][10] );
162 
164  char uplo, magma_int_t n,
165  float alpha,
166  float **A, magma_int_t lda,
167  float **X, magma_int_t incx,
168  float beta,
169  float **Y, magma_int_t incy,
170  float **work, magma_int_t lwork,
171  magma_int_t num_gpus,
172  magma_int_t nb,
173  magma_int_t offset,
174  magma_queue_t stream[][10] );
175 
177  magma_int_t num_gpus, magma_int_t k, char uplo,
178  magma_int_t n, magma_int_t nb,
179  float alpha,
180  float **da, magma_int_t ldda, magma_int_t offset,
181  float **dx, magma_int_t incx,
182  float beta,
183  float **dy, magma_int_t incy,
184  float **dwork, magma_int_t ldwork,
185  float *work, float *w,
186  magma_queue_t stream[][10] );
187 
189  magma_int_t num_gpus, magma_int_t k,
190  magma_int_t n, float *work, float *w,
191  magma_queue_t stream[][10] );
192 
195  float alpha,
196  magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t offset,
197  magmaFloat_ptr dB[], magma_int_t lddb,
198  float beta,
199  magmaFloat_ptr dC[], magma_int_t lddc,
200  float* C, magma_int_t ldc,
201  magma_int_t ngpu, magma_int_t nb,
202  magma_queue_t streams[][20], magma_int_t nstream );
203 
206  float alpha,
207  magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t offset,
208  magmaFloat_ptr dB[], magma_int_t lddb,
209  float beta,
210  magmaFloat_ptr dC[], magma_int_t lddc,
211  float* C, magma_int_t ldc,
212  magma_int_t ngpu, magma_int_t nb,
213  magma_queue_t streams[][20], magma_int_t nstream );
214 
217  float alpha,
218  magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t offset,
219  magmaFloat_ptr dB[], magma_int_t lddb,
220  float beta,
221  magmaFloat_ptr dC[], magma_int_t lddc,
222  magmaFloat_ptr dwork[], magma_int_t lddwork,
223  float* C, magma_int_t ldc,
224  float* work[], magma_int_t ldwork,
225  magma_int_t ngpu, magma_int_t nb,
226  magma_queue_t streams[][20], magma_int_t nstream,
227  magma_event_t redevents[][20], magma_int_t nbevents );
228 
231  float alpha,
232  magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t offset,
233  magmaFloat_ptr dB[], magma_int_t lddb,
234  float beta,
235  magmaFloat_ptr dC[], magma_int_t lddc,
236  magmaFloat_ptr dwork[], magma_int_t lddwork,
237  float* C, magma_int_t ldc,
238  float* work[], magma_int_t ldwork,
239  magma_int_t ngpu, magma_int_t nb,
240  magma_queue_t streams[][20], magma_int_t nstream,
241  magma_event_t redevents[][MagmaMaxGPUs*MagmaMaxGPUs+10], magma_int_t nbevents,
242  magma_int_t gnode[MagmaMaxGPUs][MagmaMaxGPUs+2], magma_int_t nbcmplx );
243 
246  float alpha,
247  magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t offset,
248  magmaFloat_ptr dB[], magma_int_t lddb,
249  float beta,
250  magmaFloat_ptr dC[], magma_int_t lddc,
251  magmaFloat_ptr dwork[], magma_int_t lddwork,
252  float* C, magma_int_t ldc,
253  float* work[], magma_int_t ldwork,
254  magma_int_t ngpu, magma_int_t nb,
255  magma_queue_t streams[][20], magma_int_t nstream,
256  magma_event_t redevents[][MagmaMaxGPUs*MagmaMaxGPUs+10], magma_int_t nbevents,
257  magma_int_t gnode[MagmaMaxGPUs][MagmaMaxGPUs+2], magma_int_t nbcmplx );
258 
261  float alpha,
262  magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t offset,
263  magmaFloat_ptr dB[], magma_int_t lddb,
264  float beta,
265  magmaFloat_ptr dC[], magma_int_t lddc,
266  magmaFloat_ptr dVIN[], magma_int_t lddv, magma_int_t voffst,
267  magmaFloat_ptr dwork[], magma_int_t lddwork,
268  float *C, magma_int_t ldc,
269  float *work[], magma_int_t ldwork,
270  magma_int_t ngpu, magma_int_t nb,
271  magma_queue_t streams[][20], magma_int_t nstream,
272  magma_event_t redevents[][MagmaMaxGPUs*MagmaMaxGPUs+10], magma_int_t nbevents,
273  magma_int_t gnode[MagmaMaxGPUs][MagmaMaxGPUs+2], magma_int_t nbcmplx );
274 
275 // Ichi's version, in src/ssytrd_mgpu.cpp
276 void magma_ssyr2k_mgpu(
277  magma_int_t num_gpus, char uplo, char trans, magma_int_t nb, magma_int_t n, magma_int_t k,
278  float alpha,
279  float **db, magma_int_t lddb, magma_int_t boffset,
280  float beta,
281  float **dc, magma_int_t lddc, magma_int_t offset,
282  magma_int_t num_streams, magma_queue_t streams[][10] );
283 
286  float alpha,
287  magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t aoff,
288  magmaFloat_ptr dB[], magma_int_t lddb, magma_int_t boff,
289  float beta,
290  magmaFloat_ptr dC[], magma_int_t lddc, magma_int_t offset,
291  magma_int_t ngpu, magma_int_t nb,
292  magma_queue_t streams[][20], magma_int_t nstream );
293 
296  float alpha,
297  magmaFloat_ptr dA[], magma_int_t lda, magma_int_t aoff,
298  magmaFloat_ptr dB[], magma_int_t ldb, magma_int_t boff,
299  float beta,
300  magmaFloat_ptr dC[], magma_int_t ldc, magma_int_t offset,
301  magma_int_t ngpu, magma_int_t nb, magma_queue_t streams[][20], magma_int_t nstream );
302 
305  float alpha,
306  magmaFloat_ptr dVIN[], magma_int_t lddv, magma_int_t voff,
307  magmaFloat_ptr dWIN[], magma_int_t lddw, magma_int_t woff,
308  float beta,
309  magmaFloat_ptr dC[], magma_int_t lddc, magma_int_t offset,
310  magmaFloat_ptr dwork[], magma_int_t lndwork,
311  magma_int_t ngpu, magma_int_t nb,
312  magma_queue_t streams[][20], magma_int_t nstream,
313  magma_event_t redevents[][MagmaMaxGPUs*MagmaMaxGPUs+10], magma_int_t nbevents );
314 
317  float alpha,
318  magmaFloat_ptr dVIN[], magma_int_t lddv, magma_int_t voff,
319  magmaFloat_ptr dWIN[], magma_int_t lddw, magma_int_t woff,
320  float beta,
321  magmaFloat_ptr dC[], magma_int_t lddc, magma_int_t offset,
322  magmaFloat_ptr dwork[], magma_int_t lndwork,
323  magma_int_t ngpu, magma_int_t nb,
324  float **harray[],
325  magmaFloat_ptr *darray[],
326  magma_queue_t streams[][20], magma_int_t nstream,
327  magma_event_t redevents[][MagmaMaxGPUs*MagmaMaxGPUs+10], magma_int_t nbevents );
328 
329  /*
330  * LAPACK auxiliary functions
331  */
332 void magmablas_sgeadd(
334  float alpha,
336  magmaFloat_ptr dB, magma_int_t lddb );
337 
340  float alpha,
341  magmaFloat_const_ptr const *dAarray, magma_int_t ldda,
342  magmaFloat_ptr *dBarray, magma_int_t lddb,
343  magma_int_t batchCount );
344 
345 void magmablas_slacpy(
346  magma_uplo_t uplo,
349  magmaFloat_ptr dB, magma_int_t lddb );
350 
353  magmaFloat_const_ptr const *dAarray, magma_int_t ldda,
354  magmaFloat_ptr *dBarray, magma_int_t lddb,
355  magma_int_t batchCount );
356 
357 float magmablas_slange(
358  magma_norm_t norm,
361  magmaFloat_ptr dwork );
362 
363 float magmablas_slansy(
364  magma_norm_t norm, magma_uplo_t uplo,
365  magma_int_t n,
367  magmaFloat_ptr dwork );
368 
369 float magmablas_slansy(
370  magma_norm_t norm, magma_uplo_t uplo,
371  magma_int_t n,
373  magmaFloat_ptr dwork );
374 
375 void magmablas_slascl(
376  char type, magma_int_t kl, magma_int_t ku,
377  float cfrom, float cto,
379  magmaFloat_ptr dA, magma_int_t ldda, magma_int_t *info );
380 
381 void magmablas_slaset(
383  magmaFloat_ptr dA, magma_int_t ldda );
384 
387  magmaFloat_ptr dA, magma_int_t ldda );
388 
389 void magmablas_slaswp(
390  magma_int_t n,
391  magmaFloat_ptr dAT, magma_int_t ldda,
392  magma_int_t i1, magma_int_t i2,
393  const magma_int_t *ipiv, magma_int_t inci );
394 
395 void magmablas_slaswpx(
396  magma_int_t n,
397  magmaFloat_ptr dAT, magma_int_t ldx, magma_int_t ldy,
398  magma_int_t i1, magma_int_t i2,
399  const magma_int_t *ipiv, magma_int_t inci );
400 
401 void magmablas_slaswp2(
402  magma_int_t n,
403  magmaFloat_ptr dAT, magma_int_t ldda,
404  magma_int_t i1, magma_int_t i2,
405  const magma_int_t *d_ipiv );
406 
408  magma_uplo_t uplo, magma_int_t m,
409  magmaFloat_ptr dA, magma_int_t ldda );
410 
412  magma_uplo_t uplo, magma_int_t m,
413  magmaFloat_ptr dA, magma_int_t ldda,
414  magma_int_t ntile, magma_int_t mstride, magma_int_t nstride );
415 
416 void magma_slarfgx_gpu(
417  magma_int_t n, float *dx0, float *dx,
418  float *dtau, float *dxnorm,
419  float *ddx0, magma_int_t iter);
420 
421 void magma_slarfx_gpu(
422  magma_int_t m, magma_int_t n, float *v, float *tau,
423  float *c, magma_int_t ldc, float *xnorm,
424  float *dT, magma_int_t iter, float *work);
425 
426 void magma_slarfbx_gpu(
427  magma_int_t m, magma_int_t k, float *V, magma_int_t ldv,
428  float *dT, magma_int_t ldt, float *c,
429  float *dwork);
430 
431 void magma_slarfgtx_gpu(
432  magma_int_t n, float *dx0, float *dx,
433  float *dtau, float *dxnorm,
434  float *dA, magma_int_t it,
435  float *V, magma_int_t ldv, float *T, magma_int_t ldt,
436  float *dwork);
437 
439  magma_int_t k, float *xnorm, float *c);
440 
443  magmaFloat_ptr dA, magma_int_t ldda,
444  magmaFloat_ptr dxnorm);
445 
447  magma_int_t k, float tol, float *xnorm, float *xnorm2,
448  float *c, magma_int_t ldc, float *lsticc);
449 
451  magma_int_t m, magma_int_t num, float *da, magma_int_t ldda,
452  float *dxnorm, float *lsticc);
453 
454  /*
455  * Level 1 BLAS
456  */
457 void magmablas_sswap(
458  magma_int_t n,
459  magmaFloat_ptr dA, magma_int_t ldda,
460  magmaFloat_ptr dB, magma_int_t lddb );
461 
462 void magmablas_sswapblk(
463  magma_storev_t storev,
464  magma_int_t n,
465  magmaFloat_ptr dA, magma_int_t ldda,
466  magmaFloat_ptr dB, magma_int_t lddb,
467  magma_int_t i1, magma_int_t i2,
468  const magma_int_t *ipiv, magma_int_t inci,
469  magma_int_t offset );
470 
472  magma_int_t n, magma_int_t nb,
473  magmaFloat_ptr dA, magma_int_t ldda, magma_int_t inca,
474  magmaFloat_ptr dB, magma_int_t lddb, magma_int_t incb );
475 
476  /*
477  * Level 2 BLAS
478  */
479 void magmablas_sgemv(
481  float alpha,
484  float beta,
485  magmaFloat_ptr dy, magma_int_t incy );
486 
487 #ifdef COMPLEX
489  magma_uplo_t uplo, magma_int_t n,
490  float alpha,
493  float beta,
494  magmaFloat_ptr dy, magma_int_t incy );
495 #endif
496 
498  magma_uplo_t uplo, magma_int_t n,
499  float alpha,
502  float beta,
504  magmaFloat_ptr dwork, magma_int_t lwork );
505 
507  magma_uplo_t uplo, magma_int_t n,
508  float alpha,
511  float beta,
512  magmaFloat_ptr dy, magma_int_t incy );
513 
514  /*
515  * Level 3 BLAS
516  */
517 void magmablas_sgemm(
518  magma_trans_t transA, magma_trans_t transB,
520  float alpha,
523  float beta,
524  magmaFloat_ptr dC, magma_int_t lddc );
525 
528  float alpha,
529  const float *dA, magma_int_t lda,
530  const float *dB, magma_int_t ldb,
531  float beta,
532  float *dC, magma_int_t ldc );
533 
535  magma_trans_t transA, magma_trans_t transB,
537  float alpha,
540  float beta,
541  magmaFloat_ptr dC, magma_int_t lddc );
542 
544  magma_trans_t transA, magma_trans_t transB,
546  float alpha,
549  float beta,
550  magmaFloat_ptr dC, magma_int_t lddc );
551 
552 void magmablas_ssymm(
553  magma_side_t side, magma_uplo_t uplo,
555  float alpha,
558  float beta,
559  magmaFloat_ptr dC, magma_int_t lddc );
560 
561 void magmablas_ssymm(
562  magma_side_t side, magma_uplo_t uplo,
564  float alpha,
567  float beta,
568  magmaFloat_ptr dC, magma_int_t lddc );
569 
570 void magmablas_ssyrk(
571  magma_uplo_t uplo, magma_trans_t trans,
573  float alpha,
575  float beta,
576  magmaFloat_ptr dC, magma_int_t lddc );
577 
578 void magmablas_ssyrk(
579  magma_uplo_t uplo, magma_trans_t trans,
581  float alpha,
583  float beta,
584  magmaFloat_ptr dC, magma_int_t lddc );
585 
586 void magmablas_ssyr2k(
587  magma_uplo_t uplo, magma_trans_t trans,
589  float alpha,
592  float beta,
593  magmaFloat_ptr dC, magma_int_t lddc );
594 
595 void magmablas_ssyr2k(
596  magma_uplo_t uplo, magma_trans_t trans,
598  float alpha,
601  float beta,
602  magmaFloat_ptr dC, magma_int_t lddc );
603 
604 #ifndef COMPLEX
605 void magmablas_strsm(
606  magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
608  float alpha,
610  magmaFloat_ptr dB, magma_int_t lddb );
611 
613  magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
615  float alpha,
617  magmaFloat_ptr db, magma_int_t lddb,
618  int flag, magmaFloat_ptr d_dinvA, magmaFloat_ptr dx );
619 #endif
620 
621  /*
622  * Wrappers for platform independence.
623  * These wrap CUBLAS or AMD OpenCL BLAS functions.
624  */
625 
626 // ========================================
627 // copying vectors
628 // set copies host to device
629 // get copies device to host
630 // copy copies device to device
631 // (with CUDA unified addressing, copy can be between same or different devices)
632 // Add the function, file, and line for error-reporting purposes.
633 
634 #define magma_ssetvector( n, hx_src, incx, dy_dst, incy ) \
635  magma_ssetvector_internal( n, hx_src, incx, dy_dst, incy, __func__, __FILE__, __LINE__ )
636 
637 #define magma_sgetvector( n, dx_src, incx, hy_dst, incy ) \
638  magma_sgetvector_internal( n, dx_src, incx, hy_dst, incy, __func__, __FILE__, __LINE__ )
639 
640 #define magma_ssetvector_async( n, hx_src, incx, dy_dst, incy, queue ) \
641  magma_ssetvector_async_internal( n, hx_src, incx, dy_dst, incy, queue, __func__, __FILE__, __LINE__ )
642 
643 #define magma_sgetvector_async( n, dx_src, incx, hy_dst, incy, queue ) \
644  magma_sgetvector_async_internal( n, dx_src, incx, hy_dst, incy, queue, __func__, __FILE__, __LINE__ )
645 
646 #define magma_scopyvector_async( n, dx_src, incx, dy_dst, incy, queue ) \
647  magma_scopyvector_async_internal( n, dx_src, incx, dy_dst, incy, queue, __func__, __FILE__, __LINE__ )
648 
649 #define magma_scopyvector_async( n, dx_src, incx, dy_dst, incy, queue ) \
650  magma_scopyvector_async_internal( n, dx_src, incx, dy_dst, incy, queue, __func__, __FILE__, __LINE__ )
651 
653  magma_int_t n,
654  float const* hx_src, magma_int_t incx,
655  magmaFloat_ptr dy_dst, magma_int_t incy,
656  const char* func, const char* file, int line );
657 
659  magma_int_t n,
660  magmaFloat_const_ptr dx_src, magma_int_t incx,
661  float* hy_dst, magma_int_t incy,
662  const char* func, const char* file, int line );
663 
667  magmaFloat_const_ptr dx_src, magma_int_t incx,
668  magmaFloat_ptr dy_dst, magma_int_t incy,
669  const char* func, const char* file, int line );
670 
672  magma_int_t n,
673  float const* hx_src, magma_int_t incx,
674  magmaFloat_ptr dy_dst, magma_int_t incy,
675  magma_queue_t queue,
676  const char* func, const char* file, int line );
677 
679  magma_int_t n,
680  magmaFloat_const_ptr dx_src, magma_int_t incx,
681  float* hy_dst, magma_int_t incy,
682  magma_queue_t queue,
683  const char* func, const char* file, int line );
684 
688  magmaFloat_const_ptr dx_src, magma_int_t incx,
689  magmaFloat_ptr dy_dst, magma_int_t incy,
690  magma_queue_t queue,
691  const char* func, const char* file, int line );
692 
693 
694 // ========================================
695 // copying sub-matrices (contiguous columns)
696 // set copies host to device
697 // get copies device to host
698 // copy copies device to device
699 // (with CUDA unified addressing, copy can be between same or different devices)
700 // Add the function, file, and line for error-reporting purposes.
701 
702 #define magma_ssetmatrix( m, n, hA_src, lda, dB_dst, lddb ) \
703  magma_ssetmatrix_internal( m, n, hA_src, lda, dB_dst, lddb, __func__, __FILE__, __LINE__ )
704 
705 #define magma_sgetmatrix( m, n, dA_src, ldda, hB_dst, ldb ) \
706  magma_sgetmatrix_internal( m, n, dA_src, ldda, hB_dst, ldb, __func__, __FILE__, __LINE__ )
707 
708 #define magma_scopymatrix( m, n, dA_src, ldda, dB_dst, lddb ) \
709  magma_scopymatrix_internal( m, n, dA_src, ldda, dB_dst, lddb, __func__, __FILE__, __LINE__ )
710 
711 #define magma_ssetmatrix_async( m, n, hA_src, lda, dB_dst, lddb, queue ) \
712  magma_ssetmatrix_async_internal( m, n, hA_src, lda, dB_dst, lddb, queue, __func__, __FILE__, __LINE__ )
713 
714 #define magma_sgetmatrix_async( m, n, dA_src, ldda, hB_dst, ldb, queue ) \
715  magma_sgetmatrix_async_internal( m, n, dA_src, ldda, hB_dst, ldb, queue, __func__, __FILE__, __LINE__ )
716 
717 #define magma_scopymatrix_async( m, n, dA_src, ldda, dB_dst, lddb, queue ) \
718  magma_scopymatrix_async_internal( m, n, dA_src, ldda, dB_dst, lddb, queue, __func__, __FILE__, __LINE__ )
719 
722  float const* hA_src, magma_int_t ldha,
723  magmaFloat_ptr dB_dst, magma_int_t lddb,
724  const char* func, const char* file, int line );
725 
728  magmaFloat_const_ptr dA_src, magma_int_t ldda,
729  float* hB_dst, magma_int_t ldhb,
730  const char* func, const char* file, int line );
731 
734  magmaFloat_const_ptr dA_src, magma_int_t ldda,
735  magmaFloat_ptr dB_dst, magma_int_t lddb,
736  const char* func, const char* file, int line );
737 
740  float const* hA_src, magma_int_t ldha,
741  magmaFloat_ptr dB_dst, magma_int_t lddb,
742  magma_queue_t queue,
743  const char* func, const char* file, int line );
744 
747  magmaFloat_const_ptr dA_src, magma_int_t ldda,
748  float* hB_dst, magma_int_t ldhb,
749  magma_queue_t queue,
750  const char* func, const char* file, int line );
751 
754  magmaFloat_const_ptr dA_src, magma_int_t ldda,
755  magmaFloat_ptr dB_dst, magma_int_t lddb,
756  magma_queue_t queue,
757  const char* func, const char* file, int line );
758 
759 
760 // ========================================
761 // Level 1 BLAS
762 
763 // in cublas_v2, result returned through output argument
765  magma_int_t n,
767 
768 // in cublas_v2, result returned through output argument
770  magma_int_t n,
772 
773 // in cublas_v2, result returned through output argument
774 float magma_sasum(
775  magma_int_t n,
777 
778 void magma_saxpy(
779  magma_int_t n,
780  float alpha,
782  magmaFloat_ptr dy, magma_int_t incy );
783 
784 void magma_scopy(
785  magma_int_t n,
787  magmaFloat_ptr dy, magma_int_t incy );
788 
789 // in cublas_v2, result returned through output argument
790 float
791 magma_sdot(
792  magma_int_t n,
795 
796 // in cublas_v2, result returned through output argument
797 #ifdef COMPLEX
798 float
799 magma_sdotu(
800  magma_int_t n,
803 #endif
804 
805 // in cublas_v2, result returned through output argument
806 float magma_snrm2(
807  magma_int_t n,
809 
810 void magma_srot(
811  magma_int_t n,
812  magmaFloat_ptr dx, magma_int_t incx,
813  magmaFloat_ptr dy, magma_int_t incy,
814  float dc, float ds );
815 
816 #ifdef COMPLEX
817 void magma_srot(
818  magma_int_t n,
819  magmaFloat_ptr dx, magma_int_t incx,
820  magmaFloat_ptr dy, magma_int_t incy,
821  float dc, float ds );
822 #endif
823 
824 #ifdef REAL
825 void magma_srotm(
826  magma_int_t n,
827  magmaFloat_ptr dx, magma_int_t incx,
828  magmaFloat_ptr dy, magma_int_t incy,
829  magmaFloat_const_ptr param );
830 
831 void magma_srotmg(
834  magmaFloat_ptr param );
835 #endif
836 
837 void magma_sscal(
838  magma_int_t n,
839  float alpha,
840  magmaFloat_ptr dx, magma_int_t incx );
841 
842 void magma_sscal(
843  magma_int_t n,
844  float alpha,
845  magmaFloat_ptr dx, magma_int_t incx );
846 
847 void magma_sswap(
848  magma_int_t n,
849  magmaFloat_ptr dx, magma_int_t incx,
850  magmaFloat_ptr dy, magma_int_t incy );
851 
852 // ========================================
853 // Level 2 BLAS
854 
855 void magma_sgemv(
856  magma_trans_t transA,
858  float alpha,
861  float beta,
862  magmaFloat_ptr dy, magma_int_t incy );
863 
864 void magma_sger(
866  float alpha,
869  magmaFloat_ptr dA, magma_int_t ldda );
870 
871 #ifdef COMPLEX
872 void magma_sger(
874  float alpha,
877  magmaFloat_ptr dA, magma_int_t ldda );
878 #endif
879 
880 void magma_ssymv(
881  magma_uplo_t uplo,
882  magma_int_t n,
883  float alpha,
886  float beta,
887  magmaFloat_ptr dy, magma_int_t incy );
888 
889 void magma_ssyr(
890  magma_uplo_t uplo,
891  magma_int_t n,
892  float alpha,
894  magmaFloat_ptr dA, magma_int_t ldda );
895 
896 void magma_ssyr2(
897  magma_uplo_t uplo,
898  magma_int_t n,
899  float alpha,
902  magmaFloat_ptr dA, magma_int_t ldda );
903 
904 void magma_strmv(
905  magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
906  magma_int_t n,
908  magmaFloat_ptr dx, magma_int_t incx );
909 
910 void magma_strsv(
911  magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
912  magma_int_t n,
914  magmaFloat_ptr dx, magma_int_t incx );
915 
916 // ========================================
917 // Level 3 BLAS
918 
919 void magma_sgemm(
920  magma_trans_t transA, magma_trans_t transB,
922  float alpha,
925  float beta,
926  magmaFloat_ptr dC, magma_int_t lddc );
927 
928 void magma_ssymm(
929  magma_side_t side, magma_uplo_t uplo,
931  float alpha,
934  float beta,
935  magmaFloat_ptr dC, magma_int_t lddc );
936 
937 void magma_ssyrk(
938  magma_uplo_t uplo, magma_trans_t trans,
940  float alpha,
942  float beta,
943  magmaFloat_ptr dC, magma_int_t lddc );
944 
945 void magma_ssyr2k(
946  magma_uplo_t uplo, magma_trans_t trans,
948  float alpha,
951  float beta,
952  magmaFloat_ptr dC, magma_int_t lddc );
953 
954 #ifdef COMPLEX
955 void magma_ssymm(
956  magma_side_t side, magma_uplo_t uplo,
958  float alpha,
961  float beta,
962  magmaFloat_ptr dC, magma_int_t lddc );
963 
964 void magma_ssyrk(
965  magma_uplo_t uplo, magma_trans_t trans,
967  float alpha,
969  float beta,
970  magmaFloat_ptr dC, magma_int_t lddc );
971 
972 void magma_ssyr2k(
973  magma_uplo_t uplo, magma_trans_t trans,
975  float alpha,
978  float beta,
979  magmaFloat_ptr dC, magma_int_t lddc );
980 #endif
981 
982 void magma_strmm(
983  magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
985  float alpha,
987  magmaFloat_ptr dB, magma_int_t lddb );
988 
989 void magma_strsm(
990  magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
992  float alpha,
994  magmaFloat_ptr dB, magma_int_t lddb );
995 
996 #ifdef __cplusplus
997 }
998 #endif
999 
1000 #undef REAL
1001 
1002 #endif /* MAGMABLAS_S_H */
void magmablas_snrm2_row_check_adjust(magma_int_t k, float tol, float *xnorm, float *xnorm2, float *c, magma_int_t ldc, float *lsticc)
void magmablas_sgemm_fermi64(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dB, magma_int_t lddb, float beta, magmaFloat_ptr dC, magma_int_t lddc)
void magma_ssyr2k(magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dB, magma_int_t lddb, float beta, magmaFloat_ptr dC, magma_int_t lddc)
void magma_strmm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_ptr dB, magma_int_t lddb)
float magma_snrm2(magma_int_t n, magmaFloat_const_ptr dx, magma_int_t incx)
void magma_sgemm(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dB, magma_int_t lddb, float beta, magmaFloat_ptr dC, magma_int_t lddc)
magma_int_t magmablas_ssymv_mgpu_offset(char uplo, magma_int_t n, float alpha, float **A, magma_int_t lda, float **X, magma_int_t incx, float beta, float **Y, magma_int_t incy, float **work, magma_int_t lwork, magma_int_t num_gpus, magma_int_t nb, magma_int_t offset, magma_queue_t stream[][10])
void magma_strmv(magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t n, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_ptr dx, magma_int_t incx)
void magma_sgetvector_internal(magma_int_t n, magmaFloat_const_ptr dx_src, magma_int_t incx, float *hy_dst, magma_int_t incy, const char *func, const char *file, int line)
void magma_ssyr(magma_uplo_t uplo, magma_int_t n, float alpha, magmaFloat_const_ptr dx, magma_int_t incx, magmaFloat_ptr dA, magma_int_t ldda)
void magmablas_ssyr2k_mgpu_spec(magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, float alpha, magmaFloat_ptr dA[], magma_int_t lda, magma_int_t aoff, magmaFloat_ptr dB[], magma_int_t ldb, magma_int_t boff, float beta, magmaFloat_ptr dC[], magma_int_t ldc, magma_int_t offset, magma_int_t ngpu, magma_int_t nb, magma_queue_t streams[][20], magma_int_t nstream)
void magmablas_sgemm(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dB, magma_int_t lddb, float beta, magmaFloat_ptr dC, magma_int_t lddc)
void magmablas_ssymm_mgpu_com(magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, float alpha, magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t offset, magmaFloat_ptr dB[], magma_int_t lddb, float beta, magmaFloat_ptr dC[], magma_int_t lddc, magmaFloat_ptr dwork[], magma_int_t lddwork, float *C, magma_int_t ldc, float *work[], magma_int_t ldwork, magma_int_t ngpu, magma_int_t nb, magma_queue_t streams[][20], magma_int_t nstream, magma_event_t redevents[][MagmaMaxGPUs *MagmaMaxGPUs+10], magma_int_t nbevents, magma_int_t gnode[MagmaMaxGPUs][MagmaMaxGPUs+2], magma_int_t nbcmplx)
Definition: ssymm_mgpu.cpp:20
void magmablas_ssymm_mgpu_spec33(magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, float alpha, magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t offset, magmaFloat_ptr dB[], magma_int_t lddb, float beta, magmaFloat_ptr dC[], magma_int_t lddc, magmaFloat_ptr dVIN[], magma_int_t lddv, magma_int_t voffst, magmaFloat_ptr dwork[], magma_int_t lddwork, float *C, magma_int_t ldc, float *work[], magma_int_t ldwork, magma_int_t ngpu, magma_int_t nb, magma_queue_t streams[][20], magma_int_t nstream, magma_event_t redevents[][MagmaMaxGPUs *MagmaMaxGPUs+10], magma_int_t nbevents, magma_int_t gnode[MagmaMaxGPUs][MagmaMaxGPUs+2], magma_int_t nbcmplx)
char magma_diag_t
Definition: magma_types.h:379
void magma_ssetvector_internal(magma_int_t n, float const *hx_src, magma_int_t incx, magmaFloat_ptr dy_dst, magma_int_t incy, const char *func, const char *file, int line)
float * magmaFloat_ptr
Definition: magma_types.h:230
void magmablas_slaswp(magma_int_t n, magmaFloat_ptr dAT, magma_int_t ldda, magma_int_t i1, magma_int_t i2, const magma_int_t *ipiv, magma_int_t inci)
void magmablas_ssyr2k_mgpu_spec324(magma_uplo_t uplo, magma_trans_t trans, magma_int_t m, magma_int_t n, float alpha, magmaFloat_ptr dVIN[], magma_int_t lddv, magma_int_t voff, magmaFloat_ptr dWIN[], magma_int_t lddw, magma_int_t woff, float beta, magmaFloat_ptr dC[], magma_int_t lddc, magma_int_t offset, magmaFloat_ptr dwork[], magma_int_t lndwork, magma_int_t ngpu, magma_int_t nb, magma_queue_t streams[][20], magma_int_t nstream, magma_event_t redevents[][MagmaMaxGPUs *MagmaMaxGPUs+10], magma_int_t nbevents)
void magmablas_ssetmatrix_transpose(magma_int_t m, magma_int_t n, const float *hA, magma_int_t lda, magmaFloat_ptr dAT, magma_int_t ldda, magmaFloat_ptr dwork, magma_int_t lddwork, magma_int_t nb)
magma_int_t magmablas_ssymv_sync(magma_int_t num_gpus, magma_int_t k, magma_int_t n, float *work, float *w, magma_queue_t stream[][10])
#define T(m)
Definition: zgeqrf_mc.cpp:14
void magmablas_slaset(magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda)
magma_int_t magma_sdtohpo(magma_int_t num_gpus, char *uplo, magma_int_t m, magma_int_t n, magma_int_t off_i, magma_int_t off_j, magma_int_t nb, magma_int_t NB, float *a, magma_int_t lda, float *work[], magma_int_t ldda, magma_queue_t stream[][3], magma_int_t *info)
int magma_int_t
Definition: magmablas.h:12
void magmablas_ssymm_mgpu_spec(magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, float alpha, magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t offset, magmaFloat_ptr dB[], magma_int_t lddb, float beta, magmaFloat_ptr dC[], magma_int_t lddc, magmaFloat_ptr dwork[], magma_int_t lddwork, float *C, magma_int_t ldc, float *work[], magma_int_t ldwork, magma_int_t ngpu, magma_int_t nb, magma_queue_t streams[][20], magma_int_t nstream, magma_event_t redevents[][MagmaMaxGPUs *MagmaMaxGPUs+10], magma_int_t nbevents, magma_int_t gnode[MagmaMaxGPUs][MagmaMaxGPUs+2], magma_int_t nbcmplx)
void magmablas_sgetmatrix_transpose(magma_int_t m, magma_int_t n, magmaFloat_const_ptr dAT, magma_int_t ldda, float *hA, magma_int_t lda, magmaFloat_ptr dwork, magma_int_t lddwork, magma_int_t nb)
void magma_ssetmatrix_async_internal(magma_int_t m, magma_int_t n, float const *hA_src, magma_int_t ldha, magmaFloat_ptr dB_dst, magma_int_t lddb, magma_queue_t queue, const char *func, const char *file, int line)
void magmablas_sgemv(magma_trans_t trans, magma_int_t m, magma_int_t n, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dx, magma_int_t incx, float beta, magmaFloat_ptr dy, magma_int_t incy)
#define C(i, j)
void szero_32x32_block(magmaFloat_ptr dA, magma_int_t ldda)
magma_int_t magmablas_ssymv2(magma_uplo_t uplo, magma_int_t n, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dX, magma_int_t incx, float beta, magmaFloat_ptr dY, magma_int_t incy, magmaFloat_ptr dwork, magma_int_t lwork)
void magma_ssetmatrix_1D_col_bcyclic(magma_int_t m, magma_int_t n, const float *hA, magma_int_t lda, magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t ngpu, magma_int_t nb)
void magma_ssetvector_async_internal(magma_int_t n, float const *hx_src, magma_int_t incx, magmaFloat_ptr dy_dst, magma_int_t incy, magma_queue_t queue, const char *func, const char *file, int line)
void magma_srotm(magma_int_t n, magmaFloat_ptr dx, magma_int_t incx, magmaFloat_ptr dy, magma_int_t incy, magmaFloat_const_ptr param)
void magmablas_snrm2_adjust(magma_int_t k, float *xnorm, float *c)
magma_int_t magmablas_ssymv(magma_uplo_t uplo, magma_int_t n, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dx, magma_int_t incx, float beta, magmaFloat_ptr dy, magma_int_t incy)
#define h_A(i, j)
#define dB(dev, i, j)
char magma_storev_t
Definition: magma_types.h:386
char magma_trans_t
Definition: magma_types.h:377
void magma_srotmg(magmaFloat_ptr d1, magmaFloat_ptr d2, magmaFloat_ptr x1, magmaFloat_const_ptr y1, magmaFloat_ptr param)
void magma_ssymv(magma_uplo_t uplo, magma_int_t n, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dx, magma_int_t incx, float beta, magmaFloat_ptr dy, magma_int_t incy)
void magma_sscal(magma_int_t n, float alpha, magmaFloat_ptr dx, magma_int_t incx)
magma_err_t magma_scopyvector_internal(magma_int_t m, magma_int_t n, magmaFloat_const_ptr dx_src, magma_int_t incx, magmaFloat_ptr dy_dst, magma_int_t incy, const char *func, const char *file, int line)
magma_int_t magmablas_ssymv_mgpu(magma_int_t num_gpus, magma_int_t k, char uplo, magma_int_t n, magma_int_t nb, float alpha, float **da, magma_int_t ldda, magma_int_t offset, float **dx, magma_int_t incx, float beta, float **dy, magma_int_t incy, float **dwork, magma_int_t ldwork, float *work, float *w, magma_queue_t stream[][10])
void magmablas_ssymm(magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dB, magma_int_t lddb, float beta, magmaFloat_ptr dC, magma_int_t lddc)
void magmablas_ssymmetrize(magma_uplo_t uplo, magma_int_t m, magmaFloat_ptr dA, magma_int_t ldda)
void magmablas_sgeadd_batched(magma_int_t m, magma_int_t n, float alpha, magmaFloat_const_ptr const *dAarray, magma_int_t ldda, magmaFloat_ptr *dBarray, magma_int_t lddb, magma_int_t batchCount)
void magmablas_sswap(magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magmaFloat_ptr dB, magma_int_t lddb)
char magma_norm_t
Definition: magma_types.h:381
void magmablas_slascl(char type, magma_int_t kl, magma_int_t ku, float cfrom, float cto, magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t *info)
#define dX(i, j)
void magmablas_ssymmetrize_tiles(magma_uplo_t uplo, magma_int_t m, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t ntile, magma_int_t mstride, magma_int_t nstride)
#define dwork(dev, i, j)
char magma_side_t
Definition: magma_types.h:380
void magmablas_ssyrk(magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, float beta, magmaFloat_ptr dC, magma_int_t lddc)
void magma_strsv(magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t n, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_ptr dx, magma_int_t incx)
void magmablas_ssetmatrix_transpose_mgpu(magma_int_t ngpu, magma_queue_t stream[][2], const float *hA, magma_int_t lda, magmaFloat_ptr dAT[], magma_int_t ldda, magmaFloat_ptr dB[], magma_int_t lddb, magma_int_t m, magma_int_t n, magma_int_t nb)
void magma_slarfgtx_gpu(magma_int_t n, float *dx0, float *dx, float *dtau, float *dxnorm, float *dA, magma_int_t it, float *V, magma_int_t ldv, float *T, magma_int_t ldt, float *dwork)
float cpu_gpu_sdiff(magma_int_t m, magma_int_t n, const float *hA, magma_int_t lda, magmaFloat_const_ptr dA, magma_int_t ldda)
#define dAT(i, j)
void magmablas_stranspose(magmaFloat_ptr odata, magma_int_t ldo, magmaFloat_const_ptr idata, magma_int_t ldi, magma_int_t m, magma_int_t n)
void magmablas_slacpy_batched(magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaFloat_const_ptr const *dAarray, magma_int_t ldda, magmaFloat_ptr *dBarray, magma_int_t lddb, magma_int_t batchCount)
void magmablas_ssymm_1gpu(magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, float alpha, magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t offset, magmaFloat_ptr dB[], magma_int_t lddb, float beta, magmaFloat_ptr dC[], magma_int_t lddc, float *C, magma_int_t ldc, magma_int_t ngpu, magma_int_t nb, magma_queue_t streams[][20], magma_int_t nstream)
void magma_ssyr2k_mgpu(magma_int_t num_gpus, char uplo, char trans, magma_int_t nb, magma_int_t n, magma_int_t k, float alpha, float **db, magma_int_t lddb, magma_int_t boffset, float beta, float **dc, magma_int_t lddc, magma_int_t offset, magma_int_t num_streams, magma_queue_t streams[][10])
float magma_sdot(magma_int_t n, magmaFloat_const_ptr dx, magma_int_t incx, magmaFloat_const_ptr dy, magma_int_t incy)
void magma_ssyrk(magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, float beta, magmaFloat_ptr dC, magma_int_t lddc)
void magmablas_stranspose2(magmaFloat_ptr odata, magma_int_t ldo, magmaFloat_const_ptr idata, magma_int_t ldi, magma_int_t m, magma_int_t n)
void magmablas_strsm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_ptr dB, magma_int_t lddb)
void magma_sgetmatrix_async_internal(magma_int_t m, magma_int_t n, magmaFloat_const_ptr dA_src, magma_int_t ldda, float *hB_dst, magma_int_t ldhb, magma_queue_t queue, const char *func, const char *file, int line)
#define dY(d, i, j)
float const * magmaFloat_const_ptr
Definition: magma_types.h:237
#define MagmaMaxGPUs
Definition: magma_types.h:255
float magma_sasum(magma_int_t n, magmaFloat_const_ptr dx, magma_int_t incx)
void magma_strsm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_ptr dB, magma_int_t lddb)
void magma_scopy(magma_int_t n, magmaFloat_const_ptr dx, magma_int_t incx, magmaFloat_ptr dy, magma_int_t incy)
void magma_sger(magma_int_t m, magma_int_t n, float alpha, magmaFloat_const_ptr dx, magma_int_t incx, magmaFloat_const_ptr dy, magma_int_t incy, magmaFloat_ptr dA, magma_int_t ldda)
void magmablas_ssyr2k(magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dB, magma_int_t lddb, float beta, magmaFloat_ptr dC, magma_int_t lddc)
void magmablas_spermute_long2(magma_int_t n, magmaFloat_ptr dAT, magma_int_t ldda, magma_int_t *ipiv, magma_int_t nb, magma_int_t ind)
void magmablas_sgeadd(magma_int_t m, magma_int_t n, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_ptr dB, magma_int_t lddb)
void magma_ssetmatrix_internal(magma_int_t m, magma_int_t n, float const *hA_src, magma_int_t ldha, magmaFloat_ptr dB_dst, magma_int_t lddb, const char *func, const char *file, int line)
void magmablas_strsm_work(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_ptr db, magma_int_t lddb, int flag, magmaFloat_ptr d_dinvA, magmaFloat_ptr dx)
magma_int_t magma_isamin(magma_int_t n, magmaFloat_const_ptr dx, magma_int_t incx)
void magmablas_slaset_identity(magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda)
void magmablas_stranspose_inplace(magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda)
void magma_sgetmatrix_internal(magma_int_t m, magma_int_t n, magmaFloat_const_ptr dA_src, magma_int_t ldda, float *hB_dst, magma_int_t ldhb, const char *func, const char *file, int line)
void magmablas_slacpy(magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_ptr dB, magma_int_t lddb)
float magmablas_slansy(magma_norm_t norm, magma_uplo_t uplo, magma_int_t n, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_ptr dwork)
void magma_saxpy(magma_int_t n, float alpha, magmaFloat_const_ptr dx, magma_int_t incx, magmaFloat_ptr dy, magma_int_t incy)
#define Y(i, j)
void magmablas_slaswp2(magma_int_t n, magmaFloat_ptr dAT, magma_int_t ldda, magma_int_t i1, magma_int_t i2, const magma_int_t *d_ipiv)
void magma_scopymatrix_internal(magma_int_t m, magma_int_t n, magmaFloat_const_ptr dA_src, magma_int_t ldda, magmaFloat_ptr dB_dst, magma_int_t lddb, const char *func, const char *file, int line)
void magmablas_stranspose2s(magmaFloat_ptr odata, magma_int_t ldo, magmaFloat_const_ptr idata, magma_int_t ldi, magma_int_t m, magma_int_t n, magma_queue_t stream)
void magmablas_ssymm_1gpu_old(magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, float alpha, magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t offset, magmaFloat_ptr dB[], magma_int_t lddb, float beta, magmaFloat_ptr dC[], magma_int_t lddc, float *C, magma_int_t ldc, magma_int_t ngpu, magma_int_t nb, magma_queue_t streams[][20], magma_int_t nstream)
void magma_sgetmatrix_1D_col_bcyclic(magma_int_t m, magma_int_t n, magmaFloat_ptr dA[], magma_int_t ldda, float *hA, magma_int_t lda, magma_int_t ngpu, magma_int_t nb)
#define A(i, j)
Definition: cprint.cpp:16
magma_int_t magmablas_ssymv_mgpu_32_offset(char uplo, magma_int_t n, float alpha, float **A, magma_int_t lda, float **X, magma_int_t incx, float beta, float **Y, magma_int_t incy, float **work, magma_int_t lwork, magma_int_t num_gpus, magma_int_t nb, magma_int_t offset, magma_queue_t stream[][10])
void magma_sswap(magma_int_t n, magmaFloat_ptr dx, magma_int_t incx, magmaFloat_ptr dy, magma_int_t incy)
void magmablas_slaswpx(magma_int_t n, magmaFloat_ptr dAT, magma_int_t ldx, magma_int_t ldy, magma_int_t i1, magma_int_t i2, const magma_int_t *ipiv, magma_int_t inci)
void magmablas_sswapdblk(magma_int_t n, magma_int_t nb, magmaFloat_ptr dA, magma_int_t ldda, magma_int_t inca, magmaFloat_ptr dB, magma_int_t lddb, magma_int_t incb)
void magmablas_spermute_long3(magmaFloat_ptr dAT, magma_int_t ldda, const magma_int_t *ipiv, magma_int_t nb, magma_int_t ind)
void magmablas_ssyr2k_mgpu2(magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, float alpha, magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t aoff, magmaFloat_ptr dB[], magma_int_t lddb, magma_int_t boff, float beta, magmaFloat_ptr dC[], magma_int_t lddc, magma_int_t offset, magma_int_t ngpu, magma_int_t nb, magma_queue_t streams[][20], magma_int_t nstream)
void magmablas_sgemm_fermi80(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dB, magma_int_t lddb, float beta, magmaFloat_ptr dC, magma_int_t lddc)
char magma_uplo_t
Definition: magma_types.h:378
void magma_sgetmatrix_1D_row_bcyclic(magma_int_t m, magma_int_t n, magmaFloat_ptr dA[], magma_int_t ldda, float *hA, magma_int_t lda, magma_int_t ngpu, magma_int_t nb)
void magmablas_sswapblk(magma_storev_t storev, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magmaFloat_ptr dB, magma_int_t lddb, magma_int_t i1, magma_int_t i2, const magma_int_t *ipiv, magma_int_t inci, magma_int_t offset)
#define dC(dev, i, j)
void magmablas_sgemm_reduce(magma_int_t m, magma_int_t n, magma_int_t k, float alpha, const float *dA, magma_int_t lda, const float *dB, magma_int_t ldb, float beta, float *dC, magma_int_t ldc)
void szero_nbxnb_block(magma_int_t nb, magmaFloat_ptr dA, magma_int_t ldda)
void magma_scopymatrix_async_internal(magma_int_t m, magma_int_t n, magmaFloat_const_ptr dA_src, magma_int_t ldda, magmaFloat_ptr dB_dst, magma_int_t lddb, magma_queue_t queue, const char *func, const char *file, int line)
void magmablas_snrm2_check(magma_int_t m, magma_int_t num, float *da, magma_int_t ldda, float *dxnorm, float *lsticc)
void magma_ssyr2(magma_uplo_t uplo, magma_int_t n, float alpha, magmaFloat_const_ptr dx, magma_int_t incx, magmaFloat_const_ptr dy, magma_int_t incy, magmaFloat_ptr dA, magma_int_t ldda)
int magma_err_t
Definition: magma_types.h:23
void magma_slarfgx_gpu(magma_int_t n, float *dx0, float *dx, float *dtau, float *dxnorm, float *ddx0, magma_int_t iter)
void magma_slarfbx_gpu(magma_int_t m, magma_int_t k, float *V, magma_int_t ldv, float *dT, magma_int_t ldt, float *c, float *dwork)
magma_int_t magma_shtodpo(magma_int_t num_gpus, char *uplo, magma_int_t m, magma_int_t n, magma_int_t off_i, magma_int_t off_j, magma_int_t nb, float *h_A, magma_int_t lda, float *d_lA[], magma_int_t ldda, magma_queue_t stream[][3], magma_int_t *info)
#define dx(d, j)
void magma_sgetvector_async_internal(magma_int_t n, magmaFloat_const_ptr dx_src, magma_int_t incx, float *hy_dst, magma_int_t incy, magma_queue_t queue, const char *func, const char *file, int line)
void magma_ssymm(magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dB, magma_int_t lddb, float beta, magmaFloat_ptr dC, magma_int_t lddc)
void magma_sgemv(magma_trans_t transA, magma_int_t m, magma_int_t n, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dx, magma_int_t incx, float beta, magmaFloat_ptr dy, magma_int_t incy)
void magma_ssetmatrix_1D_row_bcyclic(magma_int_t m, magma_int_t n, const float *hA, magma_int_t lda, magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t ngpu, magma_int_t nb)
void magma_srot(magma_int_t n, magmaFloat_ptr dx, magma_int_t incx, magmaFloat_ptr dy, magma_int_t incy, float dc, float ds)
#define dT(m)
void magma_slarfx_gpu(magma_int_t m, magma_int_t n, float *v, float *tau, float *c, magma_int_t ldc, float *xnorm, float *dT, magma_int_t iter, float *work)
void magmablas_sgetmatrix_transpose_mgpu(magma_int_t ngpu, magma_queue_t stream[][2], magmaFloat_ptr dAT[], magma_int_t ldda, float *hA, magma_int_t lda, magmaFloat_ptr dB[], magma_int_t lddb, magma_int_t m, magma_int_t n, magma_int_t nb)
magma_err_t magma_scopyvector_async_internal(magma_int_t m, magma_int_t n, magmaFloat_const_ptr dx_src, magma_int_t incx, magmaFloat_ptr dy_dst, magma_int_t incy, magma_queue_t queue, const char *func, const char *file, int line)
void magmablas_ssyr2k_mgpu_spec325(magma_uplo_t uplo, magma_trans_t trans, magma_int_t m, magma_int_t n, float alpha, magmaFloat_ptr dVIN[], magma_int_t lddv, magma_int_t voff, magmaFloat_ptr dWIN[], magma_int_t lddw, magma_int_t woff, float beta, magmaFloat_ptr dC[], magma_int_t lddc, magma_int_t offset, magmaFloat_ptr dwork[], magma_int_t lndwork, magma_int_t ngpu, magma_int_t nb, float **harray[], magmaFloat_ptr *darray[], magma_queue_t streams[][20], magma_int_t nstream, magma_event_t redevents[][MagmaMaxGPUs *MagmaMaxGPUs+10], magma_int_t nbevents)
void magmablas_ssymm_mgpu(magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, float alpha, magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t offset, magmaFloat_ptr dB[], magma_int_t lddb, float beta, magmaFloat_ptr dC[], magma_int_t lddc, magmaFloat_ptr dwork[], magma_int_t lddwork, float *C, magma_int_t ldc, float *work[], magma_int_t ldwork, magma_int_t ngpu, magma_int_t nb, magma_queue_t streams[][20], magma_int_t nstream, magma_event_t redevents[][20], magma_int_t nbevents)
magma_int_t magma_shtodhe(magma_int_t num_gpus, char *uplo, magma_int_t n, magma_int_t nb, float *a, magma_int_t lda, float **dwork, magma_int_t ldda, magma_queue_t stream[][10], magma_int_t *info)
float magmablas_slange(magma_norm_t norm, magma_int_t m, magma_int_t n, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_ptr dwork)
magma_int_t magma_isamax(magma_int_t n, magmaFloat_const_ptr dx, magma_int_t incx)
#define V(m)
#define dA(dev, i, j)
void magmablas_snrm2_cols(magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magmaFloat_ptr dxnorm)