MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
magmablas_s.h
Go to the documentation of this file.
1 /*
2  * -- MAGMA (version 1.2.0) --
3  * Univ. of Tennessee, Knoxville
4  * Univ. of California, Berkeley
5  * Univ. of Colorado, Denver
6  * May 2012
7  *
8  * @generated s Thu May 10 22:26:24 2012
9  */
10 
11 #ifndef _MAGMABLAS_S_H_
12 #define _MAGMABLAS_S_H_
13 
14 #define PRECISION_s
15 
16 #ifdef __cplusplus
17 extern "C" {
18 #endif
19 
20  /*
21  * Interface to clean
22  */
23 float cpu_gpu_sdiff( int M, int N,
24  float * a, int lda,
25  float *da, int ldda);
26 void szero_32x32_block( float *, magma_int_t);
29 void magmablas_spermute_long( float *, magma_int_t,
33 void magmablas_spermute_long3( float *dAT, int lda,
34  int *ipiv, int nb, int ind );
35 void magmablas_stranspose( float *, magma_int_t,
36  float *, magma_int_t,
38 void magmablas_stranspose2( float *, magma_int_t,
39  float *, magma_int_t,
41 void magmablas_stranspose2s(float *odata, int ldo,
42  float *idata, int ldi,
43  int m, int n, cudaStream_t *stream );
44 
45 void magmablas_sgetmatrix_transpose( int m, int n,
46  float *dat, int ldda,
47  float *ha, int lda,
48  float *dB, int lddb, int nb );
49 void magmablas_sgetmatrix_transpose2( int m, int n,
50  float **dat, int *ldda,
51  float *ha, int lda,
52  float **dB, int lddb, int nb,
53  int num_gpus, cudaStream_t stream[][2] );
54 void magmablas_ssetmatrix_transpose( int m, int n,
55  float *ha, int lda,
56  float *dat, int ldda,
57  float *dB, int lddb, int nb );
58 void magmablas_ssetmatrix_transpose2( int m, int n,
59  float *ha, int lda,
60  float **dat, int *ldda,
61  float **dB, int lddb, int nb,
62  int num_gpus, cudaStream_t stream[][2] );
63 void magmablas_sgetmatrix_1D_bcyclic( int m, int n,
64  float *da[], int ldda,
65  float *ha, int lda,
66  int num_gpus, int nb );
67 void magmablas_ssetmatrix_1D_bcyclic( int m, int n,
68  float *ha, int lda,
69  float *da[], int ldda,
70  int num_gpus, int nb );
71 
72  /*
73  * LAPACK auxiliary functions
74  */
75 void magmablas_slacpy( char uplo,
77  float *A, magma_int_t lda,
78  float *B, magma_int_t ldb);
79 float magmablas_slange( char norm,
81  float *A, magma_int_t lda, float *WORK);
82 float magmablas_slansy( char norm, char uplo,
83  magma_int_t n,
84  float *A, magma_int_t lda, float *WORK);
85 float magmablas_slansy( char norm, char uplo,
86  magma_int_t n,
87  float *A, magma_int_t lda, float *WORK);
88 void magmablas_slascl( char type, int kl, int ku,
89  float cfrom, float cto,
90  int m, int n,
91  float *A, int lda, int *info );
92 void magmablas_slaset( char uplo, magma_int_t m, magma_int_t n,
93  float *A, magma_int_t lda);
95  float *dAT, magma_int_t lda,
96  magma_int_t i1, magma_int_t i2,
97  magma_int_t *ipiv, magma_int_t inci );
99  float *dAT, magma_int_t ldx, magma_int_t ldy,
100  magma_int_t i1, magma_int_t i2,
101  magma_int_t *ipiv, magma_int_t inci );
102 
103  /*
104  * Level 1 BLAS
105  */
107  float *dA1, magma_int_t lda1,
108  float *dA2, magma_int_t lda2 );
109 void magmablas_sswapblk(char storev,
110  magma_int_t N,
111  float *dA1, magma_int_t lda1,
112  float *dA2, magma_int_t lda2,
113  magma_int_t i1, magma_int_t i2,
114  magma_int_t *ipiv, magma_int_t inci,
115  magma_int_t offset);
117  float *dA1, magma_int_t ldda1, magma_int_t inca1,
118  float *dA2, magma_int_t ldda2, magma_int_t inca2 );
119 
120  /*
121  * Level 2 BLAS
122  */
123 void magmablas_sgemv(char t, magma_int_t M, magma_int_t N,
124  float alpha,
125  float *A, magma_int_t lda,
126  float * X, magma_int_t incX,
127  float beta,
128  float *Y, magma_int_t incY);
129 #if defined(PRECISION_z) || defined(PRECISION_c)
131  float alpha,
132  float *A, magma_int_t lda,
133  float *X, magma_int_t incX,
134  float beta,
135  float *Y, magma_int_t incY);
136 #endif
138  float alpha,
139  float *A, magma_int_t lda,
140  float *X, magma_int_t incX,
141  float beta,
142  float *Y, magma_int_t incY);
143 
144  /*
145  * Level 3 BLAS
146  */
147 void magmablas_sgemm(char tA, char tB,
149  float alpha,
150  const float *A, magma_int_t lda,
151  const float *B, magma_int_t ldb,
152  float beta,
153  float *C, magma_int_t ldc);
154 void magmablas_sgemm_fermi80(char tA, char tB,
156  float alpha,
157  const float *A, magma_int_t lda,
158  const float *B, magma_int_t ldb,
159  float beta,
160  float *C, magma_int_t ldc);
161 void magmablas_sgemm_fermi64(char tA, char tB,
163  float alpha,
164  const float *A, magma_int_t lda,
165  const float *B, magma_int_t ldb,
166  float beta,
167  float *C, magma_int_t ldc);
168 void magmablas_ssymm(char s, char u,
170  float alpha,
171  const float *A, magma_int_t lda,
172  const float *B, magma_int_t ldb,
173  float beta,
174  float *C, magma_int_t ldc);
175 void magmablas_ssymm(char s, char u,
177  float alpha,
178  const float *A, magma_int_t lda,
179  const float *B, magma_int_t ldb,
180  float beta,
181  float *C, magma_int_t ldc);
182 void magmablas_ssyrk(char u, char t,
183  magma_int_t n, magma_int_t k,
184  float alpha,
185  const float *A, magma_int_t lda,
186  float beta,
187  float *C, magma_int_t ldc);
188 void magmablas_ssyrk(char u, char t,
189  magma_int_t n, magma_int_t k,
190  float alpha,
191  const float *A, magma_int_t lda,
192  float beta,
193  float *C, magma_int_t ldc);
194 void magmablas_ssyr2k(char u, char t,
196  float alpha,
197  const float *A, magma_int_t lda,
198  const float *B, magma_int_t ldb,
199  float beta,
200  float *C, magma_int_t ldc);
201 void magmablas_ssyr2k(char u, char t,
202  magma_int_t n, magma_int_t k,
203  float alpha,
204  const float *A, magma_int_t lda,
205  const float *B, magma_int_t ldb,
206  float beta,
207  float *C, magma_int_t ldc);
208 void magmablas_strmm(char s, char u, char t, char d,
210  float alpha,
211  const float *A, magma_int_t lda,
212  float *B, magma_int_t ldb);
213 void magmablas_strsm(char s, char u, char t, char d,
215  float alpha,
216  /*const*/ float *A, magma_int_t lda,
217  float *B, magma_int_t ldb);
218 
219 
220  /*
221  * Wrappers for platform independence.
222  * These wrap CUBLAS or AMD OpenCL BLAS functions.
223  */
224 
225 // ========================================
226 // copying vectors
227 // set copies host to device
228 // get copies device to host
229 
230 void magma_ssetvector(
231  magma_int_t n,
232  float const *hx_src, magma_int_t incx,
233  float *dy_dst, magma_int_t incy );
234 
235 void magma_sgetvector(
236  magma_int_t n,
237  float const *dx_src, magma_int_t incx,
238  float *hy_dst, magma_int_t incy );
239 
241  magma_int_t n,
242  float const *hx_src, magma_int_t incx,
243  float *dy_dst, magma_int_t incy,
244  magma_stream_t stream );
245 
247  magma_int_t n,
248  float const *dx_src, magma_int_t incx,
249  float *hy_dst, magma_int_t incy,
250  magma_stream_t stream );
251 
252 
253 // ========================================
254 // copying sub-matrices (contiguous columns)
255 // set copies host to device
256 // get copies device to host
257 // cpy copies device to device (with CUDA unified addressing, can be same or different devices)
258 
259 void magma_ssetmatrix(
261  float const *hA_src, magma_int_t lda,
262  float *dB_dst, magma_int_t ldb );
263 
264 void magma_sgetmatrix(
266  float const *dA_src, magma_int_t lda,
267  float *hB_dst, magma_int_t ldb );
268 
271  float const *hA_src, magma_int_t lda,
272  float *dB_dst, magma_int_t ldb,
273  magma_stream_t stream );
274 
277  float const *dA_src, magma_int_t lda,
278  float *hB_dst, magma_int_t ldb,
279  magma_stream_t stream );
280 
281 void magma_scopymatrix(
283  float const *dA_src, magma_int_t lda,
284  float *dB_dst, magma_int_t ldb );
285 
288  float const *dA_src, magma_int_t lda,
289  float *dB_dst, magma_int_t ldb,
290  magma_stream_t stream );
291 
292 
293 // ========================================
294 // Level 1 BLAS
295 
296 void magma_sswap(
297  magma_int_t n,
298  float *dx, magma_int_t incx,
299  float *dy, magma_int_t incy );
300 
302  magma_int_t n,
303  float *dx, magma_int_t incx );
304 
305 // ========================================
306 // Level 2 BLAS
307 
308 void magma_sgemv(
309  magma_trans_t transA,
311  float alpha, float const *dA, magma_int_t lda,
312  float const *dx, magma_int_t incx,
313  float beta, float *dy, magma_int_t incy );
314 
315 void magma_ssymv(
316  magma_uplo_t uplo,
317  magma_int_t n,
318  float alpha, float const *dA, magma_int_t lda,
319  float const *dx, magma_int_t incx,
320  float beta, float *dy, magma_int_t incy );
321 
322 void magma_strsv(
324  magma_int_t n,
325  float const *dA, magma_int_t lda,
326  float *dx, magma_int_t incx );
327 
328 // ========================================
329 // Level 3 BLAS
330 
331 void magma_sgemm(
332  magma_trans_t transA, magma_trans_t transB,
334  float alpha, float const *dA, magma_int_t lda,
335  float const *dB, magma_int_t ldb,
336  float beta, float *dC, magma_int_t ldc );
337 
338 void magma_ssymm(
341  float alpha, float const *dA, magma_int_t lda,
342  float const *dB, magma_int_t ldb,
343  float beta, float *dC, magma_int_t ldc );
344 
345 void magma_ssyrk(
346  magma_uplo_t uplo, magma_trans_t trans,
348  float alpha, float const *dA, magma_int_t lda,
349  float beta, float *dC, magma_int_t ldc );
350 
351 void magma_ssyr2k(
352  magma_uplo_t uplo, magma_trans_t trans,
354  float alpha, float const *dA, magma_int_t lda,
355  float const *dB, magma_int_t ldb,
356  float beta, float *dC, magma_int_t ldc );
357 
358 void magma_strmm(
359  magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
361  float alpha, float const *dA, magma_int_t lda,
362  float *dB, magma_int_t ldb );
363 
364 void magma_strsm(
365  magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
367  float alpha, float const *dA, magma_int_t lda,
368  float *dB, magma_int_t ldb );
369 
370 #ifdef __cplusplus
371 }
372 #endif
373 
374 #undef PRECISION_s
375 #endif