MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
testing_sgemm.cpp
Go to the documentation of this file.
1 /*
2  * -- MAGMA (version 1.2.0) --
3  * Univ. of Tennessee, Knoxville
4  * Univ. of California, Berkeley
5  * Univ. of Colorado, Denver
6  * May 2012
7  *
8  * @generated s Thu May 10 22:27:25 2012
9  *
10  **/
11 
12 #include <stdlib.h>
13 #include <stdio.h>
14 #include <string.h>
15 #include <math.h>
16 #include <cuda.h>
17 #include <cuda_runtime_api.h>
18 #include <cublas.h>
19 
20 #include "flops.h"
21 #include "magma.h"
22 #include "magma_lapack.h"
23 #include "testings.h"
24 
25 // Flops formula
26 #define PRECISION_s
27 #if defined(PRECISION_z) || defined(PRECISION_c)
28 #define FLOPS(m, n, k) ( 6. * FMULS_GEMM(m, n, k) + 2. * FADDS_GEMM(m, n, k))
29 #else
30 #define FLOPS(m, n, k) ( FMULS_GEMM(m, n, k) + FADDS_GEMM(m, n, k))
31 #endif
32 
33 int main( int argc, char** argv)
34 {
36 
37  magma_timestr_t start, end;
38  float flops, magma_perf, cuda_perf, error, work[1];
39  char transA = MagmaNoTrans;
40  char transB = MagmaNoTrans;
41 
42  magma_int_t istart = 1024;
43  magma_int_t iend = 6240;
44  magma_int_t M, M0 = 0;
45  magma_int_t N, N0 = 0;
46  magma_int_t K, K0 = 0;
47  magma_int_t i;
48  magma_int_t Am, An, Bm, Bn;
49  magma_int_t szeA, szeB, szeC;
50  magma_int_t lda, ldb, ldc, ldda, lddb, lddc;
51  magma_int_t ione = 1;
52  magma_int_t ISEED[4] = {0,0,0,1};
53 
54  float *h_A, *h_B, *h_C, *h_C2;
55  float *d_A, *d_B, *d_C;
56  float c_neg_one = MAGMA_S_NEG_ONE;
57  float alpha = MAGMA_S_MAKE( 0.29, -0.86 );
58  float beta = MAGMA_S_MAKE( -0.48, 0.38 );
59 
60  if (argc != 1){
61  for(i=1; i<argc; i++){
62  if ( strcmp("-N", argv[i]) == 0 ){
63  N0 = atoi(argv[++i]);
64  }
65  else if ( strcmp("-M", argv[i]) == 0 ){
66  M0 = atoi(argv[++i]);
67  }
68  else if ( strcmp("-K", argv[i]) == 0 ){
69  K0 = atoi(argv[++i]);
70  }
71  else if (strcmp("-NN", argv[i])==0){
72  transA = transB = MagmaNoTrans;
73  }
74  else if (strcmp("-TT", argv[i])==0){
75  transA = transB = MagmaTrans;
76  }
77  else if (strcmp("-NT", argv[i])==0){
78  transA = MagmaNoTrans;
79  transB = MagmaTrans;
80  }
81  else if (strcmp("-TN", argv[i])==0){
82  transA = MagmaTrans;
83  transB = MagmaNoTrans;
84  }
85 #if defined(PRECISION_z) || defined(PRECISION_c)
86  else if (strcmp("-NC", argv[i])==0){
87  transA = MagmaNoTrans;
88  transB = MagmaTrans;
89  }
90  else if (strcmp("-TC", argv[i])==0){
91  transA = MagmaTrans;
92  transB = MagmaTrans;
93  }
94  else if (strcmp("-CN", argv[i])==0){
95  transA = MagmaTrans;
96  transB = MagmaNoTrans;
97  }
98  else if (strcmp("-CT", argv[i])==0){
99  transA = MagmaTrans;
100  transB = MagmaTrans;
101  }
102  else if (strcmp("-CC", argv[i])==0){
103  transA = transB = MagmaTrans;
104  }
105 #endif
106  }
107  }
108 
109  if ( (M0 != 0) && (N0 != 0) && (K0 != 0) )
110  iend = istart + 1;
111 
112  M = N = K = iend;
113  if ( M0 != 0 ) M = M0;
114  if ( N0 != 0 ) N = N0;
115  if ( K0 != 0 ) K = K0;
116 
117  if( transA == MagmaNoTrans ) {
118  Am = M;
119  An = K;
120  } else {
121  Am = K;
122  An = M;
123  }
124 
125  if( transB == MagmaNoTrans ) {
126  Bm = K;
127  Bn = N;
128  } else {
129  Bm = N;
130  Bn = K;
131  }
132 
133  lda = ldc = M;
134  ldb = Bm;
135 
136  ldda = lddc = ((M+31)/32)*32;
137  lddb = ((ldb+31)/32)*32;
138 
139  K+=32;
140  M+=32;
141  N +=32;
142 
143  TESTING_MALLOC( h_A, float, lda*K );
144  TESTING_MALLOC( h_B, float, ldb*Bn );
145  TESTING_MALLOC( h_C, float, ldc*N );
146  TESTING_MALLOC( h_C2, float, ldc*N );
147 
148  TESTING_DEVALLOC( d_A, float, ldda*K );
149  TESTING_DEVALLOC( d_B, float, lddb*Bn );
150  TESTING_DEVALLOC( d_C, float, lddc*N );
151 
152  printf("\nUsage: \n");
153  printf(" testing_sgemm [-NN|NT|TN|TT] [-N %d] \n\n", 1024);
154 
155  printf("\n");
156  printf("Testing transA = %c transB = %c\n", transA, transB);
157  printf(" M N K MAGMA GFLop/s CUBLAS GFlop/s error\n");
158  printf("==================================================================\n");
159  for(i=istart; i<iend; i = (int)(i*1.25) )
160  {
161  M = N = K = i;
162  if ( M0 != 0 ) M = M0;
163  if ( N0 != 0 ) N = N0;
164  if ( K0 != 0 ) K = K0;
165 
166  if( transA == MagmaNoTrans ) {
167  lda = Am = M;
168  An = K;
169  } else {
170  lda = Am = K;
171  An = M;
172  }
173 
174  if( transB == MagmaNoTrans ) {
175  ldb = Bm = K;
176  Bn = N;
177  } else {
178  ldb = Bm = N;
179  Bn = K;
180  }
181  flops = FLOPS( (float)M, (float)N, (float)K ) / 1000000;
182  ldc = M;
183 
184  ldda = ((lda+31)/32)*32;
185  lddb = ((ldb+31)/32)*32;
186  lddc = ((ldc+31)/32)*32;
187 
188  szeA = lda * An;
189  szeB = ldb * Bn;
190  szeC = ldc * N;
191 
192  /* Initialize the matrices */
193  lapackf77_slarnv( &ione, ISEED, &szeA, h_A );
194  lapackf77_slarnv( &ione, ISEED, &szeB, h_B );
195  lapackf77_slarnv( &ione, ISEED, &szeC, h_C );
196 
197  /* =====================================================================
198  Performs operation using MAGMA-BLAS
199  =================================================================== */
200  magma_ssetmatrix( Am, An, h_A, lda, d_A, ldda );
201  magma_ssetmatrix( Bm, Bn, h_B, ldb, d_B, lddb );
202  magma_ssetmatrix( M, N, h_C, ldc, d_C, lddc );
203 
204  start = get_current_time();
205  magmablas_sgemm( transA, transB, M, N, K,
206  alpha, d_A, ldda,
207  d_B, lddb,
208  beta, d_C, lddc );
209  end = get_current_time();
210  magma_perf = flops / GetTimerValue(start, end);
211 
212  magma_sgetmatrix( M, N, d_C, lddc, h_C2, ldc );
213 
214  /* =====================================================================
215  Performs operation using CUDA-BLAS
216  =================================================================== */
217  magma_ssetmatrix( M, N, h_C, ldc, d_C, lddc );
218 
219  start = get_current_time();
220  cublasSgemm( transA, transB, M, N, K,
221  alpha, d_A, ldda,
222  d_B, lddb,
223  beta, d_C, lddc );
224  end = get_current_time();
225  cuda_perf = flops / GetTimerValue(start, end);
226 
227  magma_sgetmatrix( M, N, d_C, lddc, h_C, ldc );
228 
229  /* =====================================================================
230  Error Computation and Performance Compariosn
231  =================================================================== */
232  blasf77_saxpy(&szeC, &c_neg_one, h_C, &ione, h_C2, &ione);
233  error = lapackf77_slange("M", &M, &N, h_C2, &ldc, work);
234  printf("%5d %5d %5d %6.2f %6.2f %e\n",
235  M, N, K, magma_perf, cuda_perf, error);
236  }
237 
238  /* Memory clean up */
239  TESTING_FREE( h_A );
240  TESTING_FREE( h_B );
241  TESTING_FREE( h_C );
242  TESTING_FREE( h_C2 );
243 
244  TESTING_DEVFREE( d_A );
245  TESTING_DEVFREE( d_B );
246  TESTING_DEVFREE( d_C );
247 
249 }