17 #include <cuda_runtime_api.h>
22 #include "magma_lapack.h"
27 #if defined(PRECISION_z) || defined(PRECISION_c)
28 #define FLOPS(m, n, k) ( 6. * FMULS_GEMM(m, n, k) + 2. * FADDS_GEMM(m, n, k))
30 #define FLOPS(m, n, k) ( FMULS_GEMM(m, n, k) + FADDS_GEMM(m, n, k))
33 int main(
int argc,
char** argv)
38 float flops, magma_perf, cuda_perf, error,
work[1];
54 float *h_A, *h_B, *h_C, *h_C2;
55 float *d_A, *d_B, *d_C;
61 for(i=1; i<argc; i++){
62 if ( strcmp(
"-N", argv[i]) == 0 ){
65 else if ( strcmp(
"-M", argv[i]) == 0 ){
68 else if ( strcmp(
"-K", argv[i]) == 0 ){
71 else if (strcmp(
"-NN", argv[i])==0){
74 else if (strcmp(
"-TT", argv[i])==0){
77 else if (strcmp(
"-NT", argv[i])==0){
81 else if (strcmp(
"-TN", argv[i])==0){
85 #if defined(PRECISION_z) || defined(PRECISION_c)
86 else if (strcmp(
"-NC", argv[i])==0){
90 else if (strcmp(
"-TC", argv[i])==0){
94 else if (strcmp(
"-CN", argv[i])==0){
98 else if (strcmp(
"-CT", argv[i])==0){
102 else if (strcmp(
"-CC", argv[i])==0){
109 if ( (M0 != 0) && (N0 != 0) && (K0 != 0) )
113 if ( M0 != 0 ) M = M0;
114 if ( N0 != 0 ) N = N0;
115 if ( K0 != 0 ) K = K0;
136 ldda = lddc = ((M+31)/32)*32;
137 lddb = ((ldb+31)/32)*32;
152 printf(
"\nUsage: \n");
153 printf(
" testing_sgemm [-NN|NT|TN|TT] [-N %d] \n\n", 1024);
156 printf(
"Testing transA = %c transB = %c\n", transA, transB);
157 printf(
" M N K MAGMA GFLop/s CUBLAS GFlop/s error\n");
158 printf(
"==================================================================\n");
159 for(i=istart; i<iend; i = (int)(i*1.25) )
162 if ( M0 != 0 ) M = M0;
163 if ( N0 != 0 ) N = N0;
164 if ( K0 != 0 ) K = K0;
181 flops =
FLOPS( (
float)M, (
float)N, (
float)K ) / 1000000;
184 ldda = ((lda+31)/32)*32;
185 lddb = ((ldb+31)/32)*32;
186 lddc = ((ldc+31)/32)*32;
220 cublasSgemm( transA, transB, M, N, K,
234 printf(
"%5d %5d %5d %6.2f %6.2f %e\n",
235 M, N, K, magma_perf, cuda_perf, error);