MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
testing_slacpy.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @generated s Thu May 10 22:27:35 2012
9 */
10 
11 // includes, system
12 #include <stdlib.h>
13 #include <stdio.h>
14 #include <string.h>
15 #include <math.h>
16 #include <cuda.h>
17 #include <cuda_runtime_api.h>
18 #include <cublas.h>
19 #include <cblas.h>
20 
21 // includes, project
22 #include "flops.h"
23 #include "magma.h"
24 #include "magma_lapack.h"
25 #include "testings.h"
26 
27 /* ////////////////////////////////////////////////////////////////////////////
28  -- Testing slacpy
29 */
30 #define PRECISION_s
31 
32 int main( int argc, char** argv)
33 {
35 
36  float c_zero = MAGMA_S_ZERO;
37  float c_one = MAGMA_S_ONE;
38 
39  magma_timestr_t start, end;
40  float *hA, *hB, *hR, *dA, *dB;
41  float gpu_time, gpu_perf;
42 
43  int ione = 1;
44  int ISEED[4] = {0, 0, 0, 1};
45 
46  // groups of tests are:
47  // whole matrix, sub-matrix, around k*64 rows, around k*64 cols,
48  // zero rows, one row, zero cols, one col
49  int TESTS_I1[] = { 0, 100, 63, 64, 64, 64, 65, 10, 10, 10, 10, 10, 4, 4, 4, 4, 4, 4, 64, 64, 64, 64, 64, 64 };
50  int TESTS_I2[] = { 1000, 500, 511, 511, 512, 513, 513, 900, 900, 900, 900, 900, 4, 4, 4, 5, 5, 5, 127, 128, 129, 255, 256, 257 };
51  int TESTS_J1[] = { 0, 50, 10, 10, 10, 10, 10, 63, 64, 64, 64, 65, 64, 64, 64, 64, 64, 64, 4, 4, 4, 4, 4, 4 };
52  int TESTS_J2[] = { 1000, 400, 900, 900, 900, 900, 900, 511, 511, 512, 513, 513, 127, 128, 129, 255, 256, 257, 4, 4, 4, 5, 5, 5 };
53  int ntest = sizeof(TESTS_J2) / sizeof(int);
54 
55  int n = 1000;
56  int lda = n;
57 
58  TESTING_MALLOC ( hA, float, lda*n );
59  TESTING_MALLOC ( hB, float, lda*n );
60  TESTING_MALLOC ( hR, float, lda*n );
61  TESTING_DEVALLOC ( dA, float, lda*n );
62  TESTING_DEVALLOC ( dB, float, lda*n );
63 
64  // initialize matrices; entries are (i.j) for A and (800 + i.j) for B.
65  float nf = n;
66  for( int i = 0; i < n; ++i ) {
67  for( int j = 0; j < n; ++j ) {
68  hA[i + j*lda] = MAGMA_S_MAKE( i + j/nf, 0. );
69  hB[i + j*lda] = MAGMA_S_MAKE( i + j/nf + 800, 0. );
70  }
71  }
72 
73  printf( "\nNote: ranges use Python notation,\n"
74  "i.e., A[i:j] is A[ i, i+1, ..., j-1 ], excluding A[j].\n\n" );
75  for( int t = 0; t < ntest; ++t ) {
76  magma_ssetmatrix( n, n, hA, lda, dA, lda );
77  magma_ssetmatrix( n, n, hB, lda, dB, lda );
78 
79  // copy submatrix
80  int i1 = TESTS_I1[ t ];
81  int i2 = TESTS_I2[ t ];
82  int j1 = TESTS_J1[ t ];
83  int j2 = TESTS_J2[ t ];
84  magmablas_slacpy( 'F', i2-i1, j2-j1,
85  &dA[i1 + j1*lda], lda,
86  &dB[i1 + j1*lda], lda );
87 
88  // verify result
89  int bad_copies = 0;
90  int overwrites = 0;
91  magma_sgetmatrix( n, n, dB, lda, hR, lda );
92 
93  for( int j = 0; j < n; ++j ) {
94  for( int i = 0; i < n; ++i ) {
95  if ( i1 <= i and i < i2 and j1 <= j and j < j2 ) {
96  if ( not MAGMA_S_EQUAL( hR[i + j*lda], hA[i + j*lda] )) {
97  bad_copies += 1;
98  printf( "Copy failed at B[%d,%d], expected %.4f, got %.4f\n",
99  i, j, MAGMA_S_REAL( hA[i + j*lda] ),
100  MAGMA_S_REAL( hR[i + j*lda] ));
101  }
102  }
103  else {
104  if ( not MAGMA_S_EQUAL( hR[i + j*lda], hB[i + j*lda] )) {
105  overwrites += 1;
106  printf( "Overwrote at B[%d,%d], expected %.4f, got %.4f\n",
107  i, j, MAGMA_S_REAL( hA[i + j*lda] ),
108  MAGMA_S_REAL( hR[i + j*lda] ));
109  }
110  }
111  }
112  }
113  printf( "B(%4d:%4d, %4d:%4d) = A(%4d:%4d, %4d:%4d) ",
114  i1, i2, j1, j2,
115  i1, i2, j1, j2 );
116  if ( bad_copies > 0 or overwrites > 0 ) {
117  printf( "failed, %d bad copies, %d overwrites\n", bad_copies, overwrites );
118  }
119  else {
120  printf( "passed\n" );
121  }
122  }
123 
124  TESTING_FREE( hA );
125  TESTING_FREE( hB );
126  TESTING_FREE( hR );
127  TESTING_DEVFREE( dA );
128  TESTING_DEVFREE( dB );
129 
130  // --------------------------------------------------
131  // speed tests
132  int SIZE[] = {
133  1024, 1280, 1536, 1792, 2048, 2304, 2560, 2816, 3072, 3328, 3584, 3840,
134  4096, 4352, 4608, 4864, 5120, 5376, 5632, 5888, 6144, 6400, 6656, 6912,
135  7168, 7424, 7680, 7936, 8192, 8448, 8704, 8960, 9216, 9472, 9728, 9984
136  };
137  int nsize = sizeof(SIZE) / sizeof(int);
138 
139  printf("\n N GPU MB/s (sec)\n");
140  printf("========================================\n");
141  for( int t = 0; t < nsize; ++t ) {
142  n = SIZE[ t ];
143  lda = n;
144  TESTING_MALLOC ( hA, float, lda*n );
145  TESTING_MALLOC ( hB, float, lda*n );
146  TESTING_DEVALLOC ( dA, float, lda*n );
147  TESTING_DEVALLOC ( dB, float, lda*n );
148 
149  // initialize matrices
150  int n2 = lda*n;
151  lapackf77_slarnv( &ione, ISEED, &n2, hA );
152  lapackf77_slaset( "F", &n, &n, &c_zero, &c_zero, hB, &lda );
153  magma_ssetmatrix( n, n, hA, lda, dA, lda );
154  magmablas_slaset( 'F', n, n, /*c_zero,*/ dB, lda );
155 
156  start = get_current_time();
157  magmablas_slacpy( 'F', n, n, dA, lda, dB, lda );
158  end = get_current_time();
159 
160  // verify copy
161  magma_sgetmatrix( n, n, dB, lda, hB, lda );
162  for( int j = 0; j < n; ++j ) {
163  for( int i = 0; i < n; ++i ) {
164  if ( not MAGMA_S_EQUAL( hA[i + j*lda], hB[i + j*lda] )) {
165  printf( "Copy failed at B[%d,%d], expected %.4f, got %.4f\n",
166  i, j, MAGMA_S_REAL( hA[i + j*lda] ),
167  MAGMA_S_REAL( hB[i + j*lda] ));
168  exit(1);
169  }
170  }
171  }
172 
173  gpu_time = GetTimerValue( start, end ) * 1e-3;
174  gpu_perf = n*n*sizeof(float) / 1024. / 1024. / gpu_time;
175  printf( "%5d %6.2f (%8.6f)\n", n, gpu_perf, gpu_time );
176 
177  TESTING_FREE ( hA );
178  TESTING_FREE ( hB );
179  TESTING_DEVFREE( dA );
180  TESTING_DEVFREE( dB );
181  }
182 
183  /* Shutdown */
185  return EXIT_SUCCESS;
186 }