MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
testing_dlacpy.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @generated d Thu May 10 22:27:35 2012
9 */
10 
11 // includes, system
12 #include <stdlib.h>
13 #include <stdio.h>
14 #include <string.h>
15 #include <math.h>
16 #include <cuda.h>
17 #include <cuda_runtime_api.h>
18 #include <cublas.h>
19 #include <cblas.h>
20 
21 // includes, project
22 #include "flops.h"
23 #include "magma.h"
24 #include "magma_lapack.h"
25 #include "testings.h"
26 
27 /* ////////////////////////////////////////////////////////////////////////////
28  -- Testing dlacpy
29 */
30 #define PRECISION_d
31 
32 int main( int argc, char** argv)
33 {
35 
36  double c_zero = MAGMA_D_ZERO;
37  double c_one = MAGMA_D_ONE;
38 
39  magma_timestr_t start, end;
40  double *hA, *hB, *hR, *dA, *dB;
41  double gpu_time, gpu_perf;
42 
43  int ione = 1;
44  int ISEED[4] = {0, 0, 0, 1};
45 
46  // groups of tests are:
47  // whole matrix, sub-matrix, around k*64 rows, around k*64 cols,
48  // zero rows, one row, zero cols, one col
49  int TESTS_I1[] = { 0, 100, 63, 64, 64, 64, 65, 10, 10, 10, 10, 10, 4, 4, 4, 4, 4, 4, 64, 64, 64, 64, 64, 64 };
50  int TESTS_I2[] = { 1000, 500, 511, 511, 512, 513, 513, 900, 900, 900, 900, 900, 4, 4, 4, 5, 5, 5, 127, 128, 129, 255, 256, 257 };
51  int TESTS_J1[] = { 0, 50, 10, 10, 10, 10, 10, 63, 64, 64, 64, 65, 64, 64, 64, 64, 64, 64, 4, 4, 4, 4, 4, 4 };
52  int TESTS_J2[] = { 1000, 400, 900, 900, 900, 900, 900, 511, 511, 512, 513, 513, 127, 128, 129, 255, 256, 257, 4, 4, 4, 5, 5, 5 };
53  int ntest = sizeof(TESTS_J2) / sizeof(int);
54 
55  int n = 1000;
56  int lda = n;
57 
58  TESTING_MALLOC ( hA, double, lda*n );
59  TESTING_MALLOC ( hB, double, lda*n );
60  TESTING_MALLOC ( hR, double, lda*n );
61  TESTING_DEVALLOC ( dA, double, lda*n );
62  TESTING_DEVALLOC ( dB, double, lda*n );
63 
64  // initialize matrices; entries are (i.j) for A and (800 + i.j) for B.
65  double nf = n;
66  for( int i = 0; i < n; ++i ) {
67  for( int j = 0; j < n; ++j ) {
68  hA[i + j*lda] = MAGMA_D_MAKE( i + j/nf, 0. );
69  hB[i + j*lda] = MAGMA_D_MAKE( i + j/nf + 800, 0. );
70  }
71  }
72 
73  printf( "\nNote: ranges use Python notation,\n"
74  "i.e., A[i:j] is A[ i, i+1, ..., j-1 ], excluding A[j].\n\n" );
75  for( int t = 0; t < ntest; ++t ) {
76  magma_dsetmatrix( n, n, hA, lda, dA, lda );
77  magma_dsetmatrix( n, n, hB, lda, dB, lda );
78 
79  // copy submatrix
80  int i1 = TESTS_I1[ t ];
81  int i2 = TESTS_I2[ t ];
82  int j1 = TESTS_J1[ t ];
83  int j2 = TESTS_J2[ t ];
84  magmablas_dlacpy( 'F', i2-i1, j2-j1,
85  &dA[i1 + j1*lda], lda,
86  &dB[i1 + j1*lda], lda );
87 
88  // verify result
89  int bad_copies = 0;
90  int overwrites = 0;
91  magma_dgetmatrix( n, n, dB, lda, hR, lda );
92 
93  for( int j = 0; j < n; ++j ) {
94  for( int i = 0; i < n; ++i ) {
95  if ( i1 <= i and i < i2 and j1 <= j and j < j2 ) {
96  if ( not MAGMA_D_EQUAL( hR[i + j*lda], hA[i + j*lda] )) {
97  bad_copies += 1;
98  printf( "Copy failed at B[%d,%d], expected %.4f, got %.4f\n",
99  i, j, MAGMA_D_REAL( hA[i + j*lda] ),
100  MAGMA_D_REAL( hR[i + j*lda] ));
101  }
102  }
103  else {
104  if ( not MAGMA_D_EQUAL( hR[i + j*lda], hB[i + j*lda] )) {
105  overwrites += 1;
106  printf( "Overwrote at B[%d,%d], expected %.4f, got %.4f\n",
107  i, j, MAGMA_D_REAL( hA[i + j*lda] ),
108  MAGMA_D_REAL( hR[i + j*lda] ));
109  }
110  }
111  }
112  }
113  printf( "B(%4d:%4d, %4d:%4d) = A(%4d:%4d, %4d:%4d) ",
114  i1, i2, j1, j2,
115  i1, i2, j1, j2 );
116  if ( bad_copies > 0 or overwrites > 0 ) {
117  printf( "failed, %d bad copies, %d overwrites\n", bad_copies, overwrites );
118  }
119  else {
120  printf( "passed\n" );
121  }
122  }
123 
124  TESTING_FREE( hA );
125  TESTING_FREE( hB );
126  TESTING_FREE( hR );
127  TESTING_DEVFREE( dA );
128  TESTING_DEVFREE( dB );
129 
130  // --------------------------------------------------
131  // speed tests
132  int SIZE[] = {
133  1024, 1280, 1536, 1792, 2048, 2304, 2560, 2816, 3072, 3328, 3584, 3840,
134  4096, 4352, 4608, 4864, 5120, 5376, 5632, 5888, 6144, 6400, 6656, 6912,
135  7168, 7424, 7680, 7936, 8192, 8448, 8704, 8960, 9216, 9472, 9728, 9984
136  };
137  int nsize = sizeof(SIZE) / sizeof(int);
138 
139  printf("\n N GPU MB/s (sec)\n");
140  printf("========================================\n");
141  for( int t = 0; t < nsize; ++t ) {
142  n = SIZE[ t ];
143  lda = n;
144  TESTING_MALLOC ( hA, double, lda*n );
145  TESTING_MALLOC ( hB, double, lda*n );
146  TESTING_DEVALLOC ( dA, double, lda*n );
147  TESTING_DEVALLOC ( dB, double, lda*n );
148 
149  // initialize matrices
150  int n2 = lda*n;
151  lapackf77_dlarnv( &ione, ISEED, &n2, hA );
152  lapackf77_dlaset( "F", &n, &n, &c_zero, &c_zero, hB, &lda );
153  magma_dsetmatrix( n, n, hA, lda, dA, lda );
154  magmablas_dlaset( 'F', n, n, /*c_zero,*/ dB, lda );
155 
156  start = get_current_time();
157  magmablas_dlacpy( 'F', n, n, dA, lda, dB, lda );
158  end = get_current_time();
159 
160  // verify copy
161  magma_dgetmatrix( n, n, dB, lda, hB, lda );
162  for( int j = 0; j < n; ++j ) {
163  for( int i = 0; i < n; ++i ) {
164  if ( not MAGMA_D_EQUAL( hA[i + j*lda], hB[i + j*lda] )) {
165  printf( "Copy failed at B[%d,%d], expected %.4f, got %.4f\n",
166  i, j, MAGMA_D_REAL( hA[i + j*lda] ),
167  MAGMA_D_REAL( hB[i + j*lda] ));
168  exit(1);
169  }
170  }
171  }
172 
173  gpu_time = GetTimerValue( start, end ) * 1e-3;
174  gpu_perf = n*n*sizeof(double) / 1024. / 1024. / gpu_time;
175  printf( "%5d %6.2f (%8.6f)\n", n, gpu_perf, gpu_time );
176 
177  TESTING_FREE ( hA );
178  TESTING_FREE ( hB );
179  TESTING_DEVFREE( dA );
180  TESTING_DEVFREE( dB );
181  }
182 
183  /* Shutdown */
185  return EXIT_SUCCESS;
186 }