17 #include <cuda_runtime_api.h>
24 #include "magma_lapack.h"
32 int main(
int argc,
char** argv)
40 float *hA, *hB, *hR, *
dA, *
dB;
41 float gpu_time, gpu_perf;
44 int ISEED[4] = {0, 0, 0, 1};
49 int TESTS_I1[] = { 0, 100, 63, 64, 64, 64, 65, 10, 10, 10, 10, 10, 4, 4, 4, 4, 4, 4, 64, 64, 64, 64, 64, 64 };
50 int TESTS_I2[] = { 1000, 500, 511, 511, 512, 513, 513, 900, 900, 900, 900, 900, 4, 4, 4, 5, 5, 5, 127, 128, 129, 255, 256, 257 };
51 int TESTS_J1[] = { 0, 50, 10, 10, 10, 10, 10, 63, 64, 64, 64, 65, 64, 64, 64, 64, 64, 64, 4, 4, 4, 4, 4, 4 };
52 int TESTS_J2[] = { 1000, 400, 900, 900, 900, 900, 900, 511, 511, 512, 513, 513, 127, 128, 129, 255, 256, 257, 4, 4, 4, 5, 5, 5 };
53 int ntest =
sizeof(TESTS_J2) /
sizeof(
int);
66 for(
int i = 0; i < n; ++i ) {
67 for(
int j = 0; j < n; ++j ) {
73 printf(
"\nNote: ranges use Python notation,\n"
74 "i.e., A[i:j] is A[ i, i+1, ..., j-1 ], excluding A[j].\n\n" );
75 for(
int t = 0; t < ntest; ++t ) {
80 int i1 = TESTS_I1[ t ];
81 int i2 = TESTS_I2[ t ];
82 int j1 = TESTS_J1[ t ];
83 int j2 = TESTS_J2[ t ];
85 &dA[i1 + j1*lda], lda,
86 &dB[i1 + j1*lda], lda );
93 for(
int j = 0; j < n; ++j ) {
94 for(
int i = 0; i < n; ++i ) {
95 if ( i1 <= i and i < i2 and j1 <= j and j < j2 ) {
98 printf(
"Copy failed at B[%d,%d], expected %.4f, got %.4f\n",
106 printf(
"Overwrote at B[%d,%d], expected %.4f, got %.4f\n",
113 printf(
"B(%4d:%4d, %4d:%4d) = A(%4d:%4d, %4d:%4d) ",
116 if ( bad_copies > 0 or overwrites > 0 ) {
117 printf(
"failed, %d bad copies, %d overwrites\n", bad_copies, overwrites );
120 printf(
"passed\n" );
133 1024, 1280, 1536, 1792, 2048, 2304, 2560, 2816, 3072, 3328, 3584, 3840,
134 4096, 4352, 4608, 4864, 5120, 5376, 5632, 5888, 6144, 6400, 6656, 6912,
135 7168, 7424, 7680, 7936, 8192, 8448, 8704, 8960, 9216, 9472, 9728, 9984
137 int nsize =
sizeof(SIZE) /
sizeof(
int);
139 printf(
"\n N GPU MB/s (sec)\n");
140 printf(
"========================================\n");
141 for(
int t = 0; t < nsize; ++t ) {
162 for(
int j = 0; j < n; ++j ) {
163 for(
int i = 0; i < n; ++i ) {
165 printf(
"Copy failed at B[%d,%d], expected %.4f, got %.4f\n",
174 gpu_perf = n*n*
sizeof(float) / 1024. / 1024. / gpu_time;
175 printf(
"%5d %6.2f (%8.6f)\n", n, gpu_perf, gpu_time );