I modiffied the testing_dgetrf_mgpu.cpp code, to check the correct use of the app.

Code: Select all

```
...
// checksum
// lapack y magma
double max_double = 1073741824; // 2^30 (2^52)
double auxl= 0.0;
double auxm= 0.0;
double res_lap = 0.0;
double res_mag = 0.0;
double *aux = NULL;
double *h_lA[ngpu];
//aux_ldn_local
for(int k=0; k<ngpu; k++)
{
TESTING_MALLOC_CPU(h_lA[k], double, ldda*aux_ldn_local[k]);
cudaMemcpy(h_lA[k], d_lA[k], ldda*aux_ldn_local[k]*sizeof(double), cudaMemcpyDeviceToHost);
for(int i=0; i<ldda ; i++)
{
for (int j=0; j<(int)aux_ldn_local[k]; j++)
{
if(auxl > max_double)
{
res_lap /= 100;
}
res_lap += res_lap + h_A[(int)aux_ldn_local[k]*i+j];
auxl = res_lap;
if(auxm > max_double)
{
res_mag /= 100;
}
res_mag += res_mag + h_lA[k][(int)aux_ldn_local[k]*i+j]; // OJO
auxm=res_mag;
}
}
TESTING_FREE_CPU(h_lA[k]);
}
printf("---- Checksum lapack: %f Checsum magma: %f.\n", res_lap, res_mag);
...
```

Code: Select all

```
Usage: ./testing_dgetrf_mgpu [options] [-h|--help]
ngpu 1
M N CPU GFlop/s (sec) GPU GFlop/s (sec) |PA-LU|/(N*|A|)
=========================================================================
---- Checksum lapack: 175129168.103157 Checsum magma: 175129168.103157.
1088 1088 --- ( --- ) 32.23 ( 0.03) ---
---- Checksum lapack: 1907947604.779820 Checsum magma: 1907947604.779820.
2112 2112 --- ( --- ) 164.75 ( 0.04) ---
---- Checksum lapack: 136711177.014838 Checsum magma: 136711177.014838.
3136 3136 --- ( --- ) 268.83 ( 0.08) ---
---- Checksum lapack: 344441255.846596 Checsum magma: 344441255.846596.
4160 4160 --- ( --- ) 392.56 ( 0.12) ---
---- Checksum lapack: 370879087.794721 Checsum magma: 370879087.794721.
5184 5184 --- ( --- ) 494.81 ( 0.19) ---
---- Checksum lapack: 164429408.520023 Checsum magma: 164429408.520023.
6208 6208 --- ( --- ) 574.89 ( 0.28) ---
---- Checksum lapack: 39557278.528352 Checsum magma: 39557278.528352.
7232 7232 --- ( --- ) 635.84 ( 0.40) ---
---- Checksum lapack: 461207990.882663 Checsum magma: 461207990.882663.
8256 8256 --- ( --- ) 667.69 ( 0.56) ---
---- Checksum lapack: 2081259705.950725 Checsum magma: 2081259705.950725.
9280 9280 --- ( --- ) 713.96 ( 0.75) ---
---- Checksum lapack: 46592603.134620 Checsum magma: 46592603.134620.
10304 10304 --- ( --- ) 742.24 ( 0.98) ---
```

Code: Select all

```
M/N lapack magma
1088 175147069,830752 175122108,075274
2112 1908554751,7443 1907798223,11236
3136 136765029,786001 136716184,294073
4160 344726468,832647 344439123,707522
5184 371342616,55389 370977669,367096
6208 164683471,849087 164421714,434309
7232 39635677,068431 39541368,874261
8256 461691038,783903 460646834,479622
9280 2088480297,71815 2080143772,2736
10304 46703953,877662 46585143,384757
```

Any idea? Could I take it like a true values? Why that differences? Which is the accuracy?

Thanks a lot!