I have imported your matrix and called it matrix180.dat.
I have modified testing_spotrf.cpp as follows. If you run it with -N 180 it reads the matrix instead of getting a random one.
- Code: Select all
/*
* -- MAGMA (version 1.0) --
* Univ. of Tennessee, Knoxville
* Univ. of California, Berkeley
* Univ. of Colorado, Denver
* November 2010
*
* @generated s
*
**/
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cublas.h>
// includes, project
#include "flops.h"
#include "magma.h"
#include "testings.h"
// Flops formula
#define PRECISION_s
#if defined(PRECISION_z) || defined(PRECISION_c)
#define FLOPS(n) ( 6. * FMULS_POTRF(n) + 2. * FADDS_POTRF(n) )
#else
#define FLOPS(n) ( FMULS_POTRF(n) + FADDS_POTRF(n) )
#endif
/* ////////////////////////////////////////////////////////////////////////////
-- Testing spotrf
*/
int main( int argc, char** argv)
{
TESTING_CUDA_INIT();
TimeStruct start, end;
float flops, gpu_perf, cpu_perf;
float *h_A, *h_R;
magma_int_t N=0, n2, lda;
magma_int_t size[10] = {1024,2048,3072,4032,5184,6048,7200,8064,8928,10240};
FILE *fp ;
magma_int_t i, info, j;
const char *uplo = MagmaLowerStr;
float mzone = MAGMA_S_NEG_ONE;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
float work[1], matnorm, temp;
int special180 = 0;
if (argc != 1){
for(i = 1; i<argc; i++){
if (strcmp("-N", argv[i])==0)
N = atoi(argv[++i]);
}
if (N>0) size[0] = size[9] = N;
else exit(1);
if (N==180) {
printf("Special case for N = %d\n",N);
special180 = 1;
}
}
else {
printf("\nUsage: \n");
printf(" testing_spotrf -N %d\n\n", 1024);
}
/* Allocate host memory for the matrix */
n2 = size[9] * size[9];
TESTING_MALLOC( h_A, float, n2);
TESTING_HOSTALLOC( h_R, float, n2);
printf("memory size allocated %d\n",n2);
printf("\n\n");
printf(" N CPU GFlop/s GPU GFlop/s ||R||_F / ||A||_F\n");
printf("========================================================\n");
for(i=0; i<10; i++){
N = size[i];
lda = N;
n2 = lda*N;
flops = FLOPS( (float)N ) / 1000000;
/* ====================================================================
Initialize the matrix
=================================================================== */
if (special180) {
printf("About to read matrix for special case N = %d.\n",N);
fp = fopen ("matrix180.dat", "r") ;
if( fp == NULL ){ printf("Couldn't open input file\n"); exit(1);}
else {
magma_int_t i, j;
printf("Input file opened.\n");
printf("No of items to be read is %d\n",n2);
for (i=0; i<n2; i++) {
j = fscanf(fp,"%f",work);
if (j != 1) {
printf("%d %d\n",i,j);
break;
}
h_A[i]=work[0];
}
printf("%d %f\n",n2,h_A[n2-1]);
fclose( fp ) ;
printf("Input file closed.\n");
}
} else {
lapackf77_slarnv( &ione, ISEED, &n2, h_A );
/* Symmetrize and increase the diagonal */
{
magma_int_t i, j;
for(i=0; i<N; i++) {
MAGMA_S_SET2REAL( h_A[i*lda+i], ( MAGMA_S_GET_X(h_A[i*lda+i]) + 1.*N ) );
for(j=0; j<i; j++)
h_A[i*lda+j] = (h_A[j*lda+i]);
}
}
}
lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda ); /* ====================================================================
Performs operation using MAGMA
=================================================================== */
magma_spotrf(uplo[0], N, h_R, lda, &info);
lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
start = get_current_time();
magma_spotrf(uplo[0], N, h_R, lda, &info);
end = get_current_time();
if (info < 0)
printf("Argument %d of magma_spotrf had an illegal value.\n", -info);
gpu_perf = flops / GetTimerValue(start, end);
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
start = get_current_time();
lapackf77_spotrf(uplo, &N, h_A, &lda, &info);
end = get_current_time();
if (info < 0)
printf("Argument %d of lapack_spotrf had an illegal value.\n", -info);
cpu_perf = flops / GetTimerValue(start, end);
/* =====================================================================
Check the result compared to LAPACK
=================================================================== */
matnorm = lapackf77_slange("f", &N, &N, h_A, &N, work);
blasf77_saxpy(&n2, &mzone, h_A, &ione, h_R, &ione);
printf("%5d %6.2f %6.2f %e\n",
size[i], cpu_perf, gpu_perf,
lapackf77_slange("f", &N, &N, h_R, &N, work) / matnorm );
if (argc != 1)
break;
}
/* Memory clean up */
TESTING_FREE( h_A );
TESTING_HOSTFREE( h_R );
TESTING_CUDA_FINALIZE();
}
The results are like this:
- Code: Select all
fletcher@fletcher-desktop:~/magma_1.0.0-rc3/testing$ ./testing_spotrf -N 180
device 0: GeForce GTX 460, 1400.0 MHz clock, 2047.2 MB memory
Special case for N = 180
memory size allocated 32400
N CPU GFlop/s GPU GFlop/s ||R||_F / ||A||_F
========================================================
About to read matrix for special case N = 180.
Input file opened.
No of items to be read is 32400
32400 0.500000
Input file closed.
180 12.03 2.74 3.675380e-08
This means that it ran and gave the same answers both on the CPU and GPU. I hope this helps.
John