I am trying to integrate magma sgesv into my cuda application. I followed the example in testing_sgesv_gpu.cpp but i can't seem to get the correct results. Would anyone be able to help me out with what I'm going wrong?
I have a matrix A which is of size (movecell_count X movecell_count) and bx and by which is of size, movecell_count each. movecell_count = 73 in this case but is subject to change for different inputs.
I created a A_mag which is the same size as A and B_mag which is of size movecell_count*2. I copy the numbers of A into A_mag and bx and by into B_mag, then call magma_sgetrf_gpu, and magma_sgetrs_gpu.
What are the exact sizes of the workspaces?? Also my matrix A is symmetrical and positive definite, so I would also like to use magma dsposv gpu if I can get this one right.
I've posted my code below. Any help would be greatly appreciated!!!
Code: Select all
cuInit( 0 );
int INFO[1];
int LDA, LDB, LDX;
int maxnb = magma_get_sgetrf_nb(movecell_count);
int lwork = movecell_count*maxnb;
int NRHS=2;
LDB = LDX = LDA = movecell_count ;
int status ;
float *d_A , * d_B , *d_X ;
float *h_work_M_S;
int *IPIV_mag ;
float *A_mag , *B_mag, *X_mag;
status = cublasAlloc(movecell_count*movecell_count, sizeof(float), (void**)&d_A ) ;
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! device memory allocation error (d_A)\n");
exit(1);
}
status = cublasAlloc(LDB*NRHS, sizeof(float), (void**)&d_B ) ;
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! device memory allocation error (d_B)\n");
exit(1);
}
status = cublasAlloc(LDB*NRHS, sizeof(float), (void**)&d_X ) ;
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! device memory allocation error (d_X)\n");
exit(1);
}
status = cudaMallocHost( (void**)&h_work_M_S, movecell_count*maxnb*sizeof(float) );
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! device memory allocation error (h_work_M_S)\n");
exit(1);
}
A_mag = ( float *) malloc ( sizeof(float) * LDA*movecell_count);
if( A_mag == NULL ) {
printf("Allocation Error\n");
exit(1);
}
B_mag = ( float *) malloc ( sizeof(float) * LDB*NRHS);
if( B_mag == NULL ){
printf("Allocation Error\n");
exit(1);
}
X_mag = ( float *) malloc ( sizeof(float) *LDB*NRHS);
if( X_mag == NULL ) {
printf("Allocation Error\n");
exit(1);
}
IPIV_mag = ( int *) malloc ( sizeof (int) * movecell_count ) ;
if( IPIV_mag == NULL ) {
printf("Allocation Error\n");
exit(1);
}
LDB = LDX = LDA = movecell_count ;
int dlda = (movecell_count/32)*32;
//putting in values to magma structures
int B_mag_index=0;
for (int i=0; i<movecell_count; i++)
{
for (int j=0; j<movecell_count; j++)
{
A_mag[B_mag_index] = A[i][j];
B_mag_index++;
}
}
for (int i=0; i<movecell_count; i++)
{
B_mag[i] = bx[i];
}
B_mag_index=movecell_count;
for (int i=0; i<movecell_count; i++)
{
B_mag[B_mag_index] = by[i];
B_mag_index++;
}
cublasSetMatrix( movecell_count, movecell_count, sizeof( float ), A_mag, movecell_count, d_A, dlda ) ;
cublasSetMatrix( movecell_count, NRHS, sizeof( float ), B_mag, movecell_count, d_B, movecell_count ) ;
//=====================================================================
// SP - GPU
//=====================================================================
magma_sgetrf_gpu(&movecell_count, &movecell_count, d_A, &dlda, IPIV_mag, h_work_M_S, INFO);
magma_sgetrs_gpu("N", movecell_count, NRHS, d_A, dlda, IPIV_mag, d_B, LDB, INFO, h_work_M_S);
cublasGetMatrix( movecell_count, NRHS, sizeof( float ), d_B , movecell_count, X_mag ,movecell_count) ;
free(IPIV_mag);
free(X_mag);
free(B_mag);
free(A_mag);
cublasFree(h_work_M_S);
cublasFree(d_X);
cublasFree(d_B);
cublasFree(d_A);