Help with sgesv

Open discussion for MAGMA

Help with sgesv

Postby jameric » Wed Apr 21, 2010 4:37 pm

Hi,

I am trying to integrate magma sgesv into my cuda application. I followed the example in testing_sgesv_gpu.cpp but i can't seem to get the correct results. Would anyone be able to help me out with what I'm going wrong?
I have a matrix A which is of size (movecell_count X movecell_count) and bx and by which is of size, movecell_count each. movecell_count = 73 in this case but is subject to change for different inputs.

I created a A_mag which is the same size as A and B_mag which is of size movecell_count*2. I copy the numbers of A into A_mag and bx and by into B_mag, then call magma_sgetrf_gpu, and magma_sgetrs_gpu.

What are the exact sizes of the workspaces?? Also my matrix A is symmetrical and positive definite, so I would also like to use magma dsposv gpu if I can get this one right.
I've posted my code below. Any help would be greatly appreciated!!!


Code: Select all
    cuInit( 0 );
    int INFO[1];
    int LDA, LDB, LDX;
    int maxnb = magma_get_sgetrf_nb(movecell_count);
    int lwork = movecell_count*maxnb;
    int NRHS=2;
    LDB = LDX = LDA = movecell_count ;
    int status ;
    float *d_A , * d_B , *d_X ;
    float *h_work_M_S;
    int *IPIV_mag ;
    float *A_mag , *B_mag, *X_mag;
 

    status = cublasAlloc(movecell_count*movecell_count, sizeof(float), (void**)&d_A ) ;
    if (status != CUBLAS_STATUS_SUCCESS) {
      fprintf (stderr, "!!!! device memory allocation error (d_A)\n");
      exit(1);
    }

    status = cublasAlloc(LDB*NRHS, sizeof(float), (void**)&d_B ) ;
    if (status != CUBLAS_STATUS_SUCCESS) {
      fprintf (stderr, "!!!! device memory allocation error (d_B)\n");
      exit(1);
    }
    status = cublasAlloc(LDB*NRHS, sizeof(float), (void**)&d_X ) ;
    if (status != CUBLAS_STATUS_SUCCESS) {
      fprintf (stderr, "!!!! device memory allocation error (d_X)\n");
      exit(1);
    }

    status = cudaMallocHost( (void**)&h_work_M_S, movecell_count*maxnb*sizeof(float) );
    if (status != CUBLAS_STATUS_SUCCESS) {
      fprintf (stderr, "!!!! device memory allocation error (h_work_M_S)\n");
      exit(1);
    }
   
    A_mag = ( float *) malloc ( sizeof(float) * LDA*movecell_count);
    if( A_mag == NULL ) {
      printf("Allocation Error\n");
      exit(1);
    }          
    B_mag = ( float *) malloc ( sizeof(float) * LDB*NRHS);
    if( B_mag == NULL ){
      printf("Allocation Error\n");
      exit(1);
    }          
    X_mag = ( float *) malloc ( sizeof(float) *LDB*NRHS);
    if( X_mag == NULL ) {
      printf("Allocation Error\n");
      exit(1);
    }          
   
    IPIV_mag = ( int *) malloc ( sizeof (int) * movecell_count ) ;
    if( IPIV_mag == NULL ) {
      printf("Allocation Error\n");
      exit(1);
    }                 
   
      LDB = LDX = LDA = movecell_count ;
   
      int dlda = (movecell_count/32)*32;
     
      //putting in values to magma structures
      int B_mag_index=0;
      for (int i=0; i<movecell_count; i++)
      {
         for (int j=0; j<movecell_count; j++)
         {
            A_mag[B_mag_index] = A[i][j];
            B_mag_index++;
         }
      }

      for (int i=0; i<movecell_count; i++)
      {
         B_mag[i] = bx[i];
      }
      B_mag_index=movecell_count;
      for (int i=0; i<movecell_count; i++)
      {
         B_mag[B_mag_index] = by[i];
         B_mag_index++;
      } 

      cublasSetMatrix( movecell_count, movecell_count, sizeof( float ), A_mag, movecell_count, d_A, dlda ) ;

      cublasSetMatrix( movecell_count, NRHS, sizeof( float ), B_mag, movecell_count, d_B, movecell_count ) ;

      //=====================================================================
      //              SP - GPU
      //=====================================================================

      magma_sgetrf_gpu(&movecell_count, &movecell_count, d_A, &dlda, IPIV_mag, h_work_M_S, INFO);
      magma_sgetrs_gpu("N", movecell_count, NRHS, d_A, dlda, IPIV_mag, d_B, LDB, INFO, h_work_M_S);
      cublasGetMatrix( movecell_count, NRHS, sizeof( float ), d_B , movecell_count, X_mag ,movecell_count) ;

    free(IPIV_mag);
    free(X_mag);
    free(B_mag);
    free(A_mag);
    cublasFree(h_work_M_S);
    cublasFree(d_X);
    cublasFree(d_B);
    cublasFree(d_A);
jameric
 
Posts: 2
Joined: Mon Apr 19, 2010 5:29 pm

Re: Help with sgesv

Postby Stan Tomov » Fri Apr 23, 2010 8:53 pm

Hi,
The memory allocation had to be changed slightly. I have included below the modified code. I marked the places with changes with
Code: Select all
// TTT

before the modification. I also added an error check at the end to get
Code: Select all
|| B - A X || / ||A|| =  6.506779e-07

for hard coded N = 1024, random matrix, and NRHS = 2.

Code: Select all
int main( int argc, char** argv)
{
  // TTT
  int movecell_count = 1024;

  cuInit( 0 );
  int INFO[1];
  int LDA, LDB, LDX;
  int maxnb = magma_get_sgetrf_nb(movecell_count);
  int lwork = movecell_count*maxnb;
  int NRHS=2;
  LDB = LDX = LDA = movecell_count ;
  int status ;
  float *d_A , * d_B , *d_X ;
  float *h_work_M_S;
  int *IPIV_mag ;
  float *A_mag , *B_mag, *X_mag;

  // TTT
  int N = movecell_count;

  // TTT
  //status = cublasAlloc(movecell_count*movecell_count,
  //                     sizeof(float), (void**)&d_A ) ;
  status = cublasAlloc((N+32)*(N+32) + 32*maxnb + lwork+2*maxnb*maxnb,
             sizeof(float), (void**)&d_A ) ;
  if (status != CUBLAS_STATUS_SUCCESS) {
    fprintf (stderr, "!!!! device memory allocation error (d_A)\n");
    exit(1);
  }
 
  status = cublasAlloc(LDB*NRHS, sizeof(float), (void**)&d_B ) ;
  if (status != CUBLAS_STATUS_SUCCESS) {
    fprintf (stderr, "!!!! device memory allocation error (d_B)\n");
    exit(1);
  }
  status = cublasAlloc(LDB*NRHS, sizeof(float), (void**)&d_X ) ;
  if (status != CUBLAS_STATUS_SUCCESS) {
    fprintf (stderr, "!!!! device memory allocation error (d_X)\n");
    exit(1);
  }
 
  // TTT
  //status = cudaMallocHost( (void**)&h_work_M_S,
  //            movecell_count*maxnb*sizeof(float) );
  status = cudaMallocHost( (void**)&h_work_M_S,
            (lwork+32*maxnb)*sizeof(float) );
 
  if (status != CUBLAS_STATUS_SUCCESS) {
    fprintf (stderr, "!!!! device memory allocation error (h_work_M_S)\n");
    exit(1);
  }
   
  A_mag = ( float *) malloc ( sizeof(float) * LDA*movecell_count);
  if( A_mag == NULL ) {
    printf("Allocation Error\n");
    exit(1);
  }         
  B_mag = ( float *) malloc ( sizeof(float) * LDB*NRHS);
  if( B_mag == NULL ){
    printf("Allocation Error\n");
    exit(1);
  }         
  X_mag = ( float *) malloc ( sizeof(float) *LDB*NRHS);
  if( X_mag == NULL ) {
    printf("Allocation Error\n");
    exit(1);
  }         
 
  IPIV_mag = ( int *) malloc ( sizeof (int) * movecell_count ) ;
  if( IPIV_mag == NULL ) {
    printf("Allocation Error\n");
    exit(1);
  }                 
 
  LDB = LDX = LDA = movecell_count ;
 
  int dlda = (movecell_count/32)*32;
  // TTT
  if (dlda<movecell_count) dlda+=32;

  //putting in values to magma structures
  int B_mag_index=0;
  for (int i=0; i<movecell_count; i++)
    {
      for (int j=0; j<movecell_count; j++)
   {
     // TTT
     A_mag[B_mag_index] = (rand()) / (float)RAND_MAX; //A[i][j];
     B_mag_index++;
   }
    }
 
  for (int i=0; i<movecell_count; i++)
    {
      // TTT
      B_mag[i] = (rand()) / (float)RAND_MAX; //bx[i];
    }
  B_mag_index=movecell_count;
  for (int i=0; i<movecell_count; i++)
    {
      // TTT
      B_mag[B_mag_index] = (rand()) / (float)RAND_MAX; //by[i];
      B_mag_index++;
    }
 
  cublasSetMatrix( movecell_count, movecell_count, sizeof( float ), A_mag,
         movecell_count, d_A, dlda ) ;
 
  cublasSetMatrix( movecell_count, NRHS, sizeof( float ), B_mag,
         movecell_count, d_B, movecell_count ) ;

  //=====================================================================
  //              SP - GPU
  //=====================================================================

  magma_sgetrf_gpu(&movecell_count, &movecell_count, d_A, &dlda,
         IPIV_mag, h_work_M_S, INFO);
  magma_sgetrs_gpu("N", movecell_count, NRHS, d_A, dlda, IPIV_mag,
         d_B, LDB, INFO, h_work_M_S);
  cublasGetMatrix( movecell_count, NRHS, sizeof( float ), d_B ,
         movecell_count, X_mag ,movecell_count) ;
 
  //=====================================================================
  //              ERROR
  //=====================================================================
  float Rnorm, Anorm;
  float *worke = (float *)malloc(N*sizeof(float));
  Anorm = slange_("I", &N, &N, A_mag, &LDA, worke);
  float ONE = -1.0 , NEGONE = 1.0 ;
  sgemm_( "No Transpose", "No Transpose", &N, &NRHS, &N, &NEGONE, A_mag, &LDA,
     X_mag, &LDX, &ONE, B_mag, &N);
  Rnorm=slange_("I", &N, &NRHS, B_mag, &LDB, worke);
  free(worke);

  printf("|| B - A X || / ||A|| =  %e\n", Rnorm/Anorm);


  free(IPIV_mag);
  free(X_mag);
  free(B_mag);
  free(A_mag);
  cublasFree(h_work_M_S);
  cublasFree(d_X);
  cublasFree(d_B);
  cublasFree(d_A);
}


Stan
Stan Tomov
 
Posts: 251
Joined: Fri Aug 21, 2009 10:39 pm


Return to User discussion

Who is online

Users browsing this forum: Bing [Bot], Google [Bot] and 3 guests

cron