Comparison between dgesv and pdgesv

Open discussion regarding features, bugs, issues, vendors, etc.

Comparison between dgesv and pdgesv

Postby ispmarin » Tue Nov 24, 2009 5:59 pm

Hello all,

I'm trying to solve a linear system, and before advance to a more complex code, I'm trying to implement the example 1 from the Scalapack manual with pdgesv. Just to check the answers, I'm trying to solve the same system using lapack dgesv. The problem is that I cannot find the same answer to the example system. I already tried to transpose the matrix. Here is the sequential code:
Code: Select all
#include <iostream>
#include <vector>

#include <cmath>
#include <cstdlib>
#include <mkl/mkl.h>
#include <mkl/mkl_lapack.h>

#define mat(matriz,coluna,i,j) (matriz[i*coluna+j])



using namespace std;


extern "C" {
 /* BLACS C interface */

 void dgesv_( MKL_INT* n, MKL_INT* nrhs, double* a, MKL_INT* lda, MKL_INT* ipiv, double* b, MKL_INT* ldb, MKL_INT* info );

 
}


int getIndex(int row, int col,int NCOLS) {return row*NCOLS+col;}




int main(int argv, char ** argc) {


//  std::cout<<"Returned: "<<" ";
//         std::cout << "Hello World! I am " << rank << " of " << nprocs <<
//         std::endl;
srand(1);
int myrow=0;
int mycol=0;
int ictxt;
int nprow=0,npcol=0;

int BLOCK_SIZE =2; //this gonna be tricky - should be 64, but cannot be larger than the original matrix
int locR=0, locC=0;
int block = BLOCK_SIZE;
int izero = 0;
int matrix_size = 9;
int myone = 1;
int nrhs = 1;
int info=0;
int i=0,j=0;
double mone=(-1.0e0),pone=(1.0e0);
double AnormF, XnormF, RnormF, BnormF, residF,eps;



//GLOBAL
double * A = new double[matrix_size*matrix_size];
double * B = new double[matrix_size];
double * Acpy = new double[matrix_size*matrix_size];
double * Bcpy = new double[matrix_size];





int * ipiv = new int [matrix_size];
double * work;

B[2] = 1;
B[3] = 0;
B[4] = 0;
B[5]  = 0;



A[0] = 19;
A[1] = 3;
A[2] = 1;
A[3] = 12;
A[4] = 1;
A[5] = 16;
A[6] = 1;
A[7] = 3;
A[8] = 11;

A[9] = -19;
A[10] = 3;
A[11] = 1;
A[12] = 12;
A[13] = 1;
A[14] = 16;
A[15] = 1;
A[16] = 3;
A[17] = 11;

A[18] = -19;
A[19] = -3;
A[20] = 1;
A[21] = 12;
A[22] = 1;
A[23] = 16;
A[24] = 1;
A[25] = 3;
A[26] = 11;

A[27] = -19;
A[28] = -3;
A[29] = -1;
A[30] = 12;
A[31] = 1;
A[32] = 16;
A[33] = 1;
A[34] = 3;
A[35] = 11;

A[36] = -19;
A[37] = -3;
A[38] = -1;
A[39] = -12;
A[40] = 1;
A[41] = 16;
A[42] = 1;
A[43] = 3;
A[44] = 11;

A[45] = -19;
A[46] = -3;
A[47] = -1;
A[48] = -12;
A[49] = -1;
A[50] = 16;
A[51] = 1;
A[52] = 3;
A[53] = 11;

A[54] = -19;
A[55] = -3;
A[56] = -1;
A[57] = -12;
A[58] = -1;
A[59] = -16;
A[60] = 1;
A[61] = 3;
A[62] = 11;

A[63] = -19;
A[64] = -3;
A[65] = -1;
A[66] = -12;
A[67] = -1;
A[68] = -16;
A[69] = -1;
A[70] = 3;
A[71] = 11;

A[72] = -19;
A[73] = -3;
A[74] = -1;
A[75] = -12;
A[76] = -1;
A[77] = -16;
A[78] = -1;
A[79] = -3;
A[80] = 11;


int col = 0;
for(i=0;i<matrix_size; i++) {
  for(j=0; j< matrix_size; j++) {
    // A[getIndex(i,j,matrix_size)] = 0;//(double)(rand()%100)/100.0;
     
      Acpy[getIndex(i,j,matrix_size)] = A[getIndex(i,j,matrix_size)];
 }
 
  //B[i] = 0;//(double)(rand()%100)/100.0;
  Bcpy[i] = B[i];
 
}


  cout<< "Global Matrix" <<endl;
    for(i=0; i< matrix_size; i++) {
      for(j=0; j< matrix_size; j++) {
    // cout<<"  "<<local_matrix[i][j];
    A[getIndex(i,j,matrix_size)] = Acpy[getIndex(j,i,matrix_size)];
     cout<<"  "<<A[getIndex(i,j,matrix_size)];
      }
      cout <<endl;
    }
  cout <<"++++++++++"<<endl;


  cout <<"Global Vector" <<endl;
    for(i=0; i< matrix_size; i++) {
   
    // cout<<"  "<<local_matrix[i][j];
     cout<<"*  "<<B[i]<<endl;
 
    }
  cout <<"++++++++"<<endl;



dgesv_(&matrix_size, &nrhs, A,&matrix_size, ipiv, B,&matrix_size, &info);
   
  for(i=0; i< matrix_size; i++) {
    cout <<"dgesv  "<<B[i]<<endl;
  }






  delete [] A;
  delete [] B;
  delete [] Acpy;
  delete [] Bcpy;
 
return 0;


}



but Lapack anser is
Code: Select all
dgesv  0
dgesv  -0.166667
dgesv  0.5
dgesv  0
dgesv  0
dgesv  0
dgesv  0
dgesv  0
dgesv  0

if the matrix is transposed and

Code: Select all
dgesv  0
dgesv  -0.5
dgesv  0.5
dgesv  0
dgesv  0
dgesv  0
dgesv  0
dgesv  0
dgesv  0

if it's not. That's a very different result from the example, that says that the answer should be
Code: Select all
0
-1/6
1/2
0
0
0
-1/2
1/6
0


The parallel code is below:
Code: Select all
#include <iostream>
#include <vector>
#include <openmpi/mpi.h>
#include <cmath>
#include <cstdlib>
#include <mkl/mkl.h>
#include <mkl/mkl_scalapack.h>
#include <mkl/mkl_cblas.h>
#define mat(matriz,coluna,i,j) (matriz[i*coluna+j])


#define p_of_i(i,bs,p) ( int((i-1)/bs)%p)
#define l_of_i(i,bs,p) (int((i-1)/(p*bs)))
#define x_of_i(i,bs,p) (((i-1)%bs)+1)


using namespace std;


extern "C" {
 /* BLACS C interface */
 void Cblacs_get( int context, int request, int* value);
 int  Cblacs_gridinit( int* context, char * order, int np_row, int np_col);
 void Cblacs_gridinfo( int context, int*  np_row, int* np_col, int*  my_row, int*  my_col);
 int  numroc_( int *n, int *nb, int *iproc, int *isrcproc, int *nprocs);
 void Cblacs_gridexit(int ictxt);
 void pdgesv_(MKL_INT *n, MKL_INT *nrhs, double *a, MKL_INT *ia,
       MKL_INT *ja, MKL_INT *desca, MKL_INT *ipiv, double *b, MKL_INT *ib, MKL_INT *jb, MKL_INT *descb, MKL_INT *info);
 void dgesv_( MKL_INT* n, MKL_INT* nrhs, double* a, MKL_INT* lda, MKL_INT* ipiv, double* b, MKL_INT* ldb, MKL_INT* info );

 void descinit_( int *desc, int *m, int *n, int *mb, int *nb, int *irsrc, int *icsrc,
          int *ictxt, int *lld, int *info);
 double pdlamch_( int *ictxt , char *cmach);
 double pdlange_( char *norm, int *m, int *n, double *A, int *ia, int *ja, int *desca, double *work);

 void pdlacpy_( char *uplo, int *m, int *n, double *a, int *ia, int *ja, int *desca,
            double *b, int *ib, int *jb, int *descb);

 void pdgemm_( char *TRANSA, char *TRANSB, int * M, int * N, int * K, double * ALPHA,
            double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB,
            double * BETA, double * C, int * IC, int * JC, int * DESCC );
 int  indxg2p_( int *indxglob, int *nb, int *iproc, int *isrcproc, int *nprocs);

//void descinit_(int *descA, int *n2, int *n , int *nb, int *nb2, int *izero2, int *izero, int * ictxt, int * itemp, int *info);
}

void find_nps(int np, int &nprow, int & npcol);
int getIndex(int row, int col,int NCOLS) {return row*NCOLS+col;}




int main(int argv, char ** argc) {
 
MPI::Init();
        int rank = MPI::COMM_WORLD.Get_rank();
        int nprocs = MPI::COMM_WORLD.Get_size();

//  std::cout<<"Returned: "<<" ";
//         std::cout << "Hello World! I am " << rank << " of " << nprocs <<
//         std::endl;
srand(1);
int myrow=0;
int mycol=0;
int ictxt;
int nprow=0,npcol=0;

int BLOCK_SIZE =2; //this gonna be tricky - should be 64, but cannot be larger than the original matrix
int locR=0, locC=0;
int block = BLOCK_SIZE;
int izero = 0;
int matrix_size = 9;
int myone = 1;
int nrhs = 1;
int info=0;
int i=0,j=0;
double mone=(-1.0e0),pone=(1.0e0);
double AnormF, XnormF, RnormF, BnormF, residF,eps;


find_nps(nprocs,nprow,npcol);

Cblacs_get(-1, 0, &ictxt);
Cblacs_gridinit(&ictxt, "Row", nprow, npcol);
Cblacs_gridinfo(ictxt, &nprow, &npcol, &myrow, &mycol);

//std::cout <<"rank "<<rank<< "  nprow " <<nprow <<"  "<<"  npcol "<<npcol<< "  myrow  "<<myrow << "  mycol  "<<mycol<<endl;

locR = numroc_(&matrix_size, &block, &myrow, &izero, &nprow);
locC = numroc_(&matrix_size, &block, &mycol, &izero, &npcol);


//GLOBAL
double * A = new double[matrix_size*matrix_size];
double * B = new double[matrix_size];
double * Acpy = new double[matrix_size*matrix_size];
double * Bcpy = new double[matrix_size];

//LOCAL
double * local_know_vector = new double[locR];
double * local_matrix = new double[locR*locC];



int * ipiv = new int [locC*locR*block+1000000];
double * work;

B[2] = 1;
B[3] = 0;
B[4] = 0;
B[5]  = 0;



A[0] = 19;
A[1] = 3;
A[2] = 1;
A[3] = 12;
A[4] = 1;
A[5] = 16;
A[6] = 1;
A[7] = 3;
A[8] = 11;

A[9] = -19;
A[10] = 3;
A[11] = 1;
A[12] = 12;
A[13] = 1;
A[14] = 16;
A[15] = 1;
A[16] = 3;
A[17] = 11;

A[18] = -19;
A[19] = -3;
A[20] = 1;
A[21] = 12;
A[22] = 1;
A[23] = 16;
A[24] = 1;
A[25] = 3;
A[26] = 11;

A[27] = -19;
A[28] = -3;
A[29] = -1;
A[30] = 12;
A[31] = 1;
A[32] = 16;
A[33] = 1;
A[34] = 3;
A[35] = 11;

A[36] = -19;
A[37] = -3;
A[38] = -1;
A[39] = -12;
A[40] = 1;
A[41] = 16;
A[42] = 1;
A[43] = 3;
A[44] = 11;

A[45] = -19;
A[46] = -3;
A[47] = -1;
A[48] = -12;
A[49] = -1;
A[50] = 16;
A[51] = 1;
A[52] = 3;
A[53] = 11;

A[54] = -19;
A[55] = -3;
A[56] = -1;
A[57] = -12;
A[58] = -1;
A[59] = -16;
A[60] = 1;
A[61] = 3;
A[62] = 11;

A[63] = -19;
A[64] = -3;
A[65] = -1;
A[66] = -12;
A[67] = -1;
A[68] = -16;
A[69] = -1;
A[70] = 3;
A[71] = 11;

A[72] = -19;
A[73] = -3;
A[74] = -1;
A[75] = -12;
A[76] = -1;
A[77] = -16;
A[78] = -1;
A[79] = -3;
A[80] = 11;


int col = 0;
for(i=0;i<matrix_size; i++) {
  for(j=0; j< matrix_size; j++) {
    // A[getIndex(i,j,matrix_size)] = 0;//(double)(rand()%100)/100.0;
     
      Acpy[getIndex(i,j,matrix_size)] = A[getIndex(i,j,matrix_size)];
 }
 
  //B[i] = 0;//(double)(rand()%100)/100.0;
  Bcpy[i] = B[i];
 
}

if(rank ==0) {
  cout<< "Global Matrix" <<endl;
    for(i=0; i< matrix_size; i++) {
   for(j=0; j< matrix_size; j++) {
    // cout<<"  "<<local_matrix[i][j];
     cout<<"  "<<A[getIndex(i,j,matrix_size)];
      }
      cout <<endl;
    }
  cout <<"++++++++++"<<endl;
}

if(rank ==0) {
  cout <<"Global Vector" <<endl;
    for(i=0; i< matrix_size; i++) {
   
    // cout<<"  "<<local_matrix[i][j];
     cout<<"*  "<<B[i]<<endl;
 
    }
  cout <<"++++++++"<<endl;
}


//std::cout <<"rank "<<rank<< "  locR " <<locR <<"  "<<"  locC "<<locC<<endl;

 int *descA  = new int[9];
 int *descB  = new int[9];
 
 
 descA[0] = 1; // descriptor type
 descA[1] = ictxt; // blacs context
 descA[2] = matrix_size; // global number of rows
 descA[3] = matrix_size; // global number of columns
 descA[4] = block; // row block size
 descA[5] = block; // column block size (DEFINED EQUAL THAN ROW BLOCK SIZE)
 descA[6] = 0; // initial process row(DEFINED 0)
 descA[7] = 0; // initial process column (DEFINED 0)
 descA[8] = locR; // leading dimension of local array

 descB[0] = 1; // descriptor type
 descB[1] = ictxt; // blacs context
 descB[2] = matrix_size; // global number of rows
 descB[3] = 1; // global number of columns
 descB[4] = block; // row block size
 descB[5] = block; // column block size (DEFINED EQUAL THAN ROW BLOCK SIZE)
 descB[6] = 0; // initial process row(DEFINED 0)
 descB[7] = 0; // initial process column (DEFINED 0)
 descB[8] = locR; // leading dimension of local array


if(rank == 0) {
dgesv_(&matrix_size, &nrhs, Acpy,&matrix_size, ipiv, Bcpy,&matrix_size, &info);
   
  for(i=0; i< matrix_size; i++) {
    cout <<"dgesv  "<<Bcpy[i]<<endl;
  }
}

 
 
 
int il=0, jl=0;
//if(rank == 0) {
  for(i=1; i< matrix_size+1; i++) {
   for(j=1; j< matrix_size+1; j++) {
   
    int pi = p_of_i(i,block,nprow);
    int li = l_of_i(i,block,nprow);
    int xi = x_of_i(i,block,nprow);

    int pj = p_of_i(j,block,npcol);
    int lj = l_of_i(j,block,npcol);
    int xj = x_of_i(j,block,npcol);

      if( (pi == myrow) && (pj == mycol)) {
     il = li*block+xi;
     jl = lj*block+xj;
     local_matrix[getIndex(il-1, jl-1, locC)] = A[getIndex(i-1,j-1,matrix_size)];
      }
     
      if(  (pi == myrow) &&(mycol==0)  ){
      local_know_vector[il-1] = B[i-1];
      }
   }
 
 }

// if(rank ==0) {
//   cout <<"Local Matrix" <<endl;
//     for(i=0; i< locR; i++) {
//       for(j=0; j< locC; j++) {
//        cout<<"  "<<local_matrix[getIndex(i,j,locC)];
//       }
//       cout <<endl;
//     }
//   cout <<"++++++++"<<endl;
// }
//
//
// if(rank ==0) {
//   cout <<"Local Vector rank " <<rank<<endl;
//     for(i=0; i< locR; i++) {
//      cout<<"*  "<<local_know_vector[i]<<endl;
//     }
//   cout <<"++++++++"<<endl;
// }


//STARTING PDGESV
pdgesv_(&matrix_size, &nrhs, local_matrix, &myone, &myone, descA, ipiv, local_know_vector, &myone, &myone, descB, &info);

if(rank==0)
  {
    if(info != 0) cout <<"PDGESV problem! Info "<<info<<endl;
  }


for(i=0; i< locR; i++)
{
  cout<<"**\n"<<"rank "<<rank<<"  answer: "<<local_know_vector[i]<<endl;
}


  delete [] descA;
  delete [] descB;
  delete [] local_know_vector;
  delete [] local_matrix;
  delete [] A;
  delete [] B;
  delete [] Acpy;
  delete [] Bcpy;
 
  Cblacs_gridexit(ictxt);
  MPI::Finalize();
return 0;


}

void find_nps(int np, int &nprow, int & npcol) {

int min_nprow=100000;
int min_npcol=100000;


nprow = np;
npcol = np;


while(1) {
 
   npcol--;
  if(np%2==0   ) {
  if(npcol ==1){
   nprow --;
   npcol = nprow;
  }
  }else {
  if(npcol ==0){
   nprow --;
   npcol = nprow;
  }
 
  }
 
  if(nprow*npcol == np) {
    min_npcol = npcol;
    if(nprow < min_nprow)    min_nprow = nprow;
  }

    if(nprow ==1 ) break;

}

nprow = min_nprow;
npcol = min_npcol;

}


What can I be doing wrong? Thank you!
ispmarin
 
Posts: 8
Joined: Sat May 31, 2008 3:02 pm

Re: Comparison between dgesv and pdgesv using C++

Postby ispmarin » Thu Nov 26, 2009 2:49 pm

Hello all,

After some investigation, I made the dgesv routine to give half the answer:

dgesv 0
dgesv -0.166667
dgesv 0.5
dgesv 0
dgesv 0
dgesv 0
dgesv 0
dgesv 0
dgesv 0

After transposing the original matrix to

19 -19 -19 -19 -19 -19 -19 -19 -19
3 3 -3 -3 -3 -3 -3 -3 -3
1 1 1 -1 -1 -1 -1 -1 -1
12 12 12 12 -12 -12 -12 -12 -12
1 1 1 1 1 -1 -1 -1 -1
16 16 16 16 16 16 -16 -16 -16
1 1 1 1 1 1 1 -1 -1
3 3 3 3 3 3 3 3 -3
11 11 11 11 11 11 11 11 11

This dgesv call also works in the parallel version. Further studying more the definition of pdgesv, I found that in the call I should provide the starting index. I'm using C++, so my arrays starts in zero, but PDGESV starts in one. Is this correct? How is the correct index to start? I'm calculating the submatrixes using

int il=0, jl=0;
//if(rank == 0) {
for(i=1; i< matrix_size+1; i++) {
for(j=1; j< matrix_size+1; j++) {

int pi = p_of_i(i,block,nprow);
int li = l_of_i(i,block,nprow);
int xi = x_of_i(i,block,nprow);

int pj = p_of_i(j,block,npcol);
int lj = l_of_i(j,block,npcol);
int xj = x_of_i(j,block,npcol);

if( (pi == myrow) && (pj == mycol)) {
il = li*block+xi;
jl = lj*block+xj;
local_matrix[getIndex(il-1, jl-1, locC)] = A[getIndex(i-1,j-1,matrix_size)];
}

if( (pi == myrow) &&(mycol==0) ){
local_know_vector[il-1] = B[i-1];
}
}

}


that calculates the local_matrix and local_vector distribution. The distribution seems to be alright after I transposed the original matrix, but the results are not correct.

Thank you for your patience!

Ivan
ispmarin
 
Posts: 8
Joined: Sat May 31, 2008 3:02 pm

Re: Comparison between dgesv and pdgesv

Postby ispmarin » Thu Nov 26, 2009 11:01 pm

Ok, living and learning...

Just found here http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=139 that there is a typo on the example1.f, and so the dgesv solution is correct. I'm checking now the matrix distribution to see if it is correct. And now the differences between the C++ and Fortran shows up. I write the original matrix in the C orientation, row order, so the vector is

Code: Select all
A[0] = 19;
A[1] = 3;
A[2] = 1;
A[3] = 12;
A[4] = 1;
A[5] = 16;
A[6] = 1;
A[7] = 3;
A[8] = 11;

for the first line. So, to use Scalapack from C++, I should transpose this to column orientation?

Second question: I'm distributing the total matrix to each subprocess using
Code: Select all
  for(i=1; i< matrix_size+1; i++) {
   for(j=1; j< matrix_size+1; j++) {
   
    int pi = p_of_i(i,block,nprow);
    int li = l_of_i(i,block,nprow);
    int xi = x_of_i(i,block,nprow);

    int pj = p_of_i(j,block,npcol);
    int lj = l_of_i(j,block,npcol);
    int xj = x_of_i(j,block,npcol);

      if( (pi == myrow) && (pj == mycol)) {
     il = li*block+xi;
     jl = lj*block+xj;
     local_matrix[getIndex(il-1, jl-1, locC)] = A[getIndex(i-1,j-1,matrix_size)];
      }
     
      if(  (pi == myrow) &&(mycol==0)  ){
      local_know_vector[il-1] = B[i-1];
      }
   }
 
 }

But should I transpose the entire matrix or the local_matrix blocks?

Sorry to keep bumping my own thread, but I really would like to put this code to work, so I can proceed to a more complex case.

Thank you!
ispmarin
 
Posts: 8
Joined: Sat May 31, 2008 3:02 pm


Return to User Discussion

Who is online

Users browsing this forum: Google [Bot], Yahoo [Bot] and 2 guests