ScaLAPACK Archives

[Scalapack] Help with using dgesv and pdgesv

Hello all,

I'm trying to solve a linear system, and before advance to a more complex
code, I'm trying to implement the example 1 from the Scalapack manual with
pdgesv. Just to check the answers, I'm trying to solve the same system using
lapack dgesv. The problem is that I cannot find the same answer to the
example system. I already tried to transpose the matrix. Here is the
sequential code:

<http://icl.cs.utk.edu/lapack-forum/viewtopic.php?f=2&t=1683&sid=26b4f2535c0c3856d02bdadd62d08c5f#>
#include
<iostream>
#include <vector>

#include <cmath>
#include <cstdlib>
#include <mkl/mkl.h>
#include <mkl/mkl_lapack.h>

#define mat(matriz,coluna,i,j) (matriz[i*coluna+j])



using namespace std;


extern "C" {
/* BLACS C interface */

void dgesv_( MKL_INT* n, MKL_INT* nrhs, double* a, MKL_INT* lda, MKL_INT*
ipiv, double* b, MKL_INT* ldb, MKL_INT* info );


}


int getIndex(int row, int col,int NCOLS) {return row*NCOLS+col;}




int main(int argv, char ** argc) {


//  std::cout<<"Returned: "<<" ";
//         std::cout << "Hello World! I am " << rank << " of " << nprocs <<
//         std::endl;
srand(1);
int myrow=0;
int mycol=0;
int ictxt;
int nprow=0,npcol=0;

int BLOCK_SIZE =2; //this gonna be tricky - should be 64, but cannot be
larger than the original matrix
int locR=0, locC=0;
int block = BLOCK_SIZE;
int izero = 0;
int matrix_size = 9;
int myone = 1;
int nrhs = 1;
int info=0;
int i=0,j=0;
double mone=(-1.0e0),pone=(1.0e0);
double AnormF, XnormF, RnormF, BnormF, residF,eps;



//GLOBAL
double * A = new double[matrix_size*matrix_size];
double * B = new double[matrix_size];
double * Acpy = new double[matrix_size*matrix_size];
double * Bcpy = new double[matrix_size];





int * ipiv = new int [matrix_size];
double * work;

B[2] = 1;
B[3] = 0;
B[4] = 0;
B[5]  = 0;



A[0] = 19;
A[1] = 3;
A[2] = 1;
A[3] = 12;
A[4] = 1;
A[5] = 16;
A[6] = 1;
A[7] = 3;
A[8] = 11;

A[9] = -19;
A[10] = 3;
A[11] = 1;
A[12] = 12;
A[13] = 1;
A[14] = 16;
A[15] = 1;
A[16] = 3;
A[17] = 11;

A[18] = -19;
A[19] = -3;
A[20] = 1;
A[21] = 12;
A[22] = 1;
A[23] = 16;
A[24] = 1;
A[25] = 3;
A[26] = 11;

A[27] = -19;
A[28] = -3;
A[29] = -1;
A[30] = 12;
A[31] = 1;
A[32] = 16;
A[33] = 1;
A[34] = 3;
A[35] = 11;

A[36] = -19;
A[37] = -3;
A[38] = -1;
A[39] = -12;
A[40] = 1;
A[41] = 16;
A[42] = 1;
A[43] = 3;
A[44] = 11;

A[45] = -19;
A[46] = -3;
A[47] = -1;
A[48] = -12;
A[49] = -1;
A[50] = 16;
A[51] = 1;
A[52] = 3;
A[53] = 11;

A[54] = -19;
A[55] = -3;
A[56] = -1;
A[57] = -12;
A[58] = -1;
A[59] = -16;
A[60] = 1;
A[61] = 3;
A[62] = 11;

A[63] = -19;
A[64] = -3;
A[65] = -1;
A[66] = -12;
A[67] = -1;
A[68] = -16;
A[69] = -1;
A[70] = 3;
A[71] = 11;

A[72] = -19;
A[73] = -3;
A[74] = -1;
A[75] = -12;
A[76] = -1;
A[77] = -16;
A[78] = -1;
A[79] = -3;
A[80] = 11;


int col = 0;
for(i=0;i<matrix_size; i++) {
  for(j=0; j< matrix_size; j++) {
    // A[getIndex(i,j,matrix_size)] = 0;//(double)(rand()%100)/100.0;

      Acpy[getIndex(i,j,matrix_size)] = A[getIndex(i,j,matrix_size)];
}

  //B[i] = 0;//(double)(rand()%100)/100.0;
  Bcpy[i] = B[i];

}


  cout<< "Global Matrix" <<endl;
    for(i=0; i< matrix_size; i++) {
      for(j=0; j< matrix_size; j++) {
    // cout<<"  "<<local_matrix[i][j];
    A[getIndex(i,j,matrix_size)] = Acpy[getIndex(j,i,matrix_size)];
     cout<<"  "<<A[getIndex(i,j,matrix_size)];
      }
      cout <<endl;
    }
  cout <<"++++++++++"<<endl;


  cout <<"Global Vector" <<endl;
    for(i=0; i< matrix_size; i++) {

    // cout<<"  "<<local_matrix[i][j];
     cout<<"*  "<<B[i]<<endl;

    }
  cout <<"++++++++"<<endl;



dgesv_(&matrix_size, &nrhs, A,&matrix_size, ipiv, B,&matrix_size, &info);

  for(i=0; i< matrix_size; i++) {
    cout <<"dgesv  "<<B[i]<<endl;
  }






  delete [] A;
  delete [] B;
  delete [] Acpy;
  delete [] Bcpy;

return 0;


}

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

but Lapack answer is
<http://icl.cs.utk.edu/lapack-forum/viewtopic.php?f=2&t=1683&sid=26b4f2535c0c3856d02bdadd62d08c5f#>
dgesv
0
dgesv  -0.166667
dgesv  0.5
dgesv  0
dgesv  0
dgesv  0
dgesv  0
dgesv  0
dgesv  0

if the matrix is transposed and


<http://icl.cs.utk.edu/lapack-forum/viewtopic.php?f=2&t=1683&sid=26b4f2535c0c3856d02bdadd62d08c5f#>dgesv
0
dgesv  -0.5
dgesv  0.5
dgesv  0
dgesv  0
dgesv  0
dgesv  0
dgesv  0
dgesv  0

if it's not. That's a very different result from the example, that says that
the answer should be

<http://icl.cs.utk.edu/lapack-forum/viewtopic.php?f=2&t=1683&sid=26b4f2535c0c3856d02bdadd62d08c5f#>
0
-1/6
1/2
0
0
0
-1/2
1/6
0
The parallel code is below:

++++++++++++++++++++++++++++++++++++++++++++
#include <iostream>
#include <vector>
#include <openmpi/mpi.h>
#include <cmath>
#include <cstdlib>
#include <mkl/mkl.h>
#include <mkl/mkl_scalapack.h>
#include <mkl/mkl_cblas.h>
#define mat(matriz,coluna,i,j) (matriz[i*coluna+j])


#define p_of_i(i,bs,p) ( int((i-1)/bs)%p)
#define l_of_i(i,bs,p) (int((i-1)/(p*bs)))
#define x_of_i(i,bs,p) (((i-1)%bs)+1)


using namespace std;


extern "C" {
/* BLACS C interface */
void Cblacs_get( int context, int request, int* value);
int  Cblacs_gridinit( int* context, char * order, int np_row, int np_col);
void Cblacs_gridinfo( int context, int*  np_row, int* np_col, int*  my_row,
int*  my_col);
int  numroc_( int *n, int *nb, int *iproc, int *isrcproc, int *nprocs);
void Cblacs_gridexit(int ictxt);
void pdgesv_(MKL_INT *n, MKL_INT *nrhs, double *a, MKL_INT *ia,
       MKL_INT *ja, MKL_INT *desca, MKL_INT *ipiv, double *b, MKL_INT *ib,
MKL_INT *jb, MKL_INT *descb, MKL_INT *info);
void dgesv_( MKL_INT* n, MKL_INT* nrhs, double* a, MKL_INT* lda, MKL_INT*
ipiv, double* b, MKL_INT* ldb, MKL_INT* info );

void descinit_( int *desc, int *m, int *n, int *mb, int *nb, int *irsrc, int
*icsrc,
          int *ictxt, int *lld, int *info);
double pdlamch_( int *ictxt , char *cmach);
double pdlange_( char *norm, int *m, int *n, double *A, int *ia, int *ja,
int *desca, double *work);

void pdlacpy_( char *uplo, int *m, int *n, double *a, int *ia, int *ja, int
*desca,
            double *b, int *ib, int *jb, int *descb);

void pdgemm_( char *TRANSA, char *TRANSB, int * M, int * N, int * K, double
* ALPHA,
            double * A, int * IA, int * JA, int * DESCA, double * B, int *
IB, int * JB, int * DESCB,
            double * BETA, double * C, int * IC, int * JC, int * DESCC );
int  indxg2p_( int *indxglob, int *nb, int *iproc, int *isrcproc, int
*nprocs);

//void descinit_(int *descA, int *n2, int *n , int *nb, int *nb2, int
*izero2, int *izero, int * ictxt, int * itemp, int *info);
}

void find_nps(int np, int &nprow, int & npcol);
int getIndex(int row, int col,int NCOLS) {return row*NCOLS+col;}




int main(int argv, char ** argc) {

MPI::Init();
        int rank = MPI::COMM_WORLD.Get_rank();
        int nprocs = MPI::COMM_WORLD.Get_size();

//  std::cout<<"Returned: "<<" ";
//         std::cout << "Hello World! I am " << rank << " of " << nprocs <<
//         std::endl;
srand(1);
int myrow=0;
int mycol=0;
int ictxt;
int nprow=0,npcol=0;

int BLOCK_SIZE =2; //this gonna be tricky - should be 64, but cannot be
larger than the original matrix
int locR=0, locC=0;
int block = BLOCK_SIZE;
int izero = 0;
int matrix_size = 9;
int myone = 1;
int nrhs = 1;
int info=0;
int i=0,j=0;
double mone=(-1.0e0),pone=(1.0e0);
double AnormF, XnormF, RnormF, BnormF, residF,eps;


find_nps(nprocs,nprow,npcol);

Cblacs_get(-1, 0, &ictxt);
Cblacs_gridinit(&ictxt, "Row", nprow, npcol);
Cblacs_gridinfo(ictxt, &nprow, &npcol, &myrow, &mycol);

//std::cout <<"rank "<<rank<< "  nprow " <<nprow <<"  "<<"  npcol "<<npcol<<
"  myrow  "<<myrow << "  mycol  "<<mycol<<endl;

locR = numroc_(&matrix_size, &block, &myrow, &izero, &nprow);
locC = numroc_(&matrix_size, &block, &mycol, &izero, &npcol);


//GLOBAL
double * A = new double[matrix_size*matrix_size];
double * B = new double[matrix_size];
double * Acpy = new double[matrix_size*matrix_size];
double * Bcpy = new double[matrix_size];

//LOCAL
double * local_know_vector = new double[locR];
double * local_matrix = new double[locR*locC];



int * ipiv = new int [locC*locR*block+1000000];
double * work;

B[2] = 1;
B[3] = 0;
B[4] = 0;
B[5]  = 0;



A[0] = 19;
A[1] = 3;
A[2] = 1;
A[3] = 12;
A[4] = 1;
A[5] = 16;
A[6] = 1;
A[7] = 3;
A[8] = 11;

A[9] = -19;
A[10] = 3;
A[11] = 1;
A[12] = 12;
A[13] = 1;
A[14] = 16;
A[15] = 1;
A[16] = 3;
A[17] = 11;

A[18] = -19;
A[19] = -3;
A[20] = 1;
A[21] = 12;
A[22] = 1;
A[23] = 16;
A[24] = 1;
A[25] = 3;
A[26] = 11;

A[27] = -19;
A[28] = -3;
A[29] = -1;
A[30] = 12;
A[31] = 1;
A[32] = 16;
A[33] = 1;
A[34] = 3;
A[35] = 11;

A[36] = -19;
A[37] = -3;
A[38] = -1;
A[39] = -12;
A[40] = 1;
A[41] = 16;
A[42] = 1;
A[43] = 3;
A[44] = 11;

A[45] = -19;
A[46] = -3;
A[47] = -1;
A[48] = -12;
A[49] = -1;
A[50] = 16;
A[51] = 1;
A[52] = 3;
A[53] = 11;

A[54] = -19;
A[55] = -3;
A[56] = -1;
A[57] = -12;
A[58] = -1;
A[59] = -16;
A[60] = 1;
A[61] = 3;
A[62] = 11;

A[63] = -19;
A[64] = -3;
A[65] = -1;
A[66] = -12;
A[67] = -1;
A[68] = -16;
A[69] = -1;
A[70] = 3;
A[71] = 11;

A[72] = -19;
A[73] = -3;
A[74] = -1;
A[75] = -12;
A[76] = -1;
A[77] = -16;
A[78] = -1;
A[79] = -3;
A[80] = 11;


int col = 0;
for(i=0;i<matrix_size; i++) {
  for(j=0; j< matrix_size; j++) {
    // A[getIndex(i,j,matrix_size)] = 0;//(double)(rand()%100)/100.0;

      Acpy[getIndex(i,j,matrix_size)] = A[getIndex(i,j,matrix_size)];
}

  //B[i] = 0;//(double)(rand()%100)/100.0;
  Bcpy[i] = B[i];

}

if(rank ==0) {
  cout<< "Global Matrix" <<endl;
    for(i=0; i< matrix_size; i++) {
   for(j=0; j< matrix_size; j++) {
    // cout<<"  "<<local_matrix[i][j];
     cout<<"  "<<A[getIndex(i,j,matrix_size)];
      }
      cout <<endl;
    }
  cout <<"++++++++++"<<endl;
}

if(rank ==0) {
  cout <<"Global Vector" <<endl;
    for(i=0; i< matrix_size; i++) {

    // cout<<"  "<<local_matrix[i][j];
     cout<<"*  "<<B[i]<<endl;

    }
  cout <<"++++++++"<<endl;
}


//std::cout <<"rank "<<rank<< "  locR " <<locR <<"  "<<"  locC
"<<locC<<endl;

int *descA  = new int[9];
int *descB  = new int[9];


descA[0] = 1; // descriptor type
descA[1] = ictxt; // blacs context
descA[2] = matrix_size; // global number of rows
descA[3] = matrix_size; // global number of columns
descA[4] = block; // row block size
descA[5] = block; // column block size (DEFINED EQUAL THAN ROW BLOCK SIZE)
descA[6] = 0; // initial process row(DEFINED 0)
descA[7] = 0; // initial process column (DEFINED 0)
descA[8] = locR; // leading dimension of local array

descB[0] = 1; // descriptor type
descB[1] = ictxt; // blacs context
descB[2] = matrix_size; // global number of rows
descB[3] = 1; // global number of columns
descB[4] = block; // row block size
descB[5] = block; // column block size (DEFINED EQUAL THAN ROW BLOCK SIZE)
descB[6] = 0; // initial process row(DEFINED 0)
descB[7] = 0; // initial process column (DEFINED 0)
descB[8] = locR; // leading dimension of local array


if(rank == 0) {
dgesv_(&matrix_size, &nrhs, Acpy,&matrix_size, ipiv, Bcpy,&matrix_size,
&info);

  for(i=0; i< matrix_size; i++) {
    cout <<"dgesv  "<<Bcpy[i]<<endl;
  }
}




int il=0, jl=0;
//if(rank == 0) {
  for(i=1; i< matrix_size+1; i++) {
   for(j=1; j< matrix_size+1; j++) {

    int pi = p_of_i(i,block,nprow);
    int li = l_of_i(i,block,nprow);
    int xi = x_of_i(i,block,nprow);

    int pj = p_of_i(j,block,npcol);
    int lj = l_of_i(j,block,npcol);
    int xj = x_of_i(j,block,npcol);

      if( (pi == myrow) && (pj == mycol)) {
     il = li*block+xi;
     jl = lj*block+xj;
     local_matrix[getIndex(il-1, jl-1, locC)] =
A[getIndex(i-1,j-1,matrix_size)];
      }

      if(  (pi == myrow) &&(mycol==0)  ){
      local_know_vector[il-1] = B[i-1];
      }
   }

}

// if(rank ==0) {
//   cout <<"Local Matrix" <<endl;
//     for(i=0; i< locR; i++) {
//       for(j=0; j< locC; j++) {
//        cout<<"  "<<local_matrix[getIndex(i,j,locC)];
//       }
//       cout <<endl;
//     }
//   cout <<"++++++++"<<endl;
// }
//
//
// if(rank ==0) {
//   cout <<"Local Vector rank " <<rank<<endl;
//     for(i=0; i< locR; i++) {
//      cout<<"*  "<<local_know_vector[i]<<endl;
//     }
//   cout <<"++++++++"<<endl;
// }


//STARTING PDGESV
pdgesv_(&matrix_size, &nrhs, local_matrix, &myone, &myone, descA, ipiv,
local_know_vector, &myone, &myone, descB, &info);

if(rank==0)
  {
    if(info != 0) cout <<"PDGESV problem! Info "<<info<<endl;
  }


for(i=0; i< locR; i++)
{
  cout<<"**\n"<<"rank "<<rank<<"  answer: "<<local_know_vector[i]<<endl;
}


  delete [] descA;
  delete [] descB;
  delete [] local_know_vector;
  delete [] local_matrix;
  delete [] A;
  delete [] B;
  delete [] Acpy;
  delete [] Bcpy;

  Cblacs_gridexit(ictxt);
  MPI::Finalize();
return 0;


}

void find_nps(int np, int &nprow, int & npcol) {

int min_nprow=100000;
int min_npcol=100000;


nprow = np;
npcol = np;


while(1) {

   npcol--;
  if(np%2==0   ) {
  if(npcol ==1){
   nprow --;
   npcol = nprow;
  }
  }else {
  if(npcol ==0){
   nprow --;
   npcol = nprow;
  }

  }

  if(nprow*npcol == np) {
    min_npcol = npcol;
    if(nprow < min_nprow)    min_nprow = nprow;
  }

    if(nprow ==1 ) break;

}

nprow = min_nprow;
npcol = min_npcol;

}++++++++++++++++++++++++++++++++++++++++

Thank you!

Ivan
-------------- next part --------------
An HTML attachment was scrubbed...
URL: 
http://lists.eecs.utk.edu/mailman/private/scalapack/attachments/20091125/d59fe479/attachment-0001.html
 

<Prev in Thread] Current Thread [Next in Thread>


For additional information you may use the LAPACK/ScaLAPACK Forum.
Or one of the mailing lists, or