Hello all,
I'm trying to solve a linear system, and before advance to a more complex
code, I'm trying to implement the example 1 from the Scalapack manual with
pdgesv. Just to check the answers, I'm trying to solve the same system using
lapack dgesv. The problem is that I cannot find the same answer to the
example system. I already tried to transpose the matrix. Here is the
sequential code:
<http://icl.cs.utk.edu/lapack-forum/viewtopic.php?f=2&t=1683&sid=26b4f2535c0c3856d02bdadd62d08c5f#>
#include
<iostream>
#include <vector>
#include <cmath>
#include <cstdlib>
#include <mkl/mkl.h>
#include <mkl/mkl_lapack.h>
#define mat(matriz,coluna,i,j) (matriz[i*coluna+j])
using namespace std;
extern "C" {
/* BLACS C interface */
void dgesv_( MKL_INT* n, MKL_INT* nrhs, double* a, MKL_INT* lda, MKL_INT*
ipiv, double* b, MKL_INT* ldb, MKL_INT* info );
}
int getIndex(int row, int col,int NCOLS) {return row*NCOLS+col;}
int main(int argv, char ** argc) {
// std::cout<<"Returned: "<<" ";
// std::cout << "Hello World! I am " << rank << " of " << nprocs <<
// std::endl;
srand(1);
int myrow=0;
int mycol=0;
int ictxt;
int nprow=0,npcol=0;
int BLOCK_SIZE =2; //this gonna be tricky - should be 64, but cannot be
larger than the original matrix
int locR=0, locC=0;
int block = BLOCK_SIZE;
int izero = 0;
int matrix_size = 9;
int myone = 1;
int nrhs = 1;
int info=0;
int i=0,j=0;
double mone=(-1.0e0),pone=(1.0e0);
double AnormF, XnormF, RnormF, BnormF, residF,eps;
//GLOBAL
double * A = new double[matrix_size*matrix_size];
double * B = new double[matrix_size];
double * Acpy = new double[matrix_size*matrix_size];
double * Bcpy = new double[matrix_size];
int * ipiv = new int [matrix_size];
double * work;
B[2] = 1;
B[3] = 0;
B[4] = 0;
B[5] = 0;
A[0] = 19;
A[1] = 3;
A[2] = 1;
A[3] = 12;
A[4] = 1;
A[5] = 16;
A[6] = 1;
A[7] = 3;
A[8] = 11;
A[9] = -19;
A[10] = 3;
A[11] = 1;
A[12] = 12;
A[13] = 1;
A[14] = 16;
A[15] = 1;
A[16] = 3;
A[17] = 11;
A[18] = -19;
A[19] = -3;
A[20] = 1;
A[21] = 12;
A[22] = 1;
A[23] = 16;
A[24] = 1;
A[25] = 3;
A[26] = 11;
A[27] = -19;
A[28] = -3;
A[29] = -1;
A[30] = 12;
A[31] = 1;
A[32] = 16;
A[33] = 1;
A[34] = 3;
A[35] = 11;
A[36] = -19;
A[37] = -3;
A[38] = -1;
A[39] = -12;
A[40] = 1;
A[41] = 16;
A[42] = 1;
A[43] = 3;
A[44] = 11;
A[45] = -19;
A[46] = -3;
A[47] = -1;
A[48] = -12;
A[49] = -1;
A[50] = 16;
A[51] = 1;
A[52] = 3;
A[53] = 11;
A[54] = -19;
A[55] = -3;
A[56] = -1;
A[57] = -12;
A[58] = -1;
A[59] = -16;
A[60] = 1;
A[61] = 3;
A[62] = 11;
A[63] = -19;
A[64] = -3;
A[65] = -1;
A[66] = -12;
A[67] = -1;
A[68] = -16;
A[69] = -1;
A[70] = 3;
A[71] = 11;
A[72] = -19;
A[73] = -3;
A[74] = -1;
A[75] = -12;
A[76] = -1;
A[77] = -16;
A[78] = -1;
A[79] = -3;
A[80] = 11;
int col = 0;
for(i=0;i<matrix_size; i++) {
for(j=0; j< matrix_size; j++) {
// A[getIndex(i,j,matrix_size)] = 0;//(double)(rand()%100)/100.0;
Acpy[getIndex(i,j,matrix_size)] = A[getIndex(i,j,matrix_size)];
}
//B[i] = 0;//(double)(rand()%100)/100.0;
Bcpy[i] = B[i];
}
cout<< "Global Matrix" <<endl;
for(i=0; i< matrix_size; i++) {
for(j=0; j< matrix_size; j++) {
// cout<<" "<<local_matrix[i][j];
A[getIndex(i,j,matrix_size)] = Acpy[getIndex(j,i,matrix_size)];
cout<<" "<<A[getIndex(i,j,matrix_size)];
}
cout <<endl;
}
cout <<"++++++++++"<<endl;
cout <<"Global Vector" <<endl;
for(i=0; i< matrix_size; i++) {
// cout<<" "<<local_matrix[i][j];
cout<<"* "<<B[i]<<endl;
}
cout <<"++++++++"<<endl;
dgesv_(&matrix_size, &nrhs, A,&matrix_size, ipiv, B,&matrix_size, &info);
for(i=0; i< matrix_size; i++) {
cout <<"dgesv "<<B[i]<<endl;
}
delete [] A;
delete [] B;
delete [] Acpy;
delete [] Bcpy;
return 0;
}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
but Lapack answer is
<http://icl.cs.utk.edu/lapack-forum/viewtopic.php?f=2&t=1683&sid=26b4f2535c0c3856d02bdadd62d08c5f#>
dgesv
0
dgesv -0.166667
dgesv 0.5
dgesv 0
dgesv 0
dgesv 0
dgesv 0
dgesv 0
dgesv 0
if the matrix is transposed and
<http://icl.cs.utk.edu/lapack-forum/viewtopic.php?f=2&t=1683&sid=26b4f2535c0c3856d02bdadd62d08c5f#>dgesv
0
dgesv -0.5
dgesv 0.5
dgesv 0
dgesv 0
dgesv 0
dgesv 0
dgesv 0
dgesv 0
if it's not. That's a very different result from the example, that says that
the answer should be
<http://icl.cs.utk.edu/lapack-forum/viewtopic.php?f=2&t=1683&sid=26b4f2535c0c3856d02bdadd62d08c5f#>
0
-1/6
1/2
0
0
0
-1/2
1/6
0
The parallel code is below:
++++++++++++++++++++++++++++++++++++++++++++
#include <iostream>
#include <vector>
#include <openmpi/mpi.h>
#include <cmath>
#include <cstdlib>
#include <mkl/mkl.h>
#include <mkl/mkl_scalapack.h>
#include <mkl/mkl_cblas.h>
#define mat(matriz,coluna,i,j) (matriz[i*coluna+j])
#define p_of_i(i,bs,p) ( int((i-1)/bs)%p)
#define l_of_i(i,bs,p) (int((i-1)/(p*bs)))
#define x_of_i(i,bs,p) (((i-1)%bs)+1)
using namespace std;
extern "C" {
/* BLACS C interface */
void Cblacs_get( int context, int request, int* value);
int Cblacs_gridinit( int* context, char * order, int np_row, int np_col);
void Cblacs_gridinfo( int context, int* np_row, int* np_col, int* my_row,
int* my_col);
int numroc_( int *n, int *nb, int *iproc, int *isrcproc, int *nprocs);
void Cblacs_gridexit(int ictxt);
void pdgesv_(MKL_INT *n, MKL_INT *nrhs, double *a, MKL_INT *ia,
MKL_INT *ja, MKL_INT *desca, MKL_INT *ipiv, double *b, MKL_INT *ib,
MKL_INT *jb, MKL_INT *descb, MKL_INT *info);
void dgesv_( MKL_INT* n, MKL_INT* nrhs, double* a, MKL_INT* lda, MKL_INT*
ipiv, double* b, MKL_INT* ldb, MKL_INT* info );
void descinit_( int *desc, int *m, int *n, int *mb, int *nb, int *irsrc, int
*icsrc,
int *ictxt, int *lld, int *info);
double pdlamch_( int *ictxt , char *cmach);
double pdlange_( char *norm, int *m, int *n, double *A, int *ia, int *ja,
int *desca, double *work);
void pdlacpy_( char *uplo, int *m, int *n, double *a, int *ia, int *ja, int
*desca,
double *b, int *ib, int *jb, int *descb);
void pdgemm_( char *TRANSA, char *TRANSB, int * M, int * N, int * K, double
* ALPHA,
double * A, int * IA, int * JA, int * DESCA, double * B, int *
IB, int * JB, int * DESCB,
double * BETA, double * C, int * IC, int * JC, int * DESCC );
int indxg2p_( int *indxglob, int *nb, int *iproc, int *isrcproc, int
*nprocs);
//void descinit_(int *descA, int *n2, int *n , int *nb, int *nb2, int
*izero2, int *izero, int * ictxt, int * itemp, int *info);
}
void find_nps(int np, int &nprow, int & npcol);
int getIndex(int row, int col,int NCOLS) {return row*NCOLS+col;}
int main(int argv, char ** argc) {
MPI::Init();
int rank = MPI::COMM_WORLD.Get_rank();
int nprocs = MPI::COMM_WORLD.Get_size();
// std::cout<<"Returned: "<<" ";
// std::cout << "Hello World! I am " << rank << " of " << nprocs <<
// std::endl;
srand(1);
int myrow=0;
int mycol=0;
int ictxt;
int nprow=0,npcol=0;
int BLOCK_SIZE =2; //this gonna be tricky - should be 64, but cannot be
larger than the original matrix
int locR=0, locC=0;
int block = BLOCK_SIZE;
int izero = 0;
int matrix_size = 9;
int myone = 1;
int nrhs = 1;
int info=0;
int i=0,j=0;
double mone=(-1.0e0),pone=(1.0e0);
double AnormF, XnormF, RnormF, BnormF, residF,eps;
find_nps(nprocs,nprow,npcol);
Cblacs_get(-1, 0, &ictxt);
Cblacs_gridinit(&ictxt, "Row", nprow, npcol);
Cblacs_gridinfo(ictxt, &nprow, &npcol, &myrow, &mycol);
//std::cout <<"rank "<<rank<< " nprow " <<nprow <<" "<<" npcol "<<npcol<<
" myrow "<<myrow << " mycol "<<mycol<<endl;
locR = numroc_(&matrix_size, &block, &myrow, &izero, &nprow);
locC = numroc_(&matrix_size, &block, &mycol, &izero, &npcol);
//GLOBAL
double * A = new double[matrix_size*matrix_size];
double * B = new double[matrix_size];
double * Acpy = new double[matrix_size*matrix_size];
double * Bcpy = new double[matrix_size];
//LOCAL
double * local_know_vector = new double[locR];
double * local_matrix = new double[locR*locC];
int * ipiv = new int [locC*locR*block+1000000];
double * work;
B[2] = 1;
B[3] = 0;
B[4] = 0;
B[5] = 0;
A[0] = 19;
A[1] = 3;
A[2] = 1;
A[3] = 12;
A[4] = 1;
A[5] = 16;
A[6] = 1;
A[7] = 3;
A[8] = 11;
A[9] = -19;
A[10] = 3;
A[11] = 1;
A[12] = 12;
A[13] = 1;
A[14] = 16;
A[15] = 1;
A[16] = 3;
A[17] = 11;
A[18] = -19;
A[19] = -3;
A[20] = 1;
A[21] = 12;
A[22] = 1;
A[23] = 16;
A[24] = 1;
A[25] = 3;
A[26] = 11;
A[27] = -19;
A[28] = -3;
A[29] = -1;
A[30] = 12;
A[31] = 1;
A[32] = 16;
A[33] = 1;
A[34] = 3;
A[35] = 11;
A[36] = -19;
A[37] = -3;
A[38] = -1;
A[39] = -12;
A[40] = 1;
A[41] = 16;
A[42] = 1;
A[43] = 3;
A[44] = 11;
A[45] = -19;
A[46] = -3;
A[47] = -1;
A[48] = -12;
A[49] = -1;
A[50] = 16;
A[51] = 1;
A[52] = 3;
A[53] = 11;
A[54] = -19;
A[55] = -3;
A[56] = -1;
A[57] = -12;
A[58] = -1;
A[59] = -16;
A[60] = 1;
A[61] = 3;
A[62] = 11;
A[63] = -19;
A[64] = -3;
A[65] = -1;
A[66] = -12;
A[67] = -1;
A[68] = -16;
A[69] = -1;
A[70] = 3;
A[71] = 11;
A[72] = -19;
A[73] = -3;
A[74] = -1;
A[75] = -12;
A[76] = -1;
A[77] = -16;
A[78] = -1;
A[79] = -3;
A[80] = 11;
int col = 0;
for(i=0;i<matrix_size; i++) {
for(j=0; j< matrix_size; j++) {
// A[getIndex(i,j,matrix_size)] = 0;//(double)(rand()%100)/100.0;
Acpy[getIndex(i,j,matrix_size)] = A[getIndex(i,j,matrix_size)];
}
//B[i] = 0;//(double)(rand()%100)/100.0;
Bcpy[i] = B[i];
}
if(rank ==0) {
cout<< "Global Matrix" <<endl;
for(i=0; i< matrix_size; i++) {
for(j=0; j< matrix_size; j++) {
// cout<<" "<<local_matrix[i][j];
cout<<" "<<A[getIndex(i,j,matrix_size)];
}
cout <<endl;
}
cout <<"++++++++++"<<endl;
}
if(rank ==0) {
cout <<"Global Vector" <<endl;
for(i=0; i< matrix_size; i++) {
// cout<<" "<<local_matrix[i][j];
cout<<"* "<<B[i]<<endl;
}
cout <<"++++++++"<<endl;
}
//std::cout <<"rank "<<rank<< " locR " <<locR <<" "<<" locC
"<<locC<<endl;
int *descA = new int[9];
int *descB = new int[9];
descA[0] = 1; // descriptor type
descA[1] = ictxt; // blacs context
descA[2] = matrix_size; // global number of rows
descA[3] = matrix_size; // global number of columns
descA[4] = block; // row block size
descA[5] = block; // column block size (DEFINED EQUAL THAN ROW BLOCK SIZE)
descA[6] = 0; // initial process row(DEFINED 0)
descA[7] = 0; // initial process column (DEFINED 0)
descA[8] = locR; // leading dimension of local array
descB[0] = 1; // descriptor type
descB[1] = ictxt; // blacs context
descB[2] = matrix_size; // global number of rows
descB[3] = 1; // global number of columns
descB[4] = block; // row block size
descB[5] = block; // column block size (DEFINED EQUAL THAN ROW BLOCK SIZE)
descB[6] = 0; // initial process row(DEFINED 0)
descB[7] = 0; // initial process column (DEFINED 0)
descB[8] = locR; // leading dimension of local array
if(rank == 0) {
dgesv_(&matrix_size, &nrhs, Acpy,&matrix_size, ipiv, Bcpy,&matrix_size,
&info);
for(i=0; i< matrix_size; i++) {
cout <<"dgesv "<<Bcpy[i]<<endl;
}
}
int il=0, jl=0;
//if(rank == 0) {
for(i=1; i< matrix_size+1; i++) {
for(j=1; j< matrix_size+1; j++) {
int pi = p_of_i(i,block,nprow);
int li = l_of_i(i,block,nprow);
int xi = x_of_i(i,block,nprow);
int pj = p_of_i(j,block,npcol);
int lj = l_of_i(j,block,npcol);
int xj = x_of_i(j,block,npcol);
if( (pi == myrow) && (pj == mycol)) {
il = li*block+xi;
jl = lj*block+xj;
local_matrix[getIndex(il-1, jl-1, locC)] =
A[getIndex(i-1,j-1,matrix_size)];
}
if( (pi == myrow) &&(mycol==0) ){
local_know_vector[il-1] = B[i-1];
}
}
}
// if(rank ==0) {
// cout <<"Local Matrix" <<endl;
// for(i=0; i< locR; i++) {
// for(j=0; j< locC; j++) {
// cout<<" "<<local_matrix[getIndex(i,j,locC)];
// }
// cout <<endl;
// }
// cout <<"++++++++"<<endl;
// }
//
//
// if(rank ==0) {
// cout <<"Local Vector rank " <<rank<<endl;
// for(i=0; i< locR; i++) {
// cout<<"* "<<local_know_vector[i]<<endl;
// }
// cout <<"++++++++"<<endl;
// }
//STARTING PDGESV
pdgesv_(&matrix_size, &nrhs, local_matrix, &myone, &myone, descA, ipiv,
local_know_vector, &myone, &myone, descB, &info);
if(rank==0)
{
if(info != 0) cout <<"PDGESV problem! Info "<<info<<endl;
}
for(i=0; i< locR; i++)
{
cout<<"**\n"<<"rank "<<rank<<" answer: "<<local_know_vector[i]<<endl;
}
delete [] descA;
delete [] descB;
delete [] local_know_vector;
delete [] local_matrix;
delete [] A;
delete [] B;
delete [] Acpy;
delete [] Bcpy;
Cblacs_gridexit(ictxt);
MPI::Finalize();
return 0;
}
void find_nps(int np, int &nprow, int & npcol) {
int min_nprow=100000;
int min_npcol=100000;
nprow = np;
npcol = np;
while(1) {
npcol--;
if(np%2==0 ) {
if(npcol ==1){
nprow --;
npcol = nprow;
}
}else {
if(npcol ==0){
nprow --;
npcol = nprow;
}
}
if(nprow*npcol == np) {
min_npcol = npcol;
if(nprow < min_nprow) min_nprow = nprow;
}
if(nprow ==1 ) break;
}
nprow = min_nprow;
npcol = min_npcol;
}++++++++++++++++++++++++++++++++++++++++
Thank you!
Ivan
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
http://lists.eecs.utk.edu/mailman/private/scalapack/attachments/20091125/d59fe479/attachment-0001.html
|