Hi!
I'm new to ScaLAPACK and not a very good programmer either. What I am trying to do is to get a very simple program to solve an equation system and when it works I intend to make it more sophisticated so I can easily use it for large matrices.
However, I am trying to do this in C, which has turned out to be an mistake. I have spent more than a months work on this code and it is still not working.
When running the program I get: "On entry to PDGETRF parameter number 4 had an illegal value"
I have tried a lot of different ways with this and I still can not get it to work. I would be very grateful if someone with more skill and experience than me could have a look at the code and advice how to make it work.
Thanks
Theo
/*
*
* This function solves an equation of the A*x=b type, where x and b are
* vectors and A is a matrix.
*
*/
#include <stdio.h>
#include <math.h> /* sqrt() */
#include <mpi.h>
#include <stdlib.h>
/*int MPI_Init(int *argc, char ***argv);*/
int Get_dimension(int order, int block_size, int my_process_row_or_col,
int nproc_rows_or_cols);
void Build_descrip(int my_rank, int* descrip,
int m, int n, int row_block_size,
int col_block_size, int blacs_grid,
int leading_dim);
extern void descinit_(int* descrip, int* m, int* n,
int* row_block_size, int* col_block_size,
int* first_proc_row, int* first_proc_col,
int* blacs_grid, int* leading_dim,
int* error_info);
int main(int argc, char *argv[])
{
/* Declare variables */
double subgrid; /* one-element subgrid on each process */
double norm; /* result: value of 2-norm */
int s[9]; /* ScaLAPACK descriptor */
int me, nprocs, icontxt, x, y;
char grid_major = 'R';
int ix = 1, jx = 1, mx = 1; /* some striding info for ScaLAPACK call */
int NoOfRows; /* No of rows of processor matrix */
int NoOfCol; /* No of columns of processor matrix */
int desc_A[9]; /* Scalapack descriptor matrix A */
int desc_b[9]; /* Scalapack descriptor vector b */
int desc_x[9]; /* Scalapack descriptor vector x */
double *A_loc; /* Matrix A local portion */
double *b_loc; /* Vector b local portion */
int n=8; /* Square dimension of matrix A */
int BlockSize; /* Size of local data block */
int mloc_A; /* No of rows in local data block */
int nloc_A; /* No of columns in local data block */
int mloc_b; /* No of rows in local data block */
int nloc_b; /* No of columns in local data block */
int MyRow; /* Local process row number */
int MyCol; /* Local process column number */
int nbuser=64; /* Maximal array size */
int info; /* Error flag */
int i_loc; /* Local row index */
int j_loc; /* Local column index */
int i_glob; /* Global row index */
int j_glob; /* Global column index */
int intdummy;
int intdummy2;
int* intdummypek;
int * ipvt;
double doubledummy;
/* double doubledummy; */
char chardummy;
const int zero=0;
const int one=1;
int error_info;
/* Initialization */
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &me);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
/* Create processor grid */
/*
* Cblacs_pinfo initializes BLACS; standard BLACS + ScaLAPACK procedure
*/
/* Determine total no of processors */
Cblacs_pinfo(&me, &nprocs);
/* Factor the number of processors into a rectangle
-quadratic shape preferred */
for (x=1; x<= 10000; x++){
if ((x*x) >= nprocs) {
if ((x*x) == nprocs){
NoOfRows=x;
NoOfCol =x;
break;
} /* endif */
else {
for (y=x; y>=1; y--){
if (x*y==nprocs){
NoOfRows=x;
NoOfCol=y;
break;
} /* endif */
} /* endfor */
} /* endelse */
} /* endif */
} /* endfor */
/* Inform Blacs of this grid extent */
Cblacs_get(0, 0, &icontxt);
/* initialize BLACS grid and get BLACS context */
Cblacs_gridinit(&icontxt, &grid_major, NoOfRows, NoOfCol);
/* Get process coordinates in processor grid */
Cblacs_pcoord(icontxt, me, &MyRow, &MyCol);
/* Determine a good block size */
if ((n/NoOfRows)<(n/NoOfCol))
BlockSize=(n/NoOfRows);
else
BlockSize=(n/NoOfCol);
if (nbuser>0){
if (BlockSize>nbuser)
BlockSize=(nbuser);
}
if (BlockSize<1)
BlockSize=(1);
/* Create a Scalapack array descriptor and allocate memory on each process*/
mloc_A=Get_dimension(n, BlockSize, MyRow, NoOfRows);
nloc_A=Get_dimension(n, BlockSize, MyCol, NoOfCol);
mloc_b=Get_dimension(n, BlockSize, MyRow, NoOfRows);
nloc_b=Get_dimension(1, BlockSize, MyCol, NoOfCol);
intdummy2=0;
descinit_(desc_A, &n, &n, &BlockSize, &BlockSize, &intdummy2, &intdummy2, &icontxt, &mloc_A, &error_info);
intdummy=1;
descinit_(desc_b, &intdummy, &n, &BlockSize, &intdummy, &intdummy2, &intdummy2, &icontxt, &mloc_b, &error_info);
/* int ipvt[mloc_A];*/ /* Pivoting information */
ipvt = (int*)malloc(mloc_A*sizeof(int));
/* Allocate space for each process */
A_loc = (double*) malloc((mloc_A*nloc_A)*sizeof(double));
b_loc = (double*) malloc((mloc_b*nloc_b)*sizeof(double));
/* Initialize*/
intdummy2=1;
for(j_loc=0; j_loc<nloc_A; j_loc++){
j_glob=indxl2g_(&j_loc, &BlockSize, &MyCol, &zero, &NoOfCol);
for(i_loc=0; i_loc<mloc_A; i_loc++){
i_glob=indxl2g_(&i_loc, &BlockSize, &MyRow, &zero, &NoOfRows);
A_loc[i_loc, j_loc]=((double)(1.0+sqrt(fabs(i_glob-j_glob)/8)));
}
}
intdummy=-1;
for(j_loc=0; j_loc<nloc_b; j_loc++){
j_glob=indxl2g_(&j_loc, &one, &MyCol, &zero, &NoOfCol);
for(i_loc=0; i_loc<mloc_b; i_loc++){
intdummy=intdummy+1;
i_glob=indxl2g_(&i_loc, &BlockSize, &MyRow, &zero, &NoOfRows);
b_loc[intdummy]=((double)(i_glob));
}
}
intdummy=indxl2g_(&one, &BlockSize, &MyRow, &zero, &nprocs);
intdummy2=indxl2g_(&one, &BlockSize, &MyCol, &zero, &nprocs);
/* Solve the equation */
Cblacs_barrier(icontxt, "A");
pdgetrf_(&mloc_A, &nloc_A, (double *)A_loc, &intdummy, &intdummy2, desc_A, &ipvt, &info);
if (info!=0)
printf("\n Error i pdgetrf, ERROR=%i\n", info);
chardummy='N'; intdummy=1; intdummy2=0;
pdgetrs_(&chardummy, &n, &intdummy, A_loc, &intdummy, &intdummy, desc_A, &ipvt,
b_loc, &intdummy, &intdummy, desc_b, &info);
if (info!=0)
printf("\n Error in pdgetrs");
free (A_loc); /* Free allocated heap memory for each process */
if (me == 0) {
/* double diff;
diff = norm - (double) nprocs;
if ( diff > -0.01 && diff < 0.01 ) {
} else {
printf("ScaLAPACKmkI example FAILED. ");
} */
}
/* MPI Collect data f(b_loc) from all processes to give the global X-vector */
free(ipvt);
free (b_loc); /* Free allocated heap memory for each process */
Cblacs_gridexit(icontxt);
Cblacs_exit(1);
MPI_Finalize();
return (0);
}
/*===================================================================
*
* Simplified wrapper for ScaLAPACK routine numroc
* numroc computes minimum number of rows or columns needed to store
* a process' piece of a distributed array
*/
int Get_dimension(int order, int block_size, int my_process_row_or_col,
int nproc_rows_or_cols) {
int first_process_row_or_col = 0; /* Assume array begins in row */
/* or column 0. */
int return_val;
extern int numroc_(int* order, int* block_size,
int* my_process_row_or_col, int* first_process_row_or_col,
int* nproc_rows_or_cols);
return_val = numroc_(&order, &block_size, &my_process_row_or_col,
&first_process_row_or_col, &nproc_rows_or_cols);
return return_val;
} /* Get_dimension */
/* end file */