Hi Hartwig,
a Happy new year i wish you :)
Yes i have to do the GMRES Algorithm for several Columns of the right hand side. I changed the Code in my first Reply and support the full Code below. Thank you for your help. I'm nearly ready with my Masterthesis :)
Greetz Noran
Code: Select all
#include "pyMAGMA_functions.h"
#include "cuda.h"
int
pyMAGMA_MMSolveGmres_sparse_double (CasADi::Matrix<double> *matrixA,
CasADi::Matrix<double> *matrixB,
CasADi::Matrix<double> *out_matrixX,
int num_queues)
{
printf("matrixA = \n");
matrixA->print();
printf("matrixB = \n");
matrixB->print();
printf("out_matrixX = \n");
out_matrixX->print();
magma_int_t rowsMatrixA = matrixA->size1 ();
magma_int_t colsMatrixA = matrixA->size2 ();
magma_int_t nonZeroElementsA = matrixA->numel ();
magma_index_t *rowPointerA =
const_cast<int*> (matrixA->rowind ().data ());
magma_index_t *colIndA =
const_cast<int*> (matrixA->col ().data ());
double *valA = matrixA->ptr ();
magma_int_t colsMatrixB = matrixB->size2 ();
magma_int_t rowsMatrixB = matrixB->size1 ();
double *valB = matrixB->ptr ();
double *valAB;
int error = 0;
//choose used GPU
unsigned long int neededMemory = ((2 * nonZeroElementsA
+ rowsMatrixA + 1) //A
+ (rowsMatrixB * colsMatrixA * 2)) //B and C
* sizeof(double);
error = pyMAGMA_select_GPU (neededMemory);
if (error != MAGMA_SUCCESS)
{
printf ("Error at selecting GPU. Errorcode = %i.\n", error);
return error;
}
//Initialize colIndex and data
std::vector<double> v_outData;
CasADi::CRSSparsity sparsity;
//prepare vector for output
v_outData.resize (matrixA->size1 () * matrixB->size2 ());
valAB = v_outData.data ();
if (num_queues < 1)
{
printf ("Error: num_queues must be grater than 0\n");
return -1;
}
DEBUG_LOG("initializing magma_queues");
magma_queue_t queue[num_queues];
for (int i = 0; i < num_queues; i++)
{
magma_queue_create(&queue[i]);
}
double zero = 0.0;
double one = 1.0;
cudaError_t cudaError;
magma_d_sparse_matrix A, d_A;
magma_d_vector b[colsMatrixB];
magma_d_vector d_b[colsMatrixB];
magma_d_vector ab[colsMatrixB];
magma_d_vector d_ab[colsMatrixB];
magma_dopts opts[colsMatrixB];
DEBUG_LOG("call magma_scsrset");
magma_dcsrset ( rowsMatrixA,
colsMatrixA,
rowPointerA,
colIndA,
valA,
&A,
queue[0]);
DEBUG_LOG("call magma_s_mtransfer");
magma_d_mtransfer ( A,
&d_A,
Magma_CPU,
Magma_DEV,
queue[1 % num_queues]);
for (int i = 0; i < colsMatrixB; i++)
{
DEBUG_LOG("call magma_dvset");
magma_dvset ( colsMatrixA,
1,
valB + i * rowsMatrixA,
&b[i],
queue[i]);
DEBUG_LOG("transfer magma types");
magma_d_vtransfer (b[i], &d_b[i], Magma_CPU, Magma_DEV, 0);
magma_d_vinit ( &d_ab[i],
Magma_DEV,
d_A.num_cols,
zero,
queue[i % num_queues]); //initial guess (0 vector)
DEBUG_LOG("Initializing solver_par");
opts[i].solver_par.solver = Magma_GMRES;
opts[i].solver_par.epsilon = 10e-16;
opts[i].solver_par.maxiter = 1000;
opts[i].solver_par.restart = 30;
opts[i].solver_par.version = 0;
opts[i].solver_par.verbose = 0;
opts[i].solver_par.num_eigenvalues = 0;
opts[i].solver_par.eigenvalues = NULL;
opts[i].solver_par.eigenvectors = NULL;
opts[i].precond_par.solver = Magma_ILU;
DEBUG_LOG("Initialize solver setup");
magma_dsolverinfo_init (&opts[i].solver_par,
&opts[i].precond_par,
queue[i % num_queues]);
DEBUG_LOG("Running solver...");
magma_dgmres ( d_A,
d_b[i],
&d_ab[i],
&opts[i].solver_par,
queue[i % num_queues]);
DEBUG_device_print_vector_double(d_ab[i].dval, d_ab[i].num_rows, "d_ab[i].val");
DEBUG_LOG("Get Matrix from GPU");
ab[i].memory_location = Magma_CPU;
ab[i].num_rows = d_ab[i].num_rows;
ab[i].num_cols = d_ab[i].num_cols;
ab[i].nnz = d_ab[i].nnz;
ab[i].major = d_ab[i].major;
ab[i].val = valAB + i * ab[i].num_rows;
magma_dgetvector( d_ab[i].num_rows,
d_ab[i].val,
1,
ab[i].val,
1);
}
//prepare Matrix for output
*out_matrixX = CasADi::Matrix<double> ( v_outData,
matrixA->size1 (),
matrixB->size2 ());
DEBUG_LOG("Free memory");
//magma_s_mfree(&A, queue);
DEBUG_LOG("free d_A");
magma_d_mfree (&d_A, queue[0]);
//magma_s_vfree(&ab, queue);
//magma_s_vfree(&b, queue);
for (int i = 0; i < colsMatrixB; i++)
{
magma_d_vfree (&d_ab[i], queue[i % num_queues]);
magma_d_vfree (&d_b[i], queue[i % num_queues]);
}
DEBUG_LOG("Destroy cudaStreams");
for (int i = 0; i < num_queues; i++)
{
cudaStreamDestroy (queue[i]);
}
return error;
}