floating point exception using magma_dpgmres

Open discussion for MAGMA library (Matrix Algebra on GPU and Multicore Architectures)
Post Reply
Noran
Posts: 27
Joined: Fri Sep 26, 2014 3:47 am

floating point exception using magma_dpgmres

Post by Noran » Fri Dec 19, 2014 3:37 pm

Hi @ all,
I tried to use the function magma_dpgmres. But it looks like im initializing the precond_par wrong, because I always get an floating point exception when I call dpgmres, while the normal GMRES algorithm works fine with my matrices. The initializiation of the solver options are below. Can somebody help me, please?

Greetz Noran

Code: Select all

for (int i = 0; i < colsMatrixB; i++)
	{
		DEBUG_LOG("call magma_dvset");
		magma_dvset (	colsMatrixA,
						1,
						valB + i * rowsMatrixA,
						&b[i],
						queue[i]);

		DEBUG_LOG("transfer magma types");
		magma_d_vtransfer (b[i], &d_b[i], Magma_CPU, Magma_DEV, 0);
		magma_d_vinit (	&d_ab[i],
						Magma_DEV,
						d_A.num_cols,
						zero,
						queue[i % num_queues]); //initial guess (0 vector)
		opts[i].solver_par.solver = Magma_GMRES;
		opts[i].solver_par.epsilon = 10e-8;
		opts[i].solver_par.maxiter = 1000;
		opts[i].solver_par.restart = 30;
		opts[i].solver_par.version = 0;
		opts[i].solver_par.verbose = 0;
		opts[i].solver_par.num_eigenvalues = 0;
		opts[i].solver_par.eigenvalues = NULL;
		opts[i].solver_par.eigenvectors = NULL;
		opts[i].precond_par.solver = Magma_ILU;

		magma_dsolverinfo_init (&opts[i].solver_par,
					&opts[i].precond_par,
					queue[i % num_queues]);

		magma_dpgmres (	d_A,
				d_b[i],
				&d_ab[i],
				&opts[i].solver_par,
				&opts[i].precond_par,
				queue[i % num_queues]);

		ab[i].memory_location = Magma_CPU;
		ab[i].num_rows = d_ab[i].num_rows;
		ab[i].num_cols = d_ab[i].num_cols;
		ab[i].nnz = d_ab[i].nnz;
		ab[i].major = d_ab[i].major;
		ab[i].val = valAB + i * ab[i].num_rows;

		magma_dgetvector(	d_ab[i].num_rows,
							d_ab[i].val,
							1,
							ab[i].val,
							1);
Last edited by Noran on Tue Jan 13, 2015 6:09 am, edited 1 time in total.

Noran
Posts: 27
Joined: Fri Sep 26, 2014 3:47 am

Re: floating point exception using magma_dpgmres

Post by Noran » Tue Jan 06, 2015 1:41 am

Has anybody an idea of a solution?

hartwig anzt
Posts: 90
Joined: Tue Sep 02, 2014 5:44 pm

Re: floating point exception using magma_dpgmres

Post by hartwig anzt » Thu Jan 08, 2015 12:57 pm

Noran,

could you please provide your complete runfile?

From "opts.solver_par.solver = Magma_GMRES;" I deduce you have a loop starting several solvers at the same time? This may cause problems, but for looking into it, I would need the complete code.

Thanks, Hartwig

Noran
Posts: 27
Joined: Fri Sep 26, 2014 3:47 am

Re: floating point exception using magma_dpgmres

Post by Noran » Tue Jan 13, 2015 6:12 am

Hi Hartwig,
a Happy new year i wish you :)
Yes i have to do the GMRES Algorithm for several Columns of the right hand side. I changed the Code in my first Reply and support the full Code below. Thank you for your help. I'm nearly ready with my Masterthesis :)

Greetz Noran

Code: Select all

#include "pyMAGMA_functions.h"
#include "cuda.h"

int
pyMAGMA_MMSolveGmres_sparse_double (CasADi::Matrix<double> *matrixA,
									CasADi::Matrix<double> *matrixB,
									CasADi::Matrix<double> *out_matrixX,
									int num_queues)
{
	printf("matrixA = \n");
	matrixA->print();
	printf("matrixB = \n");
	matrixB->print();
	printf("out_matrixX = \n");
	out_matrixX->print();

	magma_int_t rowsMatrixA = matrixA->size1 ();
	magma_int_t colsMatrixA = matrixA->size2 ();
	magma_int_t nonZeroElementsA = matrixA->numel ();
	magma_index_t *rowPointerA =
			const_cast<int*> (matrixA->rowind ().data ());
	magma_index_t *colIndA =
			const_cast<int*> (matrixA->col ().data ());
	double *valA = matrixA->ptr ();
	magma_int_t colsMatrixB = matrixB->size2 ();
	magma_int_t rowsMatrixB = matrixB->size1 ();
	double *valB = matrixB->ptr ();
	double *valAB;
	int error = 0;

	//choose used GPU
	unsigned long int neededMemory = ((2 * nonZeroElementsA
			+ rowsMatrixA + 1) //A
	+ (rowsMatrixB * colsMatrixA * 2)) //B and C
	* sizeof(double);

	error = pyMAGMA_select_GPU (neededMemory);
	if (error != MAGMA_SUCCESS)
	{
		printf ("Error at selecting GPU. Errorcode = %i.\n", error);
		return error;
	}

	//Initialize colIndex and data
	std::vector<double> v_outData;
	CasADi::CRSSparsity sparsity;

	//prepare vector for output
	v_outData.resize (matrixA->size1 () * matrixB->size2 ());
	valAB = v_outData.data ();

	if (num_queues < 1)
	{
		printf ("Error: num_queues must be grater than 0\n");
		return -1;
	}

	DEBUG_LOG("initializing magma_queues");
	magma_queue_t queue[num_queues];
	for (int i = 0; i < num_queues; i++)
	{
		magma_queue_create(&queue[i]);
	}

	double zero = 0.0;
	double one = 1.0;

	cudaError_t cudaError;
	magma_d_sparse_matrix A, d_A;
	magma_d_vector b[colsMatrixB];
	magma_d_vector d_b[colsMatrixB];
	magma_d_vector ab[colsMatrixB];
	magma_d_vector d_ab[colsMatrixB];
	magma_dopts opts[colsMatrixB];

	DEBUG_LOG("call magma_scsrset");
	magma_dcsrset (	rowsMatrixA,
					colsMatrixA,
					rowPointerA,
					colIndA,
					valA,
					&A,
					queue[0]);

	DEBUG_LOG("call magma_s_mtransfer");
	magma_d_mtransfer (	A,
						&d_A,
						Magma_CPU,
						Magma_DEV,
						queue[1 % num_queues]);

	for (int i = 0; i < colsMatrixB; i++)
	{
		DEBUG_LOG("call magma_dvset");
		magma_dvset (	colsMatrixA,
						1,
						valB + i * rowsMatrixA,
						&b[i],
						queue[i]);

		DEBUG_LOG("transfer magma types");
		magma_d_vtransfer (b[i], &d_b[i], Magma_CPU, Magma_DEV, 0);
		magma_d_vinit (	&d_ab[i],
						Magma_DEV,
						d_A.num_cols,
						zero,
						queue[i % num_queues]); //initial guess (0 vector)

		DEBUG_LOG("Initializing solver_par");
		opts[i].solver_par.solver = Magma_GMRES;
		opts[i].solver_par.epsilon = 10e-16;
		opts[i].solver_par.maxiter = 1000;
		opts[i].solver_par.restart = 30;
		opts[i].solver_par.version = 0;
		opts[i].solver_par.verbose = 0;
		opts[i].solver_par.num_eigenvalues = 0;
		opts[i].solver_par.eigenvalues = NULL;
		opts[i].solver_par.eigenvectors = NULL;
		opts[i].precond_par.solver = Magma_ILU;

		DEBUG_LOG("Initialize solver setup");
		magma_dsolverinfo_init (&opts[i].solver_par,
								&opts[i].precond_par,
								queue[i % num_queues]);

		DEBUG_LOG("Running solver...");
		magma_dgmres (	d_A,
						d_b[i],
						&d_ab[i],
						&opts[i].solver_par,
						queue[i % num_queues]);

		DEBUG_device_print_vector_double(d_ab[i].dval, d_ab[i].num_rows, "d_ab[i].val");

		DEBUG_LOG("Get Matrix from GPU");
		ab[i].memory_location = Magma_CPU;
		ab[i].num_rows = d_ab[i].num_rows;
		ab[i].num_cols = d_ab[i].num_cols;
		ab[i].nnz = d_ab[i].nnz;
		ab[i].major = d_ab[i].major;
		ab[i].val = valAB + i * ab[i].num_rows;

		magma_dgetvector(	d_ab[i].num_rows,
							d_ab[i].val,
							1,
							ab[i].val,
							1);
	}

	//prepare Matrix for output
	*out_matrixX = CasADi::Matrix<double> (	v_outData,
											matrixA->size1 (),
											matrixB->size2 ());

	DEBUG_LOG("Free memory");
	//magma_s_mfree(&A, queue);
	DEBUG_LOG("free d_A");
	magma_d_mfree (&d_A, queue[0]);
	//magma_s_vfree(&ab, queue);
	//magma_s_vfree(&b, queue);
	for (int i = 0; i < colsMatrixB; i++)
	{
		magma_d_vfree (&d_ab[i], queue[i % num_queues]);
		magma_d_vfree (&d_b[i], queue[i % num_queues]);
	}

	DEBUG_LOG("Destroy cudaStreams");
	for (int i = 0; i < num_queues; i++)
	{
		cudaStreamDestroy (queue[i]);
	}

	return error;
}

hartwig anzt
Posts: 90
Joined: Tue Sep 02, 2014 5:44 pm

Re: floating point exception using magma_dpgmres

Post by hartwig anzt » Tue Jan 13, 2015 3:39 pm

Noran,

unfortunately, I don't think I can help you this time. The problem is, that running several instances of GMRES requires multiple threads - and MAGMA is in the current version not threadsafe.

I am sorry, but you may have to solve one system after another... How many systems are you actually solving?

Thanks, Hartwig

Noran
Posts: 27
Joined: Fri Sep 26, 2014 3:47 am

Re: floating point exception using magma_dpgmres

Post by Noran » Wed Jan 14, 2015 3:29 am

Thank you Hartwig for you're reply.
We have to solve something between 10 and 50 Systems (depends on the optimizationproblem). Even if we can't solve the problem for now, we know what's the reason. :)

Greetz Noran

Post Reply