- Code: Select all
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include "mex.h"
#include "cuda.h"
#include "cuda_runtime_api.h"
#include "cublas.h"
#include "magma.h"
#include "sys/time.h"
void mexFunction( int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[])
{
int L;
int Lc;
int dims0[2];
// INPUT VARIABLES %%%%%%%%%%%%%%%%%%%%%%%%%
// A is dimensioned LXL
float *A;
// OUTPUT VARIABLE, X factorized %%%%%%%%%%%%%%%%%%
float *X;
// CUDA/GPU VARIABLES %%%%%%%%%%%%%%%%%%%%%%%%
float *ga;
float *h_work;
int* ipiv = 0;
int info[1];
if (nrhs != 1) {
mexErrMsgTxt("gpu_sgetrf_magma requires 1 input arguments");
} else if (nlhs != 1) {
mexErrMsgTxt("gpu_sgesv_magma requires 1 output argument");
}
if ( !mxIsSingle(prhs[0]) ) {
mexErrMsgTxt("Input array must be single precision.");
}
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// Single-precision input array */
// Dimensions, and then array data
L = mxGetN(prhs[0]);
// printf("L = %i\n",L);
A = (float*) mxGetData(prhs[0]);
// Left hand side matrix set up (the solution)
dims0[0]=L;
plhs[0] = mxCreateNumericArray(2,dims0,mxSINGLE_CLASS,mxREAL);
X = (float*) mxGetData(plhs[0]);
cuInit( 0 );
cublasInit();
int maxnb = magma_get_sgetrf_nb(L);
int lwork = L*maxnb;
printf("Get maxnb...\n");
cublasAlloc((L+32)*(L+32) + 32*maxnb + lwork+2*maxnb*maxnb,sizeof(float), (void**)&ga);
cudaMemset(ga,0,((L+32)*(L+32) + 32*maxnb + lwork+2*maxnb*maxnb)*4);
Lc = (L/32)*32;
if (Lc<L) Lc+=32;
printf("%i, %i, %i, %i\n",L,Lc,maxnb,lwork);
cublasSetMatrix (L, L, sizeof(float), A, L, ga, Lc);
// cublasSetMatrix (L, L, sizeof(float), A, L, (void *)ga, Lc);
printf("Set A...\n");
ipiv = (int *) mxCalloc (Lc,sizeof (int));
// cudaMallocHost( (void**)&h_work, (lwork+32*maxnb)*sizeof(float) );
h_work = (float *) mxCalloc((lwork+32*maxnb),sizeof(float));
printf("mxCalloc...\n");
// Ready to go...
magma_sgetrf_gpu(&L, &L, ga, &Lc, ipiv, h_work, info);
// magma_sgetrf_gpu(&N, &N, d_A, &lda, ipiv, h_work, info);
// ***** CRASHES AT THIS CALL TO MAGMA *****
printf("sgetrf...\n");
// Get the solution off the GPU
cublasGetMatrix (L, L, sizeof(float), ga, Lc, X, L);
printf("Solution...\n");
// X has the solution we need; now back to matlab after a bit of clean up.
// Print the first three elements of the first row (debugging)
printf("X-top = %e %e %e\n",X[0],X[L],X[L+L]);
// Print the last three elements of the last row (debugging)
printf("X-bottom = %e %e %e\n",X[L*(L-2)-1],X[L*(L-1)-1],X[L*L-1]);
// Clear the variables to avoid GPU memory leak (and GPU crash!)
free(ipiv);
free(h_work);
cublasFree (ga);
cublasShutdown();
}