- Code: Select all
`#include <stdlib.h>`

#include <stdio.h>

#include <string.h>

#include <math.h>

#include "mex.h"

#include "cuda.h"

#include "cuda_runtime_api.h"

#include "cublas.h"

#include "magma.h"

#include "sys/time.h"

void mexFunction( int nlhs, mxArray *plhs[],

int nrhs, const mxArray *prhs[])

{

int L;

int Lc;

int dims0[2];

// INPUT VARIABLES %%%%%%%%%%%%%%%%%%%%%%%%%

// A is dimensioned LXL

float *A;

// OUTPUT VARIABLE, X factorized %%%%%%%%%%%%%%%%%%

float *X;

// CUDA/GPU VARIABLES %%%%%%%%%%%%%%%%%%%%%%%%

float *ga;

float *h_work;

int* ipiv = 0;

int info[1];

if (nrhs != 1) {

mexErrMsgTxt("gpu_sgetrf_magma requires 1 input arguments");

} else if (nlhs != 1) {

mexErrMsgTxt("gpu_sgesv_magma requires 1 output argument");

}

if ( !mxIsSingle(prhs[0]) ) {

mexErrMsgTxt("Input array must be single precision.");

}

// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

// Single-precision input array */

// Dimensions, and then array data

L = mxGetN(prhs[0]);

// printf("L = %i\n",L);

A = (float*) mxGetData(prhs[0]);

// Left hand side matrix set up (the solution)

dims0[0]=L;

plhs[0] = mxCreateNumericArray(2,dims0,mxSINGLE_CLASS,mxREAL);

X = (float*) mxGetData(plhs[0]);

cuInit( 0 );

cublasInit();

int maxnb = magma_get_sgetrf_nb(L);

int lwork = L*maxnb;

printf("Get maxnb...\n");

cublasAlloc((L+32)*(L+32) + 32*maxnb + lwork+2*maxnb*maxnb,sizeof(float), (void**)&ga);

cudaMemset(ga,0,((L+32)*(L+32) + 32*maxnb + lwork+2*maxnb*maxnb)*4);

Lc = (L/32)*32;

if (Lc<L) Lc+=32;

printf("%i, %i, %i, %i\n",L,Lc,maxnb,lwork);

cublasSetMatrix (L, L, sizeof(float), A, L, ga, Lc);

// cublasSetMatrix (L, L, sizeof(float), A, L, (void *)ga, Lc);

printf("Set A...\n");

ipiv = (int *) mxCalloc (Lc,sizeof (int));

// cudaMallocHost( (void**)&h_work, (lwork+32*maxnb)*sizeof(float) );

h_work = (float *) mxCalloc((lwork+32*maxnb),sizeof(float));

printf("mxCalloc...\n");

// Ready to go...

magma_sgetrf_gpu(&L, &L, ga, &Lc, ipiv, h_work, info);

// magma_sgetrf_gpu(&N, &N, d_A, &lda, ipiv, h_work, info);

// ***** CRASHES AT THIS CALL TO MAGMA *****

printf("sgetrf...\n");

// Get the solution off the GPU

cublasGetMatrix (L, L, sizeof(float), ga, Lc, X, L);

printf("Solution...\n");

// X has the solution we need; now back to matlab after a bit of clean up.

// Print the first three elements of the first row (debugging)

printf("X-top = %e %e %e\n",X[0],X[L],X[L+L]);

// Print the last three elements of the last row (debugging)

printf("X-bottom = %e %e %e\n",X[L*(L-2)-1],X[L*(L-1)-1],X[L*L-1]);

// Clear the variables to avoid GPU memory leak (and GPU crash!)

free(ipiv);

free(h_work);

cublasFree (ga);

cublasShutdown();

}