so i have to factorize first with (sgetrf_gpu)

I think everything runs fine until calling

- Code: Select all
`cublasStrsm(MagmaLeft, MagmaUpper, MagmaNoTrans, MagmaNonUnit, n, nrhs, c_one, dA, ldda, dB, lddb );`

in the magma_sgetrs_gpu method

I am a little bit confused, probably the mistake is on my side, so i ask the pros.

here is the funktion. A is an NxN Matrix, size is N, B is an Vector with dim N:

- Code: Select all
`float* LU(float* h_A, int size, float* h_B){`

TESTING_CUDA_INIT();

float *h_R,*h_X;

float *d_A,*d_B;

magma_int_t *ipiv;

/* Matrix size */

magma_int_t M =size, N = size, lda =size, ldda = size, lddb = size;

magma_int_t info, min_mn;

min_mn = min(M, N);

/* Allocate host memory for the matrix */

TESTING_MALLOC(ipiv, magma_int_t, min_mn);

TESTING_MALLOC( h_A, float, M * N );

TESTING_HOSTALLOC( h_R, float, M * N );

TESTING_MALLOC( h_B, float, lda*1 );

TESTING_MALLOC( h_X, float, lda*1 );

TESTING_DEVALLOC( d_A, float, ldda*N );

TESTING_DEVALLOC( d_B, float, lddb*1 );

h_R=h_A;

cublasSetMatrix( M, N, sizeof(float), h_R, lda, d_A, ldda);

cublasSetMatrix( N, 1, sizeof( float ), h_B, N, d_B, lddb );

magma_sgetrf_gpu( M, N, d_A, ldda, ipiv, &info);

magma_sgetrs_gpu('N', M, 1, d_A, ldda, ipiv, h_B, ldda, &info);

//cublasGetMatrix( M, N, sizeof(float), d_A, ldda, h_A, lda);

cublasGetMatrix( N, 1, sizeof( float ), d_B, ldda, h_X, lda );

TESTING_CUDA_FINALIZE();

return h_X ;

}

thanks Tomac