so i have to factorize first with (sgetrf_gpu)

I think everything runs fine until calling

Code: Select all

`cublasStrsm(MagmaLeft, MagmaUpper, MagmaNoTrans, MagmaNonUnit, n, nrhs, c_one, dA, ldda, dB, lddb );`

I am a little bit confused, probably the mistake is on my side, so i ask the pros.

here is the funktion. A is an NxN Matrix, size is N, B is an Vector with dim N:

Code: Select all

```
float* LU(float* h_A, int size, float* h_B){
TESTING_CUDA_INIT();
float *h_R,*h_X;
float *d_A,*d_B;
magma_int_t *ipiv;
/* Matrix size */
magma_int_t M =size, N = size, lda =size, ldda = size, lddb = size;
magma_int_t info, min_mn;
min_mn = min(M, N);
/* Allocate host memory for the matrix */
TESTING_MALLOC(ipiv, magma_int_t, min_mn);
TESTING_MALLOC( h_A, float, M * N );
TESTING_HOSTALLOC( h_R, float, M * N );
TESTING_MALLOC( h_B, float, lda*1 );
TESTING_MALLOC( h_X, float, lda*1 );
TESTING_DEVALLOC( d_A, float, ldda*N );
TESTING_DEVALLOC( d_B, float, lddb*1 );
h_R=h_A;
cublasSetMatrix( M, N, sizeof(float), h_R, lda, d_A, ldda);
cublasSetMatrix( N, 1, sizeof( float ), h_B, N, d_B, lddb );
magma_sgetrf_gpu( M, N, d_A, ldda, ipiv, &info);
magma_sgetrs_gpu('N', M, 1, d_A, ldda, ipiv, h_B, ldda, &info);
//cublasGetMatrix( M, N, sizeof(float), d_A, ldda, h_A, lda);
cublasGetMatrix( N, 1, sizeof( float ), d_B, ldda, h_X, lda );
TESTING_CUDA_FINALIZE();
return h_X ;
}
```