MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
strtri.cpp File Reference
#include "common_magma.h"
Include dependency graph for strtri.cpp:

Go to the source code of this file.

Macros

#define PRECISION_s
#define magma_sgemm   magmablas_sgemm
#define magma_strsm   magmablas_strsm
#define A(i, j)   (a +(j)*lda + (i))
#define dA(i, j)   (work+(j)*ldda + (i))

Functions

magma_int_t magma_strtri (char uplo, char diag, magma_int_t n, float *a, magma_int_t lda, magma_int_t *info)

Macro Definition Documentation

#define A (   i,
 
)    (a +(j)*lda + (i))

Definition at line 28 of file strtri.cpp.

#define dA (   i,
 
)    (work+(j)*ldda + (i))

Definition at line 29 of file strtri.cpp.

#define magma_sgemm   magmablas_sgemm

Definition at line 16 of file strtri.cpp.

#define magma_strsm   magmablas_strsm

Definition at line 17 of file strtri.cpp.

#define PRECISION_s

Definition at line 14 of file strtri.cpp.


Function Documentation

magma_int_t magma_strtri ( char  uplo,
char  diag,
magma_int_t  n,
float *  a,
magma_int_t  lda,
magma_int_t info 
)

Definition at line 33 of file strtri.cpp.

References __func__, A, dA, diag, lapackf77_lsame, lapackf77_strtri(), MAGMA_ERR_DEVICE_ALLOC, magma_free(), magma_get_spotrf_nb(), magma_queue_create(), magma_queue_destroy(), magma_queue_sync(), MAGMA_S_NEG_ONE, MAGMA_S_ONE, magma_sgetmatrix_async(), magma_smalloc(), magma_ssetmatrix(), magma_strmm(), magma_strsm, MAGMA_SUCCESS, magma_xerbla(), MagmaLeft, MagmaLower, MagmaLowerStr, MagmaNonUnit, MagmaNoTrans, MagmaRight, MagmaUpper, MagmaUpperStr, max, min, uplo, and codegen::work.

{
/* -- MAGMA (version 1.2.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
May 2012
Purpose
=======
DTRTRI computes the inverse of a real upper or lower triangular
matrix A.
This is the Level 3 BLAS version of the algorithm.
Arguments
=========
UPLO (input) CHARACTER*1
= 'U': A is upper triangular;
= 'L': A is lower triangular.
DIAG (input) CHARACTER*1
= 'N': A is non-unit triangular;
= 'U': A is unit triangular.
N (input) INTEGER
The order of the matrix A. N >= 0.
A (input/output) REAL array, dimension (LDA,N)
On entry, the triangular matrix A. If UPLO = 'U', the
leading N-by-N upper triangular part of the array A contains
the upper triangular matrix, and the strictly lower
triangular part of A is not referenced. If UPLO = 'L', the
leading N-by-N lower triangular part of the array A contains
the lower triangular matrix, and the strictly upper
triangular part of A is not referenced. If DIAG = 'U', the
diagonal elements of A are also not referenced and are
assumed to be 1.
On exit, the (triangular) inverse of the original matrix, in
the same storage format.
LDA (input) INTEGER
The leading dimension of the array A. LDA >= max(1,N).
INFO (output) INTEGER
= 0: successful exit
< 0: if INFO = -i, the i-th argument had an illegal value
> 0: if INFO = i, A(i,i) is exactly zero. The triangular
matrix is singular and its inverse can not be computed.
===================================================================== */
/* Local variables */
char uplo_[2] = {uplo, 0};
char diag_[2] = {diag, 0};
magma_int_t ldda, nb, nn;
static magma_int_t j, jb;
float c_one = MAGMA_S_ONE;
float c_neg_one = MAGMA_S_NEG_ONE;
float *work;
long int upper = lapackf77_lsame(uplo_, "U");
long int nounit = lapackf77_lsame(diag_, "N");
*info = 0;
if ((! upper) && (! lapackf77_lsame(uplo_, "L")))
*info = -1;
else if ((! nounit) && (! lapackf77_lsame(diag_, "U")))
*info = -2;
else if (n < 0)
*info = -3;
else if (lda < max(1,n))
*info = -5;
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
/* Quick return */
if ( n == 0 )
return *info;
/* Check for singularity if non-unit */
if (nounit)
{
for (*info=0; *info < n; *info=*info+1)
{
if(A(*info,*info)==0)
return *info;
}
*info=0;
}
/* Determine the block size for this environment */
ldda = ((n+31)/32)*32;
if (MAGMA_SUCCESS != magma_smalloc( &work, (n)*ldda )) {
return *info;
}
static cudaStream_t stream[2];
magma_queue_create( &stream[0] );
magma_queue_create( &stream[1] );
if (nb <= 1 || nb >= n)
lapackf77_strtri(uplo_, diag_, &n, a, &lda, info);
else
{
if (upper)
{
/* Compute inverse of upper triangular matrix */
for (j=0; j<n; j =j+ nb)
{
jb = min(nb, (n-j));
magma_ssetmatrix( jb, (n-j),
A(j, j), lda,
dA(j, j), ldda );
/* Compute rows 1:j-1 of current block column */
c_one, dA(0,0), ldda, dA(0, j),ldda);
c_neg_one, dA(j,j), ldda, dA(0, j),ldda);
//cublasGetMatrix(j ,jb, sizeof( float),
//dA(0, j), ldda, A(0, j), lda);
dA(j, j), ldda,
A(j, j), lda, stream[1] );
dA(0, j), ldda,
A(0, j), lda, stream[0] );
magma_queue_sync( stream[1] );
/* Compute inverse of current diagonal block */
lapackf77_strtri(MagmaUpperStr, diag_, &jb, A(j,j), &lda, info);
A(j, j), lda,
dA(j, j), ldda );
}
}
else
{
/* Compute inverse of lower triangular matrix */
nn=((n-1)/nb)*nb+1;
for(j=nn-1; j>=0; j=j-nb)
{
jb=min(nb,(n-j));
if((j+jb) < n)
{
magma_ssetmatrix( (n-j), jb,
A(j, j), lda,
dA(j, j), ldda );
/* Compute rows j+jb:n of current block column */
MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
c_one, dA(j+jb,j+jb), ldda, dA(j+jb, j), ldda);
MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
c_neg_one, dA(j,j), ldda, dA(j+jb, j), ldda);
//cublasGetMatrix((n-j), jb, sizeof( cuDoub
//leComplex),dA(j, j), ldda, A(j, j), lda);
magma_sgetmatrix_async( (n-j-jb), jb,
dA(j+jb, j), ldda,
A(j+jb, j), lda, stream[1] );
dA(j,j), ldda,
A(j,j), lda, stream[0] );
magma_queue_sync( stream[0] );
}
/* Compute inverse of current diagonal block */
lapackf77_strtri(MagmaLowerStr, diag_, &jb, A(j,j), &lda, info);
A(j, j), lda,
dA(j, j), ldda );
}
}
}
magma_queue_destroy( stream[0] );
magma_queue_destroy( stream[1] );
magma_free( work );
return *info;
}

Here is the call graph for this function:

Here is the caller graph for this function: