MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dtrtri_gpu.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  August 2013
7 
8  @generated d Tue Aug 13 16:44:06 2013
9 
10 */
11 #include "common_magma.h"
12 
13 #define dA(i, j) (dA+(j)*ldda + (i))
14 
15 extern "C" magma_int_t
16 magma_dtrtri_gpu(char uplo, char diag, magma_int_t n,
17  double *dA, magma_int_t ldda, magma_int_t *info)
18 {
19 /* -- MAGMA (version 1.4.0) --
20  Univ. of Tennessee, Knoxville
21  Univ. of California, Berkeley
22  Univ. of Colorado, Denver
23  August 2013
24 
25  Purpose
26  =======
27  DTRTRI computes the inverse of a real upper or lower triangular
28  matrix dA.
29 
30  This is the Level 3 BLAS version of the algorithm.
31 
32  Arguments
33  =========
34  UPLO (input) CHARACTER*1
35  = 'U': A is upper triangular;
36  = 'L': A is lower triangular.
37 
38  DIAG (input) CHARACTER*1
39  = 'N': A is non-unit triangular;
40  = 'U': A is unit triangular.
41 
42  N (input) INTEGER
43  The order of the matrix A. N >= 0.
44 
45  dA (input/output) DOUBLE_PRECISION array ON THE GPU, dimension (LDDA,N)
46  On entry, the triangular matrix A. If UPLO = 'U', the
47  leading N-by-N upper triangular part of the array dA contains
48  the upper triangular matrix, and the strictly lower
49  triangular part of A is not referenced. If UPLO = 'L', the
50  leading N-by-N lower triangular part of the array dA contains
51  the lower triangular matrix, and the strictly upper
52  triangular part of A is not referenced. If DIAG = 'U', the
53  diagonal elements of A are also not referenced and are
54  assumed to be 1.
55  On exit, the (triangular) inverse of the original matrix, in
56  the same storage format.
57 
58  LDDA (input) INTEGER
59  The leading dimension of the array dA. LDDA >= max(1,N).
60 
61  INFO (output) INTEGER
62  = 0: successful exit
63  < 0: if INFO = -i, the i-th argument had an illegal value
64  > 0: if INFO = i, dA(i,i) is exactly zero. The triangular
65  matrix is singular and its inverse cannot be computed.
66  (Singularity check is currently disabled.)
67 
68  ===================================================================== */
69 
70  /* Local variables */
71  char uplo_[2] = {uplo, 0};
72  char diag_[2] = {diag, 0};
73  magma_int_t nb, nn, j, jb;
74  //double c_zero = MAGMA_D_ZERO;
75  double c_one = MAGMA_D_ONE;
76  double c_neg_one = MAGMA_D_NEG_ONE;
77  double *work;
78 
79  int upper = lapackf77_lsame(uplo_, "U");
80  int nounit = lapackf77_lsame(diag_, "N");
81 
82  *info = 0;
83 
84  if ((! upper) && (! lapackf77_lsame(uplo_, "L")))
85  *info = -1;
86  else if ((! nounit) && (! lapackf77_lsame(diag_, "U")))
87  *info = -2;
88  else if (n < 0)
89  *info = -3;
90  else if (ldda < max(1,n))
91  *info = -5;
92 
93  if (*info != 0) {
94  magma_xerbla( __func__, -(*info) );
95  return *info;
96  }
97 
98  /* Check for singularity if non-unit */
99  /* cannot do here with matrix dA on GPU -- need kernel */
100  /*
101  if (nounit) {
102  for ( j=0; j<n; ++j ) {
103  if ( MAGMA_D_EQUAL( *dA(j,j), c_zero )) {
104  *info = j+1; // Fortran index
105  return *info;
106  }
107  }
108  }
109  */
110 
111  /* Determine the block size for this environment */
112  nb = magma_get_dpotrf_nb(n);
113 
114  if (MAGMA_SUCCESS != magma_dmalloc_pinned( &work, nb*nb )) {
115  *info = MAGMA_ERR_HOST_ALLOC;
116  return *info;
117  }
118 
119  magma_queue_t stream[2];
120  magma_queue_create( &stream[0] );
121  magma_queue_create( &stream[1] );
122 
123  if (nb <= 1 || nb >= n) {
124  magma_dgetmatrix( n, n, dA, ldda, work, n );
125  lapackf77_dtrtri(uplo_, diag_, &n, work, &n, info);
126  magma_dsetmatrix( n, n, work, n, dA, ldda );
127  }
128  else {
129  if (upper) {
130  /* Compute inverse of upper triangular matrix */
131  for (j=0; j < n; j += nb) {
132  jb = min(nb, (n-j));
133 
134  /* Compute rows 1:j-1 of current block column */
136  MagmaNoTrans, MagmaNonUnit, j, jb,
137  c_one, dA(0,0), ldda, dA(0, j),ldda);
138 
140  MagmaNoTrans, MagmaNonUnit, j, jb,
141  c_neg_one, dA(j,j), ldda, dA(0, j),ldda);
142 
143  magma_dgetmatrix_async( jb, jb,
144  dA(j, j), ldda,
145  work, jb, stream[1] );
146 
147  magma_queue_sync( stream[1] );
148 
149  /* Compute inverse of current diagonal block */
150  lapackf77_dtrtri(MagmaUpperStr, diag_, &jb, work, &jb, info);
151 
152  magma_dsetmatrix_async( jb, jb,
153  work, jb,
154  dA(j, j), ldda, stream[0] );
155  }
156  }
157  else {
158  /* Compute inverse of lower triangular matrix */
159  nn=((n-1)/nb)*nb+1;
160 
161  for(j=nn-1; j>=0; j=j-nb) {
162  jb=min(nb,(n-j));
163 
164  if((j+jb) < n) {
165  /* Compute rows j+jb:n of current block column */
167  MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
168  c_one, dA(j+jb,j+jb), ldda, dA(j+jb, j), ldda);
169 
171  MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
172  c_neg_one, dA(j,j), ldda, dA(j+jb, j), ldda);
173  }
174 
175  magma_dgetmatrix_async( jb, jb,
176  dA(j, j), ldda,
177  work, jb, stream[1] );
178 
179  magma_queue_sync( stream[1] );
180 
181  /* Compute inverse of current diagonal block */
182  lapackf77_dtrtri(MagmaLowerStr, diag_, &jb, work, &jb, info);
183 
184  magma_dsetmatrix_async( jb, jb,
185  work, jb,
186  dA(j, j), ldda, stream[0] );
187  }
188  }
189  }
190 
191  magma_queue_destroy( stream[0] );
192  magma_queue_destroy( stream[1] );
193  magma_free_pinned( work );
194 
195  return *info;
196 }
static magma_err_t magma_dmalloc_pinned(double **ptrPtr, size_t n)
Definition: magma.h:90
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_D_ONE
Definition: magma.h:176
#define magma_dsetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_d.h:711
#define dA(i, j)
Definition: dtrtri_gpu.cpp:13
magma_int_t magma_dtrtri_gpu(char uplo, char diag, magma_int_t n, double *dA, magma_int_t ldda, magma_int_t *info)
Definition: dtrtri_gpu.cpp:16
#define MagmaLeft
Definition: magma.h:68
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
#define MagmaUpper
Definition: magma.h:61
void magma_dtrsm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_ptr dB, magma_int_t lddb)
magma_int_t magma_get_dpotrf_nb(magma_int_t m)
Definition: get_nb.cpp:47
#define magma_free_pinned(ptr)
Definition: magma.h:60
int magma_int_t
Definition: magmablas.h:12
#define magma_dgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_d.h:714
#define magma_dgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_d.h:705
#define magma_queue_destroy(queue)
Definition: magma.h:116
#define MagmaLower
Definition: magma.h:62
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define MagmaLowerStr
Definition: magma.h:85
#define MagmaNonUnit
Definition: magma.h:65
void lapackf77_dtrtri(const char *uplo, const char *diag, const magma_int_t *n, double *A, const magma_int_t *lda, magma_int_t *info)
#define MAGMA_SUCCESS
Definition: magma.h:106
#define MagmaRight
Definition: magma.h:69
#define MAGMA_D_NEG_ONE
Definition: magma.h:178
void magma_dtrmm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_ptr dB, magma_int_t lddb)
#define MagmaUpperStr
Definition: magma.h:84
#define MagmaNoTrans
Definition: magma.h:57
#define max(a, b)
Definition: common_magma.h:82
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define magma_dsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_d.h:702
#define magma_queue_sync(queue)
Definition: magma.h:119