MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
strtri.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @generated s Thu May 10 22:27:06 2012
9 
10 */
11 #include "common_magma.h"
12 
13 // === Define what BLAS to use ============================================
14  #define PRECISION_s
15  #if (defined(PRECISION_s) || defined(PRECISION_d))
16  #define magma_sgemm magmablas_sgemm
17  #define magma_strsm magmablas_strsm
18  #endif
19 
20  #if (GPUSHMEM >= 200)
21  #if (defined(PRECISION_s))
22  #undef magma_sgemm
23  #define magma_sgemm magmablas_sgemm_fermi80
24  #endif
25  #endif
26 // === End defining what BLAS to use ======================================
27 
28  #define A(i, j) (a +(j)*lda + (i))
29  #define dA(i, j) (work+(j)*ldda + (i))
30 
31 
32 extern "C" magma_int_t
34  float *a, magma_int_t lda, magma_int_t *info)
35 {
36 /* -- MAGMA (version 1.2.0) --
37  Univ. of Tennessee, Knoxville
38  Univ. of California, Berkeley
39  Univ. of Colorado, Denver
40  May 2012
41 
42  Purpose
43  =======
44 
45  DTRTRI computes the inverse of a real upper or lower triangular
46  matrix A.
47 
48  This is the Level 3 BLAS version of the algorithm.
49 
50  Arguments
51  =========
52 
53  UPLO (input) CHARACTER*1
54  = 'U': A is upper triangular;
55  = 'L': A is lower triangular.
56 
57  DIAG (input) CHARACTER*1
58  = 'N': A is non-unit triangular;
59  = 'U': A is unit triangular.
60 
61  N (input) INTEGER
62  The order of the matrix A. N >= 0.
63 
64  A (input/output) REAL array, dimension (LDA,N)
65  On entry, the triangular matrix A. If UPLO = 'U', the
66  leading N-by-N upper triangular part of the array A contains
67  the upper triangular matrix, and the strictly lower
68  triangular part of A is not referenced. If UPLO = 'L', the
69  leading N-by-N lower triangular part of the array A contains
70  the lower triangular matrix, and the strictly upper
71  triangular part of A is not referenced. If DIAG = 'U', the
72  diagonal elements of A are also not referenced and are
73  assumed to be 1.
74  On exit, the (triangular) inverse of the original matrix, in
75  the same storage format.
76 
77  LDA (input) INTEGER
78  The leading dimension of the array A. LDA >= max(1,N).
79  INFO (output) INTEGER
80  = 0: successful exit
81  < 0: if INFO = -i, the i-th argument had an illegal value
82  > 0: if INFO = i, A(i,i) is exactly zero. The triangular
83  matrix is singular and its inverse can not be computed.
84 
85  ===================================================================== */
86 
87 
88  /* Local variables */
89  char uplo_[2] = {uplo, 0};
90  char diag_[2] = {diag, 0};
91  magma_int_t ldda, nb, nn;
92  static magma_int_t j, jb;
93  float c_one = MAGMA_S_ONE;
94  float c_neg_one = MAGMA_S_NEG_ONE;
95  float *work;
96 
97  long int upper = lapackf77_lsame(uplo_, "U");
98  long int nounit = lapackf77_lsame(diag_, "N");
99 
100  *info = 0;
101 
102  if ((! upper) && (! lapackf77_lsame(uplo_, "L")))
103  *info = -1;
104  else if ((! nounit) && (! lapackf77_lsame(diag_, "U")))
105  *info = -2;
106  else if (n < 0)
107  *info = -3;
108  else if (lda < max(1,n))
109  *info = -5;
110 
111  if (*info != 0) {
112  magma_xerbla( __func__, -(*info) );
113  return *info;
114  }
115 
116  /* Quick return */
117  if ( n == 0 )
118  return *info;
119 
120 
121  /* Check for singularity if non-unit */
122  if (nounit)
123  {
124  for (*info=0; *info < n; *info=*info+1)
125  {
126  if(A(*info,*info)==0)
127  return *info;
128  }
129  *info=0;
130  }
131 
132 
133  /* Determine the block size for this environment */
134  ldda = ((n+31)/32)*32;
135 
136  if (MAGMA_SUCCESS != magma_smalloc( &work, (n)*ldda )) {
137  *info = MAGMA_ERR_DEVICE_ALLOC;
138  return *info;
139  }
140 
141  static cudaStream_t stream[2];
142  magma_queue_create( &stream[0] );
143  magma_queue_create( &stream[1] );
144 
145  nb = magma_get_spotrf_nb(n);
146 
147  if (nb <= 1 || nb >= n)
148  lapackf77_strtri(uplo_, diag_, &n, a, &lda, info);
149  else
150  {
151  if (upper)
152  {
153  /* Compute inverse of upper triangular matrix */
154  for (j=0; j<n; j =j+ nb)
155  {
156  jb = min(nb, (n-j));
157  magma_ssetmatrix( jb, (n-j),
158  A(j, j), lda,
159  dA(j, j), ldda );
160 
161  /* Compute rows 1:j-1 of current block column */
163  MagmaNoTrans, MagmaNonUnit, j, jb,
164  c_one, dA(0,0), ldda, dA(0, j),ldda);
165 
167  MagmaNoTrans, MagmaNonUnit, j, jb,
168  c_neg_one, dA(j,j), ldda, dA(0, j),ldda);
169 
170  //cublasGetMatrix(j ,jb, sizeof( float),
171  //dA(0, j), ldda, A(0, j), lda);
172 
173  magma_sgetmatrix_async( jb, jb,
174  dA(j, j), ldda,
175  A(j, j), lda, stream[1] );
176 
177 
178  magma_sgetmatrix_async( j, jb,
179  dA(0, j), ldda,
180  A(0, j), lda, stream[0] );
181 
182  magma_queue_sync( stream[1] );
183 
184  /* Compute inverse of current diagonal block */
185  lapackf77_strtri(MagmaUpperStr, diag_, &jb, A(j,j), &lda, info);
186 
187  magma_ssetmatrix( jb, jb,
188  A(j, j), lda,
189  dA(j, j), ldda );
190  }
191 
192  }
193  else
194  {
195  /* Compute inverse of lower triangular matrix */
196  nn=((n-1)/nb)*nb+1;
197 
198  for(j=nn-1; j>=0; j=j-nb)
199  {
200 
201  jb=min(nb,(n-j));
202 
203  if((j+jb) < n)
204  {
205  magma_ssetmatrix( (n-j), jb,
206  A(j, j), lda,
207  dA(j, j), ldda );
208 
209  /* Compute rows j+jb:n of current block column */
211  MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
212  c_one, dA(j+jb,j+jb), ldda, dA(j+jb, j), ldda);
213 
215  MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
216  c_neg_one, dA(j,j), ldda, dA(j+jb, j), ldda);
217 
218  //cublasGetMatrix((n-j), jb, sizeof( cuDoub
219  //leComplex),dA(j, j), ldda, A(j, j), lda);
220 
221  magma_sgetmatrix_async( (n-j-jb), jb,
222  dA(j+jb, j), ldda,
223  A(j+jb, j), lda, stream[1] );
224 
225  magma_sgetmatrix_async( jb, jb,
226  dA(j,j), ldda,
227  A(j,j), lda, stream[0] );
228 
229  magma_queue_sync( stream[0] );
230  }
231 
232  /* Compute inverse of current diagonal block */
233  lapackf77_strtri(MagmaLowerStr, diag_, &jb, A(j,j), &lda, info);
234 
235  magma_ssetmatrix( jb, jb,
236  A(j, j), lda,
237  dA(j, j), ldda );
238  }
239  }
240  }
241 
242  magma_queue_destroy( stream[0] );
243  magma_queue_destroy( stream[1] );
244 
245  magma_free( work );
246 
247  return *info;
248 }