MAGMA  1.2.0 MatrixAlgebraonGPUandMulticoreArchitectures
strtri.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
6  May 2012
7
8  @generated s Thu May 10 22:27:06 2012
9
10 */
11 #include "common_magma.h"
12
13 // === Define what BLAS to use ============================================
14  #define PRECISION_s
15  #if (defined(PRECISION_s) || defined(PRECISION_d))
16  #define magma_sgemm magmablas_sgemm
17  #define magma_strsm magmablas_strsm
18  #endif
19
20  #if (GPUSHMEM >= 200)
21  #if (defined(PRECISION_s))
22  #undef magma_sgemm
23  #define magma_sgemm magmablas_sgemm_fermi80
24  #endif
25  #endif
26 // === End defining what BLAS to use ======================================
27
28  #define A(i, j) (a +(j)*lda + (i))
29  #define dA(i, j) (work+(j)*ldda + (i))
30
31
32 extern "C" magma_int_t
34  float *a, magma_int_t lda, magma_int_t *info)
35 {
36 /* -- MAGMA (version 1.2.0) --
37  Univ. of Tennessee, Knoxville
38  Univ. of California, Berkeley
40  May 2012
41
42  Purpose
43  =======
44
45  DTRTRI computes the inverse of a real upper or lower triangular
46  matrix A.
47
48  This is the Level 3 BLAS version of the algorithm.
49
50  Arguments
51  =========
52
53  UPLO (input) CHARACTER*1
54  = 'U': A is upper triangular;
55  = 'L': A is lower triangular.
56
57  DIAG (input) CHARACTER*1
58  = 'N': A is non-unit triangular;
59  = 'U': A is unit triangular.
60
61  N (input) INTEGER
62  The order of the matrix A. N >= 0.
63
64  A (input/output) REAL array, dimension (LDA,N)
65  On entry, the triangular matrix A. If UPLO = 'U', the
66  leading N-by-N upper triangular part of the array A contains
67  the upper triangular matrix, and the strictly lower
68  triangular part of A is not referenced. If UPLO = 'L', the
69  leading N-by-N lower triangular part of the array A contains
70  the lower triangular matrix, and the strictly upper
71  triangular part of A is not referenced. If DIAG = 'U', the
72  diagonal elements of A are also not referenced and are
73  assumed to be 1.
74  On exit, the (triangular) inverse of the original matrix, in
75  the same storage format.
76
77  LDA (input) INTEGER
78  The leading dimension of the array A. LDA >= max(1,N).
79  INFO (output) INTEGER
80  = 0: successful exit
81  < 0: if INFO = -i, the i-th argument had an illegal value
82  > 0: if INFO = i, A(i,i) is exactly zero. The triangular
83  matrix is singular and its inverse can not be computed.
84
85  ===================================================================== */
86
87
88  /* Local variables */
89  char uplo_[2] = {uplo, 0};
90  char diag_[2] = {diag, 0};
91  magma_int_t ldda, nb, nn;
92  static magma_int_t j, jb;
93  float c_one = MAGMA_S_ONE;
94  float c_neg_one = MAGMA_S_NEG_ONE;
95  float *work;
96
97  long int upper = lapackf77_lsame(uplo_, "U");
98  long int nounit = lapackf77_lsame(diag_, "N");
99
100  *info = 0;
101
102  if ((! upper) && (! lapackf77_lsame(uplo_, "L")))
103  *info = -1;
104  else if ((! nounit) && (! lapackf77_lsame(diag_, "U")))
105  *info = -2;
106  else if (n < 0)
107  *info = -3;
108  else if (lda < max(1,n))
109  *info = -5;
110
111  if (*info != 0) {
112  magma_xerbla( __func__, -(*info) );
113  return *info;
114  }
115
116  /* Quick return */
117  if ( n == 0 )
118  return *info;
119
120
121  /* Check for singularity if non-unit */
122  if (nounit)
123  {
124  for (*info=0; *info < n; *info=*info+1)
125  {
126  if(A(*info,*info)==0)
127  return *info;
128  }
129  *info=0;
130  }
131
132
133  /* Determine the block size for this environment */
134  ldda = ((n+31)/32)*32;
135
136  if (MAGMA_SUCCESS != magma_smalloc( &work, (n)*ldda )) {
137  *info = MAGMA_ERR_DEVICE_ALLOC;
138  return *info;
139  }
140
141  static cudaStream_t stream[2];
142  magma_queue_create( &stream[0] );
143  magma_queue_create( &stream[1] );
144
145  nb = magma_get_spotrf_nb(n);
146
147  if (nb <= 1 || nb >= n)
148  lapackf77_strtri(uplo_, diag_, &n, a, &lda, info);
149  else
150  {
151  if (upper)
152  {
153  /* Compute inverse of upper triangular matrix */
154  for (j=0; j<n; j =j+ nb)
155  {
156  jb = min(nb, (n-j));
157  magma_ssetmatrix( jb, (n-j),
158  A(j, j), lda,
159  dA(j, j), ldda );
160
161  /* Compute rows 1:j-1 of current block column */
163  MagmaNoTrans, MagmaNonUnit, j, jb,
164  c_one, dA(0,0), ldda, dA(0, j),ldda);
165
167  MagmaNoTrans, MagmaNonUnit, j, jb,
168  c_neg_one, dA(j,j), ldda, dA(0, j),ldda);
169
170  //cublasGetMatrix(j ,jb, sizeof( float),
171  //dA(0, j), ldda, A(0, j), lda);
172
173  magma_sgetmatrix_async( jb, jb,
174  dA(j, j), ldda,
175  A(j, j), lda, stream[1] );
176
177
178  magma_sgetmatrix_async( j, jb,
179  dA(0, j), ldda,
180  A(0, j), lda, stream[0] );
181
182  magma_queue_sync( stream[1] );
183
184  /* Compute inverse of current diagonal block */
185  lapackf77_strtri(MagmaUpperStr, diag_, &jb, A(j,j), &lda, info);
186
187  magma_ssetmatrix( jb, jb,
188  A(j, j), lda,
189  dA(j, j), ldda );
190  }
191
192  }
193  else
194  {
195  /* Compute inverse of lower triangular matrix */
196  nn=((n-1)/nb)*nb+1;
197
198  for(j=nn-1; j>=0; j=j-nb)
199  {
200
201  jb=min(nb,(n-j));
202
203  if((j+jb) < n)
204  {
205  magma_ssetmatrix( (n-j), jb,
206  A(j, j), lda,
207  dA(j, j), ldda );
208
209  /* Compute rows j+jb:n of current block column */
211  MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
212  c_one, dA(j+jb,j+jb), ldda, dA(j+jb, j), ldda);
213
215  MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
216  c_neg_one, dA(j,j), ldda, dA(j+jb, j), ldda);
217
218  //cublasGetMatrix((n-j), jb, sizeof( cuDoub
219  //leComplex),dA(j, j), ldda, A(j, j), lda);
220
221  magma_sgetmatrix_async( (n-j-jb), jb,
222  dA(j+jb, j), ldda,
223  A(j+jb, j), lda, stream[1] );
224
225  magma_sgetmatrix_async( jb, jb,
226  dA(j,j), ldda,
227  A(j,j), lda, stream[0] );
228
229  magma_queue_sync( stream[0] );
230  }
231
232  /* Compute inverse of current diagonal block */
233  lapackf77_strtri(MagmaLowerStr, diag_, &jb, A(j,j), &lda, info);
234
235  magma_ssetmatrix( jb, jb,
236  A(j, j), lda,
237  dA(j, j), ldda );
238  }
239  }
240  }
241
242  magma_queue_destroy( stream[0] );
243  magma_queue_destroy( stream[1] );
244
245  magma_free( work );
246
247  return *info;
248 }