MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dtrtri_gpu.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @generated d Thu May 10 22:27:06 2012
9 
10 */
11 #include "common_magma.h"
12 
13 // === Define what BLAS to use ============================================
14 #define PRECISION_d
15 #if (defined(PRECISION_s) || defined(PRECISION_d))
16  #define magma_dgemm magmablas_dgemm
17  #define magma_dtrsm magmablas_dtrsm
18 #endif
19 
20 #if (GPUSHMEM >= 200)
21  #if (defined(PRECISION_s))
22  #undef magma_sgemm
23  #define magma_sgemm magmablas_sgemm_fermi80
24  #endif
25 #endif
26 // === End defining what BLAS to use ======================================
27 
28 #define dA(i, j) (dA+(j)*ldda + (i))
29 
30 extern "C" magma_int_t
32  double *dA, magma_int_t ldda, magma_int_t *info)
33 {
34 /* -- MAGMA (version 1.2.0) --
35  Univ. of Tennessee, Knoxville
36  Univ. of California, Berkeley
37  Univ. of Colorado, Denver
38  May 2012
39 
40  Purpose
41  =======
42 
43  DTRTRI computes the inverse of a real upper or lower triangular
44  matrix dA.
45 
46  This is the Level 3 BLAS version of the algorithm.
47 
48  Arguments
49  =========
50 
51  UPLO (input) CHARACTER*1
52  = 'U': A is upper triangular;
53  = 'L': A is lower triangular.
54 
55  DIAG (input) CHARACTER*1
56  = 'N': A is non-unit triangular;
57  = 'U': A is unit triangular.
58 
59  N (input) INTEGER
60  The order of the matrix A. N >= 0.
61 
62  dA (input/output) DOUBLE PRECISION array ON THE GPU, dimension (LDDA,N)
63  On entry, the triangular matrix A. If UPLO = 'U', the
64  leading N-by-N upper triangular part of the array dA contains
65  the upper triangular matrix, and the strictly lower
66  triangular part of A is not referenced. If UPLO = 'L', the
67  leading N-by-N lower triangular part of the array dA contains
68  the lower triangular matrix, and the strictly upper
69  triangular part of A is not referenced. If DIAG = 'U', the
70  diagonal elements of A are also not referenced and are
71  assumed to be 1.
72  On exit, the (triangular) inverse of the original matrix, in
73  the same storage format.
74 
75  LDDA (input) INTEGER
76  The leading dimension of the array dA. LDDA >= max(1,N).
77  INFO (output) INTEGER
78  = 0: successful exit
79  < 0: if INFO = -i, the i-th argument had an illegal value
80  > 0: if INFO = i, dA(i,i) is exactly zero. The triangular
81  matrix is singular and its inverse can not be computed.
82 
83  ===================================================================== */
84 
85 
86  /* Local variables */
87  char uplo_[2] = {uplo, 0};
88  char diag_[2] = {diag, 0};
89  magma_int_t nb, nn, j, jb;
90  double c_one = MAGMA_D_ONE;
91  double c_neg_one = MAGMA_D_NEG_ONE;
92  double *work;
93 
94  long int upper = lapackf77_lsame(uplo_, "U");
95  long int nounit = lapackf77_lsame(diag_, "N");
96 
97  *info = 0;
98 
99  if ((! upper) && (! lapackf77_lsame(uplo_, "L")))
100  *info = -1;
101  else if ((! nounit) && (! lapackf77_lsame(diag_, "U")))
102  *info = -2;
103  else if (n < 0)
104  *info = -3;
105  else if (ldda < max(1,n))
106  *info = -5;
107 
108  if (*info != 0) {
109  magma_xerbla( __func__, -(*info) );
110  return *info;
111  }
112 
113 
114  /* Check for singularity if non-unit */
115  if (nounit)
116  {
117  for (*info=0; *info < n; *info=*info+1)
118  {
119  if(dA(*info,*info)==0)
120  return *info;
121  }
122  *info=0;
123  }
124 
125  nb = magma_get_dpotrf_nb(n);
126 
127  if (MAGMA_SUCCESS != magma_dmalloc_host( &work, nb*nb )) {
128  *info = MAGMA_ERR_HOST_ALLOC;
129  return *info;
130  }
131 
132  static cudaStream_t stream[2];
133  magma_queue_create( &stream[0] );
134  magma_queue_create( &stream[1] );
135 
136 
137  if (nb <= 1 || nb >= n)
138  {
139  magma_dgetmatrix( n, n, dA, ldda, work, n );
140  lapackf77_dtrtri(uplo_, diag_, &n, work, &n, info);
141  magma_dsetmatrix( n, n, work, n, dA, ldda );
142  }
143  else
144  {
145  if (upper)
146  {
147  /* Compute inverse of upper triangular matrix */
148  for (j=0; j<n; j =j+ nb)
149  {
150  jb = min(nb, (n-j));
151 
152  /* Compute rows 1:j-1 of current block column */
154  MagmaNoTrans, MagmaNonUnit, j, jb,
155  c_one, dA(0,0), ldda, dA(0, j),ldda);
156 
158  MagmaNoTrans, MagmaNonUnit, j, jb,
159  c_neg_one, dA(j,j), ldda, dA(0, j),ldda);
160 
161 
162  //cublasGetMatrix(jb ,jb, sizeof(double),
163  // dA(j, j), ldda, work, jb);
164 
165  magma_dgetmatrix_async( jb, jb,
166  dA(j, j), ldda,
167  work, jb, stream[1] );
168 
169  magma_queue_sync( stream[1] );
170 
171  /* Compute inverse of current diagonal block */
172  lapackf77_dtrtri(MagmaUpperStr, diag_, &jb, work, &jb, info);
173 
174  //cublasSetMatrix(jb, jb, sizeof(double),
175  // work, jb, dA(j, j), ldda);
176 
177  magma_dsetmatrix_async( jb, jb,
178  work, jb,
179  dA(j, j), ldda, stream[0] );
180  }
181 
182  }
183  else
184  {
185  /* Compute inverse of lower triangular matrix */
186  nn=((n-1)/nb)*nb+1;
187 
188  for(j=nn-1; j>=0; j=j-nb)
189  {
190  jb=min(nb,(n-j));
191 
192  if((j+jb) < n)
193  {
194 
195  /* Compute rows j+jb:n of current block column */
197  MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
198  c_one, dA(j+jb,j+jb), ldda, dA(j+jb, j), ldda);
199 
201  MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
202  c_neg_one, dA(j,j), ldda, dA(j+jb, j), ldda);
203  }
204 
205  //cublasGetMatrix(jb, jb, sizeof(double),
206  // dA(j, j), ldda, work, jb);
207 
208  magma_dgetmatrix_async( jb, jb,
209  dA(j, j), ldda,
210  work, jb, stream[1] );
211 
212  magma_queue_sync( stream[1] );
213 
214  /* Compute inverse of current diagonal block */
215  lapackf77_dtrtri(MagmaLowerStr, diag_, &jb, work, &jb, info);
216 
217  //cublasSetMatrix(jb, jb, sizeof(double),
218  // work, jb, dA(j, j), ldda);
219 
220  magma_dsetmatrix_async( jb, jb,
221  work, jb,
222  dA(j, j), ldda, stream[0] );
223  }
224  }
225  }
226 
227  magma_queue_destroy( stream[0] );
228  magma_queue_destroy( stream[1] );
229 
230  magma_free_host( work );
231 
232  return *info;
233 }