MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zcposv_gpu.cpp File Reference
#include "common_magma.h"
Include dependency graph for zcposv_gpu.cpp:

Go to the source code of this file.

Macros

#define BWDMAX   1.0
 
#define ITERMAX   30
 
#define dB(i, j)   (dB + (i) + (j)*lddb)
 
#define dX(i, j)   (dX + (i) + (j)*lddx)
 
#define dR(i, j)   (dR + (i) + (j)*lddr)
 
#define dSX(i, j)   (dSX + (i) + (j)*lddsx)
 

Functions

magma_int_t magma_zcposv_gpu (char uplo, magma_int_t n, magma_int_t nrhs, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *dB, magma_int_t lddb, magmaDoubleComplex *dX, magma_int_t lddx, magmaDoubleComplex *dworkd, magmaFloatComplex *dworks, magma_int_t *iter, magma_int_t *info)
 

Macro Definition Documentation

#define BWDMAX   1.0

Definition at line 13 of file zcposv_gpu.cpp.

#define dB (   i,
 
)    (dB + (i) + (j)*lddb)
#define dR (   i,
 
)    (dR + (i) + (j)*lddr)
#define dSX (   i,
 
)    (dSX + (i) + (j)*lddsx)
#define dX (   i,
 
)    (dX + (i) + (j)*lddx)
#define ITERMAX   30

Definition at line 14 of file zcposv_gpu.cpp.

Function Documentation

magma_int_t magma_zcposv_gpu ( char  uplo,
magma_int_t  n,
magma_int_t  nrhs,
magmaDoubleComplex *  dA,
magma_int_t  ldda,
magmaDoubleComplex *  dB,
magma_int_t  lddb,
magmaDoubleComplex *  dX,
magma_int_t  lddx,
magmaDoubleComplex *  dworkd,
magmaFloatComplex *  dworks,
magma_int_t iter,
magma_int_t info 
)

Definition at line 17 of file zcposv_gpu.cpp.

References __func__, BWDMAX, dB, dR, dSX, dX, ITERMAX, lapackf77_dlamch, lapackf77_zlange, magma_cpotrf_gpu(), magma_cpotrs_gpu(), magma_izamax(), magma_xerbla(), MAGMA_Z_NEG_ONE, MAGMA_Z_ONE, magma_zgetmatrix, magma_zhemm(), magma_zhemv(), magma_zpotrf_gpu(), magma_zpotrs_gpu(), magmablas_clag2z(), magmablas_zcaxpycp(), magmablas_zlacpy(), magmablas_zlag2c(), magmablas_zlanhe(), magmablas_zlat2c(), MagmaLeft, MagmaUpperLower, and max.

23 {
24 /* -- MAGMA (version 1.4.0) --
25  Univ. of Tennessee, Knoxville
26  Univ. of California, Berkeley
27  Univ. of Colorado, Denver
28  August 2013
29 
30  Purpose
31  =======
32  ZCPOSV computes the solution to a complex system of linear equations
33  A * X = B,
34  where A is an N-by-N Hermitian positive definite matrix and X and B
35  are N-by-NRHS matrices.
36 
37  ZCPOSV first attempts to factorize the matrix in complex SINGLE PRECISION
38  and use this factorization within an iterative refinement procedure
39  to produce a solution with complex DOUBLE PRECISION norm-wise backward error
40  quality (see below). If the approach fails the method switches to a
41  complex DOUBLE PRECISION factorization and solve.
42 
43  The iterative refinement is not going to be a winning strategy if
44  the ratio complex SINGLE PRECISION performance over complex DOUBLE PRECISION
45  performance is too small. A reasonable strategy should take the
46  number of right-hand sides and the size of the matrix into account.
47  This might be done with a call to ILAENV in the future. Up to now, we
48  always try iterative refinement.
49 
50  The iterative refinement process is stopped if
51  ITER > ITERMAX
52  or for all the RHS we have:
53  RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX
54  where
55  o ITER is the number of the current iteration in the iterative
56  refinement process
57  o RNRM is the infinity-norm of the residual
58  o XNRM is the infinity-norm of the solution
59  o ANRM is the infinity-operator-norm of the matrix A
60  o EPS is the machine epsilon returned by DLAMCH('Epsilon')
61  The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.
62 
63  Arguments
64  =========
65  UPLO (input) CHARACTER
66  = 'U': Upper triangle of A is stored;
67  = 'L': Lower triangle of A is stored.
68 
69  N (input) INTEGER
70  The number of linear equations, i.e., the order of the
71  matrix A. N >= 0.
72 
73  NRHS (input) INTEGER
74  The number of right hand sides, i.e., the number of columns
75  of the matrix B. NRHS >= 0.
76 
77  dA (input or input/output) COMPLEX_16 array on the GPU, dimension (LDDA,N)
78  On entry, the Hermitian matrix A. If UPLO = 'U', the leading
79  N-by-N upper triangular part of A contains the upper
80  triangular part of the matrix A, and the strictly lower
81  triangular part of A is not referenced. If UPLO = 'L', the
82  leading N-by-N lower triangular part of A contains the lower
83  triangular part of the matrix A, and the strictly upper
84  triangular part of A is not referenced.
85  On exit, if iterative refinement has been successfully used
86  (INFO.EQ.0 and ITER.GE.0, see description below), then A is
87  unchanged, if double factorization has been used
88  (INFO.EQ.0 and ITER.LT.0, see description below), then the
89  array dA contains the factor U or L from the Cholesky
90  factorization A = U**T*U or A = L*L**T.
91 
92  LDDA (input) INTEGER
93  The leading dimension of the array dA. LDDA >= max(1,N).
94 
95  dB (input) COMPLEX_16 array on the GPU, dimension (LDDB,NRHS)
96  The N-by-NRHS right hand side matrix B.
97 
98  LDDB (input) INTEGER
99  The leading dimension of the array dB. LDDB >= max(1,N).
100 
101  dX (output) COMPLEX_16 array on the GPU, dimension (LDDX,NRHS)
102  If INFO = 0, the N-by-NRHS solution matrix X.
103 
104  LDDX (input) INTEGER
105  The leading dimension of the array dX. LDDX >= max(1,N).
106 
107  dworkd (workspace) COMPLEX_16 array on the GPU, dimension (N*NRHS)
108  This array is used to hold the residual vectors.
109 
110  dworks (workspace) COMPLEX array on the GPU, dimension (N*(N+NRHS))
111  This array is used to store the complex single precision matrix
112  and the right-hand sides or solutions in single precision.
113 
114  ITER (output) INTEGER
115  < 0: iterative refinement has failed, double precision
116  factorization has been performed
117  -1 : the routine fell back to full precision for
118  implementation- or machine-specific reasons
119  -2 : narrowing the precision induced an overflow,
120  the routine fell back to full precision
121  -3 : failure of SPOTRF
122  -31: stop the iterative refinement after the 30th iteration
123  > 0: iterative refinement has been successfully used.
124  Returns the number of iterations
125 
126  INFO (output) INTEGER
127  = 0: successful exit
128  < 0: if INFO = -i, the i-th argument had an illegal value
129  > 0: if INFO = i, the leading minor of order i of (DOUBLE
130  PRECISION) A is not positive definite, so the
131  factorization could not be completed, and the solution
132  has not been computed.
133 
134  ===================================================================== */
135 
136  #define dB(i,j) (dB + (i) + (j)*lddb)
137  #define dX(i,j) (dX + (i) + (j)*lddx)
138  #define dR(i,j) (dR + (i) + (j)*lddr)
139  #define dSX(i,j) (dSX + (i) + (j)*lddsx)
140 
141  magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
142  magmaDoubleComplex c_one = MAGMA_Z_ONE;
143  magma_int_t ione = 1;
144  magmaDoubleComplex *dR;
145  magmaFloatComplex *dSA, *dSX;
146  magmaDoubleComplex Xnrmv, Rnrmv;
147  double Anrm, Xnrm, Rnrm, cte, eps;
148  magma_int_t i, j, iiter, lddsa, lddsx, lddr;
149 
150  /* Check arguments */
151  *iter = 0;
152  *info = 0;
153  if ( n < 0 )
154  *info = -1;
155  else if ( nrhs < 0 )
156  *info = -2;
157  else if ( ldda < max(1,n))
158  *info = -4;
159  else if ( lddb < max(1,n))
160  *info = -7;
161  else if ( lddx < max(1,n))
162  *info = -9;
163 
164  if (*info != 0) {
165  magma_xerbla( __func__, -(*info) );
166  return *info;
167  }
168 
169  if ( n == 0 || nrhs == 0 )
170  return *info;
171 
172  lddsa = n;
173  lddsx = n;
174  lddr = n;
175 
176  dSA = dworks;
177  dSX = dSA + lddsa*n;
178  dR = dworkd;
179 
180  eps = lapackf77_dlamch("Epsilon");
181  Anrm = magmablas_zlanhe('I', uplo, n, dA, ldda, (double*)dworkd );
182  cte = Anrm * eps * pow((double)n, 0.5) * BWDMAX;
183 
184  /*
185  * Convert to single precision
186  */
187  magmablas_zlag2c( n, nrhs, dB, lddb, dSX, lddsx, info );
188  if (*info != 0) {
189  *iter = -2;
190  goto FALLBACK;
191  }
192 
193  magmablas_zlat2c( uplo, n, dA, ldda, dSA, lddsa, info );
194  if (*info != 0) {
195  *iter = -2;
196  goto FALLBACK;
197  }
198 
199  // factor dSA in single precision
200  magma_cpotrf_gpu( uplo, n, dSA, lddsa, info );
201  if (*info != 0) {
202  *iter = -3;
203  goto FALLBACK;
204  }
205 
206  // solve dSA*dSX = dB in single precision
207  magma_cpotrs_gpu( uplo, n, nrhs, dSA, lddsa, dSX, lddsx, info );
208 
209  // residual dR = dB - dA*dX in double precision
210  magmablas_clag2z( n, nrhs, dSX, lddsx, dX, lddx, info );
211  magmablas_zlacpy( MagmaUpperLower, n, nrhs, dB, lddb, dR, lddr );
212  if ( nrhs == 1 ) {
213  magma_zhemv( uplo, n,
214  c_neg_one, dA, ldda,
215  dX, 1,
216  c_one, dR, 1 );
217  }
218  else {
219  magma_zhemm( MagmaLeft, uplo, n, nrhs,
220  c_neg_one, dA, ldda,
221  dX, lddx,
222  c_one, dR, lddr );
223  }
224 
225  // TODO: use MAGMA_Z_ABS( dX(i,j) ) instead of zlange?
226  for( j=0; j < nrhs; j++ ) {
227  i = magma_izamax( n, dX(0,j), 1) - 1;
228  magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 );
229  Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL );
230 
231  i = magma_izamax ( n, dR(0,j), 1 ) - 1;
232  magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 );
233  Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL );
234 
235  if ( Rnrm > Xnrm*cte ) {
236  goto REFINEMENT;
237  }
238  }
239 
240  *iter = 0;
241  return *info;
242 
243 REFINEMENT:
244  for( iiter=1; iiter < ITERMAX; ) {
245  *info = 0;
246  // convert residual dR to single precision dSX
247  magmablas_zlag2c( n, nrhs, dR, lddr, dSX, lddsx, info );
248  if (*info != 0) {
249  *iter = -2;
250  goto FALLBACK;
251  }
252  // solve dSA*dSX = R in single precision
253  magma_cpotrs_gpu( uplo, n, nrhs, dSA, lddsa, dSX, lddsx, info );
254 
255  // Add correction and setup residual
256  // dX += dSX [including conversion] --and--
257  // dR = dB
258  for( j=0; j < nrhs; j++ ) {
259  magmablas_zcaxpycp( n, dSX(0,j), dX(0,j), dB(0,j), dR(0,j) );
260  }
261 
262  // residual dR = dB - dA*dX in double precision
263  if ( nrhs == 1 ) {
264  magma_zhemv( uplo, n,
265  c_neg_one, dA, ldda,
266  dX, 1,
267  c_one, dR, 1 );
268  }
269  else {
270  magma_zhemm( MagmaLeft, uplo, n, nrhs,
271  c_neg_one, dA, ldda,
272  dX, lddx,
273  c_one, dR, lddr );
274  }
275 
276  /* Check whether the nrhs normwise backward errors satisfy the
277  * stopping criterion. If yes, set ITER=IITER>0 and return. */
278  for( j=0; j < nrhs; j++ ) {
279  i = magma_izamax( n, dX(0,j), 1) - 1;
280  magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 );
281  Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL );
282 
283  i = magma_izamax ( n, dR(0,j), 1 ) - 1;
284  magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 );
285  Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL );
286 
287  if ( Rnrm > Xnrm*cte ) {
288  goto L20;
289  }
290  }
291 
292  /* If we are here, the nrhs normwise backward errors satisfy
293  * the stopping criterion, we are good to exit. */
294  *iter = iiter;
295  return *info;
296 
297  L20:
298  iiter++;
299  }
300 
301  /* If we are at this place of the code, this is because we have
302  * performed ITER=ITERMAX iterations and never satisified the
303  * stopping criterion. Set up the ITER flag accordingly and follow
304  * up on double precision routine. */
305  *iter = -ITERMAX - 1;
306 
307 FALLBACK:
308  /* Single-precision iterative refinement failed to converge to a
309  * satisfactory solution, so we resort to double precision. */
310  magma_zpotrf_gpu( uplo, n, dA, ldda, info );
311  if (*info == 0) {
312  magmablas_zlacpy( MagmaUpperLower, n, nrhs, dB, lddb, dX, lddx );
313  magma_zpotrs_gpu( uplo, n, nrhs, dA, ldda, dX, lddx, info );
314  }
315 
316  return *info;
317 }
#define ITERMAX
Definition: zcposv_gpu.cpp:14
#define MagmaUpperLower
Definition: magma.h:63
#define MagmaLeft
Definition: magma.h:68
#define __func__
Definition: common_magma.h:65
magma_int_t magma_zpotrs_gpu(char uplo, magma_int_t n, magma_int_t nrhs, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *dB, magma_int_t lddb, magma_int_t *info)
#define MAGMA_Z_NEG_ONE
Definition: magma.h:134
void magmablas_clag2z(magma_int_t M, magma_int_t N, cuFloatComplex *SA, magma_int_t ldsa, cuDoubleComplex *A, magma_int_t lda, magma_int_t *info)
#define dX(i, j)
#define dB(i, j)
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_cpotrf_gpu(char uplo, magma_int_t n, magmaFloatComplex *dA, magma_int_t ldda, magma_int_t *info)
Definition: cpotrf_gpu.cpp:24
#define lapackf77_dlamch
Definition: magma_lapack.h:27
void magmablas_zcaxpycp(cuFloatComplex *, cuDoubleComplex *, magma_int_t, cuDoubleComplex *, cuDoubleComplex *)
void magmablas_zlacpy(char uplo, magma_int_t m, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *B, magma_int_t ldb)
magma_int_t ldda
magma_int_t magma_zpotrf_gpu(char uplo, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
#define BWDMAX
Definition: zcposv_gpu.cpp:13
magma_int_t magma_cpotrs_gpu(char uplo, magma_int_t n, magma_int_t nrhs, magmaFloatComplex *dA, magma_int_t ldda, magmaFloatComplex *dB, magma_int_t lddb, magma_int_t *info)
Definition: cpotrs_gpu.cpp:14
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
magma_int_t magma_izamax(magma_int_t n, magmaDoubleComplex_const_ptr dx, magma_int_t incx)
void magma_zhemm(magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
double magmablas_zlanhe(char norm, char uplo, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, double *WORK)
void magmablas_zlag2c(magma_int_t M, magma_int_t N, const cuDoubleComplex *A, magma_int_t lda, cuFloatComplex *SA, magma_int_t ldsa, magma_int_t *info)
void magma_zhemv(magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dx, magma_int_t incx, magmaDoubleComplex beta, magmaDoubleComplex_ptr dy, magma_int_t incy)
#define dR(i, j)
void magmablas_zlat2c(char uplo, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, cuFloatComplex *SA, magma_int_t ldsa, magma_int_t *info)
#define lapackf77_zlange
Definition: magma_zlapack.h:75
#define MAGMA_Z_ONE
Definition: magma.h:132
#define dSX(i, j)
#define max(a, b)
Definition: common_magma.h:82
#define dA(dev, i, j)

Here is the call graph for this function: