MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
magma_zc.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

magma_int_t magma_zcgesv_gpu (char trans, magma_int_t N, magma_int_t NRHS, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *IPIV, magma_int_t *dIPIV, magmaDoubleComplex *dB, magma_int_t lddb, magmaDoubleComplex *dX, magma_int_t lddx, magmaDoubleComplex *dworkd, magmaFloatComplex *dworks, magma_int_t *iter, magma_int_t *info)
 
magma_int_t magma_zcgetrs_gpu (char trans, magma_int_t n, magma_int_t nrhs, magmaFloatComplex *dA, magma_int_t ldda, magma_int_t *ipiv, magmaDoubleComplex *dB, magma_int_t lddb, magmaDoubleComplex *dX, magma_int_t lddx, magmaFloatComplex *dSX, magma_int_t *info)
 
magma_int_t magma_zcposv_gpu (char uplo, magma_int_t n, magma_int_t nrhs, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *dB, magma_int_t lddb, magmaDoubleComplex *dX, magma_int_t lddx, magmaDoubleComplex *dworkd, magmaFloatComplex *dworks, magma_int_t *iter, magma_int_t *info)
 
magma_int_t magma_zcgeqrsv_gpu (magma_int_t M, magma_int_t N, magma_int_t NRHS, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *dB, magma_int_t lddb, magmaDoubleComplex *dX, magma_int_t lddx, magma_int_t *iter, magma_int_t *info)
 

Function Documentation

magma_int_t magma_zcgeqrsv_gpu ( magma_int_t  M,
magma_int_t  N,
magma_int_t  NRHS,
magmaDoubleComplex *  dA,
magma_int_t  ldda,
magmaDoubleComplex *  dB,
magma_int_t  lddb,
magmaDoubleComplex *  dX,
magma_int_t  lddx,
magma_int_t iter,
magma_int_t info 
)

Definition at line 17 of file zcgeqrsv_gpu.cpp.

References __func__, BWDMAX, dB, dR, dSX, dT, dX, ITERMAX, lapackf77_dlamch, lapackf77_zlange, magma_cgeqrf_gpu(), magma_cgeqrs_gpu(), magma_cmalloc(), magma_cmalloc_cpu(), MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_cgeqrf_nb(), magma_get_zgeqrf_nb(), magma_izamax(), MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_NEG_ONE, MAGMA_Z_ONE, magma_zgemm(), magma_zgemv(), magma_zgeqrf_gpu(), magma_zgeqrs_gpu(), magma_zgetmatrix, magma_zmalloc(), magma_zmalloc_cpu(), magmablas_clag2z(), magmablas_zcaxpycp(), magmablas_zlacpy(), magmablas_zlag2c(), magmablas_zlange(), MagmaNoTrans, MagmaUpperLower, max, and min.

22 {
23 /* -- MAGMA (version 1.4.0) --
24  Univ. of Tennessee, Knoxville
25  Univ. of California, Berkeley
26  Univ. of Colorado, Denver
27  August 2013
28 
29  Purpose
30  =======
31  ZCGEQRSV solves the least squares problem
32  min || A*X - B ||,
33  where A is an M-by-N matrix and X and B are M-by-NRHS matrices.
34 
35  ZCGEQRSV first attempts to factorize the matrix in complex SINGLE PRECISION
36  and use this factorization within an iterative refinement procedure
37  to produce a solution with complex DOUBLE PRECISION norm-wise backward error
38  quality (see below). If the approach fails the method switches to a
39  complex DOUBLE PRECISION factorization and solve.
40 
41  The iterative refinement is not going to be a winning strategy if
42  the ratio complex SINGLE PRECISION performance over complex DOUBLE PRECISION
43  performance is too small. A reasonable strategy should take the
44  number of right-hand sides and the size of the matrix into account.
45  This might be done with a call to ILAENV in the future. Up to now, we
46  always try iterative refinement.
47 
48  The iterative refinement process is stopped if
49  ITER > ITERMAX
50  or for all the RHS we have:
51  RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX
52  where
53  o ITER is the number of the current iteration in the iterative
54  refinement process
55  o RNRM is the infinity-norm of the residual
56  o XNRM is the infinity-norm of the solution
57  o ANRM is the infinity-operator-norm of the matrix A
58  o EPS is the machine epsilon returned by DLAMCH('Epsilon')
59  The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.
60 
61  Arguments
62  =========
63  M (input) INTEGER
64  The number of rows of the matrix A. M >= 0.
65 
66  N (input) INTEGER
67  The number of columns of the matrix A. M >= N >= 0.
68 
69  NRHS (input) INTEGER
70  The number of right hand sides, i.e., the number of columns
71  of the matrix B. NRHS >= 0.
72 
73  dA (input or input/output) COMPLEX_16 array on the GPU, dimension (LDDA,N)
74  On entry, the M-by-N coefficient matrix A.
75  On exit, if iterative refinement has been successfully used
76  (info.EQ.0 and ITER.GE.0, see description below), A is
77  unchanged. If double precision factorization has been used
78  (info.EQ.0 and ITER.LT.0, see description below), then the
79  array dA contains the QR factorization of A as returned by
80  function DGEQRF_GPU.
81 
82  LDDA (input) INTEGER
83  The leading dimension of the array dA. LDDA >= max(1,M).
84 
85  dB (input or input/output) COMPLEX_16 array on the GPU, dimension (LDDB,NRHS)
86  The M-by-NRHS right hand side matrix B.
87  May be overwritten (e.g., if refinement fails).
88 
89  LDDB (input) INTEGER
90  The leading dimension of the array dB. LDDB >= max(1,M).
91 
92  dX (output) COMPLEX_16 array on the GPU, dimension (LDDX,NRHS)
93  If info = 0, the N-by-NRHS solution matrix X.
94 
95  LDDX (input) INTEGER
96  The leading dimension of the array dX. LDDX >= max(1,N).
97 
98  ITER (output) INTEGER
99  < 0: iterative refinement has failed, double precision
100  factorization has been performed
101  -1 : the routine fell back to full precision for
102  implementation- or machine-specific reasons
103  -2 : narrowing the precision induced an overflow,
104  the routine fell back to full precision
105  -3 : failure of SGEQRF
106  -31: stop the iterative refinement after the 30th iteration
107  > 0: iterative refinement has been successfully used.
108  Returns the number of iterations
109 
110  INFO (output) INTEGER
111  = 0: successful exit
112  < 0: if info = -i, the i-th argument had an illegal value
113 
114  ===================================================================== */
115 
116  #define dB(i,j) (dB + (i) + (j)*lddb)
117  #define dX(i,j) (dX + (i) + (j)*lddx)
118  #define dR(i,j) (dR + (i) + (j)*lddr)
119  #define dSX(i,j) (dSX + (i) + (j)*lddsx)
120 
121  magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
122  magmaDoubleComplex c_one = MAGMA_Z_ONE;
123  magma_int_t ione = 1;
124  magmaDoubleComplex *dworkd, *hworkd;
125  magmaFloatComplex *dworks, *hworks;
126  magmaDoubleComplex *dR, *tau, *dT;
127  magmaFloatComplex *dSA, *dSX, *dST, *stau;
128  magmaDoubleComplex Xnrmv, Rnrmv;
129  double Anrm, Xnrm, Rnrm, cte, eps;
130  magma_int_t i, j, iiter, lddsa, lddsx, lddr, nb, lhwork, minmn, size, ldworkd;
131 
132  /* Check arguments */
133  *iter = 0;
134  *info = 0;
135  if ( m < 0 )
136  *info = -1;
137  else if ( n < 0 || n > m )
138  *info = -2;
139  else if ( nrhs < 0 )
140  *info = -3;
141  else if ( ldda < max(1,m))
142  *info = -5;
143  else if ( lddb < max(1,m))
144  *info = -7;
145  else if ( lddx < max(1,n))
146  *info = -9;
147 
148  if (*info != 0) {
149  magma_xerbla( __func__, -(*info) );
150  return *info;
151  }
152 
153  if ( m == 0 || n == 0 || nrhs == 0 )
154  return *info;
155 
156  nb = magma_get_cgeqrf_nb(m);
157  minmn= min(m, n);
158 
159  /* dSX contains both B and X, so must be max(m or lddb,n). */
160  lddsa = ldda;
161  lddsx = max(lddb,n);
162  lddr = lddb;
163 
164  /*
165  * Allocate temporary buffers
166  */
167  /* dworks(dSA + dSX + dST) */
168  size = lddsa*n + lddsx*nrhs + ( 2*minmn + ((n+31)/32)*32 )*nb;
169  if (MAGMA_SUCCESS != magma_cmalloc( &dworks, size )) {
170  fprintf(stderr, "Allocation of dworks failed (%d)\n", (int) size);
171  *info = MAGMA_ERR_DEVICE_ALLOC;
172  return *info;
173  }
174  dSA = dworks;
175  dSX = dSA + lddsa*n;
176  dST = dSX + lddsx*nrhs;
177 
178  /* dworkd(dR) = lddr*nrhs */
179  ldworkd = lddr*nrhs;
180  if (MAGMA_SUCCESS != magma_zmalloc( &dworkd, ldworkd )) {
181  magma_free( dworks );
182  fprintf(stderr, "Allocation of dworkd failed\n");
183  *info = MAGMA_ERR_DEVICE_ALLOC;
184  return *info;
185  }
186  dR = dworkd;
187 
188  /* hworks(workspace for cgeqrs + stau) = min(m,n) + lhworks */
189  lhwork = (m - n + nb)*(nrhs + nb) + nrhs*nb;
190  size = lhwork + minmn;
191  magma_cmalloc_cpu( &hworks, size );
192  if ( hworks == NULL ) {
193  magma_free( dworks );
194  magma_free( dworkd );
195  fprintf(stderr, "Allocation of hworks failed\n");
196  *info = MAGMA_ERR_HOST_ALLOC;
197  return *info;
198  }
199  stau = hworks + lhwork;
200 
201  eps = lapackf77_dlamch("Epsilon");
202  Anrm = magmablas_zlange('I', m, n, dA, ldda, (double*)dworkd );
203  cte = Anrm * eps * pow((double)n, 0.5) * BWDMAX;
204 
205  /*
206  * Convert to single precision
207  */
208  magmablas_zlag2c( m, nrhs, dB, lddb, dSX, lddsx, info );
209  if (*info != 0) {
210  *iter = -2;
211  goto FALLBACK;
212  }
213 
214  magmablas_zlag2c( m, n, dA, ldda, dSA, lddsa, info );
215  if (*info != 0) {
216  *iter = -2;
217  goto FALLBACK;
218  }
219 
220  // factor dSA in single precision
221  magma_cgeqrf_gpu( m, n, dSA, lddsa, stau, dST, info );
222  if (*info != 0) {
223  *iter = -3;
224  goto FALLBACK;
225  }
226 
227  // solve dSA*dSX = dB in single precision
228  magma_cgeqrs_gpu( m, n, nrhs, dSA, lddsa, stau, dST, dSX, lddsx, hworks, lhwork, info );
229  if (*info != 0) {
230  *iter = -3;
231  goto FALLBACK;
232  }
233 
234  // residual dR = dB - dA*dX in double precision
235  magmablas_clag2z( n, nrhs, dSX, lddsx, dX, lddx, info );
236  magmablas_zlacpy( MagmaUpperLower, m, nrhs, dB, lddb, dR, lddr );
237  if ( nrhs == 1 ) {
238  magma_zgemv( MagmaNoTrans, m, n,
239  c_neg_one, dA, ldda,
240  dX, 1,
241  c_one, dR, 1 );
242  }
243  else {
244  magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, nrhs, n,
245  c_neg_one, dA, ldda,
246  dX, lddx,
247  c_one, dR, lddr );
248  }
249 
250  // TODO: use MAGMA_Z_ABS( dX(i,j) ) instead of zlange?
251  for( j=0; j < nrhs; j++ ) {
252  i = magma_izamax( n, dX(0,j), 1) - 1;
253  magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 );
254  Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL );
255 
256  i = magma_izamax ( m, dR(0,j), 1 ) - 1;
257  magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 );
258  Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL );
259 
260  if ( Rnrm > Xnrm*cte ) {
261  goto REFINEMENT;
262  }
263  }
264 
265  *iter = 0;
266 
267  /* Free workspaces */
268  magma_free( dworks );
269  magma_free( dworkd );
270  magma_free_cpu( hworks );
271  return *info;
272 
273 REFINEMENT:
274  /* TODO: this iterative refinement algorithm works only for compatibile
275  * systems (B in colspan of A).
276  * See Matrix Computations (3rd ed) p. 267 for correct algorithm. */
277  for( iiter=1; iiter < ITERMAX; ) {
278  *info = 0;
279  // convert residual dR to single precision dSX
280  magmablas_zlag2c( m, nrhs, dR, lddr, dSX, lddsx, info );
281  if (*info != 0) {
282  *iter = -2;
283  goto FALLBACK;
284  }
285  // solve dSA*dSX = R in single precision
286  magma_cgeqrs_gpu( m, n, nrhs, dSA, lddsa, stau, dST, dSX, lddsx, hworks, lhwork, info );
287  if (*info != 0) {
288  *iter = -3;
289  goto FALLBACK;
290  }
291 
292  // Add correction and setup residual
293  // dX += dSX [including conversion] --and--
294  // dR[1:n] = dB[1:n] (only n rows, not whole m rows! -- useless if m > n)
295  for( j=0; j < nrhs; j++ ) {
296  magmablas_zcaxpycp( n, dSX(0,j), dX(0,j), dB(0,j), dR(0,j) );
297  }
298  // dR = dB (whole m rows)
299  magmablas_zlacpy( MagmaUpperLower, m, nrhs, dB, lddb, dR, lddr );
300 
301  // residual dR = dB - dA*dX in double precision
302  if ( nrhs == 1 ) {
303  magma_zgemv( MagmaNoTrans, m, n,
304  c_neg_one, dA, ldda,
305  dX, 1,
306  c_one, dR, 1 );
307  }
308  else {
309  magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, nrhs, n,
310  c_neg_one, dA, ldda,
311  dX, lddx,
312  c_one, dR, lddr );
313  }
314 
315  /* Check whether the nrhs normwise backward errors satisfy the
316  * stopping criterion. If yes, set ITER=IITER>0 and return. */
317  for( j=0; j < nrhs; j++ ) {
318  i = magma_izamax( n, dX(0,j), 1) - 1;
319  magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 );
320  Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL );
321 
322  i = magma_izamax ( m, dR(0,j), 1 ) - 1;
323  magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 );
324  Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL );
325 
326  if ( Rnrm > Xnrm*cte ) {
327  goto L20;
328  }
329  }
330 
331  /* If we are here, the nrhs normwise backward errors satisfy
332  * the stopping criterion, we are good to exit. */
333  *iter = iiter;
334 
335  /* Free workspaces */
336  magma_free( dworks );
337  magma_free( dworkd );
338  magma_free_cpu( hworks );
339  return *info;
340 
341  L20:
342  iiter++;
343  }
344 
345  /* If we are at this place of the code, this is because we have
346  * performed ITER=ITERMAX iterations and never satisified the
347  * stopping criterion. Set up the ITER flag accordingly and follow
348  * up on double precision routine. */
349  *iter = -ITERMAX - 1;
350 
351 FALLBACK:
352  /* Single-precision iterative refinement failed to converge to a
353  * satisfactory solution, so we resort to double precision. */
354  magma_free( dworks );
355  magma_free_cpu( hworks );
356 
357  /*
358  * Allocate temporary buffers
359  */
360  /* dworkd = dT for zgeqrf */
361  nb = magma_get_zgeqrf_nb( m );
362  size = (2*min(m, n) + (n+31)/32*32 )*nb;
363  if ( size > ldworkd ) {
364  magma_free( dworkd );
365  if (MAGMA_SUCCESS != magma_zmalloc( &dworkd, size )) {
366  fprintf(stderr, "Allocation of dworkd2 failed\n");
367  *info = MAGMA_ERR_DEVICE_ALLOC;
368  return *info;
369  }
370  }
371  dT = dworkd;
372 
373  /* hworkd(dtau + workspace for zgeqrs) = min(m,n) + lhwork */
374  size = lhwork + minmn;
375  magma_zmalloc_cpu( &hworkd, size );
376  if ( hworkd == NULL ) {
377  magma_free( dworkd );
378  fprintf(stderr, "Allocation of hworkd2 failed\n");
379  *info = MAGMA_ERR_HOST_ALLOC;
380  return *info;
381  }
382  tau = hworkd + lhwork;
383 
384  magma_zgeqrf_gpu( m, n, dA, ldda, tau, dT, info );
385  if (*info == 0) {
386  // if m > n, then dB won't fit in dX, so solve with dB and copy n rows to dX
387  magma_zgeqrs_gpu( m, n, nrhs, dA, ldda, tau, dT, dB, lddb, hworkd, lhwork, info );
388  magmablas_zlacpy( MagmaUpperLower, n, nrhs, dB, lddb, dX, lddx );
389  }
390 
391  magma_free( dworkd );
392  magma_free_cpu( hworkd );
393  return *info;
394 }
#define dR(i, j)
#define MagmaUpperLower
Definition: magma.h:63
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
#define MAGMA_Z_NEG_ONE
Definition: magma.h:134
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
static magma_err_t magma_cmalloc(magmaFloatComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:79
void magmablas_clag2z(magma_int_t M, magma_int_t N, cuFloatComplex *SA, magma_int_t ldsa, cuDoubleComplex *A, magma_int_t lda, magma_int_t *info)
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define ITERMAX
#define magma_free(ptr)
Definition: magma.h:57
double magmablas_zlange(char norm, magma_int_t m, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, double *WORK)
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
#define dSX(i, j)
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_zgeqrf_gpu(magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *dT, magma_int_t *info)
#define dB(i, j)
#define lapackf77_dlamch
Definition: magma_lapack.h:27
void magmablas_zcaxpycp(cuFloatComplex *, cuDoubleComplex *, magma_int_t, cuDoubleComplex *, cuDoubleComplex *)
void magma_zgemm(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
void magmablas_zlacpy(char uplo, magma_int_t m, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *B, magma_int_t ldb)
magma_int_t magma_cgeqrf_gpu(magma_int_t m, magma_int_t n, magmaFloatComplex *dA, magma_int_t ldda, magmaFloatComplex *tau, magmaFloatComplex *dT, magma_int_t *info)
Definition: cgeqrf_gpu.cpp:43
#define BWDMAX
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
magma_int_t magma_zgeqrs_gpu(magma_int_t m, magma_int_t n, magma_int_t nrhs, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *dT, cuDoubleComplex *dB, magma_int_t lddb, cuDoubleComplex *hwork, magma_int_t lhwork, magma_int_t *info)
magma_int_t magma_izamax(magma_int_t n, magmaDoubleComplex_const_ptr dx, magma_int_t incx)
magma_int_t ldda
void magmablas_zlag2c(magma_int_t M, magma_int_t N, const cuDoubleComplex *A, magma_int_t lda, cuFloatComplex *SA, magma_int_t ldsa, magma_int_t *info)
magma_int_t magma_get_zgeqrf_nb(magma_int_t m)
Definition: get_nb.cpp:169
#define MAGMA_SUCCESS
Definition: magma.h:106
magma_int_t magma_get_cgeqrf_nb(magma_int_t m)
Definition: get_nb.cpp:155
void magma_zgemv(magma_trans_t transA, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dx, magma_int_t incx, magmaDoubleComplex beta, magmaDoubleComplex_ptr dy, magma_int_t incy)
static magma_err_t magma_cmalloc_cpu(magmaFloatComplex **ptrPtr, size_t n)
Definition: magma.h:85
static magma_err_t magma_zmalloc_cpu(magmaDoubleComplex **ptrPtr, size_t n)
Definition: magma.h:86
#define lapackf77_zlange
Definition: magma_zlapack.h:75
#define dX(i, j)
magma_int_t magma_cgeqrs_gpu(magma_int_t m, magma_int_t n, magma_int_t nrhs, magmaFloatComplex *dA, magma_int_t ldda, magmaFloatComplex *tau, magmaFloatComplex *dT, magmaFloatComplex *dB, magma_int_t lddb, magmaFloatComplex *hwork, magma_int_t lhwork, magma_int_t *info)
Definition: cgeqrs_gpu.cpp:14
#define MAGMA_Z_ONE
Definition: magma.h:132
#define dT(m)
#define MagmaNoTrans
Definition: magma.h:57
#define max(a, b)
Definition: common_magma.h:82
magma_err_t magma_free_cpu(void *ptr)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define dA(dev, i, j)

Here is the call graph for this function:

magma_int_t magma_zcgesv_gpu ( char  trans,
magma_int_t  N,
magma_int_t  NRHS,
magmaDoubleComplex *  dA,
magma_int_t  ldda,
magma_int_t IPIV,
magma_int_t dIPIV,
magmaDoubleComplex *  dB,
magma_int_t  lddb,
magmaDoubleComplex *  dX,
magma_int_t  lddx,
magmaDoubleComplex *  dworkd,
magmaFloatComplex *  dworks,
magma_int_t iter,
magma_int_t info 
)

Definition at line 17 of file zcgesv_gpu.cpp.

References __func__, BWDMAX, dB, dR, dSX, dX, ITERMAX, lapackf77_dlamch, lapackf77_zlange, magma_cgetrf_gpu(), magma_free_cpu(), magma_imalloc_cpu(), magma_izamax(), magma_setvector, magma_xerbla(), MAGMA_Z_NEG_ONE, MAGMA_Z_ONE, magma_zcgetrs_gpu(), magma_zgemm(), magma_zgemv(), magma_zgetmatrix, magma_zgetrf_gpu(), magma_zgetrs_gpu(), magmablas_zaxpycp(), magmablas_zlacpy(), magmablas_zlag2c(), magmablas_zlange(), MagmaNoTrans, MagmaUpperLower, max, and swp2pswp().

24 {
25 /* -- MAGMA (version 1.4.0) --
26  Univ. of Tennessee, Knoxville
27  Univ. of California, Berkeley
28  Univ. of Colorado, Denver
29  August 2013
30 
31  Purpose
32  =======
33  ZCGESV computes the solution to a complex system of linear equations
34  A * X = B or A' * X = B
35  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
36 
37  ZCGESV first attempts to factorize the matrix in complex SINGLE PRECISION
38  and use this factorization within an iterative refinement procedure
39  to produce a solution with complex DOUBLE PRECISION norm-wise backward error
40  quality (see below). If the approach fails the method switches to a
41  complex DOUBLE PRECISION factorization and solve.
42 
43  The iterative refinement is not going to be a winning strategy if
44  the ratio complex SINGLE PRECISION performance over complex DOUBLE PRECISION
45  performance is too small. A reasonable strategy should take the
46  number of right-hand sides and the size of the matrix into account.
47  This might be done with a call to ILAENV in the future. Up to now, we
48  always try iterative refinement.
49 
50  The iterative refinement process is stopped if
51  ITER > ITERMAX
52  or for all the RHS we have:
53  RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX
54  where
55  o ITER is the number of the current iteration in the iterative
56  refinement process
57  o RNRM is the infinity-norm of the residual
58  o XNRM is the infinity-norm of the solution
59  o ANRM is the infinity-operator-norm of the matrix A
60  o EPS is the machine epsilon returned by DLAMCH('Epsilon')
61  The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.
62 
63  Arguments
64  =========
65  TRANS (input) CHARACTER*1
66  Specifies the form of the system of equations:
67  = 'N': A * X = B (No transpose)
68  = 'T': A'* X = B (Transpose)
69  = 'C': A'* X = B (Conjugate transpose = Transpose)
70 
71  N (input) INTEGER
72  The number of linear equations, i.e., the order of the
73  matrix A. N >= 0.
74 
75  NRHS (input) INTEGER
76  The number of right hand sides, i.e., the number of columns
77  of the matrix B. NRHS >= 0.
78 
79  dA (input or input/output) COMPLEX_16 array on the GPU, dimension (ldda,N)
80  On entry, the N-by-N coefficient matrix A.
81  On exit, if iterative refinement has been successfully used
82  (info.EQ.0 and ITER.GE.0, see description below), A is
83  unchanged. If double precision factorization has been used
84  (info.EQ.0 and ITER.LT.0, see description below), then the
85  array dA contains the factors L and U from the factorization
86  A = P*L*U; the unit diagonal elements of L are not stored.
87 
88  ldda (input) INTEGER
89  The leading dimension of the array dA. ldda >= max(1,N).
90 
91  IPIV (output) INTEGER array, dimension (N)
92  The pivot indices that define the permutation matrix P;
93  row i of the matrix was interchanged with row IPIV(i).
94  Corresponzc either to the single precision factorization
95  (if info.EQ.0 and ITER.GE.0) or the double precision
96  factorization (if info.EQ.0 and ITER.LT.0).
97 
98  dIPIV (output) INTEGER array on the GPU, dimension (min(M,N))
99  The pivot indices; for 1 <= i <= min(M,N), row i of the
100  matrix was moved to row IPIV(i).
101 
102  dB (input) COMPLEX_16 array on the GPU, dimension (lddb,NRHS)
103  The N-by-NRHS right hand side matrix B.
104 
105  lddb (input) INTEGER
106  The leading dimension of the array dB. lddb >= max(1,N).
107 
108  dX (output) COMPLEX_16 array on the GPU, dimension (lddx,NRHS)
109  If info = 0, the N-by-NRHS solution matrix X.
110 
111  lddx (input) INTEGER
112  The leading dimension of the array dX. lddx >= max(1,N).
113 
114  dworkd (workspace) COMPLEX_16 array on the GPU, dimension (N*NRHS)
115  This array is used to hold the residual vectors.
116 
117  dworks (workspace) COMPLEX array on the GPU, dimension (N*(N+NRHS))
118  This array is used to store the complex single precision matrix
119  and the right-hand sides or solutions in single precision.
120 
121  iter (output) INTEGER
122  < 0: iterative refinement has failed, double precision
123  factorization has been performed
124  -1 : the routine fell back to full precision for
125  implementation- or machine-specific reasons
126  -2 : narrowing the precision induced an overflow,
127  the routine fell back to full precision
128  -3 : failure of SGETRF
129  -31: stop the iterative refinement after the 30th iteration
130  > 0: iterative refinement has been successfully used.
131  Returns the number of iterations
132 
133  info (output) INTEGER
134  = 0: successful exit
135  < 0: if info = -i, the i-th argument had an illegal value
136  > 0: if info = i, U(i,i) computed in DOUBLE PRECISION is
137  exactly zero. The factorization has been completed,
138  but the factor U is exactly singular, so the solution
139  could not be computed.
140  ===================================================================== */
141 
142  #define dB(i,j) (dB + (i) + (j)*lddb)
143  #define dX(i,j) (dX + (i) + (j)*lddx)
144  #define dR(i,j) (dR + (i) + (j)*lddr)
145 
146  magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
147  magmaDoubleComplex c_one = MAGMA_Z_ONE;
148  magma_int_t ione = 1;
149  magmaDoubleComplex *dR;
150  magmaFloatComplex *dSA, *dSX;
151  magmaDoubleComplex Xnrmv, Rnrmv;
152  double Anrm, Xnrm, Rnrm, cte, eps;
153  magma_int_t i, j, iiter, lddsa, lddr;
154 
155  /* Check arguments */
156  *iter = 0;
157  *info = 0;
158  if ( n < 0 )
159  *info = -1;
160  else if ( nrhs < 0 )
161  *info = -2;
162  else if ( ldda < max(1,n))
163  *info = -4;
164  else if ( lddb < max(1,n))
165  *info = -8;
166  else if ( lddx < max(1,n))
167  *info = -10;
168 
169  if (*info != 0) {
170  magma_xerbla( __func__, -(*info) );
171  return *info;
172  }
173 
174  if ( n == 0 || nrhs == 0 )
175  return *info;
176 
177  lddsa = n;
178  lddr = n;
179 
180  dSA = dworks;
181  dSX = dSA + lddsa*n;
182  dR = dworkd;
183 
184  eps = lapackf77_dlamch("Epsilon");
185  Anrm = magmablas_zlange('I', n, n, dA, ldda, (double*)dworkd );
186  cte = Anrm * eps * pow((double)n, 0.5) * BWDMAX;
187 
188  /*
189  * Convert to single precision
190  */
191  //magmablas_zlag2c( n, nrhs, dB, lddb, dSX, lddsx, info ); // done inside zcgetrs with pivots
192  if (*info != 0) {
193  *iter = -2;
194  goto FALLBACK;
195  }
196 
197  magmablas_zlag2c( n, n, dA, ldda, dSA, lddsa, info );
198  if (*info != 0) {
199  *iter = -2;
200  goto FALLBACK;
201  }
202 
203  // factor dSA in single precision
204  magma_cgetrf_gpu( n, n, dSA, lddsa, ipiv, info );
205  if (*info != 0) {
206  *iter = -3;
207  goto FALLBACK;
208  }
209 
210  // Generate parallel pivots
211  {
212  magma_int_t *newipiv;
213  magma_imalloc_cpu( &newipiv, n );
214  if ( newipiv == NULL ) {
215  *iter = -3;
216  goto FALLBACK;
217  }
218  swp2pswp( trans, n, ipiv, newipiv );
219  magma_setvector( n, sizeof(magma_int_t), newipiv, 1, dipiv, 1 );
220  magma_free_cpu( newipiv );
221  }
222 
223  // solve dSA*dSX = dB in single precision
224  // converts dB to dSX and applies pivots, solves, then converts result back to dX
225  magma_zcgetrs_gpu( trans, n, nrhs, dSA, lddsa, dipiv, dB, lddb, dX, lddx, dSX, info );
226 
227  // residual dR = dB - dA*dX in double precision
228  magmablas_zlacpy( MagmaUpperLower, n, nrhs, dB, lddb, dR, lddr );
229  if ( nrhs == 1 ) {
230  magma_zgemv( trans, n, n,
231  c_neg_one, dA, ldda,
232  dX, 1,
233  c_one, dR, 1 );
234  }
235  else {
236  magma_zgemm( trans, MagmaNoTrans, n, nrhs, n,
237  c_neg_one, dA, ldda,
238  dX, lddx,
239  c_one, dR, lddr );
240  }
241 
242  // TODO: use MAGMA_Z_ABS( dX(i,j) ) instead of zlange?
243  for( j=0; j < nrhs; j++ ) {
244  i = magma_izamax( n, dX(0,j), 1) - 1;
245  magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 );
246  Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL );
247 
248  i = magma_izamax ( n, dR(0,j), 1 ) - 1;
249  magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 );
250  Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL );
251 
252  if ( Rnrm > Xnrm*cte ) {
253  goto REFINEMENT;
254  }
255  }
256 
257  *iter = 0;
258  return *info;
259 
260 REFINEMENT:
261  for( iiter=1; iiter < ITERMAX; ) {
262  *info = 0;
263  // convert residual dR to single precision dSX
264  // solve dSA*dSX = R in single precision
265  // convert result back to double precision dR
266  // it's okay that dR is used for both dB input and dX output.
267  magma_zcgetrs_gpu( trans, n, nrhs, dSA, lddsa, dipiv, dR, lddr, dR, lddr, dSX, info );
268  if (*info != 0) {
269  *iter = -3;
270  goto FALLBACK;
271  }
272 
273  // Add correction and setup residual
274  // dX += dR --and--
275  // dR = dB
276  // This saves going through dR a second time (if done with one more kernel).
277  // -- not really: first time is read, second time is write.
278  for( j=0; j < nrhs; j++ ) {
279  magmablas_zaxpycp( n, dR(0,j), dX(0,j), dB(0,j) );
280  }
281 
282  // residual dR = dB - dA*dX in double precision
283  if ( nrhs == 1 ) {
284  magma_zgemv( trans, n, n,
285  c_neg_one, dA, ldda,
286  dX, 1,
287  c_one, dR, 1 );
288  }
289  else {
290  magma_zgemm( trans, MagmaNoTrans, n, nrhs, n,
291  c_neg_one, dA, ldda,
292  dX, lddx,
293  c_one, dR, lddr );
294  }
295 
296  /* Check whether the nrhs normwise backward errors satisfy the
297  * stopping criterion. If yes, set ITER=IITER>0 and return. */
298  for( j=0; j < nrhs; j++ ) {
299  i = magma_izamax( n, dX(0,j), 1) - 1;
300  magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 );
301  Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL );
302 
303  i = magma_izamax ( n, dR(0,j), 1 ) - 1;
304  magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 );
305  Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL );
306 
307  if ( Rnrm > Xnrm*cte ) {
308  goto L20;
309  }
310  }
311 
312  /* If we are here, the nrhs normwise backward errors satisfy
313  * the stopping criterion, we are good to exit. */
314  *iter = iiter;
315  return *info;
316 
317  L20:
318  iiter++;
319  }
320 
321  /* If we are at this place of the code, this is because we have
322  * performed ITER=ITERMAX iterations and never satisified the
323  * stopping criterion. Set up the ITER flag accordingly and follow
324  * up on double precision routine. */
325  *iter = -ITERMAX - 1;
326 
327 FALLBACK:
328  /* Single-precision iterative refinement failed to converge to a
329  * satisfactory solution, so we resort to double precision. */
330  magma_zgetrf_gpu( n, n, dA, ldda, ipiv, info );
331  if (*info == 0) {
332  magmablas_zlacpy( MagmaUpperLower, n, nrhs, dB, lddb, dX, lddx );
333  magma_zgetrs_gpu( trans, n, nrhs, dA, ldda, ipiv, dX, lddx, info );
334  }
335 
336  return *info;
337 }
#define MagmaUpperLower
Definition: magma.h:63
#define __func__
Definition: common_magma.h:65
#define MAGMA_Z_NEG_ONE
Definition: magma.h:134
double magmablas_zlange(char norm, magma_int_t m, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, double *WORK)
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
int magma_int_t
Definition: magmablas.h:12
#define dB(i, j)
magma_int_t magma_zcgetrs_gpu(char trans, magma_int_t n, magma_int_t nrhs, cuFloatComplex *dA, magma_int_t ldda, magma_int_t *ipiv, cuDoubleComplex *dB, magma_int_t lddb, cuDoubleComplex *dX, magma_int_t lddx, cuFloatComplex *dSX, magma_int_t *info)
#define magma_setvector(n, elemSize, hx_src, incx, dy_dst, incy)
Definition: magmablas.h:42
#define lapackf77_dlamch
Definition: magma_lapack.h:27
#define ITERMAX
Definition: zcgesv_gpu.cpp:14
void magma_zgemm(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
void magmablas_zlacpy(char uplo, magma_int_t m, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *B, magma_int_t ldb)
void swp2pswp(magma_trans_t trans, magma_int_t n, magma_int_t *ipiv, magma_int_t *newipiv)
Definition: auxiliary.cpp:117
void magmablas_zaxpycp(cuDoubleComplex *, cuDoubleComplex *, magma_int_t, cuDoubleComplex *)
magma_int_t magma_zgetrf_gpu(magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *ipiv, magma_int_t *info)
#define dSX(i, j)
magma_int_t magma_zgetrs_gpu(char trans, magma_int_t n, magma_int_t nrhs, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *ipiv, cuDoubleComplex *dB, magma_int_t lddb, magma_int_t *info)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
magma_int_t magma_izamax(magma_int_t n, magmaDoubleComplex_const_ptr dx, magma_int_t incx)
magma_int_t ldda
void magmablas_zlag2c(magma_int_t M, magma_int_t N, const cuDoubleComplex *A, magma_int_t lda, cuFloatComplex *SA, magma_int_t ldsa, magma_int_t *info)
#define dR(i, j)
magma_int_t magma_cgetrf_gpu(magma_int_t m, magma_int_t n, magmaFloatComplex *dA, magma_int_t ldda, magma_int_t *ipiv, magma_int_t *info)
Definition: cgetrf_gpu.cpp:14
static magma_err_t magma_imalloc_cpu(magma_int_t **ptrPtr, size_t n)
Definition: magma.h:82
void magma_zgemv(magma_trans_t transA, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dx, magma_int_t incx, magmaDoubleComplex beta, magmaDoubleComplex_ptr dy, magma_int_t incy)
#define lapackf77_zlange
Definition: magma_zlapack.h:75
#define MAGMA_Z_ONE
Definition: magma.h:132
#define MagmaNoTrans
Definition: magma.h:57
#define dX(i, j)
#define max(a, b)
Definition: common_magma.h:82
magma_err_t magma_free_cpu(void *ptr)
#define dA(dev, i, j)
#define BWDMAX
Definition: zcgesv_gpu.cpp:13

Here is the call graph for this function:

magma_int_t magma_zcgetrs_gpu ( char  trans,
magma_int_t  n,
magma_int_t  nrhs,
magmaFloatComplex *  dA,
magma_int_t  ldda,
magma_int_t ipiv,
magmaDoubleComplex *  dB,
magma_int_t  lddb,
magmaDoubleComplex *  dX,
magma_int_t  lddx,
magmaFloatComplex *  dSX,
magma_int_t info 
)

Definition at line 14 of file zcgetrs_gpu.cpp.

References __func__, lapackf77_lsame, MAGMA_C_ONE, magma_ctrsm(), magma_xerbla(), magmablas_clag2z(), magmablas_zclaswp(), magmablas_zlag2c(), MagmaConjTrans, MagmaLeft, MagmaLower, MagmaNonUnit, MagmaNoTrans, MagmaUnit, and MagmaUpper.

21 {
22 /* -- MAGMA (version 1.4.0) --
23  Univ. of Tennessee, Knoxville
24  Univ. of California, Berkeley
25  Univ. of Colorado, Denver
26  August 2013
27 
28  Purpose
29  =======
30  ZCGETRS solves a system of linear equations
31  A * X = B or A' * X = B
32  with a general N-by-N matrix A using the LU factorization computed
33  by MAGMA_CGETRF_GPU. B and X are in COMPLEX_16, and A is in COMPLEX.
34  This routine is used in the mixed precision iterative solver
35  magma_zcgesv.
36 
37  Arguments
38  =========
39  TRANS (input) CHARACTER*1
40  Specifies the form of the system of equations:
41  = 'N': A * X = B (No transpose)
42  = 'T': A'* X = B (Transpose)
43  = 'C': A'* X = B (Conjugate transpose = Transpose)
44 
45  N (input) INTEGER
46  The order of the matrix A. N >= 0.
47 
48  NRHS (input) INTEGER
49  The number of right hand sides, i.e., the number of columns
50  of the matrix B. NRHS >= 0.
51 
52  dA (input) COMPLEX array on the GPU, dimension (LDDA,N)
53  The factors L and U from the factorization A = P*L*U
54  as computed by CGETRF_GPU.
55 
56  LDDA (input) INTEGER
57  The leading dimension of the array dA. LDDA >= max(1,N).
58 
59  IPIV (input) INTEGER array on the GPU, dimension (N)
60  The pivot indices from CGETRF_GPU; Row i of the
61  matrix was moved to row IPIV(i).
62 
63  dB (input) COMPLEX_16 array on the GPU, dimension (LDDB,NRHS)
64  On entry, the right hand side matrix B.
65 
66  LDDB (input) INTEGER
67  The leading dimension of the arrays X and B. LDDB >= max(1,N).
68 
69  dX (output) COMPLEX_16 array on the GPU, dimension (LDDX, NRHS)
70  On exit, the solution matrix dX.
71 
72  LDDX (input) INTEGER
73  The leading dimension of the array dX, LDDX >= max(1,N).
74 
75  dSX (workspace) COMPLEX array on the GPU used as workspace,
76  dimension (N, NRHS)
77 
78  INFO (output) INTEGER
79  = 0: successful exit
80  < 0: if INFO = -i, the i-th argument had an illegal value
81  ===================================================================== */
82 
83  magmaFloatComplex c_one = MAGMA_C_ONE;
84  char trans_[2] = {trans, 0};
85  int notran = lapackf77_lsame(trans_, "N");
86  magma_int_t inc;
87 
88  *info = 0;
89  if ( (! notran) &&
90  (! lapackf77_lsame(trans_, "T")) &&
91  (! lapackf77_lsame(trans_, "C")) ) {
92  *info = -1;
93  } else if (n < 0) {
94  *info = -2;
95  } else if (nrhs < 0) {
96  *info = -3;
97  } else if (ldda < n) {
98  *info = -5;
99  } else if (lddb < n) {
100  *info = -8;
101  } else if (lddx < n) {
102  *info = -10;
103  } else if (lddx != lddb) { /* TODO: remove it when zclaswp will have the correct interface */
104  *info = -10;
105  }
106  if (*info != 0) {
107  magma_xerbla( __func__, -(*info) );
108  return *info;
109  }
110 
111  /* Quick return if possible */
112  if (n == 0 || nrhs == 0) {
113  return *info;
114  }
115 
116  if (notran) {
117  inc = 1;
118 
119  /* Get X by row applying interchanges to B and cast to single */
120  /*
121  * TODO: clean zclaswp interface to have interface closer to zlaswp
122  */
123  //magmablas_zclaswp(nrhs, dB, lddb, dSX, lddbx, 1, n, ipiv);
124  magmablas_zclaswp(nrhs, dB, lddb, dSX, n, ipiv, inc);
125 
126  /* Solve L*X = B, overwriting B with SX. */
128  n, nrhs, c_one, dA, ldda, dSX, n);
129 
130  /* Solve U*X = B, overwriting B with X. */
132  n, nrhs, c_one, dA, ldda, dSX, n);
133 
134  magmablas_clag2z( n, nrhs, dSX, n, dX, lddx, info );
135  }
136  else {
137  inc = -1;
138 
139  /* Cast the COMPLEX_16 RHS to COMPLEX */
140  magmablas_zlag2c( n, nrhs, dB, lddb, dSX, n, info );
141 
142  /* Solve A' * X = B. */
144  n, nrhs, c_one, dA, ldda, dSX, n );
146  n, nrhs, c_one, dA, ldda, dSX, n );
147 
148  magmablas_zclaswp( nrhs, dX, lddx, dSX, n, ipiv, inc );
149  }
150 
151  return *info;
152 } /* magma_zcgetrs */
#define MagmaLeft
Definition: magma.h:68
#define __func__
Definition: common_magma.h:65
#define MagmaUpper
Definition: magma.h:61
void magmablas_clag2z(magma_int_t M, magma_int_t N, cuFloatComplex *SA, magma_int_t ldsa, cuDoubleComplex *A, magma_int_t lda, magma_int_t *info)
int magma_int_t
Definition: magmablas.h:12
#define dB(dev, i, j)
void magma_ctrsm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb)
void magmablas_zclaswp(magma_int_t, cuDoubleComplex *, magma_int_t, cuFloatComplex *, magma_int_t, magma_int_t *, magma_int_t)
#define dX(i, j)
#define MagmaLower
Definition: magma.h:62
#define dSX(i, j)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define MagmaConjTrans
Definition: magma.h:59
magma_int_t ldda
#define MAGMA_C_ONE
Definition: magma.h:154
void magmablas_zlag2c(magma_int_t M, magma_int_t N, const cuDoubleComplex *A, magma_int_t lda, cuFloatComplex *SA, magma_int_t ldsa, magma_int_t *info)
#define MagmaNonUnit
Definition: magma.h:65
#define MagmaUnit
Definition: magma.h:66
#define MagmaNoTrans
Definition: magma.h:57
#define dA(dev, i, j)

Here is the call graph for this function:

magma_int_t magma_zcposv_gpu ( char  uplo,
magma_int_t  n,
magma_int_t  nrhs,
magmaDoubleComplex *  dA,
magma_int_t  ldda,
magmaDoubleComplex *  dB,
magma_int_t  lddb,
magmaDoubleComplex *  dX,
magma_int_t  lddx,
magmaDoubleComplex *  dworkd,
magmaFloatComplex *  dworks,
magma_int_t iter,
magma_int_t info 
)

Definition at line 17 of file zcposv_gpu.cpp.

References __func__, BWDMAX, dB, dR, dSX, dX, ITERMAX, lapackf77_dlamch, lapackf77_zlange, magma_cpotrf_gpu(), magma_cpotrs_gpu(), magma_izamax(), magma_xerbla(), MAGMA_Z_NEG_ONE, MAGMA_Z_ONE, magma_zgetmatrix, magma_zhemm(), magma_zhemv(), magma_zpotrf_gpu(), magma_zpotrs_gpu(), magmablas_clag2z(), magmablas_zcaxpycp(), magmablas_zlacpy(), magmablas_zlag2c(), magmablas_zlanhe(), magmablas_zlat2c(), MagmaLeft, MagmaUpperLower, and max.

23 {
24 /* -- MAGMA (version 1.4.0) --
25  Univ. of Tennessee, Knoxville
26  Univ. of California, Berkeley
27  Univ. of Colorado, Denver
28  August 2013
29 
30  Purpose
31  =======
32  ZCPOSV computes the solution to a complex system of linear equations
33  A * X = B,
34  where A is an N-by-N Hermitian positive definite matrix and X and B
35  are N-by-NRHS matrices.
36 
37  ZCPOSV first attempts to factorize the matrix in complex SINGLE PRECISION
38  and use this factorization within an iterative refinement procedure
39  to produce a solution with complex DOUBLE PRECISION norm-wise backward error
40  quality (see below). If the approach fails the method switches to a
41  complex DOUBLE PRECISION factorization and solve.
42 
43  The iterative refinement is not going to be a winning strategy if
44  the ratio complex SINGLE PRECISION performance over complex DOUBLE PRECISION
45  performance is too small. A reasonable strategy should take the
46  number of right-hand sides and the size of the matrix into account.
47  This might be done with a call to ILAENV in the future. Up to now, we
48  always try iterative refinement.
49 
50  The iterative refinement process is stopped if
51  ITER > ITERMAX
52  or for all the RHS we have:
53  RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX
54  where
55  o ITER is the number of the current iteration in the iterative
56  refinement process
57  o RNRM is the infinity-norm of the residual
58  o XNRM is the infinity-norm of the solution
59  o ANRM is the infinity-operator-norm of the matrix A
60  o EPS is the machine epsilon returned by DLAMCH('Epsilon')
61  The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.
62 
63  Arguments
64  =========
65  UPLO (input) CHARACTER
66  = 'U': Upper triangle of A is stored;
67  = 'L': Lower triangle of A is stored.
68 
69  N (input) INTEGER
70  The number of linear equations, i.e., the order of the
71  matrix A. N >= 0.
72 
73  NRHS (input) INTEGER
74  The number of right hand sides, i.e., the number of columns
75  of the matrix B. NRHS >= 0.
76 
77  dA (input or input/output) COMPLEX_16 array on the GPU, dimension (LDDA,N)
78  On entry, the Hermitian matrix A. If UPLO = 'U', the leading
79  N-by-N upper triangular part of A contains the upper
80  triangular part of the matrix A, and the strictly lower
81  triangular part of A is not referenced. If UPLO = 'L', the
82  leading N-by-N lower triangular part of A contains the lower
83  triangular part of the matrix A, and the strictly upper
84  triangular part of A is not referenced.
85  On exit, if iterative refinement has been successfully used
86  (INFO.EQ.0 and ITER.GE.0, see description below), then A is
87  unchanged, if double factorization has been used
88  (INFO.EQ.0 and ITER.LT.0, see description below), then the
89  array dA contains the factor U or L from the Cholesky
90  factorization A = U**T*U or A = L*L**T.
91 
92  LDDA (input) INTEGER
93  The leading dimension of the array dA. LDDA >= max(1,N).
94 
95  dB (input) COMPLEX_16 array on the GPU, dimension (LDDB,NRHS)
96  The N-by-NRHS right hand side matrix B.
97 
98  LDDB (input) INTEGER
99  The leading dimension of the array dB. LDDB >= max(1,N).
100 
101  dX (output) COMPLEX_16 array on the GPU, dimension (LDDX,NRHS)
102  If INFO = 0, the N-by-NRHS solution matrix X.
103 
104  LDDX (input) INTEGER
105  The leading dimension of the array dX. LDDX >= max(1,N).
106 
107  dworkd (workspace) COMPLEX_16 array on the GPU, dimension (N*NRHS)
108  This array is used to hold the residual vectors.
109 
110  dworks (workspace) COMPLEX array on the GPU, dimension (N*(N+NRHS))
111  This array is used to store the complex single precision matrix
112  and the right-hand sides or solutions in single precision.
113 
114  ITER (output) INTEGER
115  < 0: iterative refinement has failed, double precision
116  factorization has been performed
117  -1 : the routine fell back to full precision for
118  implementation- or machine-specific reasons
119  -2 : narrowing the precision induced an overflow,
120  the routine fell back to full precision
121  -3 : failure of SPOTRF
122  -31: stop the iterative refinement after the 30th iteration
123  > 0: iterative refinement has been successfully used.
124  Returns the number of iterations
125 
126  INFO (output) INTEGER
127  = 0: successful exit
128  < 0: if INFO = -i, the i-th argument had an illegal value
129  > 0: if INFO = i, the leading minor of order i of (DOUBLE
130  PRECISION) A is not positive definite, so the
131  factorization could not be completed, and the solution
132  has not been computed.
133 
134  ===================================================================== */
135 
136  #define dB(i,j) (dB + (i) + (j)*lddb)
137  #define dX(i,j) (dX + (i) + (j)*lddx)
138  #define dR(i,j) (dR + (i) + (j)*lddr)
139  #define dSX(i,j) (dSX + (i) + (j)*lddsx)
140 
141  magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
142  magmaDoubleComplex c_one = MAGMA_Z_ONE;
143  magma_int_t ione = 1;
144  magmaDoubleComplex *dR;
145  magmaFloatComplex *dSA, *dSX;
146  magmaDoubleComplex Xnrmv, Rnrmv;
147  double Anrm, Xnrm, Rnrm, cte, eps;
148  magma_int_t i, j, iiter, lddsa, lddsx, lddr;
149 
150  /* Check arguments */
151  *iter = 0;
152  *info = 0;
153  if ( n < 0 )
154  *info = -1;
155  else if ( nrhs < 0 )
156  *info = -2;
157  else if ( ldda < max(1,n))
158  *info = -4;
159  else if ( lddb < max(1,n))
160  *info = -7;
161  else if ( lddx < max(1,n))
162  *info = -9;
163 
164  if (*info != 0) {
165  magma_xerbla( __func__, -(*info) );
166  return *info;
167  }
168 
169  if ( n == 0 || nrhs == 0 )
170  return *info;
171 
172  lddsa = n;
173  lddsx = n;
174  lddr = n;
175 
176  dSA = dworks;
177  dSX = dSA + lddsa*n;
178  dR = dworkd;
179 
180  eps = lapackf77_dlamch("Epsilon");
181  Anrm = magmablas_zlanhe('I', uplo, n, dA, ldda, (double*)dworkd );
182  cte = Anrm * eps * pow((double)n, 0.5) * BWDMAX;
183 
184  /*
185  * Convert to single precision
186  */
187  magmablas_zlag2c( n, nrhs, dB, lddb, dSX, lddsx, info );
188  if (*info != 0) {
189  *iter = -2;
190  goto FALLBACK;
191  }
192 
193  magmablas_zlat2c( uplo, n, dA, ldda, dSA, lddsa, info );
194  if (*info != 0) {
195  *iter = -2;
196  goto FALLBACK;
197  }
198 
199  // factor dSA in single precision
200  magma_cpotrf_gpu( uplo, n, dSA, lddsa, info );
201  if (*info != 0) {
202  *iter = -3;
203  goto FALLBACK;
204  }
205 
206  // solve dSA*dSX = dB in single precision
207  magma_cpotrs_gpu( uplo, n, nrhs, dSA, lddsa, dSX, lddsx, info );
208 
209  // residual dR = dB - dA*dX in double precision
210  magmablas_clag2z( n, nrhs, dSX, lddsx, dX, lddx, info );
211  magmablas_zlacpy( MagmaUpperLower, n, nrhs, dB, lddb, dR, lddr );
212  if ( nrhs == 1 ) {
213  magma_zhemv( uplo, n,
214  c_neg_one, dA, ldda,
215  dX, 1,
216  c_one, dR, 1 );
217  }
218  else {
219  magma_zhemm( MagmaLeft, uplo, n, nrhs,
220  c_neg_one, dA, ldda,
221  dX, lddx,
222  c_one, dR, lddr );
223  }
224 
225  // TODO: use MAGMA_Z_ABS( dX(i,j) ) instead of zlange?
226  for( j=0; j < nrhs; j++ ) {
227  i = magma_izamax( n, dX(0,j), 1) - 1;
228  magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 );
229  Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL );
230 
231  i = magma_izamax ( n, dR(0,j), 1 ) - 1;
232  magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 );
233  Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL );
234 
235  if ( Rnrm > Xnrm*cte ) {
236  goto REFINEMENT;
237  }
238  }
239 
240  *iter = 0;
241  return *info;
242 
243 REFINEMENT:
244  for( iiter=1; iiter < ITERMAX; ) {
245  *info = 0;
246  // convert residual dR to single precision dSX
247  magmablas_zlag2c( n, nrhs, dR, lddr, dSX, lddsx, info );
248  if (*info != 0) {
249  *iter = -2;
250  goto FALLBACK;
251  }
252  // solve dSA*dSX = R in single precision
253  magma_cpotrs_gpu( uplo, n, nrhs, dSA, lddsa, dSX, lddsx, info );
254 
255  // Add correction and setup residual
256  // dX += dSX [including conversion] --and--
257  // dR = dB
258  for( j=0; j < nrhs; j++ ) {
259  magmablas_zcaxpycp( n, dSX(0,j), dX(0,j), dB(0,j), dR(0,j) );
260  }
261 
262  // residual dR = dB - dA*dX in double precision
263  if ( nrhs == 1 ) {
264  magma_zhemv( uplo, n,
265  c_neg_one, dA, ldda,
266  dX, 1,
267  c_one, dR, 1 );
268  }
269  else {
270  magma_zhemm( MagmaLeft, uplo, n, nrhs,
271  c_neg_one, dA, ldda,
272  dX, lddx,
273  c_one, dR, lddr );
274  }
275 
276  /* Check whether the nrhs normwise backward errors satisfy the
277  * stopping criterion. If yes, set ITER=IITER>0 and return. */
278  for( j=0; j < nrhs; j++ ) {
279  i = magma_izamax( n, dX(0,j), 1) - 1;
280  magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 );
281  Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL );
282 
283  i = magma_izamax ( n, dR(0,j), 1 ) - 1;
284  magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 );
285  Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL );
286 
287  if ( Rnrm > Xnrm*cte ) {
288  goto L20;
289  }
290  }
291 
292  /* If we are here, the nrhs normwise backward errors satisfy
293  * the stopping criterion, we are good to exit. */
294  *iter = iiter;
295  return *info;
296 
297  L20:
298  iiter++;
299  }
300 
301  /* If we are at this place of the code, this is because we have
302  * performed ITER=ITERMAX iterations and never satisified the
303  * stopping criterion. Set up the ITER flag accordingly and follow
304  * up on double precision routine. */
305  *iter = -ITERMAX - 1;
306 
307 FALLBACK:
308  /* Single-precision iterative refinement failed to converge to a
309  * satisfactory solution, so we resort to double precision. */
310  magma_zpotrf_gpu( uplo, n, dA, ldda, info );
311  if (*info == 0) {
312  magmablas_zlacpy( MagmaUpperLower, n, nrhs, dB, lddb, dX, lddx );
313  magma_zpotrs_gpu( uplo, n, nrhs, dA, ldda, dX, lddx, info );
314  }
315 
316  return *info;
317 }
#define ITERMAX
Definition: zcposv_gpu.cpp:14
#define MagmaUpperLower
Definition: magma.h:63
#define MagmaLeft
Definition: magma.h:68
#define __func__
Definition: common_magma.h:65
magma_int_t magma_zpotrs_gpu(char uplo, magma_int_t n, magma_int_t nrhs, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *dB, magma_int_t lddb, magma_int_t *info)
#define MAGMA_Z_NEG_ONE
Definition: magma.h:134
void magmablas_clag2z(magma_int_t M, magma_int_t N, cuFloatComplex *SA, magma_int_t ldsa, cuDoubleComplex *A, magma_int_t lda, magma_int_t *info)
#define dX(i, j)
#define dB(i, j)
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_cpotrf_gpu(char uplo, magma_int_t n, magmaFloatComplex *dA, magma_int_t ldda, magma_int_t *info)
Definition: cpotrf_gpu.cpp:24
#define lapackf77_dlamch
Definition: magma_lapack.h:27
void magmablas_zcaxpycp(cuFloatComplex *, cuDoubleComplex *, magma_int_t, cuDoubleComplex *, cuDoubleComplex *)
void magmablas_zlacpy(char uplo, magma_int_t m, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *B, magma_int_t ldb)
magma_int_t magma_zpotrf_gpu(char uplo, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
#define BWDMAX
Definition: zcposv_gpu.cpp:13
magma_int_t magma_cpotrs_gpu(char uplo, magma_int_t n, magma_int_t nrhs, magmaFloatComplex *dA, magma_int_t ldda, magmaFloatComplex *dB, magma_int_t lddb, magma_int_t *info)
Definition: cpotrs_gpu.cpp:14
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
magma_int_t magma_izamax(magma_int_t n, magmaDoubleComplex_const_ptr dx, magma_int_t incx)
void magma_zhemm(magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
magma_int_t ldda
double magmablas_zlanhe(char norm, char uplo, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, double *WORK)
void magmablas_zlag2c(magma_int_t M, magma_int_t N, const cuDoubleComplex *A, magma_int_t lda, cuFloatComplex *SA, magma_int_t ldsa, magma_int_t *info)
void magma_zhemv(magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dx, magma_int_t incx, magmaDoubleComplex beta, magmaDoubleComplex_ptr dy, magma_int_t incy)
#define dR(i, j)
void magmablas_zlat2c(char uplo, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, cuFloatComplex *SA, magma_int_t ldsa, magma_int_t *info)
#define lapackf77_zlange
Definition: magma_zlapack.h:75
#define MAGMA_Z_ONE
Definition: magma.h:132
#define dSX(i, j)
#define max(a, b)
Definition: common_magma.h:82
#define dA(dev, i, j)

Here is the call graph for this function: