001: /* ///////////////////////////////////////////////////////////////////////////////////////
002:  *  -- PLASMA --
003:  *     University of Tennessee
004:  */
005: #include <stdlib.h>
006: #include "common.h"
007: #include <math.h>
008: #include "lapack.h"
009: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
010: /* ///                    PLASMA computational routine (version 2.1.0)                       ///
011:  * ///                    Release Date: November, 15th 2009                                  ///
012:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
013:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
014: 
015: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
016: // PLASMA_zcgesv - Computes the solution to a system of linear equations A * X = B,
017: // where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
018: //
019: // PLASMA_zcgesv first attempts to factorize the matrix in COMPLEX and use this
020: // factorization within an iterative refinement procedure to produce a
021: // solution with COMPLEX*16 normwise backward error quality (see below).
022: // If the approach fails the method switches to a COMPLEX*16
023: // factorization and solve.
024: //
025: // The iterative refinement is not going to be a winning strategy if
026: // the ratio COMPLEX performance over COMPLEX*16 performance is too
027: // small. A reasonable strategy should take the number of right-hand
028: // sides and the size of the matrix into account. This might be done
029: // with a call to ILAENV in the future. Up to now, we always try
030: // iterative refinement.
031: //
032: // The iterative refinement process is stopped if ITER > ITERMAX or
033: // for all the RHS we have: RNRM < N*XNRM*ANRM*EPS*BWDMAX
034: // where
035: //
036: // - ITER is the number of the current iteration in the iterative refinement process
037: // - RNRM is the infinity-norm of the residual
038: // - XNRM is the infinity-norm of the solution
039: // - ANRM is the infinity-operator-norm of the matrix A
040: // - EPS is the machine epsilon returned by DLAMCH('Epsilon').
041: //
042: // Actually, in its current state (PLASMA 2.1.0), the test is slightly relaxed.
043: //
044: // The values ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.
045: 
046: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
047: // N        int (IN)
048: //          The number of linear equations, i.e., the order of the matrix A. N >= 0.
049: //
050: // NRHS     int (IN)
051: //          The number of right hand sides, i.e., the number of columns of the matrix B.
052: //          NRHS >= 0.
053: //
054: // A        PLASMA_Complex64_t* (IN)
055: //          The N-by-N coefficient matrix A. This matrix is not modified.
056: //
057: // LDA      int (IN)
058: //          The leading dimension of the array A. LDA >= max(1,N).
059: //
060: // B        PLASMA_Complex64_t* (IN)
061: //          The N-by-NRHS matrix of right hand side matrix B.
062: //
063: // LDB      int (IN)
064: //          The leading dimension of the array B. LDB >= max(1,N).
065: //
066: // X        PLASMA_Complex64_t* (OUT)
067: //          If return value = 0, the N-by-NRHS solution matrix X.
068: //
069: // LDX      int (IN)
070: //          The leading dimension of the array B. LDX >= max(1,N).
071: //
072: // ITER     int* (OUT)is the number of the current iteration in the iterative refinement process
073: 
074: 
075: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
076: //          = 0: successful exit
077: //          < 0: if -i, the i-th argument had an illegal value
078: //          > 0: if i, U(i,i) is exactly zero. The factorization has been completed,
079: //               but the factor U is exactly singular, so the solution could not be computed.
080: 
081: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
082: 
083: #define PLASMA_zlag2c(_descA, _descB) plasma_parallel_call_2(plasma_pzlag2c, PLASMA_desc, _descA, PLASMA_desc, _descB)
084: #define PLASMA_clag2z(_descA, _descB) plasma_parallel_call_2(plasma_pclag2z, PLASMA_desc, _descA, PLASMA_desc, _descB)
085: #define PLASMA_zlange(_norm, _descA, _result, _work, _counter) _result = 0;     \
086:                                                      plasma_parallel_call_3(plasma_pzlange, char, _norm, PLASMA_desc, _descA, double*, _work);\
087:                                                      for (_counter = 0; _counter < PLASMA_SIZE; _counter++){if (((double *)_work)[_counter] > _result) _result = ((double *)_work)[_counter];}
088: 
089: #define PLASMA_zlacpy(_descA, _descB) plasma_parallel_call_2(plasma_pzlacpy, PLASMA_desc, _descA, PLASMA_desc, _descB)
090: #define PLASMA_zaxpy(_alpha, _descA, _descB) plasma_parallel_call_3(plasma_pzaxpy, PLASMA_Complex64_t, _alpha, PLASMA_desc, _descA, PLASMA_desc, _descB)
091: 
092: int PLASMA_zcgesv(int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t *X, int LDX, int *ITER)
093: {
094:     int NB, NT, NTRHS;
095:     int status;
096:     PLASMA_Complex64_t *Abdl;
097:     PLASMA_Complex64_t *Lbdl;
098:     PLASMA_Complex64_t *Bbdl;
099:     PLASMA_Complex64_t *Xbdl;
100:     plasma_context_t *plasma;
101:     PLASMA_Complex64_t *L;
102:     int *IPIV;
103: 
104:     plasma = plasma_context_self();
105:     if (plasma == NULL) {
106:         plasma_fatal_error("PLASMA_zcgesv", "PLASMA not initialized");
107:         return PLASMA_ERR_NOT_INITIALIZED;
108:     }
109:     /* Check input arguments */
110:     if (N < 0) {
111:         plasma_error("PLASMA_zcgesv", "illegal value of N");
112:         return -1;
113:     }
114:     if (NRHS < 0) {
115:         plasma_error("PLASMA_zcgesv", "illegal value of NRHS");
116:         return -2;
117:     }
118:     if (LDA < max(1, N)) {
119:         plasma_error("PLASMA_zcgesv", "illegal value of LDA");
120:         return -4;
121:     }
122:     if (LDB < max(1, N)) {
123:         plasma_error("PLASMA_zcgesv", "illegal value of LDB");
124:         return -8;
125:     }
126:     if (LDX < max(1, N)) {
127:         plasma_error("PLASMA_zcgesv", "illegal value of LDX");
128:         return -10;
129:     }
130:     /* Quick return */
131:     if (min(N, NRHS) == 0)
132:         return PLASMA_SUCCESS;
133: 
134:     /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
135:     status = plasma_tune(PLASMA_TUNE_ZCGESV, N, N, NRHS);
136:     if (status != PLASMA_SUCCESS) {
137:         plasma_error("PLASMA_zcgesv", "plasma_tune() failed");
138:         return status;
139:     }
140: 
141:     /* Set NT & NTRHS */
142:     NB = PLASMA_NB;
143:     NT = (N%NB==0) ? (N/NB) : (N/NB+1);
144:     NTRHS = (NRHS%NB==0) ? (NRHS/NB) : (NRHS/NB+1);
145: 
146:     /* DOUBLE PRECISION INITIALIZATION */
147:     /* Allocate memory for double precision matrices in block layout */
148:     Abdl = (PLASMA_Complex64_t *)plasma_shared_alloc(plasma, NT*NT*PLASMA_NBNBSIZE, PlasmaComplexDouble);
149:     Lbdl = (PLASMA_Complex64_t *)plasma_shared_alloc(plasma, NT*NT*PLASMA_IBNBSIZE, PlasmaComplexDouble);
150:     Bbdl = (PLASMA_Complex64_t *)plasma_shared_alloc(plasma, NT*NTRHS*PLASMA_NBNBSIZE, PlasmaComplexDouble);
151:     Xbdl = (PLASMA_Complex64_t *)plasma_shared_alloc(plasma, NT*NTRHS*PLASMA_NBNBSIZE, PlasmaComplexDouble);
152:     if (Abdl == NULL || Lbdl == NULL || Xbdl == NULL) {
153:         plasma_error("PLASMA_zcgesv", "plasma_shared_alloc() failed");
154:         plasma_shared_free(plasma, Abdl);
155:         plasma_shared_free(plasma, Lbdl);
156:         plasma_shared_free(plasma, Bbdl);
157:         plasma_shared_free(plasma, Xbdl);
158:         return PLASMA_ERR_OUT_OF_RESOURCES;
159:     }
160: 
161:     PLASMA_desc descA = plasma_desc_init(
162:         Abdl, PlasmaComplexDouble,
163:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
164:         N, N, 0, 0, N, N);
165: 
166:     PLASMA_desc descL = plasma_desc_init(
167:         Lbdl, PlasmaComplexDouble,
168:         PLASMA_IB, PLASMA_NB, PLASMA_IBNBSIZE,
169:         N, N, 0, 0, N, N);
170: 
171:     PLASMA_desc descB = plasma_desc_init(
172:         Bbdl, PlasmaComplexDouble,
173:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
174:         N, NRHS, 0, 0, N, NRHS);
175: 
176:     PLASMA_desc descX = plasma_desc_init(
177:         Xbdl, PlasmaComplexDouble,
178:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
179:         N, NRHS, 0, 0, N, NRHS);
180: 
181:     plasma_parallel_call_3(plasma_lapack_to_tile,
182:         PLASMA_Complex64_t*, A,
183:         int, LDA,
184:         PLASMA_desc, descA);
185: 
186:     plasma_parallel_call_3(plasma_lapack_to_tile,
187:         PLASMA_Complex64_t*, B,
188:         int, LDB,
189:         PLASMA_desc, descB);
190: 
191:     plasma_parallel_call_3(plasma_lapack_to_tile,
192:         PLASMA_Complex64_t*, X,
193:         int, LDX,
194:         PLASMA_desc, descX);
195: 
196:     /* Allocate workspace */
197:     PLASMA_Alloc_Workspace_zgesv(N, &L, &IPIV);
198: 
199:     /* Call the native interface */
200:     status = PLASMA_zcgesv_Tile(&descA, &descL, IPIV, &descB, &descX, ITER);
201: 
202:     //plasma_parallel_call_3(plasma_tile_to_lapack,
203:     //    PLASMA_desc, descA,
204:     //    PLASMA_Complex64_t*, A,
205:     //    int, LDA);
206: 
207:     plasma_parallel_call_3(plasma_tile_to_lapack,
208:         PLASMA_desc, descX,
209:         PLASMA_Complex64_t*, X,
210:         int, LDX);
211: 
212:     plasma_shared_free(plasma, Abdl);
213:     plasma_shared_free(plasma, Lbdl);
214:     plasma_shared_free(plasma, Bbdl);
215:     plasma_shared_free(plasma, Xbdl);
216:     free(L);
217:     free(IPIV);
218:     return PLASMA_INFO;
219: }
220: 
221: 
222: 
223: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
224: // PLASMA_zcgesv_Tile - Computes the solution to a system of linear equations A * X = B,
225: // where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
226: // All matrices are passed through descriptors. All dimensions are taken from the descriptors.
227: //
228: // PLASMA_zcgesv_Tile first attempts to factorize the matrix in COMPLEX and use this
229: // factorization within an iterative refinement procedure to produce a
230: // solution with COMPLEX*16 normwise backward error quality (see below).
231: // If the approach fails the method switches to a COMPLEX*16
232: // factorization and solve.
233: //
234: // The iterative refinement is not going to be a winning strategy if
235: // the ratio COMPLEX performance over COMPLEX*16 performance is too
236: // small. A reasonable strategy should take the number of right-hand
237: // sides and the size of the matrix into account. This might be done
238: // with a call to ILAENV in the future. Up to now, we always try
239: // iterative refinement.
240: //
241: // The iterative refinement process is stopped if ITER > ITERMAX or
242: // for all the RHS we have: RNRM < N*XNRM*ANRM*EPS*BWDMAX
243: // where
244: //
245: // - ITER is the number of the current iteration in the iterative refinement process
246: // - RNRM is the infinity-norm of the residual
247: // - XNRM is the infinity-norm of the solution
248: // - ANRM is the infinity-operator-norm of the matrix A
249: // - EPS is the machine epsilon returned by DLAMCH('Epsilon').
250: //
251: // Actually, in his current state (PLASMA 2.1.0), the test is slightly relaxed.
252: //
253: // The values ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively.
254: 
255: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
256: // A        PLASMA_Complex64_t* (In or INOUT)
257: //          On entry, the N-by-N coefficient matrix A.
258: //          - if the iterative refinement converged, A is not modified;
259: //          - otherwise, it falled backed to double precision solution,
260: //          and then A contains the tile L and U factors from the factorization (not equivalent to LAPACK).
261: //
262: // L        PLASMA_Complex64_t* (NODEP or OUT)
263: //          On exit:
264: //          - if the iterative refinement converged, L is not modified;
265: //          - otherwise, it falled backed to double precision solution,
266: //          and then L is an auxiliary factorization data, related to the tile L factor,
267: //          necessary to solve the system of equations (not equivalent to LAPACK).
268: //
269: // IPIV     int* (OUT)
270: //          On exit, the pivot indices that define the permutations (not equivalent to LAPACK).
271: //
272: // B        PLASMA_Complex64_t* (INOUT)
273: //          On entry, the N-by-NRHS matrix of right hand side matrix B.
274: //          On exit, if return value = 0, the N-by-NRHS solution matrix X.
275: 
276: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
277: //          = 0: successful exit
278: //          > 0: if i, U(i,i) is exactly zero. The factorization has been completed,
279: //               but the factor U is exactly singular, so the solution could not be computed.
280: 
281: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
282: int PLASMA_zcgesv_Tile(PLASMA_desc *A, PLASMA_desc *L, int *IPIV, PLASMA_desc *B, PLASMA_desc *X, int *ITER)
283: {
284:     int N, NRHS, NB, NT, NTRHS;
285:     PLASMA_desc descA = *A;
286:     PLASMA_desc descL = *L;
287:     PLASMA_desc descB = *B;
288:     PLASMA_desc descX = *X;
289:     PLASMA_Complex32_t *SAbdl;
290:     PLASMA_Complex32_t *SLbdl;
291:     PLASMA_Complex32_t *SXbdl;
292:     PLASMA_Complex64_t *Rbdl;
293:     plasma_context_t *plasma;
294:     int counter;
295:     double *work;
296: 
297:     const int itermax = 30;
298:     const double bwdmax = 1.0;
299:     const PLASMA_Complex64_t negone = -1.0;
300:     const PLASMA_Complex64_t one = 1.0;
301:     char norm='I';
302:     int iiter;
303:     double Anorm, cte, eps, Rnorm, Xnorm;
304:     *ITER=0;
305: 
306:     plasma = plasma_context_self();
307:     if (plasma == NULL) {
308:         plasma_fatal_error("PLASMA_zcgesv_Tile", "PLASMA not initialized");
309:         return PLASMA_ERR_NOT_INITIALIZED;
310:     }
311:     /* Check descriptors for correctness */
312:     if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
313:         plasma_error("PLASMA_zcgesv_Tile", "invalid first descriptor");
314:         return PLASMA_ERR_ILLEGAL_VALUE;
315:     }
316:     if (plasma_desc_check(&descL) != PLASMA_SUCCESS) {
317:         plasma_error("PLASMA_zcgesv_Tile", "invalid second descriptor");
318:         return PLASMA_ERR_ILLEGAL_VALUE;
319:     }
320:     if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
321:         plasma_error("PLASMA_zcgesv_Tile", "invalid third descriptor");
322:         return PLASMA_ERR_ILLEGAL_VALUE;
323:     }
324:     if (plasma_desc_check(&descX) != PLASMA_SUCCESS) {
325:         plasma_error("PLASMA_zcgesv_Tile", "invalid fourth descriptor");
326:         return PLASMA_ERR_ILLEGAL_VALUE;
327:     }
328:     /* Check input arguments */
329:     if (descA.nb != descA.mb || descB.nb != descB.mb || descX.nb != descX.mb) {
330:         plasma_error("PLASMA_zcgesv_Tile", "only square tiles supported");
331:         return PLASMA_ERR_ILLEGAL_VALUE;
332:     }
333: 
334:     /* Set N, NRHS, NT & NTRHS */
335:     N = descA.lm;
336:     NRHS = descB.ln;
337:     NB = PLASMA_NB;
338:     NT = (N%NB==0) ? (N/NB) : (N/NB+1);
339:     NTRHS = (NRHS%NB==0) ? (NRHS/NB) : (NRHS/NB+1);
340: 
341:     work = (double *)plasma_shared_alloc(plasma, PLASMA_SIZE, PlasmaRealDouble);
342:     if (work == NULL) {
343:         plasma_error("PLASMA_zcgesv", "plasma_shared_alloc() failed");
344:         plasma_shared_free(plasma, work);
345:         return PLASMA_ERR_OUT_OF_RESOURCES;
346:     }
347: 
348:     Rbdl = (PLASMA_Complex64_t *)plasma_shared_alloc(plasma, NT*NTRHS*PLASMA_NBNBSIZE, PlasmaComplexDouble);
349:     if (Rbdl == NULL) {
350:         plasma_error("PLASMA_zcgesv", "plasma_shared_alloc() failed");
351:         plasma_shared_free(plasma, Rbdl);
352:         return PLASMA_ERR_OUT_OF_RESOURCES;
353:     }
354: 
355:     PLASMA_desc descR = plasma_desc_init(
356:         Rbdl, PlasmaComplexDouble,
357:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
358:         N, NRHS, 0, 0, N, NRHS);
359: 
360:     /* Allocate memory for single precision matrices in block layout */
361:     SAbdl = (PLASMA_Complex32_t *)plasma_shared_alloc(plasma, NT*NT*PLASMA_NBNBSIZE, PlasmaComplexFloat);
362:     SLbdl = (PLASMA_Complex32_t *)plasma_shared_alloc(plasma, NT*NT*PLASMA_IBNBSIZE, PlasmaComplexFloat);
363:     SXbdl = (PLASMA_Complex32_t *)plasma_shared_alloc(plasma, NT*NTRHS*PLASMA_NBNBSIZE, PlasmaComplexFloat);
364:     if (SAbdl == NULL || SLbdl == NULL || SXbdl == NULL) {
365:         plasma_error("PLASMA_zcgesv", "plasma_shared_alloc() failed");
366:         plasma_shared_free(plasma, SAbdl);
367:         plasma_shared_free(plasma, SLbdl);
368:         plasma_shared_free(plasma, SXbdl);
369:         return PLASMA_ERR_OUT_OF_RESOURCES;
370:     }
371: 
372:     PLASMA_desc descSA = plasma_desc_init(
373:         SAbdl, PlasmaComplexFloat,
374:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
375:         N, N, 0, 0, N, N);
376: 
377:     PLASMA_desc descSL = plasma_desc_init(
378:         SLbdl, PlasmaComplexFloat,
379:         PLASMA_IB, PLASMA_NB, PLASMA_IBNBSIZE,
380:         N, N, 0, 0, N, N);
381: 
382:     PLASMA_desc descSX = plasma_desc_init(
383:         SXbdl, PlasmaComplexFloat,
384:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
385:         N, NRHS, 0, 0, N, NRHS);
386: 
387:     /* Compute some constants */
388:     PLASMA_zlange(norm, descA, Anorm, work, counter);
389:     eps = dlamch("Epsilon");
390:     cte = Anorm*eps*((double) N)*bwdmax;
391: 
392:     /* Convert B from double precision to single precision and store
393:        the result in SX. */
394: 
395:     PLASMA_zlag2c(descB, descSX);
396: 
397:     /* Convert A from double precision to single precision and store
398:        the result in SA. */
399: 
400:     PLASMA_zlag2c(descA, descSA);
401: 
402:     /* Clear IPIV and Lbdl */
403:     plasma_memzero(IPIV, NT*NT*PLASMA_NB, PlasmaInteger);
404:     plasma_memzero(SLbdl, NT*NT*PLASMA_IBNBSIZE, PlasmaComplexFloat);
405: 
406:     /* Set INFO to ZERO */
407:     PLASMA_INFO = PLASMA_SUCCESS;
408: 
409:     /* Compute the LU factorization of SA */
410:     plasma_parallel_call_3(plasma_pcgetrf,
411:         PLASMA_desc, descSA,
412:         PLASMA_desc, descSL,
413:         int*, IPIV);
414: 
415:     if (PLASMA_INFO == PLASMA_SUCCESS) {
416: 
417:         /* Solve the system SA*SX = SB */
418: 
419:         /* Forward substitution */
420:         plasma_parallel_call_4(plasma_pctrsmpl,
421:             PLASMA_desc, descSA,
422:             PLASMA_desc, descSX,
423:             PLASMA_desc, descSL,
424:             int*, IPIV);
425: 
426:         /* Backward substitution */
427:         plasma_parallel_call_7(plasma_pctrsm,
428:             PLASMA_enum, PlasmaLeft,
429:             PLASMA_enum, PlasmaUpper,
430:             PLASMA_enum, PlasmaNoTrans,
431:             PLASMA_enum, PlasmaNonUnit,
432:             PLASMA_Complex32_t, 1.0,
433:             PLASMA_desc, descSA,
434:             PLASMA_desc, descSX);
435: 
436:     } else {
437:         plasma_shared_free(plasma, SAbdl);
438:         plasma_shared_free(plasma, SLbdl);
439:         plasma_shared_free(plasma, SXbdl);
440:         plasma_shared_free(plasma, Rbdl);
441:         return PLASMA_INFO;
442:     }
443: 
444:     /* Convert SX back to double precision */
445:     PLASMA_clag2z(descSX, descX);
446: 
447:     /* Compute R = B - AX. */
448:     PLASMA_zlacpy(descB,descR);
449:     plasma_parallel_call_7(plasma_pzgemm,
450:         PLASMA_enum, PlasmaNoTrans,
451:         PLASMA_enum, PlasmaNoTrans,
452:         PLASMA_Complex64_t, negone,
453:         PLASMA_desc, descA,
454:         PLASMA_desc, descX,
455:         PLASMA_Complex64_t, one,
456:         PLASMA_desc, descR);
457: 
458:     /* Check whether the NRHS normwise backward error satisfies the
459:        stopping criterion. If yes return. Note that ITER=0 (already set). */
460:     PLASMA_zlange(norm, descX, Xnorm, work, counter);
461:     PLASMA_zlange(norm, descR, Rnorm, work, counter);
462: 
463:     if (Rnorm < Xnorm * cte){
464:       /* The NRHS normwise backward errors satisfy the
465:          stopping criterion. We are good to exit. */
466:       plasma_shared_free(plasma, SAbdl);
467:       plasma_shared_free(plasma, SLbdl);
468:       plasma_shared_free(plasma, SXbdl);
469:       plasma_shared_free(plasma, Rbdl);
470:       return PLASMA_INFO;
471:     }
472: 
473:     /* Iterative refinement */
474:     for (iiter = 0; iiter < itermax; iiter++){
475: 
476:       /* Convert R from double precision to single precision
477:          and store the result in SX. */
478:       PLASMA_zlag2c(descR, descSX);
479: 
480:       /* Solve the system SA*SX = SR */
481: 
482:       /* Forward substitution */
483:       plasma_parallel_call_4(plasma_pctrsmpl,
484:             PLASMA_desc, descSA,
485:             PLASMA_desc, descSX,
486:             PLASMA_desc, descSL,
487:             int*, IPIV);
488: 
489:       /* Backward substitution */
490:       plasma_parallel_call_7(plasma_pctrsm,
491:             PLASMA_enum, PlasmaLeft,
492:             PLASMA_enum, PlasmaUpper,
493:             PLASMA_enum, PlasmaNoTrans,
494:             PLASMA_enum, PlasmaNonUnit,
495:             PLASMA_Complex32_t, 1.0,
496:             PLASMA_desc, descSA,
497:             PLASMA_desc, descSX);
498: 
499:       /* Convert SX back to double precision and update the current
500:          iterate. */
501:       PLASMA_clag2z(descSX, descR);
502:       PLASMA_zaxpy(one, descR, descX);
503: 
504: 
505:       /* Compute R = B - AX. */
506:       PLASMA_zlacpy(descB,descR);
507:       plasma_parallel_call_7(plasma_pzgemm,
508:         PLASMA_enum, PlasmaNoTrans,
509:         PLASMA_enum, PlasmaNoTrans,
510:         PLASMA_Complex64_t, negone,
511:         PLASMA_desc, descA,
512:         PLASMA_desc, descX,
513:         PLASMA_Complex64_t, one,
514:         PLASMA_desc, descR);
515: 
516:       /* Check whether the NRHS normwise backward errors satisfy the
517:          stopping criterion. If yes, set ITER=IITER>0 and return. */
518:       PLASMA_zlange(norm, descX, Xnorm, work, counter);
519:       PLASMA_zlange(norm, descR, Rnorm, work, counter);
520: 
521:       if (Rnorm < Xnorm * cte){
522:         /* The NRHS normwise backward errors satisfy the
523:            stopping criterion. We are good to exit. */
524:         *ITER = iiter;
525: 
526:         plasma_shared_free(plasma, SAbdl);
527:         plasma_shared_free(plasma, SLbdl);
528:         plasma_shared_free(plasma, SXbdl);
529:         plasma_shared_free(plasma, Rbdl);
530:         return PLASMA_INFO;
531:       }
532: 
533:     }
534: 
535:     /* We have performed ITER=itermax iterations and never satisified
536:        the stopping criterion, set up the ITER flag accordingly and
537:        follow up on double precision routine. */
538: 
539:     *ITER = -itermax - 1;
540: 
541:     plasma_shared_free(plasma, SAbdl);
542:     plasma_shared_free(plasma, SLbdl);
543:     plasma_shared_free(plasma, SXbdl);
544:     plasma_shared_free(plasma, Rbdl);
545: 
546:     /* Single-precision iterative refinement failed to converge to a
547:        satisfactory solution, so we resort to double precision. */
548: 
549:     /* Clear IPIV and Lbdl */
550:     plasma_memzero(IPIV, NT*NT*PLASMA_NB, PlasmaInteger);
551:     plasma_memzero(((PLASMA_Complex64_t *)(descL.mat)), NT*NT*PLASMA_IBNBSIZE, PlasmaComplexDouble);
552: 
553:     /* Set INFO to ZERO */
554:     PLASMA_INFO = PLASMA_SUCCESS;
555: 
556:     plasma_parallel_call_3(plasma_pzgetrf,
557:         PLASMA_desc, descA,
558:         PLASMA_desc, descL,
559:         int*, IPIV);
560: 
561:     if (PLASMA_INFO == PLASMA_SUCCESS)
562:     {
563:         plasma_parallel_call_4(plasma_pztrsmpl,
564:             PLASMA_desc, descA,
565:             PLASMA_desc, descX,
566:             PLASMA_desc, descL,
567:             int*, IPIV);
568: 
569:         plasma_parallel_call_7(plasma_pztrsm,
570:             PLASMA_enum, PlasmaLeft,
571:             PLASMA_enum, PlasmaUpper,
572:             PLASMA_enum, PlasmaNoTrans,
573:             PLASMA_enum, PlasmaNonUnit,
574:             PLASMA_Complex64_t, 1.0,
575:             PLASMA_desc, descA,
576:             PLASMA_desc, descX);
577:     }
578: 
579:     return PLASMA_INFO;
580: }
581: