001: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
002: /* ///                    PLASMA computational routines (version 2.1.0)                      ///
003:  * ///                    Author: Jakub Kurzak                                               ///
004:  * ///                    Release Date: November, 15th 2009                                  ///
005:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
006:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
007: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
008: #include "common.h"
009: 
010: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
011: // PLASMA_zposv - Computes the solution to a system of linear equations A * X = B,
012: // where A is an N-by-N symmetric positive definite (or Hermitian positive definite
013: // in the complex case) matrix and X and B are N-by-NRHS matrices.
014: // The Cholesky decomposition is used to factor A as
015: //
016: //   A = U**H * U, if uplo = PlasmaUpper, or
017: //   A = L * L**H, if uplo =  PlasmaLower,
018: //
019: // where U is an upper triangular matrix and  L is a lower triangular matrix.
020: // The factored form of A is then used to solve the system of equations A * X = B.
021: 
022: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
023: // uplo     PLASMA_enum (IN)
024: //          Specifies whether the matrix A is upper triangular or lower triangular:
025: //          = PlasmaUpper: Upper triangle of A is stored;
026: //          = PlasmaLower: Lower triangle of A is stored.
027: //
028: // N        int (IN)
029: //          The number of linear equations, i.e., the order of the matrix A. N >= 0.
030: //
031: // NRHS     int (IN)
032: //          The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
033: //
034: // A        PLASMA_Complex64_t* (INOUT)
035: //          On entry, the symmetric positive definite (or Hermitian) matrix A.
036: //          If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A
037: //          contains the upper triangular part of the matrix A, and the strictly lower triangular
038: //          part of A is not referenced.
039: //          If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower
040: //          triangular part of the matrix A, and the strictly upper triangular part of A is not
041: //          referenced.
042: //          On exit, if return value = 0, the factor U or L from the Cholesky factorization
043: //          A = U**H*U or A = L*L**H.
044: //
045: // LDA      int (IN)
046: //          The leading dimension of the array A. LDA >= max(1,N).
047: //
048: // B        PLASMA_Complex64_t* (INOUT)
049: //          On entry, the N-by-NRHS right hand side matrix B.
050: //          On exit, if return value = 0, the N-by-NRHS solution matrix X.
051: //
052: // LDB      int (IN)
053: //          The leading dimension of the array B. LDB >= max(1,N).
054: 
055: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
056: //          = 0: successful exit
057: //          < 0: if -i, the i-th argument had an illegal value
058: //          > 0: if i, the leading minor of order i of A is not positive definite, so the
059: //               factorization could not be completed, and the solution has not been computed.
060: 
061: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
062: int PLASMA_zposv(PLASMA_enum uplo, int N, int NRHS, PLASMA_Complex64_t *A, int LDA,
063:                  PLASMA_Complex64_t *B, int LDB)
064: {
065:     int NB, NT, NTRHS;
066:     int status;
067:     PLASMA_Complex64_t *Abdl;
068:     PLASMA_Complex64_t *Bbdl;
069:     plasma_context_t *plasma;
070: 
071:     plasma = plasma_context_self();
072:     if (plasma == NULL) {
073:         plasma_fatal_error("PLASMA_zposv", "PLASMA not initialized");
074:         return PLASMA_ERR_NOT_INITIALIZED;
075:     }
076:     /* Check input arguments */
077:     if (uplo != PlasmaUpper && uplo != PlasmaLower) {
078:         plasma_error("PLASMA_zposv", "illegal value of uplo");
079:         return -1;
080:     }
081:     if (N < 0) {
082:         plasma_error("PLASMA_zposv", "illegal value of N");
083:         return -2;
084:     }
085:     if (NRHS < 0) {
086:         plasma_error("PLASMA_zposv", "illegal value of NRHS");
087:         return -3;
088:     }
089:     if (LDA < max(1, N)) {
090:         plasma_error("PLASMA_zposv", "illegal value of LDA");
091:         return -5;
092:     }
093:     if (LDB < max(1, N)) {
094:         plasma_error("PLASMA_zposv", "illegal value of LDB");
095:         return -7;
096:     }
097:     /* Quick return - currently NOT equivalent to LAPACK's
098:      * LAPACK does not have such check for DPOSV */
099:     if (min(N, NRHS) == 0)
100:         return PLASMA_SUCCESS;
101: 
102:     /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
103:     status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS);
104:     if (status != PLASMA_SUCCESS) {
105:         plasma_error("PLASMA_zposv", "plasma_tune() failed");
106:         return status;
107:     }
108: 
109:     /* Set NT & NTRHS */
110:     NB = PLASMA_NB;
111:     NT = (N%NB==0) ? (N/NB) : (N/NB+1);
112:     NTRHS = (NRHS%NB==0) ? (NRHS/NB) : (NRHS/NB+1);
113: 
114:     /* Allocate memory for matrices in block layout */
115:     Abdl = (PLASMA_Complex64_t *)plasma_shared_alloc(plasma, NT*NT*PLASMA_NBNBSIZE, PlasmaComplexDouble);
116:     Bbdl = (PLASMA_Complex64_t *)plasma_shared_alloc(plasma, NT*NTRHS*PLASMA_NBNBSIZE, PlasmaComplexDouble);
117:     if (Abdl == NULL || Bbdl == NULL) {
118:         plasma_error("PLASMA_zposv", "plasma_shared_alloc() failed");
119:         plasma_shared_free(plasma, Abdl);
120:         plasma_shared_free(plasma, Bbdl);
121:         return PLASMA_ERR_OUT_OF_RESOURCES;
122:     }
123: 
124:     PLASMA_desc descA = plasma_desc_init(
125:         Abdl, PlasmaComplexDouble,
126:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
127:         N, N, 0, 0, N, N);
128: 
129:     PLASMA_desc descB = plasma_desc_init(
130:         Bbdl, PlasmaComplexDouble,
131:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
132:         N, NRHS, 0, 0, N, NRHS);
133: 
134:     plasma_parallel_call_3(plasma_lapack_to_tile,
135:         PLASMA_Complex64_t*, A,
136:         int, LDA,
137:         PLASMA_desc, descA);
138: 
139:     plasma_parallel_call_3(plasma_lapack_to_tile,
140:         PLASMA_Complex64_t*, B,
141:         int, LDB,
142:         PLASMA_desc, descB);
143: 
144:     /* Call the native interface */
145:     status = PLASMA_zposv_Tile(uplo, &descA, &descB);
146: 
147:     if (status == PLASMA_SUCCESS) {
148:         plasma_parallel_call_3(plasma_tile_to_lapack,
149:             PLASMA_desc, descA,
150:             PLASMA_Complex64_t*, A,
151:             int, LDA);
152: 
153:         plasma_parallel_call_3(plasma_tile_to_lapack,
154:             PLASMA_desc, descB,
155:             PLASMA_Complex64_t*, B,
156:             int, LDB);
157:     }
158:     plasma_shared_free(plasma, Abdl);
159:     plasma_shared_free(plasma, Bbdl);
160:     return status;
161: }
162: 
163: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
164: // PLASMA_zposv_Tile - Computes the solution to a system of linear equations A * X = B,
165: // where A is an N-by-N symmetric positive definite (or Hermitian positive definite
166: // in the complex case) matrix and X and B are N-by-NRHS matrices.
167: // The Cholesky decomposition is used to factor A as
168: //
169: //   A = U**H * U, if uplo = PlasmaUpper, or
170: //   A = L * L**H, if uplo =  PlasmaLower,
171: //
172: // where U is an upper triangular matrix and  L is a lower triangular matrix.
173: // The factored form of A is then used to solve the system of equations A * X = B.
174: // All matrices are passed through descriptors. All dimensions are taken from the descriptors.
175: 
176: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
177: // uplo     PLASMA_enum (IN)
178: //          Specifies whether the matrix A is upper triangular or lower triangular:
179: //          = PlasmaUpper: Upper triangle of A is stored;
180: //          = PlasmaLower: Lower triangle of A is stored.
181: //
182: // A        PLASMA_Complex64_t* (INOUT)
183: //          On entry, the symmetric positive definite (or Hermitian) matrix A.
184: //          If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A
185: //          contains the upper triangular part of the matrix A, and the strictly lower triangular
186: //          part of A is not referenced.
187: //          If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower
188: //          triangular part of the matrix A, and the strictly upper triangular part of A is not
189: //          referenced.
190: //          On exit, if return value = 0, the factor U or L from the Cholesky factorization
191: //          A = U**H*U or A = L*L**H.
192: //
193: // B        PLASMA_Complex64_t* (INOUT)
194: //          On entry, the N-by-NRHS right hand side matrix B.
195: //          On exit, if return value = 0, the N-by-NRHS solution matrix X.
196: 
197: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
198: //          = 0: successful exit
199: //          > 0: if i, the leading minor of order i of A is not positive definite, so the
200: //               factorization could not be completed, and the solution has not been computed.
201: 
202: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
203: int PLASMA_zposv_Tile(PLASMA_enum uplo, PLASMA_desc *A, PLASMA_desc *B)
204: {
205:     PLASMA_desc descA = *A;
206:     PLASMA_desc descB = *B;
207:     plasma_context_t *plasma;
208: 
209:     plasma = plasma_context_self();
210:     if (plasma == NULL) {
211:         plasma_fatal_error("PLASMA_zposv_Tile", "PLASMA not initialized");
212:         return PLASMA_ERR_NOT_INITIALIZED;
213:     }
214:     /* Check descriptors for correctness */
215:     if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
216:         plasma_error("PLASMA_zposv_Tile", "invalid first descriptor");
217:         return PLASMA_ERR_ILLEGAL_VALUE;
218:     }
219:     if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
220:         plasma_error("PLASMA_zposv_Tile", "invalid second descriptor");
221:         return PLASMA_ERR_ILLEGAL_VALUE;
222:     }
223:     /* Check input arguments */
224:     if (descA.nb != descA.mb || descB.nb != descB.mb) {
225:         plasma_error("PLASMA_zposv_Tile", "only square tiles supported");
226:         return PLASMA_ERR_ILLEGAL_VALUE;
227:     }
228:     if (uplo != PlasmaUpper && uplo != PlasmaLower) {
229:         plasma_error("PLASMA_zposv_Tile", "illegal value of uplo");
230:         return -1;
231:     }
232:     /* Quick return - currently NOT equivalent to LAPACK's
233:      * LAPACK does not have such check for DPOSV */
234: /*
235:     if (min(N, NRHS) == 0)
236:         return PLASMA_SUCCESS;
237: */
238:     plasma_parallel_call_2(plasma_pzpotrf,
239:         PLASMA_enum, uplo,
240:         PLASMA_desc, descA);
241: 
242:     if (PLASMA_INFO == PLASMA_SUCCESS) {
243:         plasma_parallel_call_7(plasma_pztrsm,
244:             PLASMA_enum, PlasmaLeft,
245:             PLASMA_enum, uplo,
246:             PLASMA_enum, uplo == PlasmaUpper ? PlasmaConjTrans : PlasmaNoTrans,
247:             PLASMA_enum, PlasmaNonUnit,
248:             PLASMA_Complex64_t, 1.0,
249:             PLASMA_desc, descA,
250:             PLASMA_desc, descB);
251: 
252:         plasma_parallel_call_7(plasma_pztrsm,
253:             PLASMA_enum, PlasmaLeft,
254:             PLASMA_enum, uplo,
255:             PLASMA_enum, uplo == PlasmaUpper ? PlasmaNoTrans : PlasmaConjTrans,
256:             PLASMA_enum, PlasmaNonUnit,
257:             PLASMA_Complex64_t, 1.0,
258:             PLASMA_desc, descA,
259:             PLASMA_desc, descB);
260:     }
261:     return PLASMA_INFO;
262: }
263: