001: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
002: /* ///                    PLASMA computational routines (version 2.1.0)                      ///
003:  * ///                    Author: Jakub Kurzak                                               ///
004:  * ///                    Release Date: November, 15th 2009                                  ///
005:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
006:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
007: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
008: #include "common.h"
009: 
010: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
011: // PLASMA_spotrs - Solves a system of linear equations A * X = B with a symmetric positive
012: // definite (or Hermitian positive definite in the complex case) matrix A using the Cholesky
013: // factorization A = U**T*U or A = L*L**T computed by PLASMA_spotrf.
014: 
015: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
016: // uplo     PLASMA_enum (IN)
017: //          = PlasmaUpper: Upper triangle of A is stored;
018: //          = PlasmaLower: Lower triangle of A is stored.
019: //
020: // N        int (IN)
021: //          The order of the matrix A. N >= 0.
022: //
023: // NRHS     int (IN)
024: //          The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
025: //
026: // A        float* (IN)
027: //          The triangular factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T,
028: //          computed by PLASMA_spotrf.
029: //
030: // LDA      int (IN)
031: //          The leading dimension of the array A. LDA >= max(1,N).
032: //
033: // B        float* (INOUT)
034: //          On entry, the N-by-NRHS right hand side matrix B.
035: //          On exit, if return value = 0, the N-by-NRHS solution matrix X.
036: //
037: // LDB      int (IN)
038: //          The leading dimension of the array B. LDB >= max(1,N).
039: 
040: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
041: //          = 0: successful exit
042: //          < 0: if -i, the i-th argument had an illegal value
043: 
044: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
045: int PLASMA_spotrs(PLASMA_enum uplo, int N, int NRHS, float *A, int LDA,
046:                   float *B, int LDB)
047: {
048:     int NB, NT, NTRHS;
049:     int status;
050:     float *Abdl;
051:     float *Bbdl;
052:     plasma_context_t *plasma;
053: 
054:     plasma = plasma_context_self();
055:     if (plasma == NULL) {
056:         plasma_fatal_error("PLASMA_spotrs", "PLASMA not initialized");
057:         return PLASMA_ERR_NOT_INITIALIZED;
058:     }
059:     /* Check input arguments */
060:     if (uplo != PlasmaUpper && uplo != PlasmaLower) {
061:         plasma_error("PLASMA_spotrs", "illegal value of uplo");
062:         return -1;
063:     }
064:     if (N < 0) {
065:         plasma_error("PLASMA_spotrs", "illegal value of N");
066:         return -2;
067:     }
068:     if (NRHS < 0) {
069:         plasma_error("PLASMA_spotrs", "illegal value of NRHS");
070:         return -3;
071:     }
072:     if (LDA < max(1, N)) {
073:         plasma_error("PLASMA_spotrs", "illegal value of LDA");
074:         return -5;
075:     }
076:     if (LDB < max(1, N)) {
077:         plasma_error("PLASMA_spotrs", "illegal value of LDB");
078:         return -7;
079:     }
080:     /* Quick return */
081:     if (min(N, NRHS) == 0)
082:         return PLASMA_SUCCESS;
083: 
084:     /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
085:     status = plasma_tune(PLASMA_FUNC_SPOSV, N, N, NRHS);
086:     if (status != PLASMA_SUCCESS) {
087:         plasma_error("PLASMA_spotrs", "plasma_tune() failed");
088:         return status;
089:     }
090: 
091:     /* Set NT & NTRHS */
092:     NB = PLASMA_NB;
093:     NT = (N%NB==0) ? (N/NB) : (N/NB+1);
094:     NTRHS = (NRHS%NB==0) ? (NRHS/NB) : (NRHS/NB+1);
095: 
096:     /* Allocate memory for matrices in block layout */
097:     Abdl = (float *)plasma_shared_alloc(plasma, NT*NT*PLASMA_NBNBSIZE, PlasmaRealFloat);
098:     Bbdl = (float *)plasma_shared_alloc(plasma, NT*NTRHS*PLASMA_NBNBSIZE, PlasmaRealFloat);
099:     if (Abdl == NULL || Bbdl == NULL) {
100:         plasma_error("PLASMA_spotrs", "plasma_shared_alloc() failed");
101:         plasma_shared_free(plasma, Abdl);
102:         plasma_shared_free(plasma, Bbdl);
103:         return PLASMA_ERR_OUT_OF_RESOURCES;
104:     }
105: 
106:     PLASMA_desc descA = plasma_desc_init(
107:         Abdl, PlasmaRealFloat,
108:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
109:         N, N, 0, 0, N, N);
110: 
111:     PLASMA_desc descB = plasma_desc_init(
112:         Bbdl, PlasmaRealFloat,
113:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
114:         N, NRHS, 0, 0, N, NRHS);
115: 
116:     plasma_parallel_call_3(plasma_lapack_to_tile,
117:         float*, A,
118:         int, LDA,
119:         PLASMA_desc, descA);
120: 
121:     plasma_parallel_call_3(plasma_lapack_to_tile,
122:         float*, B,
123:         int, LDB,
124:         PLASMA_desc, descB);
125: 
126:     /* Call the native interface */
127:     status = PLASMA_spotrs_Tile(uplo, &descA, &descB);
128: 
129:     if (status == PLASMA_SUCCESS)
130:         plasma_parallel_call_3(plasma_tile_to_lapack,
131:             PLASMA_desc, descB,
132:             float*, B,
133:             int, LDB);
134: 
135:     plasma_shared_free(plasma, Abdl);
136:     plasma_shared_free(plasma, Bbdl);
137:     return status;
138: }
139: 
140: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
141: // PLASMA_spotrs_Tile - Solves a system of linear equations A * X = B with a symmetric positive
142: // definite (or Hermitian positive definite in the complex case) matrix A using the Cholesky
143: // factorization A = U**T*U or A = L*L**T computed by PLASMA_spotrf.
144: // All matrices are passed through descriptors. All dimensions are taken from the descriptors.
145: 
146: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
147: // uplo     PLASMA_enum (IN)
148: //          = PlasmaUpper: Upper triangle of A is stored;
149: //          = PlasmaLower: Lower triangle of A is stored.
150: //
151: // A        float* (IN)
152: //          The triangular factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T,
153: //          computed by PLASMA_spotrf.
154: //
155: // B        float* (INOUT)
156: //          On entry, the N-by-NRHS right hand side matrix B.
157: //          On exit, if return value = 0, the N-by-NRHS solution matrix X.
158: 
159: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
160: //          = 0: successful exit
161: 
162: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
163: int PLASMA_spotrs_Tile(PLASMA_enum uplo, PLASMA_desc *A, PLASMA_desc *B)
164: {
165:     PLASMA_desc descA = *A;
166:     PLASMA_desc descB = *B;
167:     plasma_context_t *plasma;
168: 
169:     plasma = plasma_context_self();
170:     if (plasma == NULL) {
171:         plasma_fatal_error("PLASMA_spotrs_Tile", "PLASMA not initialized");
172:         return PLASMA_ERR_NOT_INITIALIZED;
173:     }
174:     /* Check descriptors for correctness */
175:     if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
176:         plasma_error("PLASMA_spotrs_Tile", "invalid first descriptor");
177:         return PLASMA_ERR_ILLEGAL_VALUE;
178:     }
179:     if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
180:         plasma_error("PLASMA_spotrs_Tile", "invalid second descriptor");
181:         return PLASMA_ERR_ILLEGAL_VALUE;
182:     }
183:     /* Check input arguments */
184:     if (descA.nb != descA.mb || descB.nb != descB.mb) {
185:         plasma_error("PLASMA_spotrs_Tile", "only square tiles supported");
186:         return PLASMA_ERR_ILLEGAL_VALUE;
187:     }
188:     if (uplo != PlasmaUpper && uplo != PlasmaLower) {
189:         plasma_error("PLASMA_spotrs_Tile", "illegal value of uplo");
190:         return -1;
191:     }
192:     /* Quick return */
193: /*
194:     if (min(N, NRHS) == 0)
195:         return PLASMA_SUCCESS;
196: */
197:     plasma_parallel_call_7(plasma_pstrsm,
198:         PLASMA_enum, PlasmaLeft,
199:         PLASMA_enum, uplo,
200:         PLASMA_enum, uplo == PlasmaUpper ? PlasmaTrans : PlasmaNoTrans,
201:         PLASMA_enum, PlasmaNonUnit,
202:         float, 1.0,
203:         PLASMA_desc, descA,
204:         PLASMA_desc, descB);
205: 
206:     plasma_parallel_call_7(plasma_pstrsm,
207:         PLASMA_enum, PlasmaLeft,
208:         PLASMA_enum, uplo,
209:         PLASMA_enum, uplo == PlasmaUpper ? PlasmaNoTrans : PlasmaTrans,
210:         PLASMA_enum, PlasmaNonUnit,
211:         float, 1.0,
212:         PLASMA_desc, descA,
213:         PLASMA_desc, descB);
214: 
215:     return PLASMA_SUCCESS;
216: }
217: