001: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
002: /* ///                    PLASMA computational routines (version 2.1.0)                      ///
003:  * ///                    Author: Jakub Kurzak                                               ///
004:  * ///                    Release Date: November, 15th 2009                                  ///
005:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
006:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
007: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
008: #include "common.h"
009: 
010: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
011: // PLASMA_sgelqs - Compute a minimum-norm solution min || A*X - B || using the LQ factorization
012: // A = L*Q computed by PLASMA_sgelqf.
013: 
014: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
015: // M        int (IN)
016: //          The number of rows of the matrix A. M >= 0.
017: //
018: // N        int (IN)
019: //          The number of columns of the matrix A. N >= M >= 0.
020: //
021: // NRHS     int (IN)
022: //          The number of columns of B. NRHS >= 0.
023: //
024: // A        float* (IN)
025: //          Details of the LQ factorization of the original matrix A as returned by PLASMA_sgelqf.
026: //
027: // LDA      int (IN)
028: //          The leading dimension of the array A. LDA >= M.
029: //
030: // T        float* (IN)
031: //          Auxiliary factorization data, computed by PLASMA_sgelqf.
032: //
033: // B        float* (INOUT)
034: //          On entry, the M-by-NRHS right hand side matrix B.
035: //          On exit, the N-by-NRHS solution matrix X.
036: //
037: // LDB      int (IN)
038: //          The leading dimension of the array B. LDB >= N.
039: 
040: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
041: //          = 0: successful exit
042: //          < 0: if -i, the i-th argument had an illegal value
043: 
044: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
045: int PLASMA_sgelqs(int M, int N, int NRHS, float *A, int LDA, float *T,
046:                   float *B, int LDB)
047: {
048:     int i,j;
049:     int NB, MT, NT, NTRHS;
050:     int status;
051:     float *Abdl;
052:     float *Bbdl;
053:     float *Tbdl;
054:     plasma_context_t *plasma;
055: 
056:     plasma = plasma_context_self();
057:     if (plasma == NULL) {
058:         plasma_fatal_error("PLASMA_sgelqs", "PLASMA not initialized");
059:         return PLASMA_ERR_NOT_INITIALIZED;
060:     }
061:     /* Check input arguments */
062:     if (M < 0) {
063:         plasma_error("PLASMA_sgelqs", "illegal value of M");
064:         return -1;
065:     }
066:     if (N < 0 || M > N) {
067:         plasma_error("PLASMA_sgelqs", "illegal value of N");
068:         return -2;
069:     }
070:     if (NRHS < 0) {
071:         plasma_error("PLASMA_sgelqs", "illegal value of N");
072:         return -3;
073:     }
074:     if (LDA < max(1, M)) {
075:         plasma_error("PLASMA_sgelqs", "illegal value of LDA");
076:         return -5;
077:     }
078:     if (LDB < max(1, max(1, N))) {
079:         plasma_error("PLASMA_sgelqs", "illegal value of LDB");
080:         return -8;
081:     }
082:     /* Quick return */
083:     if (min(M, min(N, NRHS)) == 0) {
084:         return PLASMA_SUCCESS;
085:     }
086: 
087:     /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
088:     status = plasma_tune(PLASMA_FUNC_SGELS, M, N, NRHS);
089:     if (status != PLASMA_SUCCESS) {
090:         plasma_error("PLASMA_sgelqs", "plasma_tune() failed");
091:         return status;
092:     }
093: 
094:     /* Set MT, NT & NTRHS */
095:     NB = PLASMA_NB;
096:     MT = (M%NB==0) ? (M/NB) : (M/NB+1);
097:     NT = (N%NB==0) ? (N/NB) : (N/NB+1);
098:     NTRHS = (NRHS%NB==0) ? (NRHS/NB) : (NRHS/NB+1);
099: 
100:     /* Allocate memory for matrices in block layout */
101:     Abdl = (float *)plasma_shared_alloc(plasma, MT*NT*PLASMA_NBNBSIZE, PlasmaRealFloat);
102:     Tbdl = (float *)plasma_shared_alloc(plasma, MT*NT*PLASMA_IBNBSIZE, PlasmaRealFloat);
103:     Bbdl = (float *)plasma_shared_alloc(plasma, max(MT, NT)*NTRHS*PLASMA_NBNBSIZE, PlasmaRealFloat);
104:     if (Abdl == NULL || Tbdl == NULL || Bbdl == NULL) {
105:         plasma_error("PLASMA_sgelqs", "plasma_shared_alloc() failed");
106:         plasma_shared_free(plasma, Abdl);
107:         plasma_shared_free(plasma, Tbdl);
108:         plasma_shared_free(plasma, Bbdl);
109:         return PLASMA_ERR_OUT_OF_RESOURCES;
110:     }
111: 
112:     PLASMA_desc descA = plasma_desc_init(
113:         Abdl, PlasmaRealFloat,
114:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
115:         M, N, 0, 0, M, N);
116: 
117:     PLASMA_desc descB = plasma_desc_init(
118:         Bbdl, PlasmaRealFloat,
119:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
120:         N, NRHS, 0, 0, N, NRHS);
121: 
122:     PLASMA_desc descT = plasma_desc_init(
123:         Tbdl, PlasmaRealFloat,
124:         PLASMA_IB, PLASMA_NB, PLASMA_IBNBSIZE,
125:         M, N, 0, 0, M, N);
126: 
127:     plasma_parallel_call_3(plasma_lapack_to_tile,
128:         float*, A,
129:         int, LDA,
130:         PLASMA_desc, descA);
131: 
132:     plasma_parallel_call_3(plasma_lapack_to_tile,
133:         float*, B,
134:         int, LDB,
135:         PLASMA_desc, descB);
136: 
137:     /* Receive T from the user */
138:     plasma_memcpy(Tbdl, T, MT*NT*PLASMA_IBNBSIZE, PlasmaRealFloat);
139: 
140:     /* Call the native interface */
141:     status = PLASMA_sgelqs_Tile(&descA, &descT, &descB);
142: 
143:     if (status == PLASMA_SUCCESS) {
144:         plasma_parallel_call_3(plasma_tile_to_lapack,
145:             PLASMA_desc, descA,
146:             float*, A,
147:             int, LDA);
148: 
149:         plasma_parallel_call_3(plasma_tile_to_lapack,
150:             PLASMA_desc, descB,
151:             float*, B,
152:             int, LDB);
153:     }
154:     plasma_shared_free(plasma, Abdl);
155:     plasma_shared_free(plasma, Tbdl);
156:     plasma_shared_free(plasma, Bbdl);
157:     return status;
158: }
159: 
160: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
161: // PLASMA_sgelqs_Tile - Compute a minimum-norm solution min || A*X - B || using the LQ
162: // factorization A = L*Q computed by PLASMA_sgelqf_Tile.
163: // All matrices are passed through descriptors. All dimensions are taken from the descriptors.
164: 
165: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
166: // A        float* (IN)
167: //          Details of the LQ factorization of the original matrix A as returned by PLASMA_sgelqf.
168: //
169: // T        float* (IN)
170: //          Auxiliary factorization data, computed by PLASMA_sgelqf.
171: //
172: // B        float* (INOUT)
173: //          On entry, the M-by-NRHS right hand side matrix B.
174: //          On exit, the N-by-NRHS solution matrix X.
175: 
176: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
177: //          = 0: successful exit
178: 
179: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
180: int PLASMA_sgelqs_Tile(PLASMA_desc *A, PLASMA_desc *T, PLASMA_desc *B)
181: {
182:     PLASMA_desc descA = *A;
183:     PLASMA_desc descT = *T;
184:     PLASMA_desc descB = *B;
185:     plasma_context_t *plasma;
186: 
187:     plasma = plasma_context_self();
188:     if (plasma == NULL) {
189:         plasma_fatal_error("PLASMA_sgelqs_Tile", "PLASMA not initialized");
190:         return PLASMA_ERR_NOT_INITIALIZED;
191:     }
192:     /* Check descriptors for correctness */
193:     if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
194:         plasma_error("PLASMA_sgelqs_Tile", "invalid first descriptor");
195:         return PLASMA_ERR_ILLEGAL_VALUE;
196:     }
197:     if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
198:         plasma_error("PLASMA_sgelqs_Tile", "invalid second descriptor");
199:         return PLASMA_ERR_ILLEGAL_VALUE;
200:     }
201:     if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
202:         plasma_error("PLASMA_sgelqs_Tile", "invalid third descriptor");
203:         return PLASMA_ERR_ILLEGAL_VALUE;
204:     }
205:     /* Check input arguments */
206:     if (descA.nb != descA.mb || descB.nb != descB.mb) {
207:         plasma_error("PLASMA_sgelqs_Tile", "only square tiles supported");
208:         return PLASMA_ERR_ILLEGAL_VALUE;
209:     }
210:     /* Quick return */
211: /*
212:     if (min(M, min(N, NRHS)) == 0) {
213:         return PLASMA_SUCCESS;
214:     }
215: */
216:     plasma_parallel_call_1(plasma_tile_zero,
217:         PLASMA_desc, plasma_desc_submatrix(descB, descA.m, 0, descA.n-descA.m, descB.n));
218: 
219:     plasma_parallel_call_7(plasma_pstrsm,
220:         PLASMA_enum, PlasmaLeft,
221:         PLASMA_enum, PlasmaLower,
222:         PLASMA_enum, PlasmaNoTrans,
223:         PLASMA_enum, PlasmaNonUnit,
224:         float, 1.0,
225:         PLASMA_desc, plasma_desc_submatrix(descA, 0, 0, descA.m, descA.m),
226:         PLASMA_desc, plasma_desc_submatrix(descB, 0, 0, descA.m, descB.n));
227: 
228:     plasma_parallel_call_3(plasma_psormlq,
229:         PLASMA_desc, descA,
230:         PLASMA_desc, descB,
231:         PLASMA_desc, descT);
232: 
233:     return PLASMA_SUCCESS;
234: }
235: