001: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
002: /* ///                    PLASMA computational routines (version 2.1.0)                      ///
003:  * ///                    Author: Hatem Ltaief, Jakub Kurzak                                 ///
004:  * ///                    Release Date: November, 15th 2009                                  ///
005:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
006:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
007: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
008: #include "common.h"
009: 
010: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
011: // PLASMA_dormqr - overwrites the general M-by-N matrix C with Q*C, where Q is an orthogonal
012: // matrix (unitary in the complex case) defined as the product of elementary reflectors returned
013: // by PLASMA_dgeqrf. Q is of order M.
014: 
015: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
016: // side     PLASMA_enum (IN)
017: //          Intended usage:
018: //          = PlasmaLeft:  apply Q or Q**T from the left;
019: //          = PlasmaRight: apply Q or Q**T from the right.
020: //          Currently only PlasmaLeft is supported.
021: //
022: // trans    PLASMA_enum (IN)
023: //          Intended usage:
024: //          = PlasmaNoTrans:   no transpose, apply Q;
025: //          = PlasmaTrans: conjugate transpose, apply Q**T.
026: //          Currently only PlasmaTrans is supported.
027: //
028: // M        int (IN)
029: //          The number of rows of the matrix C. M >= 0.
030: //
031: // N        int (IN)
032: //          The number of columns of the matrix C. N >= 0.
033: //
034: // K        int (IN)
035: //          The number of columns of elementary tile reflectors whose product defines the matrix Q.
036: //          M >= K >= 0.
037: //
038: // A        double* (IN)
039: //          Details of the QR factorization of the original matrix A as returned by PLASMA_dgeqrf.
040: //
041: // LDA      int (IN)
042: //          The leading dimension of the array A. LDA >= max(1,M);
043: //
044: // T        double* (IN)
045: //          Auxiliary factorization data, computed by PLASMA_dgeqrf.
046: //
047: // B        double* (INOUT)
048: //          On entry, the M-by-N matrix B.
049: //          On exit, B is overwritten by Q*B or Q**T*B.
050: //
051: // LDB      int (IN)
052: //          The leading dimension of the array C. LDC >= max(1,M).
053: 
054: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
055: //          = 0: successful exit
056: //          < 0: if -i, the i-th argument had an illegal value
057: 
058: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
059: int PLASMA_dormqr(PLASMA_enum side, PLASMA_enum trans, int M, int N, int K, double *A,
060:                   int LDA, double *T, double *B, int LDB)
061: {
062:     int NB, MT, NT, KT;
063:     int status;
064:     double *Abdl;
065:     double *Bbdl;
066:     double *Tbdl;
067:     plasma_context_t *plasma;
068: 
069:     plasma = plasma_context_self();
070:     if (plasma == NULL) {
071:         plasma_fatal_error("PLASMA_dormqr", "PLASMA not initialized");
072:         return PLASMA_ERR_NOT_INITIALIZED;
073:     }
074:     /* Check input arguments */
075:     if (side != PlasmaLeft) {
076:         plasma_error("PLASMA_dormqr", "only PlasmaLeft supported");
077:         return PLASMA_ERR_NOT_SUPPORTED;
078:     }
079:     if (trans != PlasmaTrans) {
080:         plasma_error("PLASMA_dormqr", "only PlasmaTrans supported");
081:         return PLASMA_ERR_NOT_SUPPORTED;
082:     }
083:     if (M < 0) {
084:         plasma_error("PLASMA_dormqr", "illegal value of M");
085:         return -3;
086:     }
087:     if (N < 0) {
088:         plasma_error("PLASMA_dormqr", "illegal value of N");
089:         return -4;
090:     }
091:     if (K < 0) {
092:         plasma_error("PLASMA_dormqr", "illegal value of K");
093:         return -5;
094:     }
095:     if (LDA < max(1, M)) {
096:         plasma_error("PLASMA_dormqr", "illegal value of LDA");
097:         return -7;
098:     }
099:     if (LDB < max(1, M)) {
100:         plasma_error("PLASMA_dormqr", "illegal value of LDB");
101:         return -10;
102:     }
103:     /* Quick return - currently NOT equivalent to LAPACK's:
104:      * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
105:     if (min(M, min(N, K)) == 0)
106:         return PLASMA_SUCCESS;
107: 
108:     /* Tune NB & IB depending on M, K & N; Set NBNBSIZE */
109:     status = plasma_tune(PLASMA_FUNC_DGELS, M, K, N);
110:     if (status != PLASMA_SUCCESS) {
111:         plasma_error("PLASMA_dormqr", "plasma_tune() failed");
112:         return status;
113:     }
114: 
115:     /* Set MT, NT & NTRHS */
116:     NB = PLASMA_NB;
117:     MT = (M%NB==0) ? (M/NB) : (M/NB+1);
118:     KT = (K%NB==0) ? (K/NB) : (K/NB+1);
119:     NT = (N%NB==0) ? (N/NB) : (N/NB+1);
120: 
121:     /* Allocate memory for matrices in block layout */
122:     Abdl = (double *)plasma_shared_alloc(plasma, MT*KT*PLASMA_NBNBSIZE, PlasmaRealDouble);
123:     Tbdl = (double *)plasma_shared_alloc(plasma, MT*KT*PLASMA_IBNBSIZE, PlasmaRealDouble);
124:     Bbdl = (double *)plasma_shared_alloc(plasma, MT*NT*PLASMA_NBNBSIZE, PlasmaRealDouble);
125:     if (Abdl == NULL || Tbdl == NULL || Bbdl == NULL) {
126:         plasma_error("PLASMA_dormqr", "plasma_shared_alloc() failed");
127:         plasma_shared_free(plasma, Abdl);
128:         plasma_shared_free(plasma, Tbdl);
129:         plasma_shared_free(plasma, Bbdl);
130:         return PLASMA_ERR_OUT_OF_RESOURCES;
131:     }
132: 
133:     PLASMA_desc descA = plasma_desc_init(
134:         Abdl, PlasmaRealDouble,
135:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
136:         M, K, 0, 0, M, K);
137: 
138:     PLASMA_desc descB = plasma_desc_init(
139:         Bbdl, PlasmaRealDouble,
140:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
141:         M, N, 0, 0, M, N);
142: 
143:     PLASMA_desc descT = plasma_desc_init(
144:         Tbdl, PlasmaRealDouble,
145:         PLASMA_IB, PLASMA_NB, PLASMA_IBNBSIZE,
146:         M, K, 0, 0, M, K);
147: 
148:     plasma_parallel_call_3(plasma_lapack_to_tile,
149:         double*, A,
150:         int, LDA,
151:         PLASMA_desc, descA);
152: 
153:     plasma_parallel_call_3(plasma_lapack_to_tile,
154:         double*, B,
155:         int, LDB,
156:         PLASMA_desc, descB);
157: 
158:     /* Receive T from the user */
159:     plasma_memcpy(Tbdl, T, MT*KT*PLASMA_IBNBSIZE, PlasmaRealDouble);
160: 
161:     /* Call the native interface */
162:     status = PLASMA_dormqr_Tile(PlasmaLeft, PlasmaTrans, &descA, &descT, &descB);
163: 
164:     if (status == PLASMA_SUCCESS)
165:         plasma_parallel_call_3(plasma_tile_to_lapack,
166:             PLASMA_desc, descB,
167:             double*, B,
168:             int, LDB);
169: 
170:     plasma_shared_free(plasma, Abdl);
171:     plasma_shared_free(plasma, Tbdl);
172:     plasma_shared_free(plasma, Bbdl);
173:     return status;
174: }
175: 
176: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
177: // PLASMA_dormqr_Tile - overwrites the general M-by-N matrix C with Q*C, where Q is an orthogonal
178: // matrix (unitary in the complex case) defined as the product of elementary reflectors returned
179: // by PLASMA_dgeqrf_Tile Q is of order M.
180: // All matrices are passed through descriptors. All dimensions are taken from the descriptors.
181: 
182: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
183: // side     PLASMA_enum (IN)
184: //          Intended usage:
185: //          = PlasmaLeft:  apply Q or Q**T from the left;
186: //          = PlasmaRight: apply Q or Q**T from the right.
187: //          Currently only PlasmaLeft is supported.
188: //
189: // trans    PLASMA_enum (IN)
190: //          Intended usage:
191: //          = PlasmaNoTrans:   no transpose, apply Q;
192: //          = PlasmaTrans: conjugate transpose, apply Q**T.
193: //          Currently only PlasmaTrans is supported.
194: //
195: // A        double* (IN)
196: //          Details of the QR factorization of the original matrix A as returned by PLASMA_dgeqrf.
197: //
198: // T        double* (IN)
199: //          Auxiliary factorization data, computed by PLASMA_dgeqrf.
200: //
201: // B        double* (INOUT)
202: //          On entry, the M-by-N matrix B.
203: //          On exit, B is overwritten by Q*B or Q**T*B.
204: 
205: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
206: //          = 0: successful exit
207: 
208: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
209: int PLASMA_dormqr_Tile(PLASMA_enum side, PLASMA_enum trans, PLASMA_desc *A, PLASMA_desc *T,
210:                        PLASMA_desc *B)
211: {
212:     PLASMA_desc descA = *A;
213:     PLASMA_desc descT = *T;
214:     PLASMA_desc descB = *B;
215:     plasma_context_t *plasma;
216: 
217:     plasma = plasma_context_self();
218:     if (plasma == NULL) {
219:         plasma_fatal_error("PLASMA_dormqr_Tile", "PLASMA not initialized");
220:         return PLASMA_ERR_NOT_INITIALIZED;
221:     }
222:     /* Check descriptors for correctness */
223:     if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
224:         plasma_error("PLASMA_dormqr_Tile", "invalid first descriptor");
225:         return PLASMA_ERR_ILLEGAL_VALUE;
226:     }
227:     if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
228:         plasma_error("PLASMA_dormqr_Tile", "invalid second descriptor");
229:         return PLASMA_ERR_ILLEGAL_VALUE;
230:     }
231:     if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
232:         plasma_error("PLASMA_dormqr_Tile", "invalid third descriptor");
233:         return PLASMA_ERR_ILLEGAL_VALUE;
234:     }
235:     /* Check input arguments */
236:     if (descA.nb != descA.mb || descB.nb != descB.mb) {
237:         plasma_error("PLASMA_dormqr_Tile", "only square tiles supported");
238:         return PLASMA_ERR_ILLEGAL_VALUE;
239:     }
240:     if (side != PlasmaLeft) {
241:         plasma_error("PLASMA_dormqr_Tile", "only PlasmaLeft supported");
242:         return PLASMA_ERR_NOT_SUPPORTED;
243:     }
244:     if (trans != PlasmaTrans) {
245:         plasma_error("PLASMA_dormqr_Tile", "only PlasmaTrans supported");
246:         return PLASMA_ERR_NOT_SUPPORTED;
247:     }
248:     /* Quick return - currently NOT equivalent to LAPACK's:
249:      * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
250: /*
251:     if (min(M, min(N, K)) == 0)
252:         return PLASMA_SUCCESS;
253: */
254:     plasma_parallel_call_3(plasma_pdormqr,
255:         PLASMA_desc, descA,
256:         PLASMA_desc, descB,
257:         PLASMA_desc, descT);
258: 
259:     return PLASMA_SUCCESS;
260: }
261: