001: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
002: /* ///                    PLASMA computational routines (version 2.1.0)                      ///
003:  * ///                    Author: Hatem Ltaief, Jakub Kurzak                                 ///
004:  * ///                    Release Date: November, 15th 2009                                  ///
005:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
006:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
007: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
008: #include "common.h"
009: 
010: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
011: // PLASMA_dorglq - Generates an M-by-N matrix Q with orthonormal rows, which is defined as the
012: // first M rows of a product of the elementary reflectors returned by PLASMA_dgelqf.
013: 
014: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
015: // M        int (IN)
016: //          The number of rows of the matrix Q. M >= 0.
017: //
018: // N        int (IN)
019: //          The number of columns of the matrix Q. N >= M.
020: //
021: // K        int (IN)
022: //          The number of rows of elementary tile reflectors whose product defines the matrix Q.
023: //          M >= K >= 0.
024: //
025: // A        double* (IN)
026: //          Details of the LQ factorization of the original matrix A as returned by PLASMA_dgelqf.
027: //
028: // LDA      int (IN)
029: //          The leading dimension of the array A. LDA >= max(1,M).
030: //
031: // T        double* (IN)
032: //          Auxiliary factorization data, computed by PLASMA_dgelqf.
033: //
034: // B        double* (OUT)
035: //          On exit, the M-by-N matrix Q.
036: //
037: // LDA      int (IN)
038: //          The leading dimension of the array B. LDB >= max(1,M).
039: 
040: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
041: //          = 0: successful exit
042: //          < 0: if -i, the i-th argument had an illegal value
043: 
044: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
045: int PLASMA_dorglq(int M, int N, int K, double *A, int LDA, double *T,
046:                   double *B, int LDB)
047: {
048:     int NB, MT, NT, KT;
049:     int status;
050:     double *Abdl;
051:     double *Bbdl;
052:     double *Tbdl;
053:     plasma_context_t *plasma;
054: 
055:     plasma = plasma_context_self();
056:     if (plasma == NULL) {
057:         plasma_fatal_error("PLASMA_dorglq", "PLASMA not initialized");
058:         return PLASMA_ERR_NOT_INITIALIZED;
059:     }
060:     if (M < 0) {
061:         plasma_error("PLASMA_dorglq", "illegal value of M");
062:         return -1;
063:     }
064:     if (N < M) {
065:         plasma_error("PLASMA_dorglq", "illegal value of N");
066:         return -2;
067:     }
068:     if (K < 0 || K > M) {
069:         plasma_error("PLASMA_dorglq", "illegal value of K");
070:         return -3;
071:     }
072:     if (LDA < max(1, M)) {
073:         plasma_error("PLASMA_dorglq", "illegal value of LDA");
074:         return -5;
075:     }
076:     if (LDB < max(1, M)) {
077:         plasma_error("PLASMA_dorglq", "illegal value of LDB");
078:         return -8;
079:     }
080:     /* Quick return - currently NOT equivalent to LAPACK's:
081:      * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
082:     if (min(M, min(N, K)) == 0)
083:         return PLASMA_SUCCESS;
084: 
085:     /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
086:     status = plasma_tune(PLASMA_FUNC_DGELS, M, N, 0);
087:     if (status != PLASMA_SUCCESS) {
088:         plasma_error("PLASMA_dorglq", "plasma_tune() failed");
089:         return status;
090:     }
091: 
092:     /* Set MT & NT */
093:     NB = PLASMA_NB;
094:     NT = (N%NB==0) ? (N/NB) : (N/NB+1);
095:     MT = (M%NB==0) ? (M/NB) : (M/NB+1);
096:     KT = (K%NB==0) ? (K/NB) : (K/NB+1);
097: 
098:     /* Allocate memory for matrices in block layout */
099:     Abdl = (double *)plasma_shared_alloc(plasma, KT*NT*PLASMA_NBNBSIZE, PlasmaRealDouble);
100:     Tbdl = (double *)plasma_shared_alloc(plasma, MT*NT*PLASMA_IBNBSIZE, PlasmaRealDouble);
101:     Bbdl = (double *)plasma_shared_alloc(plasma, MT*NT*PLASMA_NBNBSIZE, PlasmaRealDouble);
102:     if (Abdl == NULL || Tbdl == NULL || Bbdl == NULL) {
103:         plasma_error("PLASMA_dorglq", "plasma_shared_alloc() failed");
104:         plasma_shared_free(plasma, Abdl);
105:         plasma_shared_free(plasma, Tbdl);
106:         plasma_shared_free(plasma, Bbdl);
107:         return PLASMA_ERR_OUT_OF_RESOURCES;
108:     }
109: 
110:     PLASMA_desc descA = plasma_desc_init(
111:         Abdl, PlasmaRealDouble,
112:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
113:         K, N, 0, 0, K, N);
114: 
115:     PLASMA_desc descB = plasma_desc_init(
116:         Bbdl, PlasmaRealDouble,
117:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
118:         M, N, 0, 0, M, N);
119: 
120:     PLASMA_desc descT = plasma_desc_init(
121:         Tbdl, PlasmaRealDouble,
122:         PLASMA_IB, PLASMA_NB, PLASMA_IBNBSIZE,
123:         M, N, 0, 0, M, N);
124: 
125:     plasma_parallel_call_3(plasma_lapack_to_tile,
126:         double*, A,
127:         int, LDA,
128:         PLASMA_desc, descA);
129: 
130:     plasma_parallel_call_3(plasma_lapack_to_tile,
131:         double*, B,
132:         int, LDB,
133:         PLASMA_desc, descB);
134: 
135:     /* Receive T from the user */
136:     plasma_memcpy(Tbdl, T, MT*NT*PLASMA_IBNBSIZE, PlasmaRealDouble);
137: 
138:     /* Call the native interface */
139:     status = PLASMA_dorglq_Tile(&descA, &descT, &descB);
140: 
141:     if (status == PLASMA_SUCCESS)
142:         plasma_parallel_call_3(plasma_tile_to_lapack,
143:             PLASMA_desc, descB,
144:             double*, B,
145:             int, LDB);
146: 
147:     plasma_shared_free(plasma, Abdl);
148:     plasma_shared_free(plasma, Tbdl);
149:     plasma_shared_free(plasma, Bbdl);
150:     return status;
151: }
152: 
153: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
154: // PLASMA_dorglq_Tile - Generates an M-by-N matrix Q with orthonormal rows, which is defined as the
155: // first M rows of a product of the elementary reflectors returned by PLASMA_dgelqf_Tile.
156: // All matrices are passed through descriptors. All dimensions are taken from the descriptors.
157: 
158: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
159: // A        double* (IN)
160: //          Details of the LQ factorization of the original matrix A as returned by PLASMA_dgelqf.
161: //
162: // T        double* (IN)
163: //          Auxiliary factorization data, computed by PLASMA_dgelqf.
164: //
165: // B        double* (OUT)
166: //          On exit, the M-by-N matrix Q.
167: 
168: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
169: //          = 0: successful exit
170: 
171: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
172: int PLASMA_dorglq_Tile(PLASMA_desc *A, PLASMA_desc *T, PLASMA_desc *B)
173: {
174:     PLASMA_desc descA = *A;
175:     PLASMA_desc descT = *T;
176:     PLASMA_desc descB = *B;
177:     plasma_context_t *plasma;
178: 
179:     plasma = plasma_context_self();
180:     if (plasma == NULL) {
181:         plasma_fatal_error("PLASMA_dorglq_Tile", "PLASMA not initialized");
182:         return PLASMA_ERR_NOT_INITIALIZED;
183:     }
184:     /* Check descriptors for correctness */
185:     if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
186:         plasma_error("PLASMA_dorglq_Tile", "invalid first descriptor");
187:         return PLASMA_ERR_ILLEGAL_VALUE;
188:     }
189:     if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
190:         plasma_error("PLASMA_dorglq_Tile", "invalid second descriptor");
191:         return PLASMA_ERR_ILLEGAL_VALUE;
192:     }
193:     if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
194:         plasma_error("PLASMA_dorglq_Tile", "invalid third descriptor");
195:         return PLASMA_ERR_ILLEGAL_VALUE;
196:     }
197:     /* Check input arguments */
198:     if (descA.nb != descA.mb || descB.nb != descB.mb) {
199:         plasma_error("PLASMA_dorglq_Tile", "only square tiles supported");
200:         return PLASMA_ERR_ILLEGAL_VALUE;
201:     }
202:     /* Quick return - currently NOT equivalent to LAPACK's:
203:      * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
204: /*
205:     if (min(M, N) == 0)
206:         return PLASMA_SUCCESS;
207: */
208:     plasma_parallel_call_3(plasma_pdorglq,
209:         PLASMA_desc, descA,
210:         PLASMA_desc, descB,
211:         PLASMA_desc, descT);
212: 
213:     return PLASMA_SUCCESS;
214: }
215: