001: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
002: /* ///                    PLASMA computational routines (version 2.1.0)                      ///
003:  * ///                    Author: Jakub Kurzak                                               ///
004:  * ///                    Release Date: November, 15th 2009                                  ///
005:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
006:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
007: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
008: #include "common.h"
009: 
010: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
011: // PLASMA_dgeqrf - Computes the tile QR factorization of a complex M-by-N matrix A: A = Q * R.
012: 
013: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
014: // M        int (IN)
015: //          The number of rows of the matrix A. M >= 0.
016: //
017: // N        int (IN)
018: //          The number of columns of the matrix A.  N >= 0.
019: //
020: // A        double* (INOUT)
021: //          On entry, the M-by-N matrix A.
022: //          On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N
023: //          upper trapezoidal matrix R (R is upper triangular if M >= N); the elements below the
024: //          diagonal represent the unitary matrix Q as a product of elementary reflectors stored
025: //          by tiles.
026: //
027: // LDA      int (IN)
028: //          The leading dimension of the array A. LDA >= max(1,M).
029: //
030: // T        double* (OUT)
031: //          On exit, auxiliary factorization data, required by PLASMA_dgeqrs to solve the system
032: //          of equations.
033: 
034: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
035: //          = 0: successful exit
036: //          < 0: if -i, the i-th argument had an illegal value
037: 
038: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
039: int PLASMA_dgeqrf(int M, int N, double *A, int LDA, double *T)
040: {
041:     int NB, MT, NT;
042:     int status;
043:     double *Abdl;
044:     double *Tbdl;
045:     plasma_context_t *plasma;
046: 
047:     plasma = plasma_context_self();
048:     if (plasma == NULL) {
049:         plasma_fatal_error("PLASMA_dgeqrf", "PLASMA not initialized");
050:         return PLASMA_ERR_NOT_INITIALIZED;
051:     }
052:     /* Check input arguments */
053:     if (M < 0) {
054:         plasma_error("PLASMA_dgeqrf", "illegal value of M");
055:         return -1;
056:     }
057:     if (N < 0) {
058:         plasma_error("PLASMA_dgeqrf", "illegal value of N");
059:         return -2;
060:     }
061:     if (LDA < max(1, M)) {
062:         plasma_error("PLASMA_dgeqrf", "illegal value of LDA");
063:         return -4;
064:     }
065:     /* Quick return */
066:     if (min(M, N) == 0)
067:         return PLASMA_SUCCESS;
068: 
069:     /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
070:     status = plasma_tune(PLASMA_FUNC_DGELS, M, N, 0);
071:     if (status != PLASMA_SUCCESS) {
072:         plasma_error("PLASMA_dgeqrf", "plasma_tune() failed");
073:         return status;
074:     }
075: 
076:     /* Set MT & NT */
077:     NB = PLASMA_NB;
078:     MT = (M%NB==0) ? (M/NB) : (M/NB+1);
079:     NT = (N%NB==0) ? (N/NB) : (N/NB+1);
080: 
081:     /* Allocate memory for matrices in block layout */
082:     Abdl = (double *)plasma_shared_alloc(plasma, MT*NT*PLASMA_NBNBSIZE, PlasmaRealDouble);
083:     Tbdl = (double *)plasma_shared_alloc(plasma, MT*NT*PLASMA_IBNBSIZE, PlasmaRealDouble);
084:     if (Abdl == NULL || Tbdl == NULL) {
085:         plasma_error("PLASMA_dgeqrf", "plasma_shared_alloc() failed");
086:         plasma_shared_free(plasma, Abdl);
087:         plasma_shared_free(plasma, Tbdl);
088:         return PLASMA_ERR_OUT_OF_RESOURCES;
089:     }
090: 
091:     PLASMA_desc descA = plasma_desc_init(
092:         Abdl, PlasmaRealDouble,
093:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
094:         M, N, 0, 0, M, N);
095: 
096:     PLASMA_desc descT = plasma_desc_init(
097:         Tbdl, PlasmaRealDouble,
098:         PLASMA_IB, PLASMA_NB, PLASMA_IBNBSIZE,
099:         M, N, 0, 0, M, N);
100: 
101:     plasma_parallel_call_3(plasma_lapack_to_tile,
102:         double*, A,
103:         int, LDA,
104:         PLASMA_desc, descA);
105: 
106:     /* Call the native interface */
107:     status = PLASMA_dgeqrf_Tile(&descA, &descT);
108: 
109:     if (status == PLASMA_SUCCESS) {
110:         /* Return T to the user */
111:         plasma_memcpy(T, Tbdl, MT*NT*PLASMA_IBNBSIZE, PlasmaRealDouble);
112: 
113:         plasma_parallel_call_3(plasma_tile_to_lapack,
114:             PLASMA_desc, descA,
115:             double*, A,
116:             int, LDA);
117:     }
118:     plasma_shared_free(plasma, Abdl);
119:     plasma_shared_free(plasma, Tbdl);
120:     return status;
121: }
122: 
123: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
124: // PLASMA_dgeqrf_Tile - Computes the tile QR factorization of a complex M-by-N matrix A: A = Q * R.
125: // All matrices are passed through descriptors. All dimensions are taken from the descriptors.
126: 
127: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
128: // A        double* (INOUT)
129: //          On entry, the M-by-N matrix A.
130: //          On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N
131: //          upper trapezoidal matrix R (R is upper triangular if M >= N); the elements below the
132: //          diagonal represent the unitary matrix Q as a product of elementary reflectors stored
133: //          by tiles.
134: //
135: // T        double* (OUT)
136: //          On exit, auxiliary factorization data, required by PLASMA_dgeqrs to solve the system
137: //          of equations.
138: 
139: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
140: //          = 0: successful exit
141: 
142: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
143: int PLASMA_dgeqrf_Tile(PLASMA_desc *A, PLASMA_desc *T)
144: {
145:     PLASMA_desc descA = *A;
146:     PLASMA_desc descT = *T;
147:     plasma_context_t *plasma;
148: 
149:     plasma = plasma_context_self();
150:     if (plasma == NULL) {
151:         plasma_error("PLASMA_dgeqrf_Tile", "PLASMA not initialized");
152:         return PLASMA_ERR_NOT_INITIALIZED;
153:     }
154:     /* Check descriptors for correctness */
155:     if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
156:         plasma_error("PLASMA_dgeqrf_Tile", "invalid first descriptor");
157:         return PLASMA_ERR_ILLEGAL_VALUE;
158:     }
159:     if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
160:         plasma_error("PLASMA_dgeqrf_Tile", "invalid second descriptor");
161:         return PLASMA_ERR_ILLEGAL_VALUE;
162:     }
163:     /* Check input arguments */
164:     if (descA.nb != descA.mb) {
165:         plasma_error("PLASMA_dgeqrf_Tile", "only square tiles supported");
166:         return PLASMA_ERR_ILLEGAL_VALUE;
167:     }
168:     /* Quick return */
169: /*
170:     if (min(M, N) == 0)
171:         return PLASMA_SUCCESS;
172: */
173:     plasma_parallel_call_2(plasma_pdgeqrf,
174:         PLASMA_desc, descA,
175:         PLASMA_desc, descT);
176: 
177:     return PLASMA_SUCCESS;
178: }
179: