001: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
002: /* ///                    PLASMA computational routines (version 2.1.0)                      ///
003:  * ///                    Author: Jakub Kurzak                                               ///
004:  * ///                    Release Date: November, 15th 2009                                  ///
005:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
006:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
007: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
008: #include "common.h"
009: 
010: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
011: // PLASMA_dgetrf - Computes an LU factorization of a general M-by-N matrix A
012: // using the tile LU algorithm with partial tile pivoting with row interchanges.
013: 
014: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
015: // M        int (IN)
016: //          The number of rows of the matrix A. M >= 0.
017: //
018: // N        int (IN)
019: //          The number of columns of the matrix A. N >= 0.
020: //
021: // A        double* (INOUT)
022: //          On entry, the M-by-N matrix to be factored.
023: //          On exit, the tile factors L and U from the factorization.
024: //
025: // LDA      int (IN)
026: //          The leading dimension of the array A. LDA >= max(1,M).
027: //
028: // L        double* (OUT)
029: //          On exit, auxiliary factorization data, related to the tile L factor,
030: //          required by PLASMA_dgetrs to solve the system of equations.
031: //
032: // IPIV     int* (OUT)
033: //          The pivot indices that define the permutations (not equivalent to LAPACK).
034: 
035: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
036: //          = 0: successful exit
037: //          < 0: if -i, the i-th argument had an illegal value
038: //          > 0: if i, U(i,i) is exactly zero. The factorization has been completed,
039: //               but the factor U is exactly singular, and division by zero will occur
040: //               if it is used to solve a system of equations.
041: 
042: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
043: int PLASMA_dgetrf(int M, int N, double *A, int LDA, double *L, int *IPIV)
044: {
045:     int NB, MT, NT;
046:     int status;
047:     double *Abdl;
048:     double *Lbdl;
049:     plasma_context_t *plasma;
050: 
051:     plasma = plasma_context_self();
052:     if (plasma == NULL) {
053:         plasma_fatal_error("PLASMA_dgetrf", "PLASMA not initialized");
054:         return PLASMA_ERR_NOT_INITIALIZED;
055:     }
056:     /* Check input arguments */
057:     if (M < 0) {
058:         plasma_error("PLASMA_dgetrf", "illegal value of M");
059:         return -1;
060:     }
061:     if (N < 0) {
062:         plasma_error("PLASMA_dgetrf", "illegal value of N");
063:         return -2;
064:     }
065:     if (LDA < max(1, M)) {
066:         plasma_error("PLASMA_dgetrf", "illegal value of LDA");
067:         return -4;
068:     }
069:     /* Quick return */
070:     if (min(M, N) == 0)
071:         return PLASMA_SUCCESS;
072: 
073:     /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
074:     status = plasma_tune(PLASMA_FUNC_DGESV, M, N, 0);
075:     if (status != PLASMA_SUCCESS) {
076:         plasma_error("PLASMA_dgetrf", "plasma_tune() failed");
077:         return status;
078:     }
079: 
080:     /* Set NT & NTRHS */
081:     NB = PLASMA_NB;
082:     MT = (M%NB==0) ? (M/NB) : (M/NB+1);
083:     NT = (N%NB==0) ? (N/NB) : (N/NB+1);
084: 
085:     /* Allocate memory for matrices in block layout */
086:     Abdl = (double *)plasma_shared_alloc(plasma, MT*NT*PLASMA_NBNBSIZE, PlasmaRealDouble);
087:     Lbdl = (double *)plasma_shared_alloc(plasma, MT*NT*PLASMA_IBNBSIZE, PlasmaRealDouble);
088:     if (Abdl == NULL || Lbdl == NULL) {
089:         plasma_error("PLASMA_dgetrf", "plasma_shared_alloc() failed");
090:         plasma_shared_free(plasma, Abdl);
091:         plasma_shared_free(plasma, Lbdl);
092:         return PLASMA_ERR_OUT_OF_RESOURCES;
093:     }
094: 
095:     PLASMA_desc descA = plasma_desc_init(
096:         Abdl, PlasmaRealDouble,
097:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
098:         M, N, 0, 0, M, N);
099: 
100:     PLASMA_desc descL = plasma_desc_init(
101:         Lbdl, PlasmaRealDouble,
102:         PLASMA_IB, PLASMA_NB, PLASMA_IBNBSIZE,
103:         M, N, 0, 0, M, N);
104: 
105:     plasma_parallel_call_3(plasma_lapack_to_tile,
106:         double*, A,
107:         int, LDA,
108:         PLASMA_desc, descA);
109: 
110:     /* Call the native interface */
111:     status = PLASMA_dgetrf_Tile(&descA, &descL, IPIV);
112: 
113:     if (status == PLASMA_SUCCESS) {
114:         /* Return L to the user */
115:         plasma_memcpy(L, Lbdl, MT*NT*PLASMA_IBNBSIZE, PlasmaRealDouble);
116: 
117:         plasma_parallel_call_3(plasma_tile_to_lapack,
118:             PLASMA_desc, descA,
119:             double*, A,
120:             int, LDA);
121:     }
122:     plasma_shared_free(plasma, Abdl);
123:     plasma_shared_free(plasma, Lbdl);
124:     return status;
125: }
126: 
127: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
128: // PLASMA_dgetrf_Tile - Computes an LU factorization of a general M-by-N matrix A
129: // using the tile LU algorithm with partial tile pivoting with row interchanges.
130: // All matrices are passed through descriptors. All dimensions are taken from the descriptors.
131: 
132: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
133: // A        double* (INOUT)
134: //          On entry, the M-by-N matrix to be factored.
135: //          On exit, the tile factors L and U from the factorization.
136: //
137: // L        double* (OUT)
138: //          On exit, auxiliary factorization data, related to the tile L factor,
139: //          required by PLASMA_dgetrs to solve the system of equations.
140: //
141: // IPIV     int* (OUT)
142: //          The pivot indices that define the permutations (not equivalent to LAPACK).
143: 
144: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
145: //          = 0: successful exit
146: //          > 0: if i, U(i,i) is exactly zero. The factorization has been completed,
147: //               but the factor U is exactly singular, and division by zero will occur
148: //               if it is used to solve a system of equations.
149: 
150: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
151: int PLASMA_dgetrf_Tile(PLASMA_desc *A, PLASMA_desc *L, int *IPIV)
152: {
153:     PLASMA_desc descA = *A;
154:     PLASMA_desc descL = *L;
155:     plasma_context_t *plasma;
156: 
157:     plasma = plasma_context_self();
158:     if (plasma == NULL) {
159:         plasma_fatal_error("PLASMA_dgetrf_Tile", "PLASMA not initialized");
160:         return PLASMA_ERR_NOT_INITIALIZED;
161:     }
162:     /* Check descriptors for correctness */
163:     if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
164:         plasma_error("PLASMA_dgetrf_Tile", "invalid first descriptor");
165:         return PLASMA_ERR_ILLEGAL_VALUE;
166:     }
167:     if (plasma_desc_check(&descL) != PLASMA_SUCCESS) {
168:         plasma_error("PLASMA_dgetrf_Tile", "invalid second descriptor");
169:         return PLASMA_ERR_ILLEGAL_VALUE;
170:     }
171:     /* Check input arguments */
172:     if (descA.nb != descA.mb) {
173:         plasma_error("PLASMA_dgetrf_Tile", "only square tiles supported");
174:         return PLASMA_ERR_ILLEGAL_VALUE;
175:     }
176:     /* Quick return */
177: /*
178:     if (min(M, N) == 0)
179:         return PLASMA_SUCCESS;
180: */
181:     /* Clear IPIV and Lbdl */
182:     plasma_memzero(IPIV, descA.mt*descA.nt*PLASMA_NB, PlasmaInteger);
183:     plasma_memzero(descL.mat, descL.mt*descL.nt*PLASMA_IBNBSIZE, PlasmaRealDouble);
184: 
185:     /* Set INFO to SUCCESS */
186:     PLASMA_INFO = PLASMA_SUCCESS;
187: 
188:     plasma_parallel_call_3(plasma_pdgetrf,
189:         PLASMA_desc, descA,
190:         PLASMA_desc, descL,
191:         int*, IPIV);
192: 
193:     return PLASMA_INFO;
194: }
195: