001: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
002: /* ///                    PLASMA auxiliary routines (version 2.1.0)                          ///
003:  * ///                    Author: Jakub Kurzak                                               ///
004:  * ///                    Release Date: November, 15th 2009                                  ///
005:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
006:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
007: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
008: #include "common.h"
009: #include "auxiliary.h"
010: #include "tile.h"
011: 
012: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
013: //  Conversion from LAPACK F77 matrix layout to tile layout
014: void plasma_lapack_to_tile(plasma_context_t *plasma)
015: {
016:     PLASMA_desc A;
017:     void *Af77;
018:     int lda;
019: 
020:     int x, y;
021:     int X1, Y1;
022:     int X2, Y2;
023:     int n, m;
024:     int next_m;
025:     int next_n;
026: 
027:     plasma_unpack_args_3(Af77, lda, A);
028: 
029:     n = 0;
030:     m = PLASMA_RANK;
031:     while (m >= A.mt && n < A.nt) {
032:         n++;
033:         m = m-A.mt;
034:     }
035: 
036:     while (n < A.nt) {
037:         next_m = m;
038:         next_n = n;
039: 
040:         next_m += PLASMA_SIZE;
041:         while (next_m >= A.mt && next_n < A.nt) {
042:             next_n++;
043:             next_m = next_m-A.mt;
044:         }
045: 
046:         X1 = n == 0 ? A.j%A.nb : 0;
047:         Y1 = m == 0 ? A.i%A.nb : 0;
048:         X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
049:         Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.nb+1 : A.nb;
050:         switch (A.dtyp) {
051:             case PlasmaComplexDouble: {
052:                 PLASMA_Complex64_t *f77 = &((PLASMA_Complex64_t*)Af77)[A.nb*(lda*n+m)];
053:                 PLASMA_Complex64_t *bdl = &((PLASMA_Complex64_t*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
054:                 for (x = X1; x < X2; x++)
055:                     for (y = Y1; y < Y2; y++)
056:                         bdl[A.nb*x+y] = f77[lda*x+y];
057:             }
058:             break;
059:             case PlasmaComplexFloat: {
060:                 PLASMA_Complex32_t *f77 = &((PLASMA_Complex32_t*)Af77)[A.nb*(lda*n+m)];
061:                 PLASMA_Complex32_t *bdl = &((PLASMA_Complex32_t*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
062:                 for (x = X1; x < X2; x++)
063:                     for (y = Y1; y < Y2; y++)
064:                         bdl[A.nb*x+y] = f77[lda*x+y];
065:             }
066:             break;
067:             case PlasmaRealDouble: {
068:                 double *f77 = &((double*)Af77)[A.nb*(lda*n+m)];
069:                 double *bdl = &((double*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
070:                 for (x = X1; x < X2; x++)
071:                     for (y = Y1; y < Y2; y++)
072:                         bdl[A.nb*x+y] = f77[lda*x+y];
073:             }
074:             break;
075:             case PlasmaRealFloat: {
076:                 float *f77 = &((float*)Af77)[A.nb*(lda*n+m)];
077:                 float *bdl = &((float*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
078:                 for (x = X1; x < X2; x++)
079:                     for (y = Y1; y < Y2; y++)
080:                         bdl[A.nb*x+y] = f77[lda*x+y];
081:             }
082:             break;
083:         }
084:         m = next_m;
085:         n = next_n;
086:     }
087: }
088: 
089: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
090: //  Conversion from tile layout to LAPACK F77 matrix layout
091: void plasma_tile_to_lapack(plasma_context_t *plasma)
092: {
093:     PLASMA_desc A;
094:     void *Af77;
095:     int lda;
096: 
097:     int x, y;
098:     int X1, Y1;
099:     int X2, Y2;
100:     int n, m;
101:     int next_m;
102:     int next_n;
103: 
104:     plasma_unpack_args_3(A, Af77, lda);
105: 
106:     n = 0;
107:     m = PLASMA_RANK;
108:     while (m >= A.mt && n < A.nt) {
109:         n++;
110:         m = m-A.mt;
111:     }
112: 
113:     while (n < A.nt) {
114:         next_m = m;
115:         next_n = n;
116: 
117:         next_m += PLASMA_SIZE;
118:         while (next_m >= A.mt && next_n < A.nt) {
119:             next_n++;
120:             next_m = next_m-A.mt;
121:         }
122: 
123:         X1 = n == 0 ? A.j%A.nb : 0;
124:         Y1 = m == 0 ? A.i%A.nb : 0;
125:         X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
126:         Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.nb+1 : A.nb;
127:         switch (A.dtyp) {
128:             case PlasmaComplexDouble: {
129:                 PLASMA_Complex64_t *f77 = &((PLASMA_Complex64_t*)Af77)[A.nb*(lda*n+m)];
130:                 PLASMA_Complex64_t *bdl = &((PLASMA_Complex64_t*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
131:                 for (x = X1; x < X2; x++)
132:                     for (y = Y1; y < Y2; y++)
133:                         f77[lda*x+y] = bdl[A.nb*x+y];
134:             }
135:             break;
136:             case PlasmaComplexFloat: {
137:                 PLASMA_Complex32_t *f77 = &((PLASMA_Complex32_t*)Af77)[A.nb*(lda*n+m)];
138:                 PLASMA_Complex32_t *bdl = &((PLASMA_Complex32_t*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
139:                 for (x = X1; x < X2; x++)
140:                     for (y = Y1; y < Y2; y++)
141:                         f77[lda*x+y] = bdl[A.nb*x+y];
142:             }
143:             break;
144:             case PlasmaRealDouble: {
145:                 double *f77 = &((double*)Af77)[A.nb*(lda*n+m)];
146:                 double *bdl = &((double*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
147:                 for (x = X1; x < X2; x++)
148:                     for (y = Y1; y < Y2; y++)
149:                         f77[lda*x+y] = bdl[A.nb*x+y];
150:             }
151:             break;
152:             case PlasmaRealFloat: {
153:                 float *f77 = &((float*)Af77)[A.nb*(lda*n+m)];
154:                 float *bdl = &((float*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
155:                 for (x = X1; x < X2; x++)
156:                     for (y = Y1; y < Y2; y++)
157:                         f77[lda*x+y] = bdl[A.nb*x+y];
158:             }
159:             break;
160:         }
161:         m = next_m;
162:         n = next_n;
163:     }
164: }
165: 
166: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
167: //  Zeroes a submatrix in tile layout
168: void plasma_tile_zero(plasma_context_t *plasma)
169: {
170:     PLASMA_desc A;
171: 
172:     int x, y;
173:     int X1, Y1;
174:     int X2, Y2;
175:     int n, m;
176:     int next_m;
177:     int next_n;
178: 
179:     plasma_unpack_args_1(A);
180: 
181:     n = 0;
182:     m = PLASMA_RANK;
183:     while (m >= A.mt && n < A.nt) {
184:         n++;
185:         m = m-A.mt;
186:     }
187: 
188:     while (n < A.nt) {
189:         next_m = m;
190:         next_n = n;
191: 
192:         next_m += PLASMA_SIZE;
193:         while (next_m >= A.mt && next_n < A.nt) {
194:             next_n++;
195:             next_m = next_m-A.mt;
196:         }
197: 
198:         X1 = n == 0 ? A.j%A.nb : 0;
199:         Y1 = m == 0 ? A.i%A.nb : 0;
200:         X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
201:         Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.nb+1 : A.nb;
202:         switch (A.dtyp) {
203:             case PlasmaComplexDouble: {
204:                 PLASMA_Complex64_t *bdl = &((PLASMA_Complex64_t*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
205:                 for (x = X1; x < X2; x++)
206:                     for (y = Y1; y < Y2; y++)
207:                         bdl[A.nb*x+y] = 0.0;
208:             }
209:             break;
210:             case PlasmaComplexFloat: {
211:                 PLASMA_Complex32_t *bdl = &((PLASMA_Complex32_t*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
212:                 for (x = X1; x < X2; x++)
213:                     for (y = Y1; y < Y2; y++)
214:                         bdl[A.nb*x+y] = 0.0;
215:             }
216:             break;
217:             case PlasmaRealDouble: {
218:                 double *bdl = &((double*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
219:                 for (x = X1; x < X2; x++)
220:                     for (y = Y1; y < Y2; y++)
221:                         bdl[A.nb*x+y] = 0.0;
222:             }
223:             break;
224:             case PlasmaRealFloat: {
225:                 float *bdl = &((float*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
226:                 for (x = X1; x < X2; x++)
227:                     for (y = Y1; y < Y2; y++)
228:                         bdl[A.nb*x+y] = 0.0;
229:             }
230:             break;
231:         }
232:         m = next_m;
233:         n = next_n;
234:     }
235: }
236: 
237: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
238: // PLASMA_Lapack_to_Tile - Conversion from LAPACK layout to tile layout.
239: 
240: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
241: // Af77     void* (IN)
242: //          LAPACK matrix.
243: //
244: // LDA      int (IN)
245: //          The leading dimension of the matrix Af77.
246: //
247: // A        PLASMA_desc* (OUT)
248: //          Descriptor of the PLASMA matrix in tile layout.
249: 
250: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
251: //          = PLASMA_SUCCESS: successful exit
252: 
253: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
254: int PLASMA_Lapack_to_Tile(void *Af77, int LDA, PLASMA_desc *A)
255: {
256:     PLASMA_desc descA = *A;
257:     plasma_context_t *plasma;
258: 
259:     plasma = plasma_context_self();
260:     if (plasma == NULL) {
261:         plasma_fatal_error("PLASMA_Lapack_to_Tile", "PLASMA not initialized");
262:         return PLASMA_ERR_NOT_INITIALIZED;
263:     }
264:     /* Check descriptor for correctness */
265:     if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
266:         plasma_error("PLASMA_Lapack_to_Tile", "invalid descriptor");
267:         return PLASMA_ERR_ILLEGAL_VALUE;
268:     }
269:     plasma_parallel_call_3(plasma_lapack_to_tile,
270:         void*, Af77,
271:         int, LDA,
272:         PLASMA_desc, descA);
273: 
274:     return PLASMA_SUCCESS;
275: }
276: 
277: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
278: // PLASMA_Tile_to_Lapack - Conversion from tile layout to LAPACK layout
279: 
280: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
281: // A        PLASMA_desc* (OUT)
282: //          Descriptor of the PLASMA matrix in tile layout.
283: //
284: // Af77     void* (IN)
285: //          LAPACK matrix.
286: //
287: // LDA      int (IN)
288: //          The leading dimension of the matrix Af77.
289: 
290: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
291: //          = PLASMA_SUCCESS: successful exit
292: 
293: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
294: int PLASMA_Tile_to_Lapack(PLASMA_desc *A, void *Af77, int LDA)
295: {
296:     PLASMA_desc descA = *A;
297:     plasma_context_t *plasma;
298: 
299:     plasma = plasma_context_self();
300:     if (plasma == NULL) {
301:         plasma_fatal_error("PLASMA_Tile_to_Lapack", "PLASMA not initialized");
302:         return PLASMA_ERR_NOT_INITIALIZED;
303:     }
304:     /* Check descriptor for correctness */
305:     if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
306:         plasma_error("PLASMA_Tile_to_Lapack", "invalid descriptor");
307:         return PLASMA_ERR_ILLEGAL_VALUE;
308:     }
309:     plasma_parallel_call_3(plasma_tile_to_lapack,
310:         PLASMA_desc, descA,
311:         PLASMA_Complex64_t*, Af77,
312:         int, LDA);
313: 
314:     return PLASMA_SUCCESS;
315: }
316: