001: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
002: /* ///                    PLASMA auxiliary routines (version 2.1.0)                          ///
003:  * ///                    Author: Jakub Kurzak, Hatem Ltaief                                 ///
004:  * ///                    Release Date: November, 15th 2009                                  ///
005:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
006:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
007: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
008: #include "common.h"
009: 
010: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
011: //  Parallel tile LU factorization
012: #define A(m,n) &((float*)A.mat)[A.bsiz*(m)+A.bsiz*A.lmt*(n)]
013: #define L(m,n) &((float*)L.mat)[L.bsiz*(m)+L.bsiz*L.lmt*(n)]
014: #define IPIV(m,n) &IPIV[A.nb*(m)+A.nb*A.lmt*(n)]
015: void plasma_psgetrf(plasma_context_t *plasma)
016: {
017:     PLASMA_desc A;
018:     PLASMA_desc L;
019:     int*        IPIV;
020: 
021:     int k, m, n;
022:     int next_k;
023:     int next_m;
024:     int next_n;
025:     int iinfo;
026:     float *work;
027: 
028:     plasma_unpack_args_3(A, L, IPIV);
029:     work = (float *)plasma_private_alloc(plasma, L.mb*L.nb, L.dtyp);
030:     ss_init(A.mt, A.nt, -1);
031: 
032:     k = 0;
033:     n = PLASMA_RANK;
034:     while (n >= A.nt) {
035:         k++;
036:         n = n-A.nt+k;
037:     }
038:     m = k;
039: 
040:     while (k < min(A.mt, A.nt) && n < A.nt) {
041:         next_n = n;
042:         next_m = m;
043:         next_k = k;
044: 
045:         next_m++;
046:         if (next_m == A.mt) {
047:             next_n += PLASMA_SIZE;
048:             while (next_n >= A.nt && next_k < min(A.mt, A.nt)) {
049:                 next_k++;
050:                 next_n = next_n-A.nt+next_k;
051:             }
052:             next_m = next_k;
053:         }
054: 
055:         if (n == k) {
056:             if (m == k) {
057:                 ss_cond_wait(k, k, k-1);
058:                 CORE_sgetrf(
059:                     k == A.mt-1 ? A.m-k*A.nb : A.nb,
060:                     k == A.nt-1 ? A.n-k*A.nb : A.nb,
061:                     L.mb,
062:                     A(k, k), A.nb,
063:                     IPIV(k, k), &iinfo);
064:                 if (PLASMA_INFO == 0 && iinfo > 0 && m == A.mt-1)
065:                     PLASMA_INFO = iinfo + A.nb*k;
066:                 ss_cond_set(k, k, k);
067:             }
068:             else {
069:                 ss_cond_wait(m, k, k-1);
070:                 CORE_ststrf(
071:                     m == A.mt-1 ? A.m-m*A.nb : A.nb,
072:                     k == A.nt-1 ? A.n-k*A.nb : A.nb,
073:                     L.mb,
074:                     A.nb,
075:                     A(k, k), A.nb,
076:                     A(m, k), A.nb,
077:                     L(m, k), L.mb,
078:                     IPIV(m, k), 
079:                     work, L.nb, &iinfo);
080:                 if (PLASMA_INFO == 0 && iinfo > 0 && m == A.mt-1)
081:                     PLASMA_INFO = iinfo + A.nb*k;
082:                 ss_cond_set(m, k, k);
083:             }
084:         }
085:         else {
086:             if (m == k) {
087:                 ss_cond_wait(k, k, k);
088:                 ss_cond_wait(k, n, k-1);
089:                 CORE_sgessm(
090:                     k == A.mt-1 ? A.m-k*A.nb : A.nb,
091:                     n == A.nt-1 ? A.n-n*A.nb : A.nb,
092:                     A.nb,
093:                     L.mb,
094:                     IPIV(k, k),
095:                     A(k, k), A.nb,
096:                     A(k, n), A.nb);
097:             }
098:             else {
099:                 ss_cond_wait(m, k, k);
100:                 ss_cond_wait(m, n, k-1);
101:                 CORE_sssssm(
102:                     A.nb,
103:                     m == A.mt-1 ? A.m-m*A.nb : A.nb,
104:                     n == A.nt-1 ? A.n-n*A.nb : A.nb,
105:                     L.mb,
106:                     A.nb,
107:                     A(k, n), A.nb,
108:                     A(m, n), A.nb,
109:                     L(m, k), L.mb,
110:                     A(m, k), A.nb,
111:                     IPIV(m, k));
112:                 ss_cond_set(m, n, k);
113:             }
114:         }
115:         n = next_n;
116:         m = next_m;
117:         k = next_k;
118:     }
119:     plasma_private_free(plasma, work);
120:     ss_finalize();
121: }
122: