01: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
02: /* ///                    PLASMA auxiliary routines (version 2.1.0)                          ///
03:  * ///                    Author: Jakub Kurzak, Hatem Ltaief                                 ///
04:  * ///                    Release Date: November, 15th 2009                                  ///
05:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
06:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
07: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
08: #include "common.h"
09: 
10: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
11: //  Parallel application of Q using tile V - QR factorization
12: #define A(m,n) &((float*)A.mat)[A.bsiz*(m)+A.bsiz*A.lmt*(n)]
13: #define B(m,n) &((float*)B.mat)[B.bsiz*(m)+B.bsiz*B.lmt*(n)]
14: #define L(m,n) &((float*)L.mat)[L.bsiz*(m)+L.bsiz*L.lmt*(n)]
15: #define IPIV(m,n) &IPIV[A.nb*(m)+A.nb*A.lmt*(n)]
16: void plasma_pstrsmpl(plasma_context_t *plasma)
17: {
18:     PLASMA_desc A;
19:     PLASMA_desc B;
20:     PLASMA_desc L;
21:     int*        IPIV;
22: 
23:     int k, m, n;
24:     int next_k;
25:     int next_m;
26:     int next_n;
27: 
28:     plasma_unpack_args_4(A, B, L, IPIV);
29:     ss_init(B.mt, B.nt, -1);
30: 
31:     k = 0;
32:     n = PLASMA_RANK;
33:     while (n >= B.nt) {
34:         k++;
35:         n = n-B.nt;
36:     }
37:     m = k;
38: 
39:     while (k < min(A.mt, A.nt) && n < B.nt) {
40:         next_n = n;
41:         next_m = m;
42:         next_k = k;
43: 
44:         next_m++;
45:         if (next_m == A.mt) {
46:             next_n += PLASMA_SIZE;
47:             while (next_n >= B.nt && next_k < min(A.mt, A.nt)) {
48:                 next_k++;
49:                 next_n = next_n-B.nt;
50:             }
51:             next_m = next_k;
52:         }
53: 
54:         if (m == k) {
55:             ss_cond_wait(k, n, k-1);
56:             CORE_sgessm(
57:                 k == A.mt-1 ? A.m-k*A.nb : A.nb,
58:                 n == B.nt-1 ? B.n-n*B.nb : B.nb,
59:                 k == A.nt-1 ? min(A.m-k*A.nb, A.n-k*A.nb) : A.nb,
60:                 L.mb,
61:                 IPIV(k, k),
62:                 A(k, k), A.nb,
63:                 B(k, n), B.nb);
64:             ss_cond_set(k, n, k);
65:         }
66:         else {
67:             ss_cond_wait(m, n, k-1);
68:             CORE_sssssm(
69:                 A.nb,
70:                 m == A.mt-1 ? A.m-m*A.nb : A.nb,
71:                 n == B.nt-1 ? B.n-n*A.nb : B.nb,
72:                 L.mb,
73:                 k == A.nt-1 ? A.n-k*A.nb : A.nb,
74:                 B(k, n), B.nb,
75:                 B(m, n), B.nb,
76:                 L(m, k), L.mb,
77:                 A(m, k), A.nb,
78:                 IPIV(m, k));
79:             ss_cond_set(m, n, k);
80:         }
81:         n = next_n;
82:         m = next_m;
83:         k = next_k;
84:     }
85:     ss_finalize();
86: }
87: