01: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
02: /* ///                    PLASMA auxiliary routines (version 2.1.0)                          ///
03:  * ///                    Author: Hatem Ltaief, Jakub Kurzak                                 ///
04:  * ///                    Release Date: November, 15th 2009                                  ///
05:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
06:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
07: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
08: #include "common.h"
09: 
10: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
11: //  Parallel application of Q using tile V - LQ factorization
12: #define A(m,n) &((PLASMA_Complex32_t*)A.mat)[A.bsiz*(m)+A.bsiz*A.lmt*(n)]
13: #define B(m,n) &((PLASMA_Complex32_t*)B.mat)[B.bsiz*(m)+B.bsiz*B.lmt*(n)]
14: #define T(m,n) &((PLASMA_Complex32_t*)T.mat)[T.bsiz*(m)+T.bsiz*T.lmt*(n)]
15: void plasma_pcunmlq(plasma_context_t *plasma)
16: {
17:     PLASMA_desc A;
18:     PLASMA_desc B;
19:     PLASMA_desc T;
20: 
21:     int k, m, n;
22:     int next_k;
23:     int next_m;
24:     int next_n;
25:     PLASMA_Complex32_t *work;
26: 
27:     plasma_unpack_args_3(A, B, T);
28:     work = (PLASMA_Complex32_t *)plasma_private_alloc(plasma, T.mb*T.nb, T.dtyp);
29:     ss_init(B.mt, B.nt, min(A.mt, A.nt));
30: 
31:     k = min(A.mt, A.nt)-1;
32:     n = PLASMA_RANK;
33:     while (n >= B.nt) {
34:         k--;
35:         n = n-B.nt;
36:     }
37:     m = B.mt-1;
38: 
39:     while (k >= 0 && n < B.nt) {
40:         next_n = n;
41:         next_m = m;
42:         next_k = k;
43: 
44:         next_m--;
45:         if (next_m == k-1) {
46:             next_n += PLASMA_SIZE;
47:             while (next_n >= B.nt && next_k >= 0) {
48:                 next_k--;
49:                 next_n = next_n-B.nt;
50:             }
51:             next_m = B.mt-1;
52:         }
53: 
54:         if (m == k) {
55:             CORE_cunmlq(
56:                 PlasmaLeft, PlasmaConjTrans,
57:                 k == A.nt-1 ? A.n-k*A.nb : A.nb,
58:                 n == B.nt-1 ? B.n-n*B.nb : B.nb,
59:                 T.mb,
60:                 k == min(A.mt, A.nt)-1 ? min(A.m, A.n)-k*A.nb : A.nb,
61:                 A(k, k), A.nb,
62:                 T(k, k), T.mb,
63:                 B(k, n), B.nb,
64:                 work, T.nb);
65:             ss_cond_set(k, n, k);
66:         }
67:         else {
68:             ss_cond_wait(m, n, k+1);
69:             CORE_cssmlq(
70:                 PlasmaLeft, PlasmaConjTrans,
71:                 A.nb,
72:                 m == B.mt-1 ? B.m-m*B.nb : B.nb,
73:                 n == B.nt-1 ? B.n-n*B.nb : B.nb,
74:                 T.mb,
75:                 k == A.mt-1 ? A.m-k*A.nb : A.nb,
76:                 B(k, n), B.nb,
77:                 B(m, n), B.nb,
78:                 A(k, m), A.nb,
79:                 T(k, m), T.mb,
80:                 work, T.mb);
81:             ss_cond_set(m, n, k);
82:         }
83:         m = next_m;
84:         n = next_n;
85:         k = next_k;
86:     }
87:     plasma_private_free(plasma, work);
88:     ss_finalize();
89: }
90: