PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pcungqrrh.c
Go to the documentation of this file.
1 
17 #include "common.h"
18 
19 #define A(m,n) BLKADDR(A, PLASMA_Complex32_t, (m), (n))
20 #define Q(m,n) BLKADDR(Q, PLASMA_Complex32_t, (m), (n))
21 #define T(m,n) BLKADDR(T, PLASMA_Complex32_t, (m), (n))
22 #define T2(m,n) BLKADDR(T, PLASMA_Complex32_t, (m), (n)+(A.nt))
23 /***************************************************************************/
28  PLASMA_desc T, int BS,
29  PLASMA_sequence *sequence, PLASMA_request *request)
30 {
33 
34  int k, m, n;
35  int K, M, RD, lastRD;
36  int ldaM, ldam, ldaMRD;
37  int ldbM, ldbm, ldbMRD;
38  int tempkn, tempMm, tempnn, tempmm, tempMRDm, tempkmin;
39  int ib;
40 
41  plasma = plasma_context_self();
42  if (sequence->status != PLASMA_SUCCESS)
43  return;
44  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
45 
46  ib = PLASMA_IB;
47  K = min(A.mt, A.nt);
48  for (k = K-1; k >= 0; k--) {
49  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
50  lastRD = 0;
51  for (RD = BS; RD < A.mt-k; RD *= 2)
52  lastRD = RD;
53  for (RD = lastRD; RD >= BS; RD /= 2) {
54  for (M = k; M+RD < A.mt; M += 2*RD) {
55  tempMRDm = M+RD == A.mt-1 ? A.m-(M+RD)*A.mb : A.mb;
56  ldbM = BLKLDD(Q, M );
57  ldbMRD = BLKLDD(Q, M+RD);
58  ldaMRD = BLKLDD(A, M+RD);
59  for (n = 0; n < Q.nt; n++) {
60  tempnn = n == Q.nt-1 ? Q.n-n*Q.nb : Q.nb;
62  plasma->quark, &task_flags,
64  A.nb, tempnn, tempMRDm, tempnn,
65  tempkn, ib, T.nb,
66  Q (M, n), ldbM,
67  Q (M+RD, n), ldbMRD,
68  A (M+RD, k), ldaMRD,
69  T2(M+RD, k), T.mb);
70  }
71  }
72  }
73  for (M = k; M < A.mt; M += BS) {
74  tempMm = M == A.mt-1 ? A.m-M*A.mb : A.mb;
75  tempkmin = min(tempMm, tempkn);
76  ldaM = BLKLDD(A, M);
77  ldbM = BLKLDD(Q, M);
78  for (m = min(M+BS, A.mt)-1; m > M; m--) {
79  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
80  ldbm = BLKLDD(Q, m);
81  ldam = BLKLDD(A, m);
82 
83  for (n = 0; n < Q.nt; n++) {
84  tempnn = n == Q.nt-1 ? Q.n-n*Q.nb : Q.nb;
86  plasma->quark, &task_flags,
88  A.nb, tempnn, tempmm, tempnn,
89  tempkn, ib, T.nb,
90  Q(M, n), ldbM,
91  Q(m, n), ldbm,
92  A(m, k), ldam,
93  T(m, k), T.mb);
94  }
95  }
96  for (n = 0; n < Q.nt; n++) {
97  tempnn = n == Q.nt-1 ? Q.n-n*Q.nb : Q.nb;
99  plasma->quark, &task_flags,
101  tempMm, tempnn,
102  tempkmin, ib, T.nb,
103  A(M, k), ldaM,
104  T(M, k), T.mb,
105  Q(M, n), ldbM);
106  }
107  }
108  }
109 }