PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pzunmqrrh.c
Go to the documentation of this file.
1 
17 #include "common.h"
18 
19 #define A(m,n) BLKADDR(A, PLASMA_Complex64_t, (m), (n))
20 #define B(m,n) BLKADDR(B, PLASMA_Complex64_t, (m), (n))
21 #define T(m,n) BLKADDR(T, PLASMA_Complex64_t, (m), (n))
22 #define T2(m,n) BLKADDR(T, PLASMA_Complex64_t, (m), (n)+A.nt)
23 /***************************************************************************/
29  PLASMA_sequence *sequence, PLASMA_request *request)
30 {
33 
34  int k, m, n;
35  int K, M, RD, lastRD;
36  int ldaM, ldam, ldan, ldaMRD;
37  int ldbM, ldbm, ldbMRD;
38  int tempMm, tempkn, tempnn, tempmm, tempMRDm, tempkmin;
39  int ib;
40 
41  plasma = plasma_context_self();
42  if (sequence->status != PLASMA_SUCCESS)
43  return;
44  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
45 
46  ib = PLASMA_IB;
47  K = min(A.mt, A.nt);
48 
49  if (side == PlasmaLeft ) {
50  if (trans == PlasmaConjTrans) {
51  /*
52  * PlasmaLeft / PlasmaConjTrans
53  */
54  for (k = 0; k < K; k++) {
55  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
56  for (M = k; M < A.mt; M += BS) {
57  tempMm = M == A.mt-1 ? A.m-M*A.mb : A.mb;
58  tempkmin = min(tempMm, tempkn);
59  ldaM = BLKLDD(A, M);
60  ldbM = BLKLDD(B, M);
61  for (n = 0; n < B.nt; n++) {
62  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
64  plasma->quark, &task_flags,
65  side, trans,
66  tempMm, tempnn,
67  tempkmin, ib, T.nb,
68  A(M, k), ldaM,
69  T(M, k), T.mb,
70  B(M, n), ldbM);
71  }
72  for (m = M+1; m < min(M+BS, A.mt); m++) {
73  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
74  ldbm = BLKLDD(B, m);
75  ldam = BLKLDD(A, m);
76  for (n = 0; n < B.nt; n++) {
77  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
79  plasma->quark, &task_flags,
80  side, trans,
81  A.nb, tempnn, tempmm, tempnn,
82  tempkn, ib, T.nb,
83  B(M, n), ldbM,
84  B(m, n), ldbm,
85  A(m, k), ldam,
86  T(m, k), T.mb);
87  }
88  }
89  }
90  for (RD = BS; RD < A.mt-k; RD *= 2) {
91  for (M = k; M+RD < A.mt; M += 2*RD) {
92  tempMRDm = M+RD == A.mt-1 ? A.m-(M+RD)*A.mb : A.mb;
93  ldbM = BLKLDD(B, M );
94  ldbMRD = BLKLDD(B, M+RD);
95  ldaMRD = BLKLDD(A, M+RD);
96  for (n = 0; n < B.nt; n++) {
97  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
99  plasma->quark, &task_flags,
100  side, trans,
101  A.nb, tempnn, tempMRDm, tempnn,
102  tempkn, ib, T.nb,
103  B (M, n), ldbM,
104  B (M+RD, n), ldbMRD,
105  A (M+RD, k), ldaMRD,
106  T2(M+RD, k), T.mb);
107  }
108  }
109  }
110  }
111  } else {
112  /*
113  * PlasmaLeft / PlasmaNoTrans
114  */
115  for (k = K-1; k >= 0; k--) {
116  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
117  lastRD = 0;
118  for (RD = BS; RD < A.mt-k; RD *= 2)
119  lastRD = RD;
120  for (RD = lastRD; RD >= BS; RD /= 2) {
121  for (M = k; M+RD < A.mt; M += 2*RD) {
122  tempMRDm = M+RD == A.mt-1 ? A.m-(M+RD)*A.mb : A.mb;
123  ldbM = BLKLDD(B, M );
124  ldbMRD = BLKLDD(B, M+RD);
125  ldaMRD = BLKLDD(A, M+RD);
126  for (n = 0; n < B.nt; n++) {
127  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
129  plasma->quark, &task_flags,
130  side, trans,
131  A.nb, tempnn, tempMRDm, tempnn,
132  tempkn, ib, T.nb,
133  B (M, n), ldbM,
134  B (M+RD, n), ldbMRD,
135  A (M+RD, k), ldaMRD,
136  T2(M+RD, k), T.mb);
137  }
138  }
139  }
140  for (M = k; M < A.mt; M += BS) {
141  tempMm = M == A.mt-1 ? A.m-M*A.mb : A.mb;
142  tempkmin = min(tempMm, tempkn);
143  ldaM = BLKLDD(A, M);
144  ldbM = BLKLDD(B, M);
145  for (m = min(M+BS, A.mt)-1; m > M; m--) {
146  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
147  ldbm = BLKLDD(B, m);
148  ldam = BLKLDD(A, m);
149  for (n = 0; n < B.nt; n++) {
150  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
152  plasma->quark, &task_flags,
153  side, trans,
154  A.nb, tempnn, tempmm, tempnn,
155  tempkn, ib, T.nb,
156  B(M, n), ldbM,
157  B(m, n), ldbm,
158  A(m, k), ldam,
159  T(m, k), T.mb);
160  }
161  }
162  for (n = 0; n < B.nt; n++) {
163  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
165  plasma->quark, &task_flags,
166  side, trans,
167  tempMm, tempnn,
168  tempkmin, ib, T.nb,
169  A(M, k), ldaM,
170  T(M, k), T.mb,
171  B(M, n), ldbM);
172  }
173  }
174  }
175  }
176  } else {
177  if (trans == PlasmaConjTrans) {
178  /*
179  * PlasmaRight / PlasmaConjTrans
180  */
181  for (k = K-1; k >= 0; k--) {
182  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
183  lastRD = 0;
184  for (RD = BS; RD < A.mt-k; RD *= 2)
185  lastRD = RD;
186  for (RD = lastRD; RD >= BS; RD /= 2) {
187  for (M = k; M+RD < A.mt; M += 2*RD) {
188  tempMRDm = M+RD == A.mt-1 ? A.m-(M+RD)*A.mb : A.mb;
189  ldaMRD = BLKLDD(A, M+RD);
190  for (m = 0; m < B.mt; m++) {
191  ldbm = BLKLDD(B, m);
192  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
194  plasma->quark, &task_flags,
195  side, trans,
196  tempmm, B.nb, tempmm, tempMRDm,
197  tempkn, ib, T.nb,
198  B (m, M), ldbm,
199  B (m, M+RD), ldbm,
200  A (M+RD, k), ldaMRD,
201  T2(M+RD, k), T.mb);
202  }
203  }
204  }
205  for (M = k; M < A.mt; M += BS) {
206  tempMm = M == A.mt-1 ? A.m-M*A.mb : A.mb;
207  tempkmin = min(tempMm, tempkn);
208  ldaM = BLKLDD(A, M);
209  ldbM = BLKLDD(B, M);
210  for (n = min(M+BS, A.mt)-1; n > M; n--) {
211  ldan = BLKLDD(A, n);
212  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
213  for (m = 0; m < B.mt; m++) {
214  ldbm = BLKLDD(B, m);
215  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
217  plasma->quark, &task_flags,
218  side, trans,
219  tempmm, tempMm, tempmm, tempnn,
220  tempkn, ib, T.nb,
221  B(m, M), ldbm,
222  B(m, n), ldbm,
223  A(n, k), ldan,
224  T(n, k), T.mb);
225  }
226  }
227  for (m = 0; m < B.mt; m++) {
228  ldbm = BLKLDD(B, m);
229  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
231  plasma->quark, &task_flags,
232  side, trans,
233  tempmm, tempMm,
234  tempkmin, ib, T.nb,
235  A(M, k), ldaM,
236  T(M, k), T.mb,
237  B(m, M), ldbm);
238  }
239  }
240  }
241  } else {
242  /*
243  * PlasmaRight / PlasmaNoTrans
244  */
245  for (k = 0; k < K; k++) {
246  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
247  for (M = k; M < A.mt; M += BS) {
248  tempMm = M == A.mt-1 ? A.m-M*A.mb : A.mb;
249  tempkmin = min(tempMm, tempkn);
250  ldaM = BLKLDD(A, M);
251  for (m = 0; m < B.mt; m++) {
252  ldbm = BLKLDD(B, m);
253  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
255  plasma->quark, &task_flags,
256  side, trans,
257  tempmm, tempMm,
258  tempkmin, ib, T.nb,
259  A(M, k), ldaM,
260  T(M, k), T.mb,
261  B(m, M), ldbm);
262  }
263  for (n = M+1; n < min(M+BS, A.mt); n++) {
264  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
265  ldan = BLKLDD(A, n);
266  for (m = 0; m < B.mt; m++) {
267  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
268  ldbm = BLKLDD(B, m);
270  plasma->quark, &task_flags,
271  side, trans,
272  tempmm, tempMm, tempmm, tempnn,
273  tempkn, ib, T.nb,
274  B(m, M), ldbm,
275  B(m, n), ldbm,
276  A(n, k), ldan,
277  T(n, k), T.mb);
278  }
279  }
280  }
281  for (RD = BS; RD < A.mt-k; RD *= 2) {
282  for (M = k; M+RD < A.mt; M += 2*RD) {
283  tempMRDm = M+RD == A.mt-1 ? A.m-(M+RD)*A.mb : A.mb;
284  ldaMRD = BLKLDD(A, M+RD);
285  for (m = 0; m < B.mt; m++) {
286  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
287  ldbm = BLKLDD(B, m);
289  plasma->quark, &task_flags,
290  side, trans,
291  tempmm, B.nb, tempmm, tempMRDm,
292  tempkn, ib, T.nb,
293  B (m, M ), ldbm,
294  B (m, M+RD), ldbm,
295  A (M+RD, k), ldaMRD,
296  T2(M+RD, k), T.mb);
297  }
298  }
299  }
300  }
301  }
302  }
303 }