PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pctrsmpl.c
Go to the documentation of this file.
1 
17 #include "common.h"
18 
19 #define A(m,n) BLKADDR(A, PLASMA_Complex32_t, m, n)
20 #define B(m,n) BLKADDR(B, PLASMA_Complex32_t, m, n)
21 #define L(m,n) BLKADDR(L, PLASMA_Complex32_t, m, n)
22 #define IPIV(m,n) &(IPIV[(int64_t)A.nb*((int64_t)(m)+(int64_t)A.mt*(int64_t)(n))])
23 /***************************************************************************/
27 {
28  PLASMA_desc A;
29  PLASMA_desc B;
30  PLASMA_desc L;
31  int *IPIV;
32  PLASMA_sequence *sequence;
33  PLASMA_request *request;
34 
35  int k, m, n;
36  int next_k;
37  int next_m;
38  int next_n;
39  int ldak, ldbk, ldam, ldbm;
40  int tempkm, tempnn, tempkmin, tempmm, tempkn;
41  int ib;
42 
43  plasma_unpack_args_6(A, B, L, IPIV, sequence, request);
44  if (sequence->status != PLASMA_SUCCESS)
45  return;
46  ss_init(B.mt, B.nt, -1);
47 
48  ib = PLASMA_IB;
49  k = 0;
50  n = PLASMA_RANK;
51  while (n >= B.nt) {
52  k++;
53  n = n-B.nt;
54  }
55  m = k;
56 
57  while (k < min(A.mt, A.nt) && n < B.nt) {
58  next_n = n;
59  next_m = m;
60  next_k = k;
61 
62  next_m++;
63  if (next_m == A.mt) {
64  next_n += PLASMA_SIZE;
65  while (next_n >= B.nt && next_k < min(A.mt, A.nt)) {
66  next_k++;
67  next_n = next_n-B.nt;
68  }
69  next_m = next_k;
70  }
71 
72  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
73  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
74  tempkmin = k == min(A.mt, A.nt)-1 ? min(A.m, A.n)-k*A.mb : A.mb;
75  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
76  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
77 
78  ldak = BLKLDD(A, k);
79  ldbk = BLKLDD(B, k);
80  ldam = BLKLDD(A, m);
81  ldbm = BLKLDD(B, m);
82 
83  if (m == k) {
84  ss_cond_wait(k, n, k-1);
86  tempkm, tempnn, tempkmin, ib,
87  IPIV(k, k),
88  A(k, k), ldak,
89  B(k, n), ldbk);
90  ss_cond_set(k, n, k);
91  }
92  else {
93  ss_cond_wait(m, n, k-1);
95  A.nb, tempnn, tempmm, tempnn, tempkn, ib,
96  B(k, n), ldbk,
97  B(m, n), ldbm,
98  L(m, k), L.mb,
99  A(m, k), ldam,
100  IPIV(m, k));
101  ss_cond_set(m, n, k);
102  }
103  n = next_n;
104  m = next_m;
105  k = next_k;
106  }
107  ss_finalize();
108 }
109 
110 /***************************************************************************/
114  PLASMA_sequence *sequence, PLASMA_request *request)
115 {
118 
119  int k, m, n;
120  int ldak, ldam, ldbk, ldbm;
121  int tempkm, tempnn, tempkmin, tempmm, tempkn;
122  int ib;
123 
124  plasma = plasma_context_self();
125  if (sequence->status != PLASMA_SUCCESS)
126  return;
127  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
128 
129  ib = PLASMA_IB;
130  for (k = 0; k < min(A.mt, A.nt); k++) {
131  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
132  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
133  tempkmin = k == min(A.mt, A.nt)-1 ? min(A.m, A.n)-k*A.mb : A.mb;
134  ldak = BLKLDD(A, k);
135  ldbk = BLKLDD(B, k);
136  for (n = 0; n < B.nt; n++) {
137  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
139  plasma->quark, &task_flags,
140  tempkm, tempnn, tempkmin, ib, L.nb,
141  IPIV(k, k),
142  A(k, k), ldak,
143  B(k, n), ldbk);
144  }
145  for (m = k+1; m < A.mt; m++) {
146  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
147  ldam = BLKLDD(A, m);
148  ldbm = BLKLDD(B, m);
149  for (n = 0; n < B.nt; n++) {
150  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
152  plasma->quark, &task_flags,
153  A.nb, tempnn, tempmm, tempnn, tempkn, ib, L.nb,
154  B(k, n), ldbk,
155  B(m, n), ldbm,
156  L(m, k), L.mb,
157  A(m, k), ldam,
158  IPIV(m, k));
159  }
160  }
161  }
162 }