PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pzpotrf.c
Go to the documentation of this file.
1 
17 #include "common.h"
18 
19 #define A(m,n) BLKADDR(A, PLASMA_Complex64_t, m, n)
20 /***************************************************************************/
24 {
26  PLASMA_desc A;
27  PLASMA_sequence *sequence;
28  PLASMA_request *request;
29 
30  int k, m, n;
31  int next_k;
32  int next_m;
33  int next_n;
34  int ldak, ldam, ldan;
35  int info;
36  int tempkn, tempmn;
37 
40 
41  plasma_unpack_args_4(uplo, A, sequence, request);
42  if (sequence->status != PLASMA_SUCCESS)
43  return;
44  ss_init(A.nt, A.nt, 0);
45 
46  k = 0;
47  m = PLASMA_RANK;
48  while (m >= A.nt) {
49  k++;
50  m = m-A.nt+k;
51  }
52  n = 0;
53 
54  while (k < A.nt && m < A.nt && !ss_aborted()) {
55  next_n = n;
56  next_m = m;
57  next_k = k;
58 
59  next_n++;
60  if (next_n > next_k) {
61  next_m += PLASMA_SIZE;
62  while (next_m >= A.nt && next_k < A.nt) {
63  next_k++;
64  next_m = next_m-A.nt+next_k;
65  }
66  next_n = 0;
67  }
68 
69  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
70  tempmn = m == A.nt-1 ? A.n-m*A.nb : A.nb;
71 
72  ldak = BLKLDD(A, k);
73  ldan = BLKLDD(A, n);
74  ldam = BLKLDD(A, m);
75 
76  if (m == k) {
77  if (n == k) {
78  /*
79  * PlasmaLower
80  */
81  if (uplo == PlasmaLower) {
84  tempkn,
85  A(k, k), ldak,
86  &info);
87  }
88  /*
89  * PlasmaUpper
90  */
91  else {
94  tempkn,
95  A(k, k), ldak,
96  &info);
97  }
98  if (info != 0) {
99  plasma_request_fail(sequence, request, info + A.nb*k);
100  ss_abort();
101  }
102  ss_cond_set(k, k, 1);
103  }
104  else {
105  ss_cond_wait(k, n, 1);
106  /*
107  * PlasmaLower
108  */
109  if (uplo == PlasmaLower) {
110  CORE_zherk(
112  tempkn, A.nb,
113  -1.0, A(k, n), ldak,
114  1.0, A(k, k), ldak);
115  }
116  /*
117  * PlasmaUpper
118  */
119  else {
120  CORE_zherk(
122  tempkn, A.nb,
123  -1.0, A(n, k), ldan,
124  1.0, A(k, k), ldak);
125  }
126  }
127  }
128  else {
129  if (n == k) {
130  ss_cond_wait(k, k, 1);
131  /*
132  * PlasmaLower
133  */
134  if (uplo == PlasmaLower) {
135  CORE_ztrsm(
137  tempmn, A.nb,
138  zone, A(k, k), ldak,
139  A(m, k), ldam);
140  }
141  /*
142  * PlasmaUpper
143  */
144  else {
145  CORE_ztrsm(
147  A.nb, tempmn,
148  zone, A(k, k), ldak,
149  A(k, m), ldak);
150  }
151  ss_cond_set(m, k, 1);
152  }
153  else {
154  ss_cond_wait(k, n, 1);
155  ss_cond_wait(m, n, 1);
156  /*
157  * PlasmaLower
158  */
159  if (uplo == PlasmaLower) {
160  CORE_zgemm(
162  tempmn, A.nb, A.nb,
163  mzone, A(m, n), ldam,
164  A(k, n), ldak,
165  zone, A(m, k), ldam);
166  }
167  /*
168  * PlasmaUpper
169  */
170  else {
171  CORE_zgemm(
173  A.nb, tempmn, A.nb,
174  mzone, A(n, k), ldan,
175  A(n, m), ldan,
176  zone, A(k, m), ldak);
177  }
178  }
179  }
180  n = next_n;
181  m = next_m;
182  k = next_k;
183  }
184  ss_finalize();
185 }
186 
187 /***************************************************************************/
191  PLASMA_sequence *sequence, PLASMA_request *request)
192 {
195 
196  int k, m, n;
197  int ldak, ldam;
198  int tempkm, tempmm;
199 
202 
203  plasma = plasma_context_self();
204  if (sequence->status != PLASMA_SUCCESS)
205  return;
206  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
207  /*
208  * PlasmaLower
209  */
210  if (uplo == PlasmaLower) {
211  for (k = 0; k < A.mt; k++) {
212  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
213  ldak = BLKLDD(A, k);
215  plasma->quark, &task_flags,
216  PlasmaLower, tempkm, A.mb,
217  A(k, k), ldak,
218  sequence, request, A.nb*k);
219 
220  for (m = k+1; m < A.mt; m++) {
221  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
222  ldam = BLKLDD(A, m);
224  plasma->quark, &task_flags,
226  tempmm, A.mb, A.mb,
227  zone, A(k, k), ldak,
228  A(m, k), ldam);
229  }
230  for (m = k+1; m < A.mt; m++) {
231  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
232  ldam = BLKLDD(A, m);
234  plasma->quark, &task_flags,
236  tempmm, A.mb, A.mb,
237  -1.0, A(m, k), ldam,
238  1.0, A(m, m), ldam);
239 
240  for (n = k+1; n < m; n++) {
242  plasma->quark, &task_flags,
244  tempmm, A.mb, A.mb, A.mb,
245  mzone, A(m, k), ldam,
246  A(n, k), A.mb,
247  zone, A(m, n), ldam);
248  }
249  }
250  }
251  }
252  /*
253  * PlasmaUpper
254  */
255  else {
256  for (k = 0; k < A.nt; k++) {
257  tempkm = k == A.nt-1 ? A.n-k*A.nb : A.nb;
258  ldak = BLKLDD(A, k);
260  plasma->quark, &task_flags,
261  PlasmaUpper,
262  tempkm, A.mb,
263  A(k, k), ldak,
264  sequence, request, A.nb*k);
265 
266  for (m = k+1; m < A.nt; m++) {
267  tempmm = m == A.nt-1 ? A.n-m*A.nb : A.nb;
269  plasma->quark, &task_flags,
271  A.nb, tempmm, A.mb,
272  zone, A(k, k), ldak,
273  A(k, m), ldak);
274  }
275  for (m = k+1; m < A.nt; m++) {
276  tempmm = m == A.nt-1 ? A.n-m*A.nb : A.nb;
277  ldam = BLKLDD(A, m);
279  plasma->quark, &task_flags,
281  tempmm, A.mb, A.mb,
282  -1.0, A(k, m), ldak,
283  1.0, A(m, m), ldam);
284 
285  for (n = k+1; n < m; n++) {
287  plasma->quark, &task_flags,
289  A.mb, tempmm, A.mb, A.mb,
290  mzone, A(k, n), ldak,
291  A(k, m), ldak,
292  zone, A(n, m), A.mb);
293  }
294  }
295  }
296  }
297 }