PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pcgeadd.c
Go to the documentation of this file.
1 
16 #include "common.h"
17 
18 #define A(m,n) BLKADDR(A, PLASMA_Complex32_t, m, n)
19 #define B(m,n) BLKADDR(B, PLASMA_Complex32_t, m, n)
20 /***************************************************************************/
24 {
25  PLASMA_Complex32_t alpha;
26  PLASMA_desc A;
27  PLASMA_desc B;
28  PLASMA_sequence *sequence;
29  PLASMA_request *request;
30 
31  int X, Y;
32  int m, n;
33  int next_m;
34  int next_n;
35  int ldam, ldbm;
36 
37  plasma_unpack_args_5(alpha, A, B, sequence, request);
38  if (sequence->status != PLASMA_SUCCESS)
39  return;
40 
41  n = 0;
42  m = PLASMA_RANK;
43  while (m >= A.mt && n < A.nt) {
44  n++;
45  m = m-A.mt;
46  }
47 
48  while (n < A.nt) {
49  next_m = m;
50  next_n = n;
51 
52  next_m += PLASMA_SIZE;
53  while (next_m >= A.mt && next_n < A.nt) {
54  next_n++;
55  next_m = next_m-A.mt;
56  }
57 
58  X = m == A.mt-1 ? A.m-A.mb*m : A.nb;
59  Y = n == A.nt-1 ? A.n-A.nb*n : A.nb;
60  ldam = BLKLDD(A, m);
61  ldbm = BLKLDD(B, m);
62  CORE_cgeadd(X, Y, alpha, A(m, n), ldam, B(m, n), ldbm);
63 
64  m = next_m;
65  n = next_n;
66  }
67 }
68 
69 /***************************************************************************/
73  PLASMA_sequence *sequence, PLASMA_request *request)
74 {
77 
78  int X, Y;
79  int m, n;
80  int ldam, ldbm;
81 
82  plasma = plasma_context_self();
83  if (sequence->status != PLASMA_SUCCESS)
84  return;
85  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
86 
87  for (m = 0; m < A.mt; m++) {
88  X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
89  ldam = BLKLDD(A, m);
90  ldbm = BLKLDD(B, m);
91 
92  for (n = 0; n < A.nt; n++) {
93  Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
95  plasma->quark, &task_flags,
96  X, Y, A.mb,
97  alpha, A(m, n), ldam,
98  B(m, n), ldbm);
99  }
100  }
101 }