PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pzgetmi2.c
Go to the documentation of this file.
1 
20 #include "common.h"
21 
41  PLASMA_sequence *sequence;
42  PLASMA_request *request;
43  PLASMA_Complex64_t *A, *Al, *work;
44  PLASMA_enum storev, idep, odep;
45  int i, m, n, mb, nb, nprob;
46  int size, bsiz;
47 
48  plasma_unpack_args_10(idep, odep, storev, m, n, mb, nb, A, sequence, request);
49  if (sequence->status != PLASMA_SUCCESS)
50  return;
51 
52  /* quick return */
53  if( (mb < 2) || (nb < 2) ) {
54  return ;
55  }
56 
57  size = PLASMA_SIZE;
58  bsiz = mb*nb;
59  nprob = ( m / mb ) * ( n / nb );
60 
62 
63  for (i=PLASMA_RANK; i<nprob; i+=size) {
64  Al = &(A[ i * bsiz]);
65  CORE_zgetrip(mb, nb, Al, work);
66  }
67 
68  plasma_private_free(plasma, work);
69 }
70 
129  int m, int n, int mb, int nb, PLASMA_Complex64_t *A,
130  PLASMA_sequence *sequence, PLASMA_request *request)
131 {
134  PLASMA_Complex64_t *Al, *Ap;
135  int i, j, nprob, mt, nt;
136  int bsiz, psiz, size;
137 
138  plasma = plasma_context_self();
139  if (sequence->status != PLASMA_SUCCESS)
140  return;
141 
142  /* quick return */
143  if( (mb < 2) || (nb < 2) ) {
144  return ;
145  }
146 
147  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
148 
149  bsiz = mb*nb;
150  if ( storev == PlasmaColumnwise ) {
151  psiz = m*nb;
152  mt = ( m / mb );
153  nt = ( n / nb );
154  } else {
155  psiz = n*mb;
156  mt = ( n / nb );
157  nt = ( m / mb );
158  }
159 
160  size = m*n;
161 
162  switch ( idep ) {
163 /*
164  * Dependencies on each panel as input
165  */
166  case PlasmaIPT_Panel:
167  switch ( odep ) {
168  case PlasmaIPT_Panel:
169  for (j=0; j<nt; j++) {
170  Ap = A + (psiz*j);
171  for (i=0; i<mt; i++) {
172  Al = Ap + i*bsiz;
174  plasma->quark, &task_flags,
175  mb, nb, Al, bsiz,
176  Ap, psiz, INOUT|GATHERV);
177  }
178  }
179  break;
180 
181  case PlasmaIPT_All:
182  for (j=0; j<nt; j++) {
183  Ap = A + (psiz*j);
184  for (i=0; i<mt; i++) {
185  Al = Ap + i*bsiz;
186 
187  QUARK_CORE_zgetrip_f2(plasma->quark, &task_flags,
188  mb, nb, Al, bsiz,
189  Ap, size, INPUT,
190  A, size, INOUT|GATHERV);
191  }
192  }
193  break;
194 
195  case PlasmaIPT_NoDep:
196  default:
197  for (j=0; j<nt; j++) {
198  Ap = A + (psiz*j);
199  for (i=0; i<mt; i++) {
200  Al = Ap + i*bsiz;
202  plasma->quark, &task_flags,
203  mb, nb, Al, bsiz,
204  Ap, psiz, INPUT);
205  }
206  }
207  }
208  break;
209 
210 /*
211  * Dependency on all the matrix as input
212  */
213  case PlasmaIPT_All:
214  switch ( odep ) {
215  case PlasmaIPT_Panel:
216  for (j=0; j<nt; j++) {
217  Ap = A + (psiz*j);
218  for (i=0; i<mt; i++) {
219  Al = Ap + i*bsiz;
221  plasma->quark, &task_flags,
222  mb, nb, Al, bsiz,
223  A, size, INPUT,
224  Ap, psiz, INOUT|GATHERV);
225  }
226  }
227  break;
228 
229  case PlasmaIPT_All:
230  nprob = mt*nt;
231  for (i=0; i<nprob; i++) {
232  QUARK_CORE_zgetrip_f1(plasma->quark, &task_flags,
233  mb, nb, &(A[ i*bsiz ]), bsiz,
234  A, size, INOUT|GATHERV);
235  }
236  break;
237 
238  case PlasmaIPT_NoDep:
239  default:
240  nprob = mt*nt;
241  for (i=0; i<nprob; i++) {
242  QUARK_CORE_zgetrip_f1(plasma->quark, &task_flags,
243  mb, nb, &(A[ i*bsiz ]), bsiz,
244  A, size, INPUT);
245  }
246  }
247  break;
248 
249 /*
250  * No Dependencies as input
251  */
252  case PlasmaIPT_NoDep:
253  default:
254  switch ( odep ) {
255  case PlasmaIPT_Panel:
256  for (j=0; j<nt; j++) {
257  Ap = A + (psiz*j);
258  for (i=0; i<mt; i++) {
259  Al = Ap + i*bsiz;
261  plasma->quark, &task_flags,
262  mb, nb, Al, bsiz,
263  Ap, psiz, INOUT|GATHERV);
264  }
265  }
266  break;
267 
268  case PlasmaIPT_All:
269  nprob = mt*nt;
270  for (i=0; i<nprob; i++) {
271  QUARK_CORE_zgetrip_f1(plasma->quark, &task_flags,
272  mb, nb, &(A[ i*bsiz ]), bsiz,
273  A, size, INOUT|GATHERV);
274  }
275  break;
276 
277  case PlasmaIPT_NoDep:
278  default:
279  nprob = mt*nt;
280  for (i=0; i<nprob; i++) {
281  QUARK_CORE_zgetrip(plasma->quark, &task_flags,
282  mb, nb, &(A[ i*bsiz ]), bsiz);
283  }
284  }
285  }
286 }