PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_cttmqr.c
Go to the documentation of this file.
1 
17 #include "common.h"
18 
19 /***************************************************************************/
112 #if defined(PLASMA_HAVE_WEAK)
113 #pragma weak CORE_cttmqr = PCORE_cttmqr
114 #define CORE_cttmqr PCORE_cttmqr
115 #endif
116 int CORE_cttmqr(int side, int trans,
117  int M1, int N1, int M2, int N2, int K, int IB,
118  PLASMA_Complex32_t *A1, int LDA1,
119  PLASMA_Complex32_t *A2, int LDA2,
120  PLASMA_Complex32_t *V, int LDV,
121  PLASMA_Complex32_t *T, int LDT,
122  PLASMA_Complex32_t *WORK, int LDWORK)
123 {
124  int i, i1, i3;
125  int NQ, NW;
126  int kb, l;
127  int ic = 0;
128  int jc = 0;
129  int mi1 = M1;
130  int mi2 = M2;
131  int ni1 = N1;
132  int ni2 = N2;
133 
134  /* Check input arguments */
135  if ((side != PlasmaLeft) && (side != PlasmaRight)) {
136  coreblas_error(1, "Illegal value of side");
137  return -1;
138  }
139 
140  /* NQ is the order of Q */
141  if (side == PlasmaLeft) {
142  NQ = M2;
143  NW = IB;
144  }
145  else {
146  NQ = N2;
147  NW = M1;
148  }
149 
150  if ((trans != PlasmaNoTrans) && (trans != PlasmaConjTrans)) {
151  coreblas_error(2, "Illegal value of trans");
152  return -2;
153  }
154  if (M1 < 0) {
155  coreblas_error(3, "Illegal value of M1");
156  return -3;
157  }
158  if (N1 < 0) {
159  coreblas_error(4, "Illegal value of N1");
160  return -4;
161  }
162  if ( (M2 < 0) ||
163  ( (M2 != M1) && (side == PlasmaRight) ) ){
164  coreblas_error(5, "Illegal value of M2");
165  return -5;
166  }
167  if ( (N2 < 0) ||
168  ( (N2 != N1) && (side == PlasmaLeft) ) ){
169  coreblas_error(6, "Illegal value of N2");
170  return -6;
171  }
172  if ((K < 0) ||
173  ( (side == PlasmaLeft) && (K > M1) ) ||
174  ( (side == PlasmaRight) && (K > N1) ) ) {
175  coreblas_error(7, "Illegal value of K");
176  return -7;
177  }
178  if (IB < 0) {
179  coreblas_error(8, "Illegal value of IB");
180  return -8;
181  }
182  if (LDA1 < max(1,M1)){
183  coreblas_error(10, "Illegal value of LDA1");
184  return -10;
185  }
186  if (LDA2 < max(1,M2)){
187  coreblas_error(12, "Illegal value of LDA2");
188  return -12;
189  }
190  if (LDV < max(1,NQ)){
191  coreblas_error(14, "Illegal value of LDV");
192  return -14;
193  }
194  if (LDT < max(1,IB)){
195  coreblas_error(16, "Illegal value of LDT");
196  return -16;
197  }
198  if (LDWORK < max(1,NW)){
199  coreblas_error(18, "Illegal value of LDWORK");
200  return -18;
201  }
202 
203  /* Quick return */
204  if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
205  return PLASMA_SUCCESS;
206 
207  if (((side == PlasmaLeft) && (trans != PlasmaNoTrans))
208  || ((side == PlasmaRight) && (trans == PlasmaNoTrans))) {
209  i1 = 0;
210  i3 = IB;
211  }
212  else {
213  i1 = ( ( K-1 ) / IB )*IB;
214  i3 = -IB;
215  }
216 
217  for (i = i1; (i > -1) && (i < K); i+=i3) {
218  kb = min(IB, K-i);
219 
220  if (side == PlasmaLeft) {
221  mi1 = kb;
222  mi2 = min(i+kb, M2);
223  l = min(kb, max(0, M2-i));
224  ic = i;
225  }
226  else {
227  ni1 = kb;
228  ni2 = min(i+kb, N2);
229  l = min(kb, max(0, N2-i));
230  jc = i;
231  }
232 
233  /*
234  * Apply H or H' (NOTE: CORE_cparfb used to be CORE_cttrfb)
235  */
236  CORE_cparfb(
237  side, trans, PlasmaForward, PlasmaColumnwise,
238  mi1, ni1, mi2, ni2, kb, l,
239  &A1[LDA1*jc+ic], LDA1,
240  A2, LDA2,
241  &V[LDV*i], LDV,
242  &T[LDT*i], LDT,
243  WORK, LDWORK);
244  }
245  return PLASMA_SUCCESS;
246 }
247 
248 /***************************************************************************/
251 void QUARK_CORE_cttmqr(Quark *quark, Quark_Task_Flags *task_flags,
252  int side, int trans,
253  int m1, int n1, int m2, int n2, int k, int ib, int nb,
254  PLASMA_Complex32_t *A1, int lda1,
255  PLASMA_Complex32_t *A2, int lda2,
256  PLASMA_Complex32_t *V, int ldv,
257  PLASMA_Complex32_t *T, int ldt)
258 {
259  int ldwork = side == PlasmaLeft ? ib : nb;
260 
262  QUARK_Insert_Task(quark, CORE_cttmqr_quark, task_flags,
263  sizeof(PLASMA_enum), &side, VALUE,
264  sizeof(PLASMA_enum), &trans, VALUE,
265  sizeof(int), &m1, VALUE,
266  sizeof(int), &n1, VALUE,
267  sizeof(int), &m2, VALUE,
268  sizeof(int), &n2, VALUE,
269  sizeof(int), &k, VALUE,
270  sizeof(int), &ib, VALUE,
271  sizeof(PLASMA_Complex32_t)*nb*nb, A1, INOUT,
272  sizeof(int), &lda1, VALUE,
273  sizeof(PLASMA_Complex32_t)*nb*nb, A2, INOUT,
274  sizeof(int), &lda2, VALUE,
276  sizeof(int), &ldv, VALUE,
277  sizeof(PLASMA_Complex32_t)*ib*nb, T, INPUT,
278  sizeof(int), &ldt, VALUE,
279  sizeof(PLASMA_Complex32_t)*ib*nb, NULL, SCRATCH,
280  sizeof(int), &ldwork, VALUE,
281  0);
282 }
283 
284 /***************************************************************************/
287 #if defined(PLASMA_HAVE_WEAK)
288 #pragma weak CORE_cttmqr_quark = PCORE_cttmqr_quark
289 #define CORE_cttmqr_quark PCORE_cttmqr_quark
290 #endif
292 {
293  int side;
294  int trans;
295  int m1;
296  int n1;
297  int m2;
298  int n2;
299  int k;
300  int ib;
301  PLASMA_Complex32_t *A1;
302  int lda1;
303  PLASMA_Complex32_t *A2;
304  int lda2;
306  int ldv;
308  int ldt;
309  PLASMA_Complex32_t *WORK;
310  int ldwork;
311 
312  quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib,
313  A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
314  CORE_cttmqr(side, trans, m1, n1, m2, n2, k, ib,
315  A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
316 }