PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_dtsmlq.c
Go to the documentation of this file.
1 
19 #include "common.h"
20 
21 /***************************************************************************/
120 #if defined(PLASMA_HAVE_WEAK)
121 #pragma weak CORE_dtsmlq = PCORE_dtsmlq
122 #define CORE_dtsmlq PCORE_dtsmlq
123 #endif
124 int CORE_dtsmlq(int side, int trans,
125  int M1, int N1, int M2, int N2, int K, int IB,
126  double *A1, int LDA1,
127  double *A2, int LDA2,
128  double *V, int LDV,
129  double *T, int LDT,
130  double *WORK, int LDWORK)
131 {
132  int i, i1, i3;
133  int NW;
134  int kb;
135  int ic = 0;
136  int jc = 0;
137  int mi = M1;
138  int ni = N1;
139 
140  /* Check input arguments */
141  if ((side != PlasmaLeft) && (side != PlasmaRight)) {
142  coreblas_error(1, "Illegal value of side");
143  return -1;
144  }
145 
146  /* NW is the minimum dimension of WORK */
147  if (side == PlasmaLeft) {
148  NW = IB;
149  }
150  else {
151  NW = N1;
152  }
153 
154  if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
155  coreblas_error(2, "Illegal value of trans");
156  return -2;
157  }
158  if (M1 < 0) {
159  coreblas_error(3, "Illegal value of M1");
160  return -3;
161  }
162  if (N1 < 0) {
163  coreblas_error(4, "Illegal value of N1");
164  return -4;
165  }
166  if ( (M2 < 0) ||
167  ( (M2 != M1) && (side == PlasmaRight) ) ){
168  coreblas_error(5, "Illegal value of M2");
169  return -5;
170  }
171  if ( (N2 < 0) ||
172  ( (N2 != N1) && (side == PlasmaLeft) ) ){
173  coreblas_error(6, "Illegal value of N2");
174  return -6;
175  }
176  if ((K < 0) ||
177  ( (side == PlasmaLeft) && (K > M1) ) ||
178  ( (side == PlasmaRight) && (K > N1) ) ) {
179  coreblas_error(7, "Illegal value of K");
180  return -7;
181  }
182  if (IB < 0) {
183  coreblas_error(8, "Illegal value of IB");
184  return -8;
185  }
186  if (LDA1 < max(1,M1)){
187  coreblas_error(10, "Illegal value of LDA1");
188  return -10;
189  }
190  if (LDA2 < max(1,M2)){
191  coreblas_error(12, "Illegal value of LDA2");
192  return -12;
193  }
194  if (LDV < max(1,K)){
195  coreblas_error(14, "Illegal value of LDV");
196  return -14;
197  }
198  if (LDT < max(1,IB)){
199  coreblas_error(16, "Illegal value of LDT");
200  return -16;
201  }
202  if (LDWORK < max(1,NW)){
203  coreblas_error(18, "Illegal value of LDWORK");
204  return -18;
205  }
206 
207  /* Quick return */
208  if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
209  return PLASMA_SUCCESS;
210 
211  if (((side == PlasmaLeft) && (trans == PlasmaNoTrans))
212  || ((side == PlasmaRight) && (trans != PlasmaNoTrans))) {
213  i1 = 0;
214  i3 = IB;
215  }
216  else {
217  i1 = ((K-1) / IB)*IB;
218  i3 = -IB;
219  }
220 
221  if (trans == PlasmaNoTrans) {
222  trans = PlasmaTrans;
223  }
224  else {
225  trans = PlasmaNoTrans;
226  }
227 
228  for(i = i1; (i > -1) && (i < K); i += i3) {
229  kb = min(IB, K-i);
230 
231  if (side == PlasmaLeft) {
232  /*
233  * H or H' is applied to C(i:m,1:n)
234  */
235  mi = M1 - i;
236  ic = i;
237  }
238  else {
239  /*
240  * H or H' is applied to C(1:m,i:n)
241  */
242  ni = N1 - i;
243  jc = i;
244  }
245  /*
246  * Apply H or H' (NOTE: CORE_dparfb used to be CORE_dtsrfb)
247  */
248  CORE_dparfb(
249  side, trans, PlasmaForward, PlasmaRowwise,
250  mi, ni, M2, N2, kb, 0,
251  &A1[LDA1*jc+ic], LDA1,
252  A2, LDA2,
253  &V[i], LDV,
254  &T[LDT*i], LDT,
255  WORK, LDWORK);
256  }
257  return PLASMA_SUCCESS;
258 }
259 
260 /***************************************************************************/
263 void QUARK_CORE_dtsmlq(Quark *quark, Quark_Task_Flags *task_flags,
264  int side, int trans,
265  int m1, int n1, int m2, int n2, int k, int ib, int nb,
266  double *A1, int lda1,
267  double *A2, int lda2,
268  double *V, int ldv,
269  double *T, int ldt)
270 {
271  int ldwork = side == PlasmaLeft ? ib : nb;
272 
274  QUARK_Insert_Task(quark, CORE_dtsmlq_quark, task_flags,
275  sizeof(PLASMA_enum), &side, VALUE,
276  sizeof(PLASMA_enum), &trans, VALUE,
277  sizeof(int), &m1, VALUE,
278  sizeof(int), &n1, VALUE,
279  sizeof(int), &m2, VALUE,
280  sizeof(int), &n2, VALUE,
281  sizeof(int), &k, VALUE,
282  sizeof(int), &ib, VALUE,
283  sizeof(double)*nb*nb, A1, INOUT,
284  sizeof(int), &lda1, VALUE,
285  sizeof(double)*nb*nb, A2, INOUT | LOCALITY,
286  sizeof(int), &lda2, VALUE,
287  sizeof(double)*nb*nb, V, INPUT,
288  sizeof(int), &ldv, VALUE,
289  sizeof(double)*ib*nb, T, INPUT,
290  sizeof(int), &ldt, VALUE,
291  sizeof(double)*ib*nb, NULL, SCRATCH,
292  sizeof(int), &ldwork, VALUE,
293  0);
294 }
295 
296 /***************************************************************************/
299 #if defined(PLASMA_HAVE_WEAK)
300 #pragma weak CORE_dtsmlq_quark = PCORE_dtsmlq_quark
301 #define CORE_dtsmlq_quark PCORE_dtsmlq_quark
302 #endif
304 {
305  int side;
306  int trans;
307  int m1;
308  int n1;
309  int m2;
310  int n2;
311  int k;
312  int ib;
313  double *A1;
314  int lda1;
315  double *A2;
316  int lda2;
317  double *V;
318  int ldv;
319  double *T;
320  int ldt;
321  double *WORK;
322  int ldwork;
323 
324  quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib,
325  A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
326  CORE_dtsmlq(side, trans, m1, n1, m2, n2, k, ib,
327  A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
328 }