PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dormlq.c
Go to the documentation of this file.
1 
17 #include "common.h"
18 
19 /***************************************************************************/
83 int PLASMA_dormlq(PLASMA_enum side, PLASMA_enum trans, int M, int N, int K,
84  double *A, int LDA,
85  double *T,
86  double *B, int LDB)
87 {
88  int NB, IB, IBNB, KT, NT, An;
89  int status;
91  PLASMA_sequence *sequence = NULL;
93  PLASMA_desc descA, descB, descT;
94 
95  plasma = plasma_context_self();
96  if (plasma == NULL) {
97  plasma_fatal_error("PLASMA_dormlq", "PLASMA not initialized");
99  }
100 
101  if (side == PlasmaLeft)
102  An = M;
103  else
104  An = N;
105 
106  /* Check input arguments */
107  if ( (side != PlasmaLeft) && (side != PlasmaRight) ) {
108  plasma_error("PLASMA_dormlq", "illegal value of side");
109  return -1;
110  }
111  if ( (trans != PlasmaTrans) && (trans != PlasmaNoTrans) ){
112  plasma_error("PLASMA_dormlq", "illegal value of trans");
113  return -2;
114  }
115  if (M < 0) {
116  plasma_error("PLASMA_dormlq", "illegal value of M");
117  return -3;
118  }
119  if (N < 0) {
120  plasma_error("PLASMA_dormlq", "illegal value of N");
121  return -4;
122  }
123  if ((K < 0) || (K > An)) {
124  plasma_error("PLASMA_dormlq", "illegal value of K");
125  return -5;
126  }
127  if (LDA < max(1, K)) {
128  plasma_error("PLASMA_dormlq", "illegal value of LDA");
129  return -7;
130  }
131  if (LDB < max(1, M)) {
132  plasma_error("PLASMA_dormlq", "illegal value of LDB");
133  return -10;
134  }
135  /* Quick return - currently NOT equivalent to LAPACK's:
136  * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
137  if (min(M, min(N, K)) == 0)
138  return PLASMA_SUCCESS;
139 
140  /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
141  status = plasma_tune(PLASMA_FUNC_DGELS, M, K, N);
142  if (status != PLASMA_SUCCESS) {
143  plasma_error("PLASMA_dormlq", "plasma_tune() failed");
144  return status;
145  }
146 
147  /* Set MT, NT & NTRHS */
148  NB = PLASMA_NB;
149  IB = PLASMA_IB;
150  IBNB = IB*NB;
151  KT = ( K%NB==0) ? (K /NB) : (K /NB+1);
152  NT = (An%NB==0) ? (An/NB) : (An/NB+1);
153 
154  plasma_sequence_create(plasma, &sequence);
155 
156  if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
157  descT = plasma_desc_init(
159  IB, NB, IBNB,
160  KT*IB, NT*NB, 0, 0, KT*IB, NT*NB);
161  }
162  else {
163  /* Double the size of T to accomodate the tree reduction phase */
164  descT = plasma_desc_init(
166  IB, NB, IBNB,
167  KT*IB, 2*NT*NB, 0, 0, KT*IB, 2*NT*NB);
168  }
169  descT.mat = T;
170 
172  plasma_dooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, K, An, plasma_desc_mat_free(&(descA)) );
173  plasma_dooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
174  } else {
175  plasma_diplap2tile( descA, A, NB, NB, LDA, An, 0, 0, K, An);
176  plasma_diplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N);
177  }
178 
179  /* Call the tile interface */
181  side, trans, &descA, &descT, &descB, sequence, &request);
182 
184  plasma_dooptile2lap( descB, B, NB, NB, LDB, N );
186  plasma_desc_mat_free(&descA);
187  plasma_desc_mat_free(&descB);
188  } else {
189  plasma_diptile2lap( descA, A, NB, NB, LDA, An );
190  plasma_diptile2lap( descB, B, NB, NB, LDB, N );
192  }
193 
194  status = sequence->status;
195  plasma_sequence_destroy(plasma, sequence);
196  return status;
197 }
198 
199 /***************************************************************************/
249 {
251  PLASMA_sequence *sequence = NULL;
253  int status;
254 
255  plasma = plasma_context_self();
256  if (plasma == NULL) {
257  plasma_fatal_error("PLASMA_dormlq_Tile", "PLASMA not initialized");
259  }
260  plasma_sequence_create(plasma, &sequence);
261  PLASMA_dormlq_Tile_Async(side, trans, A, T, B, sequence, &request);
263  status = sequence->status;
264  plasma_sequence_destroy(plasma, sequence);
265  return status;
266 }
267 
268 /***************************************************************************/
297  PLASMA_sequence *sequence, PLASMA_request *request)
298 {
299  PLASMA_desc descA = *A;
300  PLASMA_desc descT = *T;
301  PLASMA_desc descB = *B;
303 
304  plasma = plasma_context_self();
305  if (plasma == NULL) {
306  plasma_fatal_error("PLASMA_dormlq_Tile", "PLASMA not initialized");
308  }
309  if (sequence == NULL) {
310  plasma_fatal_error("PLASMA_dormlq_Tile", "NULL sequence");
311  return PLASMA_ERR_UNALLOCATED;
312  }
313  if (request == NULL) {
314  plasma_fatal_error("PLASMA_dormlq_Tile", "NULL request");
315  return PLASMA_ERR_UNALLOCATED;
316  }
317  /* Check sequence status */
318  if (sequence->status == PLASMA_SUCCESS)
319  request->status = PLASMA_SUCCESS;
320  else
321  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
322 
323  /* Check descriptors for correctness */
324  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
325  plasma_error("PLASMA_dormlq_Tile", "invalid first descriptor");
326  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
327  }
328  if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
329  plasma_error("PLASMA_dormlq_Tile", "invalid second descriptor");
330  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
331  }
332  if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
333  plasma_error("PLASMA_dormlq_Tile", "invalid third descriptor");
334  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
335  }
336  /* Check input arguments */
337  if (descA.nb != descA.mb || descB.nb != descB.mb) {
338  plasma_error("PLASMA_dormlq_Tile", "only square tiles supported");
339  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
340  }
341  if ( (side != PlasmaLeft) && (side != PlasmaRight) ) {
342  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
343  }
344  if ( (trans != PlasmaTrans) && (trans != PlasmaNoTrans) ){
345  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
346  }
347  /* Quick return - currently NOT equivalent to LAPACK's:
348  * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
349 /*
350  if (min(M, min(N, K)) == 0)
351  return PLASMA_SUCCESS;
352 */
353  if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
354  if ( (trans == PlasmaTrans) &&
355  (side == PlasmaLeft) ) {
357  PLASMA_enum, side,
358  PLASMA_enum, trans,
359  PLASMA_desc, descA,
360  PLASMA_desc, descB,
361  PLASMA_desc, descT,
362  PLASMA_sequence*, sequence,
363  PLASMA_request*, request);
364  } else {
366  PLASMA_enum, side,
367  PLASMA_enum, trans,
368  PLASMA_desc, descA,
369  PLASMA_desc, descB,
370  PLASMA_desc, descT,
371  PLASMA_sequence*, sequence,
372  PLASMA_request*, request);
373  }
374  }
375  else {
376  plasma_dynamic_call_8(plasma_pdormlqrh,
377  PLASMA_enum, side,
378  PLASMA_enum, trans,
379  PLASMA_desc, descA,
380  PLASMA_desc, descB,
381  PLASMA_desc, descT,
383  PLASMA_sequence*, sequence,
384  PLASMA_request*, request);
385  }
386 
387  return PLASMA_SUCCESS;
388 }