PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_ctsmlq_hetra1.c
Go to the documentation of this file.
1 
17 #include <lapacke.h>
18 #include "common.h"
19 #undef REAL
20 #define COMPLEX
21 
22 /***************************************************************************/
113 #if defined(PLASMA_HAVE_WEAK)
114 #pragma weak CORE_ctsmlq_hetra1 = PCORE_ctsmlq_hetra1
115 #define CORE_ctsmlq_hetra1 PCORE_ctsmlq_hetra1
116 #define CORE_ctsmlq PCORE_ctsmlq
117 int CORE_ctsmlq(int side, int trans,
118  int M1, int N1, int M2, int N2, int K, int IB,
119  PLASMA_Complex32_t *A1, int LDA1,
120  PLASMA_Complex32_t *A2, int LDA2,
121  PLASMA_Complex32_t *V, int LDV,
122  PLASMA_Complex32_t *T, int LDT,
123  PLASMA_Complex32_t *WORK, int LDWORK);
124 #endif
126  int m1, int n1, int m2, int n2,
127  int k, int ib,
128  PLASMA_Complex32_t *A1, int lda1,
129  PLASMA_Complex32_t *A2, int lda2,
130  PLASMA_Complex32_t *V, int ldv,
131  PLASMA_Complex32_t *T, int ldt,
132  PLASMA_Complex32_t *WORK, int ldwork)
133 {
134  int i, j;
135 
136  if ( (m1 != n1) ) {
137  coreblas_error(3, "Illegal value of M1, N1");
138  return -3;
139  }
140 
141  /* in-place transposition of A1 */
142  for (j = 0; j < n1; j++){
143  A1[j + j*lda1] = conjf(A1[j + j*lda1]);
144 
145  for (i = j+1; i < m1; i++){
146  *WORK = *(A1 + i + j*lda1);
147  *(A1 + i + j*lda1) = conjf(*(A1 + j + i*lda1));
148  *(A1 + j + i*lda1) = conjf(*WORK);
149  }
150  }
151 
152  CORE_ctsmlq(side, trans, m1, n1, m2, n2, k, ib,
153  A1, lda1, A2, lda2,
154  V, ldv, T, ldt,
155  WORK, ldwork);
156 
157  /* in-place transposition of A1 */
158  for (j = 0; j < n1; j++){
159  A1[j + j*lda1] = conjf(A1[j + j*lda1]);
160 
161  for (i = j+1; i < m1; i++){
162  *WORK = *(A1 + i + j*lda1);
163  *(A1 + i + j*lda1) = conjf(*(A1 + j + i*lda1));
164  *(A1 + j + i*lda1) = conjf(*WORK);
165  }
166  }
167 
168  return PLASMA_SUCCESS;
169 }
170 
171 /***************************************************************************/
175  int side, int trans,
176  int m1, int n1, int m2, int n2, int k, int ib, int nb,
177  PLASMA_Complex32_t *A1, int lda1,
178  PLASMA_Complex32_t *A2, int lda2,
179  PLASMA_Complex32_t *V, int ldv,
180  PLASMA_Complex32_t *T, int ldt)
181 {
182  int ldwork = side == PlasmaLeft ? ib : nb;
183 
184  QUARK_Insert_Task(quark, CORE_ctsmlq_hetra1_quark, task_flags,
185  sizeof(PLASMA_enum), &side, VALUE,
186  sizeof(PLASMA_enum), &trans, VALUE,
187  sizeof(int), &m1, VALUE,
188  sizeof(int), &n1, VALUE,
189  sizeof(int), &m2, VALUE,
190  sizeof(int), &n2, VALUE,
191  sizeof(int), &k, VALUE,
192  sizeof(int), &ib, VALUE,
194  sizeof(int), &lda1, VALUE,
195  sizeof(PLASMA_Complex32_t)*nb*nb, A2, INOUT,
196  sizeof(int), &lda2, VALUE,
197  sizeof(PLASMA_Complex32_t)*nb*nb, V, INPUT,
198  sizeof(int), &ldv, VALUE,
199  sizeof(PLASMA_Complex32_t)*ib*nb, T, INPUT,
200  sizeof(int), &ldt, VALUE,
201  sizeof(PLASMA_Complex32_t)*ib*nb, NULL, SCRATCH,
202  sizeof(int), &ldwork, VALUE,
203  0);
204 }
205 
206 /***************************************************************************/
214 #if defined(PLASMA_HAVE_WEAK)
215 #pragma weak CORE_ctsmlq_hetra1_quark = PCORE_ctsmlq_hetra1_quark
216 #define CORE_ctsmlq_hetra1_quark PCORE_ctsmlq_hetra1_quark
217 #endif
219 {
220  int side;
221  int trans;
222  int m1;
223  int n1;
224  int m2;
225  int n2;
226  int k;
227  int ib;
228  PLASMA_Complex32_t *A1;
229  int lda1;
230  PLASMA_Complex32_t *A2;
231  int lda2;
233  int ldv;
235  int ldt;
236  PLASMA_Complex32_t *WORK;
237  int ldwork;
238 
239  quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib,
240  A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
241  CORE_ctsmlq_hetra1(side, trans, m1, n1, m2, n2, k, ib,
242  A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
243 }
244