PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_stslqt.c
Go to the documentation of this file.
1 
17 #include <lapacke.h>
18 #include "common.h"
19 #undef COMPLEX
20 #define REAL
21 
22 /***************************************************************************/
95 #if defined(PLASMA_HAVE_WEAK)
96 #pragma weak CORE_stslqt = PCORE_stslqt
97 #define CORE_stslqt PCORE_stslqt
98 #define CORE_stsmlq PCORE_stsmlq
99 int CORE_stsmlq(int side, int trans,
100  int M1, int N1, int M2, int N2, int K, int IB,
101  float *A1, int LDA1,
102  float *A2, int LDA2,
103  float *V, int LDV,
104  float *T, int LDT,
105  float *WORK, int LDWORK);
106 #endif
107 int CORE_stslqt(int M, int N, int IB,
108  float *A1, int LDA1,
109  float *A2, int LDA2,
110  float *T, int LDT,
111  float *TAU, float *WORK)
112 {
113  static float zone = 1.0;
114  static float zzero = 0.0;
115 
116  float alpha;
117  int i, ii, sb;
118 
119  /* Check input arguments */
120  if (M < 0) {
121  coreblas_error(1, "Illegal value of M");
122  return -1;
123  }
124  if (N < 0) {
125  coreblas_error(2, "Illegal value of N");
126  return -2;
127  }
128  if (IB < 0) {
129  coreblas_error(3, "Illegal value of IB");
130  return -3;
131  }
132  if ((LDA2 < max(1,M)) && (M > 0)) {
133  coreblas_error(8, "Illegal value of LDA2");
134  return -8;
135  }
136 
137  /* Quick return */
138  if ((M == 0) || (N == 0) || (IB == 0))
139  return PLASMA_SUCCESS;
140 
141  for(ii = 0; ii < M; ii += IB) {
142  sb = min(M-ii, IB);
143  for(i = 0; i < sb; i++) {
144  /*
145  * Generate elementary reflector H( II*IB+I ) to annihilate A( II*IB+I, II*IB+I:N ).
146  */
147 #ifdef COMPLEX
148  LAPACKE_slacgv_work(N, &A2[ii+i], LDA2);
149  LAPACKE_slacgv_work(1, &A1[LDA1*(ii+i)+ii+i], LDA1);
150 #endif
151  LAPACKE_slarfg_work(N+1, &A1[LDA1*(ii+i)+ii+i], &A2[ii+i], LDA2, &TAU[ii+i]);
152 
153  alpha = -(TAU[ii+i]);
154  if (ii+i+1 < M) {
155  /*
156  * Apply H( II+I-1 ) to A( II+I:II+IB-1, II+I-1:N ) from the right.
157  */
158  cblas_scopy(
159  sb-i-1,
160  &A1[LDA1*(ii+i)+(ii+i+1)], 1,
161  WORK, 1);
162 
163  cblas_sgemv(
165  sb-i-1, N,
166  (zone), &A2[ii+i+1], LDA2,
167  &A2[ii+i], LDA2,
168  (zone), WORK, 1);
169 
170  cblas_saxpy(
171  sb-i-1, (alpha),
172  WORK, 1,
173  &A1[LDA1*(ii+i)+ii+i+1], 1);
174 
175  cblas_sger(
176  CblasColMajor, sb-i-1, N,
177  (alpha), WORK, 1,
178  &A2[ii+i], LDA2,
179  &A2[ii+i+1], LDA2);
180  }
181  /*
182  * Calculate T.
183  */
184  cblas_sgemv(
186  (alpha), &A2[ii], LDA2,
187  &A2[ii+i], LDA2,
188  (zzero), &T[LDT*(ii+i)], 1);
189 #ifdef COMPLEX
190  LAPACKE_slacgv_work(N, &A2[ii+i], LDA2 );
191  LAPACKE_slacgv_work(1, &A1[LDA1*(ii+i)+ii+i], LDA1 );
192 #endif
193  cblas_strmv(
195  (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i,
196  &T[LDT*ii], LDT,
197  &T[LDT*(ii+i)], 1);
198 
199  T[LDT*(ii+i)+i] = TAU[ii+i];
200  }
201  if (M > ii+sb) {
202  CORE_stsmlq(
204  M-(ii+sb), sb, M-(ii+sb), N, IB, IB,
205  &A1[LDA1*ii+ii+sb], LDA1,
206  &A2[ii+sb], LDA2,
207  &A2[ii], LDA2,
208  &T[LDT*ii], LDT,
209  WORK, LDA1);
210  }
211  }
212  return PLASMA_SUCCESS;
213 }
214 
215 /***************************************************************************/
218 void QUARK_CORE_stslqt(Quark *quark, Quark_Task_Flags *task_flags,
219  int m, int n, int ib, int nb,
220  float *A1, int lda1,
221  float *A2, int lda2,
222  float *T, int ldt)
223 {
225  QUARK_Insert_Task(quark, CORE_stslqt_quark, task_flags,
226  sizeof(int), &m, VALUE,
227  sizeof(int), &n, VALUE,
228  sizeof(int), &ib, VALUE,
229  sizeof(float)*nb*nb, A1, INOUT | QUARK_REGION_D | QUARK_REGION_L,
230  sizeof(int), &lda1, VALUE,
231  sizeof(float)*nb*nb, A2, INOUT | LOCALITY,
232  sizeof(int), &lda2, VALUE,
233  sizeof(float)*ib*nb, T, OUTPUT,
234  sizeof(int), &ldt, VALUE,
235  sizeof(float)*nb, NULL, SCRATCH,
236  sizeof(float)*ib*nb, NULL, SCRATCH,
237  0);
238 }
239 
240 /***************************************************************************/
243 #if defined(PLASMA_HAVE_WEAK)
244 #pragma weak CORE_stslqt_quark = PCORE_stslqt_quark
245 #define CORE_stslqt_quark PCORE_stslqt_quark
246 #endif
248 {
249  int m;
250  int n;
251  int ib;
252  float *A1;
253  int lda1;
254  float *A2;
255  int lda2;
256  float *T;
257  int ldt;
258  float *TAU;
259  float *WORK;
260 
261  quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
262  CORE_stslqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
263 }