PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_sttqrt.c
Go to the documentation of this file.
1 
17 #include <lapacke.h>
18 #include "common.h"
19 #undef COMPLEX
20 #define REAL
21 
22 /***************************************************************************/
96 #if defined(PLASMA_HAVE_WEAK)
97 #pragma weak CORE_sttqrt = PCORE_sttqrt
98 #define CORE_sttqrt PCORE_sttqrt
99 #endif
100 int CORE_sttqrt(int M, int N, int IB,
101  float *A1, int LDA1,
102  float *A2, int LDA2,
103  float *T, int LDT,
104  float *TAU, float *WORK)
105 {
106  static int ione = 1;
107  static float zone = 1.0;
108  static float zzero = 0.0;
109 
110  float alpha;
111  int i, j, l, ii, sb, mi, ni;
112 
113  /* Check input arguments */
114  if (M < 0) {
115  coreblas_error(1, "Illegal value of M");
116  return -1;
117  }
118  if (N < 0) {
119  coreblas_error(2, "Illegal value of N");
120  return -2;
121  }
122  if (IB < 0) {
123  coreblas_error(3, "Illegal value of IB");
124  return -3;
125  }
126  if ((LDA2 < max(1,M)) && (M > 0)) {
127  coreblas_error(7, "Illegal value of LDA2");
128  return -7;
129  }
130 
131  /* Quick return */
132  if ((M == 0) || (N == 0) || (IB == 0))
133  return PLASMA_SUCCESS;
134 
135  /* TODO: Need to check why some cases require
136  * this to not have uninitialized values */
138  0., 0., T, LDT);
139 
140  for (ii = 0; ii < N; ii += IB) {
141  sb = min(N-ii, IB);
142  for (i = 0; i < sb; i++) {
143  j = ii + i;
144  mi = min( j + 1, M );
145  ni = sb-i-1;
146 
147  /*
148  * Generate elementary reflector H( II*IB+I ) to annihilate
149  * A( II*IB+I:mi, II*IB+I ).
150  */
151  LAPACKE_slarfg_work(
152  mi+1, &A1[LDA1*j+j], &A2[LDA2*j], ione, &TAU[j]);
153 
154  if (ni > 0) {
155  /*
156  * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left.
157  */
158  cblas_scopy(
159  ni,
160  &A1[LDA1*(j+1)+j], LDA1,
161  WORK, 1);
162 
163 #ifdef COMPLEX
164  LAPACKE_slacgv_work(ni, WORK, 1);
165 #endif
166  cblas_sgemv(
168  mi, ni,
169  (zone), &A2[LDA2*(j+1)], LDA2,
170  &A2[LDA2*j], 1,
171  (zone), WORK, 1);
172 #ifdef COMPLEX
173  LAPACKE_slacgv_work(ni, WORK, 1);
174 #endif
175  alpha = -(TAU[j]);
176  cblas_saxpy(
177  ni, (alpha),
178  WORK, 1,
179  &A1[LDA1*(j+1)+j], LDA1);
180 #ifdef COMPLEX
181  LAPACKE_slacgv_work(ni, WORK, 1);
182 #endif
183  cblas_sger(
184  CblasColMajor, mi, ni,
185  (alpha), &A2[LDA2*j], 1,
186  WORK, 1,
187  &A2[LDA2*(j+1)], LDA2);
188  }
189 
190  /*
191  * Calculate T
192  *
193  * T(0:i-1, j) = alpha * A2(0:M-1, ii:j-1)' * A2(0:M-1, j)
194  */
195 
196  if ( i > 0 ) {
197 
198  l = min(i, max(0, M-ii));
199  alpha = -(TAU[j]);
200 
201  CORE_spemv(
203  min(j, M), i, l,
204  alpha, &A2[LDA2*ii], LDA2,
205  &A2[LDA2*j], 1,
206  zzero, &T[LDT*j], 1,
207  WORK);
208 
209  /* T(0:i-1, j) = T(0:i-1, ii:j-1) * T(0:i-1, j) */
210  cblas_strmv(
214  i, &T[LDT*ii], LDT,
215  &T[LDT*j], 1);
216 
217  }
218 
219  T[LDT*j+i] = TAU[j];
220  }
221 
222  /* Apply Q' to the rest of the matrix to the left */
223  if (N > ii+sb) {
224  mi = min(ii+sb, M);
225  ni = N-(ii+sb);
226  l = min(sb, max(0, mi-ii));
227  CORE_sparfb(
230  IB, ni, mi, ni, sb, l, //replaced sb by IB
231  &A1[LDA1*(ii+sb)+ii], LDA1,
232  &A2[LDA2*(ii+sb)], LDA2,
233  &A2[LDA2*ii], LDA2,
234  &T[LDT*ii], LDT,
235  WORK, sb);
236  }
237  }
238  return PLASMA_SUCCESS;
239 }
240 
241 /***************************************************************************/
244 void QUARK_CORE_sttqrt(Quark *quark, Quark_Task_Flags *task_flags,
245  int m, int n, int ib, int nb,
246  float *A1, int lda1,
247  float *A2, int lda2,
248  float *T, int ldt)
249 {
251  QUARK_Insert_Task(quark, CORE_sttqrt_quark, task_flags,
252  sizeof(int), &m, VALUE,
253  sizeof(int), &n, VALUE,
254  sizeof(int), &ib, VALUE,
255  sizeof(float)*nb*nb, A1, INOUT|QUARK_REGION_D|QUARK_REGION_U,
256  sizeof(int), &lda1, VALUE,
257  sizeof(float)*nb*nb, A2, INOUT|QUARK_REGION_D|QUARK_REGION_U|LOCALITY,
258  sizeof(int), &lda2, VALUE,
259  sizeof(float)*ib*nb, T, OUTPUT,
260  sizeof(int), &ldt, VALUE,
261  sizeof(float)*nb, NULL, SCRATCH,
262  sizeof(float)*ib*nb, NULL, SCRATCH,
263  0);
264 }
265 
266 /***************************************************************************/
269 #if defined(PLASMA_HAVE_WEAK)
270 #pragma weak CORE_sttqrt_quark = PCORE_sttqrt_quark
271 #define CORE_sttqrt_quark PCORE_sttqrt_quark
272 #endif
274 {
275  int m;
276  int n;
277  int ib;
278  float *A1;
279  int lda1;
280  float *A2;
281  int lda2;
282  float *T;
283  int ldt;
284  float *TAU;
285  float *WORK;
286 
287  quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
288  CORE_sttqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
289 }