11 #include "common_magma.h"
15 #if (defined(PRECISION_s) || defined(PRECISION_d))
23 double *d,
double *e,
double *tauq,
double *taup,
160 int a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__2,
167 a_offset = 1 + a_dim1;
175 x_offset = 1 + x_dim1;
180 y_offset = 1 + y_dim1;
185 if (m <= 0 || n <= 0) {
189 double *
f = (
double *)malloc(
max(n,m)*
sizeof(double ));
190 static cudaStream_t stream;
197 for (i__ = 1; i__ <= nb; ++i__) {
202 #if defined(PRECISION_z) || defined(PRECISION_c)
205 blasf77_dgemv(
"No transpose", &i__2, &i__3, &c_neg_one, &a[i__ + a_dim1], &lda,
206 &y[i__+y_dim1], &ldy, &c_one, &a[i__ + i__ * a_dim1], &c__1);
207 #if defined(PRECISION_z) || defined(PRECISION_c)
210 blasf77_dgemv(
"No transpose", &i__2, &i__3, &c_neg_one, &x[i__ + x_dim1], &ldx,
211 &a[i__*a_dim1+1], &c__1, &c_one, &a[i__+i__*a_dim1], &c__1);
215 alpha = a[i__ + i__ * a_dim1];
219 &a[
min(i__3,m) + i__ * a_dim1], &c__1, &tauq[i__]);
222 a[i__ + i__ * a_dim1] = c_one;
230 a + i__ + i__ * a_dim1, 1,
231 da+(i__-1)+(i__-1)* (ldda), 1 );
234 da + (i__-1) + ((i__-1) + 1) * (ldda), ldda,
235 da + (i__-1) + (i__-1) * (ldda), c__1, c_zero,
236 dy + i__ + 1 + i__ * y_dim1, c__1);
240 dy+i__+1+i__*y_dim1, y_dim1,
241 y+i__+1+i__*y_dim1, y_dim1, stream );
245 &lda, &a[i__ + i__ * a_dim1], &c__1, &c_zero,
246 &y[i__ * y_dim1 + 1], &c__1);
250 blasf77_dgemv(
"N", &i__2, &i__3, &c_neg_one, &y[i__ + 1 +y_dim1], &ldy,
251 &y[i__ * y_dim1 + 1], &c__1,
256 &ldx, &a[i__ + i__ * a_dim1], &c__1, &c_zero,
257 &y[i__ * y_dim1 + 1], &c__1);
264 blasf77_daxpy(&i__2, &c_one, f,&c__1, &y[i__+1+i__*y_dim1],&c__1);
270 a_dim1 + 1], &lda, &y[i__ * y_dim1 + 1], &c__1, &c_one,
271 &y[i__ + 1 + i__ * y_dim1], &c__1);
273 blasf77_dscal(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
277 #if defined(PRECISION_z) || defined(PRECISION_c)
281 blasf77_dgemv(
"No transpose", &i__2, &i__, &c_neg_one, &y[i__ + 1 +
282 y_dim1], &ldy, &a[i__ + a_dim1], &lda, &c_one, &a[i__ + (
283 i__ + 1) * a_dim1], &lda);
286 #if defined(PRECISION_z) || defined(PRECISION_c)
291 a_dim1 + 1], &lda, &x[i__ + x_dim1], &ldx, &c_one, &a[
292 i__ + (i__ + 1) * a_dim1], &lda);
293 #if defined(PRECISION_z) || defined(PRECISION_c)
301 alpha = a[i__ + (i__ + 1) * a_dim1];
303 i__3,n) * a_dim1], &lda, &taup[i__]);
305 a[i__ + (i__ + 1) * a_dim1] = c_one;
312 a + i__ + (i__ +1)* a_dim1, lda,
313 da+(i__-1)+((i__-1)+1)*(ldda), ldda );
318 da + (i__-1)+1+ ((i__-1)+1) * (ldda), ldda,
319 da + (i__-1) + ((i__-1)+1) * (ldda), ldda,
321 c_zero, dx + i__ + 1 + i__ * x_dim1, c__1);
325 dx+i__+1+i__*x_dim1, x_dim1,
326 x+i__+1+i__*x_dim1, x_dim1, stream );
330 &ldy, &a[i__ + (i__ + 1) * a_dim1], &lda, &c_zero, &x[
331 i__ * x_dim1 + 1], &c__1);
334 blasf77_dgemv(
"N", &i__2, &i__, &c_neg_one, &a[i__ + 1 + a_dim1], &lda,
335 &x[i__ * x_dim1 + 1], &c__1, &c_zero, f, &c__1);
338 blasf77_dgemv(
"N", &i__2, &i__3, &c_one, &a[(i__ + 1) * a_dim1 + 1],
339 &lda, &a[i__ + (i__ + 1) * a_dim1], &lda,
340 &c_zero, &x[i__ * x_dim1 + 1], &c__1);
346 blasf77_daxpy(&i__2, &c_one, f,&c__1, &x[i__+1+i__*x_dim1],&c__1);
352 blasf77_dgemv(
"No transpose", &i__2, &i__3, &c_neg_one, &x[i__ + 1 +
353 x_dim1], &ldx, &x[i__ * x_dim1 + 1], &c__1, &c_one, &x[
354 i__ + 1 + i__ * x_dim1], &c__1);
356 blasf77_dscal(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
358 #if defined(PRECISION_z) || defined(PRECISION_c)
363 a + i__ + (i__ +1)* a_dim1, lda,
364 da+(i__-1)+((i__-1)+1)*(ldda), ldda );
372 for (i__ = 1; i__ <= nb; ++i__) {
377 #if defined(PRECISION_z) || defined(PRECISION_c)
381 blasf77_dgemv(
"No transpose", &i__2, &i__3, &c_neg_one, &y[i__ + y_dim1], &ldy,
382 &a[i__ + a_dim1], &lda, &c_one, &a[i__ + i__ * a_dim1], &lda);
384 #if defined(PRECISION_z) || defined(PRECISION_c)
390 &lda, &x[i__ + x_dim1], &ldx, &c_one, &a[i__ + i__ * a_dim1], &lda);
391 #if defined(PRECISION_z) || defined(PRECISION_c)
399 alpha = a[i__ + i__ * a_dim1];
401 &a[i__ +
min(i__3,n) * a_dim1], &lda, &taup[i__]);
404 a[i__ + i__ * a_dim1] = c_one;
412 a + i__ + i__ * a_dim1, lda,
413 da+(i__-1)+(i__-1)* (ldda), ldda );
419 da + (i__-1)+1 + (i__-1) * ldda, ldda,
420 da + (i__-1) + (i__-1) * ldda, ldda,
423 dx + i__ + 1 + i__ * x_dim1, c__1);
427 dx+i__+1+i__*x_dim1, x_dim1,
428 x+i__+1+i__*x_dim1, x_dim1, stream );
433 &ldy, &a[i__ + i__ * a_dim1], &lda, &c_zero,
434 &x[i__ * x_dim1 + 1], &c__1);
438 &a[i__ + 1 + a_dim1], &lda, &x[i__ * x_dim1 + 1], &c__1, &c_zero,
444 &a[i__ * a_dim1 + 1], &lda, &a[i__ + i__ * a_dim1], &lda, &c_zero,
445 &x[i__ * x_dim1 + 1], &c__1);
451 blasf77_daxpy(&i__3, &c_one, f,&c__1, &x[i__+1+i__*x_dim1],&c__1);
457 &x[i__ + 1 + x_dim1], &ldx, &x[i__ * x_dim1 + 1], &c__1, &c_one,
458 &x[i__ + 1 + i__ * x_dim1], &c__1);
460 blasf77_dscal(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
462 #if defined(PRECISION_z) || defined(PRECISION_c)
465 a + i__ + (i__ )* a_dim1, lda,
466 da+(i__-1)+ (i__-1)*(ldda), ldda );
472 #if defined(PRECISION_z) || defined(PRECISION_c)
476 &a[i__ + 1 + a_dim1], &lda, &y[i__ + y_dim1], &ldy, &c_one,
477 &a[i__ + 1 + i__ * a_dim1], &c__1);
479 #if defined(PRECISION_z) || defined(PRECISION_c)
483 &x[i__ + 1 + x_dim1], &ldx, &a[i__ * a_dim1 + 1], &c__1, &c_one,
484 &a[i__ + 1 + i__ * a_dim1], &c__1);
489 alpha = a[i__ + 1 + i__ * a_dim1];
491 &a[
min(i__3,m) + i__ * a_dim1], &c__1, &tauq[i__]);
493 a[i__ + 1 + i__ * a_dim1] = c_one;
501 a + i__ +1+ i__ * a_dim1, 1,
502 da+(i__-1)+1+ (i__-1)*(ldda), 1 );
505 da + (i__-1)+1+ ((i__-1)+1) * ldda, ldda,
506 da + (i__-1)+1+ (i__-1) * ldda, c__1,
507 c_zero, dy + i__ + 1 + i__ * y_dim1, c__1);
511 dy+i__+1+i__*y_dim1, y_dim1,
512 y+i__+1+i__*y_dim1, y_dim1, stream );
517 &lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_zero,
518 &y[ i__ * y_dim1 + 1], &c__1);
522 &y[i__ + 1 + y_dim1], &ldy, &y[i__ * y_dim1 + 1], &c__1,
527 &ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_zero,
528 &y[i__ * y_dim1 + 1], &c__1);
534 blasf77_daxpy(&i__2, &c_one, f,&c__1, &y[i__+1+i__*y_dim1],&c__1);
539 &a[(i__ + 1) * a_dim1 + 1], &lda, &y[i__ * y_dim1 + 1],
540 &c__1, &c_one, &y[i__ + 1 + i__ * y_dim1], &c__1);
542 blasf77_dscal(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
543 #if defined(PRECISION_z) || defined(PRECISION_c)
548 a + i__ + (i__ )* a_dim1, lda,
549 da+(i__-1)+ (i__-1)*(ldda), ldda );