MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dlabrd_gpu.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @generated d Thu May 10 22:26:54 2012
9 
10 */
11 #include "common_magma.h"
12 
13 // === Define what BLAS to use ============================================
14 #define PRECISION_d
15 #if (defined(PRECISION_s) || defined(PRECISION_d))
16 // #define magma_dgemv magmablas_dgemv
17 #endif
18 // === End defining what BLAS to use ======================================
19 
20 extern "C" magma_int_t
22  double *a, magma_int_t lda, double *da, magma_int_t ldda,
23  double *d, double *e, double *tauq, double *taup,
24  double *x, magma_int_t ldx, double *dx, magma_int_t lddx,
25  double *y, magma_int_t ldy, double *dy, magma_int_t lddy)
26 {
27 /* -- MAGMA (version 1.2.0) --
28  Univ. of Tennessee, Knoxville
29  Univ. of California, Berkeley
30  Univ. of Colorado, Denver
31  May 2012
32 
33  Purpose
34  =======
35  DLABRD reduces the first NB rows and columns of a real general
36  m by n matrix A to upper or lower bidiagonal form by an orthogonal
37  transformation Q' * A * P, and returns the matrices X and Y which
38  are needed to apply the transformation to the unreduced part of A.
39 
40  If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
41  bidiagonal form.
42 
43  This is an auxiliary routine called by SGEBRD
44 
45  Arguments
46  =========
47  M (input) INTEGER
48  The number of rows in the matrix A.
49 
50  N (input) INTEGER
51  The number of columns in the matrix A.
52 
53  NB (input) INTEGER
54  The number of leading rows and columns of A to be reduced.
55 
56  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
57  On entry, the m by n general matrix to be reduced.
58  On exit, the first NB rows and columns of the matrix are
59  overwritten; the rest of the array is unchanged.
60  If m >= n, elements on and below the diagonal in the first NB
61  columns, with the array TAUQ, represent the orthogonal
62  matrix Q as a product of elementary reflectors; and
63  elements above the diagonal in the first NB rows, with the
64  array TAUP, represent the orthogonal matrix P as a product
65  of elementary reflectors.
66  If m < n, elements below the diagonal in the first NB
67  columns, with the array TAUQ, represent the orthogonal
68  matrix Q as a product of elementary reflectors, and
69  elements on and above the diagonal in the first NB rows,
70  with the array TAUP, represent the orthogonal matrix P as
71  a product of elementary reflectors.
72  See Further Details.
73 
74  LDA (input) INTEGER
75  The leading dimension of the array A. LDA >= max(1,M).
76 
77  D (output) DOUBLE_PRECISION array, dimension (NB)
78  The diagonal elements of the first NB rows and columns of
79  the reduced matrix. D(i) = A(i,i).
80 
81  E (output) DOUBLE_PRECISION array, dimension (NB)
82  The off-diagonal elements of the first NB rows and columns of
83  the reduced matrix.
84 
85  TAUQ (output) DOUBLE_PRECISION array dimension (NB)
86  The scalar factors of the elementary reflectors which
87  represent the orthogonal matrix Q. See Further Details.
88 
89  TAUP (output) DOUBLE_PRECISION array, dimension (NB)
90  The scalar factors of the elementary reflectors which
91  represent the orthogonal matrix P. See Further Details.
92 
93  X (output) DOUBLE_PRECISION array, dimension (LDX,NB)
94  The m-by-nb matrix X required to update the unreduced part
95  of A.
96 
97  LDX (input) INTEGER
98  The leading dimension of the array X. LDX >= M.
99 
100  Y (output) DOUBLE_PRECISION array, dimension (LDY,NB)
101  The n-by-nb matrix Y required to update the unreduced part
102  of A.
103 
104  LDY (input) INTEGER
105  The leading dimension of the array Y. LDY >= N.
106 
107  Further Details
108  ===============
109 
110  The matrices Q and P are represented as products of elementary
111  reflectors:
112 
113  Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb)
114 
115  Each H(i) and G(i) has the form:
116 
117  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
118 
119  where tauq and taup are real scalars, and v and u are real vectors.
120 
121  If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
122  A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
123  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
124 
125  If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
126  A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
127  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
128 
129  The elements of the vectors v and u together form the m-by-nb matrix
130  V and the nb-by-n matrix U' which are needed, with X and Y, to apply
131  the transformation to the unreduced part of the matrix, using a block
132  update of the form: A := A - V*Y' - X*U'.
133 
134  The contents of A on exit are illustrated by the following examples
135  with nb = 2:
136 
137  m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
138 
139  ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 )
140  ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 )
141  ( v1 v2 a a a ) ( v1 1 a a a a )
142  ( v1 v2 a a a ) ( v1 v2 a a a a )
143  ( v1 v2 a a a ) ( v1 v2 a a a a )
144  ( v1 v2 a a a )
145 
146  where a denotes an element of the original matrix which is unchanged,
147  vi denotes an element of the vector defining H(i), and ui an element
148  of the vector defining G(i).
149 
150  ===================================================================== */
151 
152 
153  /* Table of constant values */
154  double c_neg_one = MAGMA_D_NEG_ONE;
155  double c_one = MAGMA_D_ONE;
156  static int c__1 = 1;
157  double c_zero = MAGMA_D_ZERO;
158 
159  /* System generated locals */
160  int a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__2,
161  i__3;
162  /* Local variables */
163  static int i__;
164  double alpha;
165 
166  a_dim1 = lda;
167  a_offset = 1 + a_dim1;
168  a -= a_offset;
169  --d;
170  --e;
171  --tauq;
172  --taup;
173 
174  x_dim1 = ldx;
175  x_offset = 1 + x_dim1;
176  x -= x_offset;
177  dx-= 1 + lddx;
178 
179  y_dim1 = ldy;
180  y_offset = 1 + y_dim1;
181  y -= y_offset;
182  dy-= 1 + lddy;
183 
184  /* Function Body */
185  if (m <= 0 || n <= 0) {
186  return 0;
187  }
188 
189  double *f = (double *)malloc(max(n,m)*sizeof(double ));
190  static cudaStream_t stream;
191  magma_queue_create( &stream );
192 
193  if (m >= n) {
194 
195  /* Reduce to upper bidiagonal form */
196 
197  for (i__ = 1; i__ <= nb; ++i__) {
198 
199  /* Update A(i:m,i) */
200  i__2 = m - i__ + 1;
201  i__3 = i__ - 1;
202 #if defined(PRECISION_z) || defined(PRECISION_c)
203  lapackf77_dlacgv( &i__3, &y[i__+y_dim1], &ldy );
204 #endif
205  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one, &a[i__ + a_dim1], &lda,
206  &y[i__+y_dim1], &ldy, &c_one, &a[i__ + i__ * a_dim1], &c__1);
207 #if defined(PRECISION_z) || defined(PRECISION_c)
208  lapackf77_dlacgv( &i__3, &y[i__+y_dim1], &ldy );
209 #endif
210  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one, &x[i__ + x_dim1], &ldx,
211  &a[i__*a_dim1+1], &c__1, &c_one, &a[i__+i__*a_dim1], &c__1);
212 
213  /* Generate reflection Q(i) to annihilate A(i+1:m,i) */
214 
215  alpha = a[i__ + i__ * a_dim1];
216  i__2 = m - i__ + 1;
217  i__3 = i__ + 1;
218  lapackf77_dlarfg(&i__2, &alpha,
219  &a[min(i__3,m) + i__ * a_dim1], &c__1, &tauq[i__]);
220  d[i__] = MAGMA_D_REAL( alpha );
221  if (i__ < n) {
222  a[i__ + i__ * a_dim1] = c_one;
223 
224  /* Compute Y(i+1:n,i) */
225  i__2 = m - i__ + 1;
226  i__3 = n - i__;
227 
228  // 1. Send the block reflector A(i+1:m,i) to the GPU ------
229  magma_dsetvector( i__2,
230  a + i__ + i__ * a_dim1, 1,
231  da+(i__-1)+(i__-1)* (ldda), 1 );
232  // 2. Multiply ---------------------------------------------
233  magma_dgemv(MagmaTrans, i__2, i__3, c_one,
234  da + (i__-1) + ((i__-1) + 1) * (ldda), ldda,
235  da + (i__-1) + (i__-1) * (ldda), c__1, c_zero,
236  dy + i__ + 1 + i__ * y_dim1, c__1);
237 
238  // 3. Put the result back ----------------------------------
239  magma_dgetmatrix_async( i__3, 1,
240  dy+i__+1+i__*y_dim1, y_dim1,
241  y+i__+1+i__*y_dim1, y_dim1, stream );
242  i__2 = m - i__ + 1;
243  i__3 = i__ - 1;
244  blasf77_dgemv(MagmaTransStr, &i__2, &i__3, &c_one, &a[i__ + a_dim1],
245  &lda, &a[i__ + i__ * a_dim1], &c__1, &c_zero,
246  &y[i__ * y_dim1 + 1], &c__1);
247 
248  i__2 = n - i__;
249  i__3 = i__ - 1;
250  blasf77_dgemv("N", &i__2, &i__3, &c_neg_one, &y[i__ + 1 +y_dim1], &ldy,
251  &y[i__ * y_dim1 + 1], &c__1,
252  &c_zero, f, &c__1);
253  i__2 = m - i__ + 1;
254  i__3 = i__ - 1;
255  blasf77_dgemv(MagmaTransStr, &i__2, &i__3, &c_one, &x[i__ + x_dim1],
256  &ldx, &a[i__ + i__ * a_dim1], &c__1, &c_zero,
257  &y[i__ * y_dim1 + 1], &c__1);
258 
259  // 4. Synch to make sure the result is back ----------------
260  magma_queue_sync( stream );
261 
262  if (i__3!=0){
263  i__2 = n - i__;
264  blasf77_daxpy(&i__2, &c_one, f,&c__1, &y[i__+1+i__*y_dim1],&c__1);
265  }
266 
267  i__2 = i__ - 1;
268  i__3 = n - i__;
269  blasf77_dgemv(MagmaTransStr, &i__2, &i__3, &c_neg_one, &a[(i__ + 1) *
270  a_dim1 + 1], &lda, &y[i__ * y_dim1 + 1], &c__1, &c_one,
271  &y[i__ + 1 + i__ * y_dim1], &c__1);
272  i__2 = n - i__;
273  blasf77_dscal(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
274 
275  /* Update A(i,i+1:n) */
276  i__2 = n - i__;
277 #if defined(PRECISION_z) || defined(PRECISION_c)
278  lapackf77_dlacgv( &i__2, &a[i__+(i__+1)*a_dim1], &lda );
279  lapackf77_dlacgv( &i__, &a[i__+a_dim1], &lda );
280 #endif
281  blasf77_dgemv("No transpose", &i__2, &i__, &c_neg_one, &y[i__ + 1 +
282  y_dim1], &ldy, &a[i__ + a_dim1], &lda, &c_one, &a[i__ + (
283  i__ + 1) * a_dim1], &lda);
284  i__2 = i__ - 1;
285  i__3 = n - i__;
286 #if defined(PRECISION_z) || defined(PRECISION_c)
287  lapackf77_dlacgv( &i__, &a[i__+a_dim1], &lda );
288  lapackf77_dlacgv( &i__2, &x[i__+x_dim1], &ldx );
289 #endif
290  blasf77_dgemv(MagmaTransStr, &i__2, &i__3, &c_neg_one, &a[(i__ + 1) *
291  a_dim1 + 1], &lda, &x[i__ + x_dim1], &ldx, &c_one, &a[
292  i__ + (i__ + 1) * a_dim1], &lda);
293 #if defined(PRECISION_z) || defined(PRECISION_c)
294  lapackf77_dlacgv( &i__2, &x[i__+x_dim1], &ldx );
295 #endif
296 
297  /* Generate reflection P(i) to annihilate A(i,i+2:n) */
298  i__2 = n - i__;
299  /* Computing MIN */
300  i__3 = i__ + 2;
301  alpha = a[i__ + (i__ + 1) * a_dim1];
302  lapackf77_dlarfg(&i__2, &alpha, &a[i__ + min(
303  i__3,n) * a_dim1], &lda, &taup[i__]);
304  e[i__] = MAGMA_D_REAL( alpha );
305  a[i__ + (i__ + 1) * a_dim1] = c_one;
306 
307  /* Compute X(i+1:m,i) */
308  i__2 = m - i__;
309  i__3 = n - i__;
310  // 1. Send the block reflector A(i+1:m,i) to the GPU ------
311  magma_dsetvector( i__3,
312  a + i__ + (i__ +1)* a_dim1, lda,
313  da+(i__-1)+((i__-1)+1)*(ldda), ldda );
314  // 2. Multiply ---------------------------------------------
315  //magma_dcopy(i__3, da+(i__-1)+((i__-1)+1)*(ldda), ldda,
316  // dy + 1 + lddy, 1);
317  magma_dgemv(MagmaNoTrans, i__2, i__3, c_one,
318  da + (i__-1)+1+ ((i__-1)+1) * (ldda), ldda,
319  da + (i__-1) + ((i__-1)+1) * (ldda), ldda,
320  //dy + 1 + lddy, 1,
321  c_zero, dx + i__ + 1 + i__ * x_dim1, c__1);
322 
323  // 3. Put the result back ----------------------------------
324  magma_dgetmatrix_async( i__2, 1,
325  dx+i__+1+i__*x_dim1, x_dim1,
326  x+i__+1+i__*x_dim1, x_dim1, stream );
327 
328  i__2 = n - i__;
329  blasf77_dgemv(MagmaTransStr, &i__2, &i__, &c_one, &y[i__ + 1 + y_dim1],
330  &ldy, &a[i__ + (i__ + 1) * a_dim1], &lda, &c_zero, &x[
331  i__ * x_dim1 + 1], &c__1);
332 
333  i__2 = m - i__;
334  blasf77_dgemv("N", &i__2, &i__, &c_neg_one, &a[i__ + 1 + a_dim1], &lda,
335  &x[i__ * x_dim1 + 1], &c__1, &c_zero, f, &c__1);
336  i__2 = i__ - 1;
337  i__3 = n - i__;
338  blasf77_dgemv("N", &i__2, &i__3, &c_one, &a[(i__ + 1) * a_dim1 + 1],
339  &lda, &a[i__ + (i__ + 1) * a_dim1], &lda,
340  &c_zero, &x[i__ * x_dim1 + 1], &c__1);
341 
342  // 4. Synch to make sure the result is back ----------------
343  magma_queue_sync( stream );
344  if (i__!=0){
345  i__2 = m - i__;
346  blasf77_daxpy(&i__2, &c_one, f,&c__1, &x[i__+1+i__*x_dim1],&c__1);
347  }
348 
349 
350  i__2 = m - i__;
351  i__3 = i__ - 1;
352  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one, &x[i__ + 1 +
353  x_dim1], &ldx, &x[i__ * x_dim1 + 1], &c__1, &c_one, &x[
354  i__ + 1 + i__ * x_dim1], &c__1);
355  i__2 = m - i__;
356  blasf77_dscal(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
357 
358 #if defined(PRECISION_z) || defined(PRECISION_c)
359  i__2 = n - i__;
360  lapackf77_dlacgv( &i__2, &a[i__+(i__+1)*a_dim1], &lda );
361  // 4. Send the block reflector A(i+1:m,i) to the GPU after ZLACGV()
362  magma_dsetvector( i__2,
363  a + i__ + (i__ +1)* a_dim1, lda,
364  da+(i__-1)+((i__-1)+1)*(ldda), ldda );
365 #endif
366  }
367  }
368  } else {
369 
370  /* Reduce to lower bidiagonal form */
371 
372  for (i__ = 1; i__ <= nb; ++i__) {
373 
374  /* Update A(i,i:n) */
375  i__2 = n - i__ + 1;
376  i__3 = i__ - 1;
377 #if defined(PRECISION_z) || defined(PRECISION_c)
378  lapackf77_dlacgv(&i__2, &a[i__ + i__ * a_dim1], &lda);
379  lapackf77_dlacgv(&i__3, &a[i__ + a_dim1], &lda);
380 #endif
381  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one, &y[i__ + y_dim1], &ldy,
382  &a[i__ + a_dim1], &lda, &c_one, &a[i__ + i__ * a_dim1], &lda);
383  i__2 = i__ - 1;
384 #if defined(PRECISION_z) || defined(PRECISION_c)
385  lapackf77_dlacgv(&i__3, &a[i__ + a_dim1], &lda);
386  lapackf77_dlacgv(&i__3, &x[i__ + x_dim1], &ldx);
387 #endif
388  i__3 = n - i__ + 1;
389  blasf77_dgemv(MagmaTransStr, &i__2, &i__3, &c_neg_one, &a[i__ * a_dim1 + 1],
390  &lda, &x[i__ + x_dim1], &ldx, &c_one, &a[i__ + i__ * a_dim1], &lda);
391 #if defined(PRECISION_z) || defined(PRECISION_c)
392  lapackf77_dlacgv(&i__2, &x[i__ + x_dim1], &ldx);
393 #endif
394 
395  /* Generate reflection P(i) to annihilate A(i,i+1:n) */
396  i__2 = n - i__ + 1;
397  /* Computing MIN */
398  i__3 = i__ + 1;
399  alpha = a[i__ + i__ * a_dim1];
400  lapackf77_dlarfg(&i__2, &alpha,
401  &a[i__ + min(i__3,n) * a_dim1], &lda, &taup[i__]);
402  d[i__] = MAGMA_D_REAL( alpha );
403  if (i__ < m) {
404  a[i__ + i__ * a_dim1] = c_one;
405 
406  /* Compute X(i+1:m,i) */
407  i__2 = m - i__;
408  i__3 = n - i__ + 1;
409 
410  // 1. Send the block reflector A(i,i+1:n) to the GPU ------
411  magma_dsetvector( i__3,
412  a + i__ + i__ * a_dim1, lda,
413  da+(i__-1)+(i__-1)* (ldda), ldda );
414 
415  // 2. Multiply ---------------------------------------------
416  //magma_dcopy(i__3, da+(i__-1)+(i__-1)*(ldda), ldda,
417  // dy + 1 + lddy, 1);
418  magma_dgemv(MagmaNoTrans, i__2, i__3, c_one,
419  da + (i__-1)+1 + (i__-1) * ldda, ldda,
420  da + (i__-1) + (i__-1) * ldda, ldda,
421  // dy + 1 + lddy, 1,
422  c_zero,
423  dx + i__ + 1 + i__ * x_dim1, c__1);
424 
425  // 3. Put the result back ----------------------------------
426  magma_dgetmatrix_async( i__2, 1,
427  dx+i__+1+i__*x_dim1, x_dim1,
428  x+i__+1+i__*x_dim1, x_dim1, stream );
429 
430  i__2 = n - i__ + 1;
431  i__3 = i__ - 1;
432  blasf77_dgemv(MagmaTransStr, &i__2, &i__3, &c_one, &y[i__ + y_dim1],
433  &ldy, &a[i__ + i__ * a_dim1], &lda, &c_zero,
434  &x[i__ * x_dim1 + 1], &c__1);
435  i__2 = m - i__;
436  i__3 = i__ - 1;
437  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one,
438  &a[i__ + 1 + a_dim1], &lda, &x[i__ * x_dim1 + 1], &c__1, &c_zero,
439  f, &c__1);
440 
441  i__2 = i__ - 1;
442  i__3 = n - i__ + 1;
443  blasf77_dgemv("No transpose", &i__2, &i__3, &c_one,
444  &a[i__ * a_dim1 + 1], &lda, &a[i__ + i__ * a_dim1], &lda, &c_zero,
445  &x[i__ * x_dim1 + 1], &c__1);
446 
447  // 4. Synch to make sure the result is back ----------------
448  magma_queue_sync( stream );
449  if (i__2!=0){
450  i__3 = m - i__;
451  blasf77_daxpy(&i__3, &c_one, f,&c__1, &x[i__+1+i__*x_dim1],&c__1);
452  }
453 
454  i__2 = m - i__;
455  i__3 = i__ - 1;
456  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one,
457  &x[i__ + 1 + x_dim1], &ldx, &x[i__ * x_dim1 + 1], &c__1, &c_one,
458  &x[i__ + 1 + i__ * x_dim1], &c__1);
459  i__2 = m - i__;
460  blasf77_dscal(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
461  i__2 = n - i__ + 1;
462 #if defined(PRECISION_z) || defined(PRECISION_c)
463  lapackf77_dlacgv(&i__2, &a[i__ + i__ * a_dim1], &lda);
464  magma_dsetvector( i__2,
465  a + i__ + (i__ )* a_dim1, lda,
466  da+(i__-1)+ (i__-1)*(ldda), ldda );
467 #endif
468 
469  /* Update A(i+1:m,i) */
470  i__2 = m - i__;
471  i__3 = i__ - 1;
472 #if defined(PRECISION_z) || defined(PRECISION_c)
473  lapackf77_dlacgv(&i__3, &y[i__ + y_dim1], &ldy);
474 #endif
475  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one,
476  &a[i__ + 1 + a_dim1], &lda, &y[i__ + y_dim1], &ldy, &c_one,
477  &a[i__ + 1 + i__ * a_dim1], &c__1);
478  i__2 = m - i__;
479 #if defined(PRECISION_z) || defined(PRECISION_c)
480  lapackf77_dlacgv(&i__3, &y[i__ + y_dim1], &ldy);
481 #endif
482  blasf77_dgemv("No transpose", &i__2, &i__, &c_neg_one,
483  &x[i__ + 1 + x_dim1], &ldx, &a[i__ * a_dim1 + 1], &c__1, &c_one,
484  &a[i__ + 1 + i__ * a_dim1], &c__1);
485 
486  /* Generate reflection Q(i) to annihilate A(i+2:m,i) */
487  i__2 = m - i__;
488  i__3 = i__ + 2;
489  alpha = a[i__ + 1 + i__ * a_dim1];
490  lapackf77_dlarfg(&i__2, &alpha,
491  &a[min(i__3,m) + i__ * a_dim1], &c__1, &tauq[i__]);
492  e[i__] = MAGMA_D_REAL( alpha );
493  a[i__ + 1 + i__ * a_dim1] = c_one;
494 
495  /* Compute Y(i+1:n,i) */
496  i__2 = m - i__;
497  i__3 = n - i__;
498 
499  // 1. Send the block reflector A(i+1:m,i) to the GPU ------
500  magma_dsetvector( i__2,
501  a + i__ +1+ i__ * a_dim1, 1,
502  da+(i__-1)+1+ (i__-1)*(ldda), 1 );
503  // 2. Multiply ---------------------------------------------
504  magma_dgemv(MagmaTrans, i__2, i__3, c_one,
505  da + (i__-1)+1+ ((i__-1)+1) * ldda, ldda,
506  da + (i__-1)+1+ (i__-1) * ldda, c__1,
507  c_zero, dy + i__ + 1 + i__ * y_dim1, c__1);
508 
509  // 3. Put the result back ----------------------------------
510  magma_dgetmatrix_async( i__3, 1,
511  dy+i__+1+i__*y_dim1, y_dim1,
512  y+i__+1+i__*y_dim1, y_dim1, stream );
513 
514  i__2 = m - i__;
515  i__3 = i__ - 1;
516  blasf77_dgemv(MagmaTransStr, &i__2, &i__3, &c_one, &a[i__ + 1 + a_dim1],
517  &lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_zero,
518  &y[ i__ * y_dim1 + 1], &c__1);
519  i__2 = n - i__;
520  i__3 = i__ - 1;
521  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one,
522  &y[i__ + 1 + y_dim1], &ldy, &y[i__ * y_dim1 + 1], &c__1,
523  &c_zero, f, &c__1);
524 
525  i__2 = m - i__;
526  blasf77_dgemv(MagmaTransStr, &i__2, &i__, &c_one, &x[i__ + 1 + x_dim1],
527  &ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_zero,
528  &y[i__ * y_dim1 + 1], &c__1);
529 
530  // 4. Synch to make sure the result is back ----------------
531  magma_queue_sync( stream );
532  if (i__3!=0){
533  i__2 = n - i__;
534  blasf77_daxpy(&i__2, &c_one, f,&c__1, &y[i__+1+i__*y_dim1],&c__1);
535  }
536 
537  i__2 = n - i__;
538  blasf77_dgemv(MagmaTransStr, &i__, &i__2, &c_neg_one,
539  &a[(i__ + 1) * a_dim1 + 1], &lda, &y[i__ * y_dim1 + 1],
540  &c__1, &c_one, &y[i__ + 1 + i__ * y_dim1], &c__1);
541  i__2 = n - i__;
542  blasf77_dscal(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
543 #if defined(PRECISION_z) || defined(PRECISION_c)
544  } else {
545  i__2 = n - i__ + 1;
546  lapackf77_dlacgv(&i__2, &a[i__ + i__ * a_dim1], &lda);
547  magma_dsetvector( i__2,
548  a + i__ + (i__ )* a_dim1, lda,
549  da+(i__-1)+ (i__-1)*(ldda), ldda );
550 #endif
551  }
552  }
553  }
554 
555  magma_queue_destroy( stream );
556  free(f);
557 
558  return MAGMA_SUCCESS;
559 } /* dlabrd_ */
560