MAGMA  magma-1.4.0 Matrix Algebra on GPU and Multicore Architectures
dlabrd_gpu.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
6  August 2013
7
8  @generated d Tue Aug 13 16:44:55 2013
9
10 */
11 #include "common_magma.h"
12
13 #include <assert.h>
14
15 #define PRECISION_d
16
17 extern "C" magma_int_t
19  double *a, magma_int_t lda, double *da, magma_int_t ldda,
20  double *d, double *e, double *tauq, double *taup,
21  double *x, magma_int_t ldx, double *dx, magma_int_t lddx,
22  double *y, magma_int_t ldy, double *dy, magma_int_t lddy)
23 {
24 /* -- MAGMA (version 1.4.0) --
25  Univ. of Tennessee, Knoxville
26  Univ. of California, Berkeley
28  August 2013
29
30  Purpose
31  =======
32  DLABRD reduces the first NB rows and columns of a real general
33  m by n matrix A to upper or lower bidiagonal form by an orthogonal
34  transformation Q' * A * P, and returns the matrices X and Y which
35  are needed to apply the transformation to the unreduced part of A.
36
37  If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
38  bidiagonal form.
39
40  This is an auxiliary routine called by SGEBRD
41
42  Arguments
43  =========
44  M (input) INTEGER
45  The number of rows in the matrix A.
46
47  N (input) INTEGER
48  The number of columns in the matrix A.
49
50  NB (input) INTEGER
51  The number of leading rows and columns of A to be reduced.
52
53  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
54  On entry, the m by n general matrix to be reduced.
55  On exit, the first NB rows and columns of the matrix are
56  overwritten; the rest of the array is unchanged.
57  If m >= n, elements on and below the diagonal in the first NB
58  columns, with the array TAUQ, represent the orthogonal
59  matrix Q as a product of elementary reflectors; and
60  elements above the diagonal in the first NB rows, with the
61  array TAUP, represent the orthogonal matrix P as a product
62  of elementary reflectors.
63  If m < n, elements below the diagonal in the first NB
64  columns, with the array TAUQ, represent the orthogonal
65  matrix Q as a product of elementary reflectors, and
66  elements on and above the diagonal in the first NB rows,
67  with the array TAUP, represent the orthogonal matrix P as
68  a product of elementary reflectors.
69  See Further Details.
70
71  LDA (input) INTEGER
72  The leading dimension of the array A. LDA >= max(1,M).
73
74  D (output) DOUBLE_PRECISION array, dimension (NB)
75  The diagonal elements of the first NB rows and columns of
76  the reduced matrix. D(i) = A(i,i).
77
78  E (output) DOUBLE_PRECISION array, dimension (NB)
79  The off-diagonal elements of the first NB rows and columns of
80  the reduced matrix.
81
82  TAUQ (output) DOUBLE_PRECISION array dimension (NB)
83  The scalar factors of the elementary reflectors which
84  represent the orthogonal matrix Q. See Further Details.
85
86  TAUP (output) DOUBLE_PRECISION array, dimension (NB)
87  The scalar factors of the elementary reflectors which
88  represent the orthogonal matrix P. See Further Details.
89
90  X (output) DOUBLE_PRECISION array, dimension (LDX,NB)
91  The m-by-nb matrix X required to update the unreduced part
92  of A.
93
94  LDX (input) INTEGER
95  The leading dimension of the array X. LDX >= M.
96
97  Y (output) DOUBLE_PRECISION array, dimension (LDY,NB)
98  The n-by-nb matrix Y required to update the unreduced part
99  of A.
100
101  LDY (input) INTEGER
102  The leading dimension of the array Y. LDY >= N.
103
104  Further Details
105  ===============
106  The matrices Q and P are represented as products of elementary
107  reflectors:
108
109  Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb)
110
111  Each H(i) and G(i) has the form:
112
113  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
114
115  where tauq and taup are real scalars, and v and u are real vectors.
116
117  If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
118  A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
119  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
120
121  If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
122  A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
123  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
124
125  The elements of the vectors v and u together form the m-by-nb matrix
126  V and the nb-by-n matrix U' which are needed, with X and Y, to apply
127  the transformation to the unreduced part of the matrix, using a block
128  update of the form: A := A - V*Y' - X*U'.
129
130  The contents of A on exit are illustrated by the following examples
131  with nb = 2:
132
133  m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
134
135  ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 )
136  ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 )
137  ( v1 v2 a a a ) ( v1 1 a a a a )
138  ( v1 v2 a a a ) ( v1 v2 a a a a )
139  ( v1 v2 a a a ) ( v1 v2 a a a a )
140  ( v1 v2 a a a )
141
142  where a denotes an element of the original matrix which is unchanged,
143  vi denotes an element of the vector defining H(i), and ui an element
144  of the vector defining G(i).
145
146  ===================================================================== */
147
148
149  /* Table of constant values */
150  double c_neg_one = MAGMA_D_NEG_ONE;
151  double c_one = MAGMA_D_ONE;
152  double c_zero = MAGMA_D_ZERO;
153  magma_int_t c__1 = 1;
154
155  /* System generated locals */
156  magma_int_t a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__2, i__3;
157  /* Local variables */
158  magma_int_t i__;
159  double alpha;
160
161  a_dim1 = lda;
162  a_offset = 1 + a_dim1;
163  a -= a_offset;
164  --d;
165  --e;
166  --tauq;
167  --taup;
168
169  x_dim1 = ldx;
170  x_offset = 1 + x_dim1;
171  x -= x_offset;
172  dx-= 1 + lddx;
173
174  y_dim1 = ldy;
175  y_offset = 1 + y_dim1;
176  y -= y_offset;
177  dy-= 1 + lddy;
178
179  /* Function Body */
180  if (m <= 0 || n <= 0) {
181  return 0;
182  }
183
184  double *f;
185  magma_queue_t stream;
186  magma_queue_create( &stream );
187  magma_dmalloc_cpu( &f, max(n,m) );
188  assert( f != NULL ); // TODO return error, or allocate outside dlatrd
189
190  if (m >= n) {
191
192  /* Reduce to upper bidiagonal form */
193
194  for (i__ = 1; i__ <= nb; ++i__) {
195
196  /* Update A(i:m,i) */
197  i__2 = m - i__ + 1;
198  i__3 = i__ - 1;
199  #if defined(PRECISION_z) || defined(PRECISION_c)
200  lapackf77_dlacgv( &i__3, &y[i__+y_dim1], &ldy );
201  #endif
202  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one, &a[i__ + a_dim1], &lda,
203  &y[i__+y_dim1], &ldy, &c_one, &a[i__ + i__ * a_dim1], &c__1);
204  #if defined(PRECISION_z) || defined(PRECISION_c)
205  lapackf77_dlacgv( &i__3, &y[i__+y_dim1], &ldy );
206  #endif
207  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one, &x[i__ + x_dim1], &ldx,
208  &a[i__*a_dim1+1], &c__1, &c_one, &a[i__+i__*a_dim1], &c__1);
209
210  /* Generate reflection Q(i) to annihilate A(i+1:m,i) */
211
212  alpha = a[i__ + i__ * a_dim1];
213  i__2 = m - i__ + 1;
214  i__3 = i__ + 1;
215  lapackf77_dlarfg(&i__2, &alpha,
216  &a[min(i__3,m) + i__ * a_dim1], &c__1, &tauq[i__]);
217  d[i__] = MAGMA_D_REAL( alpha );
218  if (i__ < n) {
219  a[i__ + i__ * a_dim1] = c_one;
220
221  /* Compute Y(i+1:n,i) */
222  i__2 = m - i__ + 1;
223  i__3 = n - i__;
224
225  // 1. Send the block reflector A(i+1:m,i) to the GPU ------
226  magma_dsetvector( i__2,
227  a + i__ + i__ * a_dim1, 1,
228  da+(i__-1)+(i__-1)* (ldda), 1 );
229  // 2. Multiply ---------------------------------------------
230  magma_dgemv(MagmaTrans, i__2, i__3, c_one,
231  da + (i__-1) + ((i__-1) + 1) * (ldda), ldda,
232  da + (i__-1) + (i__-1) * (ldda), c__1, c_zero,
233  dy + i__ + 1 + i__ * y_dim1, c__1);
234
235  // 3. Put the result back ----------------------------------
236  magma_dgetmatrix_async( i__3, 1,
237  dy+i__+1+i__*y_dim1, y_dim1,
238  y+i__+1+i__*y_dim1, y_dim1, stream );
239  i__2 = m - i__ + 1;
240  i__3 = i__ - 1;
241  blasf77_dgemv(MagmaTransStr, &i__2, &i__3, &c_one, &a[i__ + a_dim1],
242  &lda, &a[i__ + i__ * a_dim1], &c__1, &c_zero,
243  &y[i__ * y_dim1 + 1], &c__1);
244
245  i__2 = n - i__;
246  i__3 = i__ - 1;
247  blasf77_dgemv("N", &i__2, &i__3, &c_neg_one, &y[i__ + 1 +y_dim1], &ldy,
248  &y[i__ * y_dim1 + 1], &c__1,
249  &c_zero, f, &c__1);
250  i__2 = m - i__ + 1;
251  i__3 = i__ - 1;
252  blasf77_dgemv(MagmaTransStr, &i__2, &i__3, &c_one, &x[i__ + x_dim1],
253  &ldx, &a[i__ + i__ * a_dim1], &c__1, &c_zero,
254  &y[i__ * y_dim1 + 1], &c__1);
255
256  // 4. Synch to make sure the result is back ----------------
257  magma_queue_sync( stream );
258
259  if (i__3!=0){
260  i__2 = n - i__;
261  blasf77_daxpy(&i__2, &c_one, f,&c__1, &y[i__+1+i__*y_dim1],&c__1);
262  }
263
264  i__2 = i__ - 1;
265  i__3 = n - i__;
266  blasf77_dgemv(MagmaTransStr, &i__2, &i__3, &c_neg_one, &a[(i__ + 1) *
267  a_dim1 + 1], &lda, &y[i__ * y_dim1 + 1], &c__1, &c_one,
268  &y[i__ + 1 + i__ * y_dim1], &c__1);
269  i__2 = n - i__;
270  blasf77_dscal(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
271
272  /* Update A(i,i+1:n) */
273  i__2 = n - i__;
274  #if defined(PRECISION_z) || defined(PRECISION_c)
275  lapackf77_dlacgv( &i__2, &a[i__+(i__+1)*a_dim1], &lda );
276  lapackf77_dlacgv( &i__, &a[i__+a_dim1], &lda );
277  #endif
278  blasf77_dgemv("No transpose", &i__2, &i__, &c_neg_one, &y[i__ + 1 +
279  y_dim1], &ldy, &a[i__ + a_dim1], &lda, &c_one, &a[i__ + (
280  i__ + 1) * a_dim1], &lda);
281  i__2 = i__ - 1;
282  i__3 = n - i__;
283  #if defined(PRECISION_z) || defined(PRECISION_c)
284  lapackf77_dlacgv( &i__, &a[i__+a_dim1], &lda );
285  lapackf77_dlacgv( &i__2, &x[i__+x_dim1], &ldx );
286  #endif
287  blasf77_dgemv(MagmaTransStr, &i__2, &i__3, &c_neg_one, &a[(i__ + 1) *
288  a_dim1 + 1], &lda, &x[i__ + x_dim1], &ldx, &c_one, &a[
289  i__ + (i__ + 1) * a_dim1], &lda);
290  #if defined(PRECISION_z) || defined(PRECISION_c)
291  lapackf77_dlacgv( &i__2, &x[i__+x_dim1], &ldx );
292  #endif
293
294  /* Generate reflection P(i) to annihilate A(i,i+2:n) */
295  i__2 = n - i__;
296  /* Computing MIN */
297  i__3 = i__ + 2;
298  alpha = a[i__ + (i__ + 1) * a_dim1];
299  lapackf77_dlarfg(&i__2, &alpha, &a[i__ + min(
300  i__3,n) * a_dim1], &lda, &taup[i__]);
301  e[i__] = MAGMA_D_REAL( alpha );
302  a[i__ + (i__ + 1) * a_dim1] = c_one;
303
304  /* Compute X(i+1:m,i) */
305  i__2 = m - i__;
306  i__3 = n - i__;
307  // 1. Send the block reflector A(i+1:m,i) to the GPU ------
308  magma_dsetvector( i__3,
309  a + i__ + (i__ +1)* a_dim1, lda,
310  da+(i__-1)+((i__-1)+1)*(ldda), ldda );
311  // 2. Multiply ---------------------------------------------
312  //magma_dcopy(i__3, da+(i__-1)+((i__-1)+1)*(ldda), ldda,
313  // dy + 1 + lddy, 1);
314  magma_dgemv(MagmaNoTrans, i__2, i__3, c_one,
315  da + (i__-1)+1+ ((i__-1)+1) * (ldda), ldda,
316  da + (i__-1) + ((i__-1)+1) * (ldda), ldda,
317  //dy + 1 + lddy, 1,
318  c_zero, dx + i__ + 1 + i__ * x_dim1, c__1);
319
320  // 3. Put the result back ----------------------------------
321  magma_dgetmatrix_async( i__2, 1,
322  dx+i__+1+i__*x_dim1, x_dim1,
323  x+i__+1+i__*x_dim1, x_dim1, stream );
324
325  i__2 = n - i__;
326  blasf77_dgemv(MagmaTransStr, &i__2, &i__, &c_one, &y[i__ + 1 + y_dim1],
327  &ldy, &a[i__ + (i__ + 1) * a_dim1], &lda, &c_zero, &x[
328  i__ * x_dim1 + 1], &c__1);
329
330  i__2 = m - i__;
331  blasf77_dgemv("N", &i__2, &i__, &c_neg_one, &a[i__ + 1 + a_dim1], &lda,
332  &x[i__ * x_dim1 + 1], &c__1, &c_zero, f, &c__1);
333  i__2 = i__ - 1;
334  i__3 = n - i__;
335  blasf77_dgemv("N", &i__2, &i__3, &c_one, &a[(i__ + 1) * a_dim1 + 1],
336  &lda, &a[i__ + (i__ + 1) * a_dim1], &lda,
337  &c_zero, &x[i__ * x_dim1 + 1], &c__1);
338
339  // 4. Synch to make sure the result is back ----------------
340  magma_queue_sync( stream );
341  if (i__!=0){
342  i__2 = m - i__;
343  blasf77_daxpy(&i__2, &c_one, f,&c__1, &x[i__+1+i__*x_dim1],&c__1);
344  }
345
346
347  i__2 = m - i__;
348  i__3 = i__ - 1;
349  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one, &x[i__ + 1 +
350  x_dim1], &ldx, &x[i__ * x_dim1 + 1], &c__1, &c_one, &x[
351  i__ + 1 + i__ * x_dim1], &c__1);
352  i__2 = m - i__;
353  blasf77_dscal(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
354
355  #if defined(PRECISION_z) || defined(PRECISION_c)
356  i__2 = n - i__;
357  lapackf77_dlacgv( &i__2, &a[i__+(i__+1)*a_dim1], &lda );
358  // 4. Send the block reflector A(i+1:m,i) to the GPU after DLACGV()
359  magma_dsetvector( i__2,
360  a + i__ + (i__ +1)* a_dim1, lda,
361  da+(i__-1)+((i__-1)+1)*(ldda), ldda );
362  #endif
363  }
364  }
365  }
366  else {
367
368  /* Reduce to lower bidiagonal form */
369
370  for (i__ = 1; i__ <= nb; ++i__) {
371
372  /* Update A(i,i:n) */
373  i__2 = n - i__ + 1;
374  i__3 = i__ - 1;
375  #if defined(PRECISION_z) || defined(PRECISION_c)
376  lapackf77_dlacgv(&i__2, &a[i__ + i__ * a_dim1], &lda);
377  lapackf77_dlacgv(&i__3, &a[i__ + a_dim1], &lda);
378  #endif
379  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one, &y[i__ + y_dim1], &ldy,
380  &a[i__ + a_dim1], &lda, &c_one, &a[i__ + i__ * a_dim1], &lda);
381  i__2 = i__ - 1;
382  #if defined(PRECISION_z) || defined(PRECISION_c)
383  lapackf77_dlacgv(&i__3, &a[i__ + a_dim1], &lda);
384  lapackf77_dlacgv(&i__3, &x[i__ + x_dim1], &ldx);
385  #endif
386  i__3 = n - i__ + 1;
387  blasf77_dgemv(MagmaTransStr, &i__2, &i__3, &c_neg_one, &a[i__ * a_dim1 + 1],
388  &lda, &x[i__ + x_dim1], &ldx, &c_one, &a[i__ + i__ * a_dim1], &lda);
389  #if defined(PRECISION_z) || defined(PRECISION_c)
390  lapackf77_dlacgv(&i__2, &x[i__ + x_dim1], &ldx);
391  #endif
392
393  /* Generate reflection P(i) to annihilate A(i,i+1:n) */
394  i__2 = n - i__ + 1;
395  /* Computing MIN */
396  i__3 = i__ + 1;
397  alpha = a[i__ + i__ * a_dim1];
398  lapackf77_dlarfg(&i__2, &alpha,
399  &a[i__ + min(i__3,n) * a_dim1], &lda, &taup[i__]);
400  d[i__] = MAGMA_D_REAL( alpha );
401  if (i__ < m) {
402  a[i__ + i__ * a_dim1] = c_one;
403
404  /* Compute X(i+1:m,i) */
405  i__2 = m - i__;
406  i__3 = n - i__ + 1;
407
408  // 1. Send the block reflector A(i,i+1:n) to the GPU ------
409  magma_dsetvector( i__3,
410  a + i__ + i__ * a_dim1, lda,
411  da+(i__-1)+(i__-1)* (ldda), ldda );
412
413  // 2. Multiply ---------------------------------------------
414  //magma_dcopy(i__3, da+(i__-1)+(i__-1)*(ldda), ldda,
415  // dy + 1 + lddy, 1);
416  magma_dgemv(MagmaNoTrans, i__2, i__3, c_one,
417  da + (i__-1)+1 + (i__-1) * ldda, ldda,
418  da + (i__-1) + (i__-1) * ldda, ldda,
419  // dy + 1 + lddy, 1,
420  c_zero,
421  dx + i__ + 1 + i__ * x_dim1, c__1);
422
423  // 3. Put the result back ----------------------------------
424  magma_dgetmatrix_async( i__2, 1,
425  dx+i__+1+i__*x_dim1, x_dim1,
426  x+i__+1+i__*x_dim1, x_dim1, stream );
427
428  i__2 = n - i__ + 1;
429  i__3 = i__ - 1;
430  blasf77_dgemv(MagmaTransStr, &i__2, &i__3, &c_one, &y[i__ + y_dim1],
431  &ldy, &a[i__ + i__ * a_dim1], &lda, &c_zero,
432  &x[i__ * x_dim1 + 1], &c__1);
433  i__2 = m - i__;
434  i__3 = i__ - 1;
435  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one,
436  &a[i__ + 1 + a_dim1], &lda, &x[i__ * x_dim1 + 1], &c__1, &c_zero,
437  f, &c__1);
438
439  i__2 = i__ - 1;
440  i__3 = n - i__ + 1;
441  blasf77_dgemv("No transpose", &i__2, &i__3, &c_one,
442  &a[i__ * a_dim1 + 1], &lda, &a[i__ + i__ * a_dim1], &lda, &c_zero,
443  &x[i__ * x_dim1 + 1], &c__1);
444
445  // 4. Synch to make sure the result is back ----------------
446  magma_queue_sync( stream );
447  if (i__2!=0){
448  i__3 = m - i__;
449  blasf77_daxpy(&i__3, &c_one, f,&c__1, &x[i__+1+i__*x_dim1],&c__1);
450  }
451
452  i__2 = m - i__;
453  i__3 = i__ - 1;
454  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one,
455  &x[i__ + 1 + x_dim1], &ldx, &x[i__ * x_dim1 + 1], &c__1, &c_one,
456  &x[i__ + 1 + i__ * x_dim1], &c__1);
457  i__2 = m - i__;
458  blasf77_dscal(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
459  i__2 = n - i__ + 1;
460  #if defined(PRECISION_z) || defined(PRECISION_c)
461  lapackf77_dlacgv(&i__2, &a[i__ + i__ * a_dim1], &lda);
462  magma_dsetvector( i__2,
463  a + i__ + (i__ )* a_dim1, lda,
464  da+(i__-1)+ (i__-1)*(ldda), ldda );
465  #endif
466
467  /* Update A(i+1:m,i) */
468  i__2 = m - i__;
469  i__3 = i__ - 1;
470  #if defined(PRECISION_z) || defined(PRECISION_c)
471  lapackf77_dlacgv(&i__3, &y[i__ + y_dim1], &ldy);
472  #endif
473  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one,
474  &a[i__ + 1 + a_dim1], &lda, &y[i__ + y_dim1], &ldy, &c_one,
475  &a[i__ + 1 + i__ * a_dim1], &c__1);
476  i__2 = m - i__;
477  #if defined(PRECISION_z) || defined(PRECISION_c)
478  lapackf77_dlacgv(&i__3, &y[i__ + y_dim1], &ldy);
479  #endif
480  blasf77_dgemv("No transpose", &i__2, &i__, &c_neg_one,
481  &x[i__ + 1 + x_dim1], &ldx, &a[i__ * a_dim1 + 1], &c__1, &c_one,
482  &a[i__ + 1 + i__ * a_dim1], &c__1);
483
484  /* Generate reflection Q(i) to annihilate A(i+2:m,i) */
485  i__2 = m - i__;
486  i__3 = i__ + 2;
487  alpha = a[i__ + 1 + i__ * a_dim1];
488  lapackf77_dlarfg(&i__2, &alpha,
489  &a[min(i__3,m) + i__ * a_dim1], &c__1, &tauq[i__]);
490  e[i__] = MAGMA_D_REAL( alpha );
491  a[i__ + 1 + i__ * a_dim1] = c_one;
492
493  /* Compute Y(i+1:n,i) */
494  i__2 = m - i__;
495  i__3 = n - i__;
496
497  // 1. Send the block reflector A(i+1:m,i) to the GPU ------
498  magma_dsetvector( i__2,
499  a + i__ +1+ i__ * a_dim1, 1,
500  da+(i__-1)+1+ (i__-1)*(ldda), 1 );
501  // 2. Multiply ---------------------------------------------
502  magma_dgemv(MagmaTrans, i__2, i__3, c_one,
503  da + (i__-1)+1+ ((i__-1)+1) * ldda, ldda,
504  da + (i__-1)+1+ (i__-1) * ldda, c__1,
505  c_zero, dy + i__ + 1 + i__ * y_dim1, c__1);
506
507  // 3. Put the result back ----------------------------------
508  magma_dgetmatrix_async( i__3, 1,
509  dy+i__+1+i__*y_dim1, y_dim1,
510  y+i__+1+i__*y_dim1, y_dim1, stream );
511
512  i__2 = m - i__;
513  i__3 = i__ - 1;
514  blasf77_dgemv(MagmaTransStr, &i__2, &i__3, &c_one, &a[i__ + 1 + a_dim1],
515  &lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_zero,
516  &y[ i__ * y_dim1 + 1], &c__1);
517  i__2 = n - i__;
518  i__3 = i__ - 1;
519  blasf77_dgemv("No transpose", &i__2, &i__3, &c_neg_one,
520  &y[i__ + 1 + y_dim1], &ldy, &y[i__ * y_dim1 + 1], &c__1,
521  &c_zero, f, &c__1);
522
523  i__2 = m - i__;
524  blasf77_dgemv(MagmaTransStr, &i__2, &i__, &c_one, &x[i__ + 1 + x_dim1],
525  &ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_zero,
526  &y[i__ * y_dim1 + 1], &c__1);
527
528  // 4. Synch to make sure the result is back ----------------
529  magma_queue_sync( stream );
530  if (i__3!=0){
531  i__2 = n - i__;
532  blasf77_daxpy(&i__2, &c_one, f,&c__1, &y[i__+1+i__*y_dim1],&c__1);
533  }
534
535  i__2 = n - i__;
536  blasf77_dgemv(MagmaTransStr, &i__, &i__2, &c_neg_one,
537  &a[(i__ + 1) * a_dim1 + 1], &lda, &y[i__ * y_dim1 + 1],
538  &c__1, &c_one, &y[i__ + 1 + i__ * y_dim1], &c__1);
539  i__2 = n - i__;
540  blasf77_dscal(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
541  }
542  #if defined(PRECISION_z) || defined(PRECISION_c)
543  else {
544  i__2 = n - i__ + 1;
545  lapackf77_dlacgv(&i__2, &a[i__ + i__ * a_dim1], &lda);
546  magma_dsetvector( i__2,
547  a + i__ + (i__ )* a_dim1, lda,
548  da+(i__-1)+ (i__-1)*(ldda), ldda );
549  }
550  #endif
551  }
552  }
553
554  magma_queue_destroy( stream );
555  magma_free_cpu(f);
556
557  return MAGMA_SUCCESS;
558 } /* dlabrd */
559
#define MagmaTransStr
Definition: magma.h:81
void lapackf77_dlarfg(const magma_int_t *n, double *alpha, double *x, const magma_int_t *incx, double *tau)
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_D_ONE
Definition: magma.h:176
magma_int_t magma_dlabrd_gpu(magma_int_t m, magma_int_t n, magma_int_t nb, double *a, magma_int_t lda, double *da, magma_int_t ldda, double *d, double *e, double *tauq, double *taup, double *x, magma_int_t ldx, double *dx, magma_int_t lddx, double *y, magma_int_t ldy, double *dy, magma_int_t lddy)
Definition: dlabrd_gpu.cpp:18
#define magma_queue_create(queuePtr)
Definition: magma.h:113
void blasf77_dgemv(const char *transa, const magma_int_t *m, const magma_int_t *n, const double *alpha, const double *A, const magma_int_t *lda, const double *x, const magma_int_t *incx, const double *beta, double *y, const magma_int_t *incy)
magma_int_t ldda
#define magma_dsetvector(n, hx_src, incx, dy_dst, incy)
Definition: magmablas_d.h:634
int magma_int_t
Definition: magmablas.h:12
#define magma_dgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_d.h:714
#define magma_queue_destroy(queue)
Definition: magma.h:116
void blasf77_daxpy(const magma_int_t *n, const double *alpha, const double *x, const magma_int_t *incx, double *y, const magma_int_t *incy)
void lapackf77_dlacgv(const magma_int_t *n, double *x, const magma_int_t *incx)
#define MAGMA_D_REAL(a)
Definition: magma.h:168
#define MagmaTrans
Definition: magma.h:58
static magma_err_t magma_dmalloc_cpu(double **ptrPtr, size_t n)
Definition: magma.h:84
#define MAGMA_SUCCESS
Definition: magma.h:106
void blasf77_dscal(const magma_int_t *n, const double *alpha, double *x, const magma_int_t *incx)
#define dx(d, j)
#define MAGMA_D_NEG_ONE
Definition: magma.h:178
#define MAGMA_D_ZERO
Definition: magma.h:175
#define MagmaNoTrans
Definition: magma.h:57
void magma_dgemv(magma_trans_t transA, magma_int_t m, magma_int_t n, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dx, magma_int_t incx, double beta, magmaDouble_ptr dy, magma_int_t incy)
#define max(a, b)
Definition: common_magma.h:82
magma_err_t magma_free_cpu(void *ptr)
#define magma_queue_sync(queue)
Definition: magma.h:119