MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dgebrd.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @generated d Thu May 10 22:26:54 2012
9 
10 */
11 #include "common_magma.h"
12 
13 #define A(i, j) (a + (j)*lda + (i))
14 #define dA(i, j) (da+ (j)*ldda + (i))
15 
16 // === Define what BLAS to use ============================================
17 #define PRECISION_d
18 #if (defined(PRECISION_s) || defined(PRECISION_d))
19  #define magma_dgemm magmablas_dgemm
20 #endif
21 // === End defining what BLAS to use ======================================
22 
23 extern "C" magma_int_t
25  double *a, magma_int_t lda, double *d, double *e,
26  double *tauq, double *taup,
27  double *work, magma_int_t lwork,
28  magma_int_t *info)
29 {
30 /* -- MAGMA (version 1.2.0) --
31  Univ. of Tennessee, Knoxville
32  Univ. of California, Berkeley
33  Univ. of Colorado, Denver
34  May 2012
35 
36  Purpose
37  =======
38  DGEBRD reduces a general real M-by-N matrix A to upper or lower
39  bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
40 
41  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
42 
43  Arguments
44  =========
45  M (input) INTEGER
46  The number of rows in the matrix A. M >= 0.
47 
48  N (input) INTEGER
49  The number of columns in the matrix A. N >= 0.
50 
51  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
52  On entry, the M-by-N general matrix to be reduced.
53  On exit,
54  if m >= n, the diagonal and the first superdiagonal are
55  overwritten with the upper bidiagonal matrix B; the
56  elements below the diagonal, with the array TAUQ, represent
57  the orthogonal matrix Q as a product of elementary
58  reflectors, and the elements above the first superdiagonal,
59  with the array TAUP, represent the orthogonal matrix P as
60  a product of elementary reflectors;
61  if m < n, the diagonal and the first subdiagonal are
62  overwritten with the lower bidiagonal matrix B; the
63  elements below the first subdiagonal, with the array TAUQ,
64  represent the orthogonal matrix Q as a product of
65  elementary reflectors, and the elements above the diagonal,
66  with the array TAUP, represent the orthogonal matrix P as
67  a product of elementary reflectors.
68  See Further Details.
69 
70  LDA (input) INTEGER
71  The leading dimension of the array A. LDA >= max(1,M).
72 
73  D (output) double precision array, dimension (min(M,N))
74  The diagonal elements of the bidiagonal matrix B:
75  D(i) = A(i,i).
76 
77  E (output) double precision array, dimension (min(M,N)-1)
78  The off-diagonal elements of the bidiagonal matrix B:
79  if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
80  if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
81 
82  TAUQ (output) DOUBLE_PRECISION array dimension (min(M,N))
83  The scalar factors of the elementary reflectors which
84  represent the orthogonal matrix Q. See Further Details.
85 
86  TAUP (output) DOUBLE_PRECISION array, dimension (min(M,N))
87  The scalar factors of the elementary reflectors which
88  represent the orthogonal matrix P. See Further Details.
89 
90  WORK (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK))
91  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
92 
93  LWORK (input) INTEGER
94  The length of the array WORK. LWORK >= max(1,M,N).
95  For optimum performance LWORK >= (M+N)*NB, where NB
96  is the optimal blocksize.
97 
98  If LWORK = -1, then a workspace query is assumed; the routine
99  only calculates the optimal size of the WORK array, returns
100  this value as the first entry of the WORK array, and no error
101  message related to LWORK is issued by XERBLA.
102 
103  INFO (output) INTEGER
104  = 0: successful exit
105  < 0: if INFO = -i, the i-th argument had an illegal value.
106 
107  Further Details
108  ===============
109  The matrices Q and P are represented as products of elementary
110  reflectors:
111 
112  If m >= n,
113  Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
114  Each H(i) and G(i) has the form:
115  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
116  where tauq and taup are real scalars, and v and u are real vectors;
117  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
118  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
119  tauq is stored in TAUQ(i) and taup in TAUP(i).
120 
121  If m < n,
122  Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
123  Each H(i) and G(i) has the form:
124  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
125  where tauq and taup are real scalars, and v and u are real vectors;
126  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
127  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
128  tauq is stored in TAUQ(i) and taup in TAUP(i).
129 
130  The contents of A on exit are illustrated by the following examples:
131 
132  m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
133 
134  ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
135  ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
136  ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
137  ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
138  ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
139  ( v1 v2 v3 v4 v5 )
140 
141  where d and e denote diagonal and off-diagonal elements of B, vi
142  denotes an element of the vector defining H(i), and ui an element of
143  the vector defining G(i).
144  ===================================================================== */
145 
146  double c_neg_one = MAGMA_D_NEG_ONE;
147  double c_one = MAGMA_D_ONE;
148  double *da, *dwork;
149 
150  magma_int_t ncol, nrow, jmax, nb, ldda;
151 
152  static magma_int_t i, j, nx;
153  static double ws;
154  static magma_int_t iinfo;
155 
156  magma_int_t minmn;
157  magma_int_t ldwrkx, ldwrky, lwkopt;
158  magma_int_t lquery;
159 
160  nb = magma_get_dgebrd_nb(n);
161  ldda = m;
162 
163  lwkopt = (m + n) * nb;
164  work[0] = MAGMA_D_MAKE( lwkopt, 0. );
165  lquery = (lwork == -1);
166 
167  /* Check arguments */
168  *info = 0;
169  if (m < 0) {
170  *info = -1;
171  } else if (n < 0) {
172  *info = -2;
173  } else if (lda < max(1,m)) {
174  *info = -4;
175  } else if ( (lwork < max( max(1, m), n)) && (! lquery) ) {
176  *info = -10;
177  }
178  if (*info < 0) {
179  magma_xerbla( __func__, -(*info) );
180  return *info;
181  }
182  else if (lquery)
183  return *info;
184 
185  /* Quick return if possible */
186  minmn = min(m,n);
187  if (minmn == 0) {
188  work[0] = c_one;
189  return *info;
190  }
191 
192  if (MAGMA_SUCCESS != magma_dmalloc( &da, n*ldda + (m + n)*nb )) {
193  fprintf (stderr, "!!!! device memory allocation error in dgebrd\n" );
194  *info = MAGMA_ERR_DEVICE_ALLOC;
195  return *info;
196  }
197  dwork = da + (n)*ldda;
198 
199  MAGMA_D_SET2REAL( ws, max(m,n) );
200  ldwrkx = m;
201  ldwrky = n;
202 
203  /* Set the block/unblock crossover point NX. */
204  nx = 128;
205 
206  /* Copy the matrix to the GPU */
207  if (minmn-nx>=1)
208  magma_dsetmatrix( m, n, a, lda, da, ldda );
209 
210  for (i=0; i< (minmn - nx); i += nb) {
211 
212  /* Reduce rows and columns i:i+nb-1 to bidiagonal form and return
213  the matrices X and Y which are needed to update the unreduced
214  part of the matrix */
215  nrow = m - i;
216  ncol = n - i;
217 
218  /* Get the current panel (no need for the 1st iteration) */
219  if ( i > 0 ) {
220  magma_dgetmatrix( nrow, nb, dA(i, i), ldda, A( i, i), lda );
221  magma_dgetmatrix( nb, ncol - nb,
222  dA(i, i+nb), ldda,
223  A( i, i+nb), lda );
224  }
225 
226  magma_dlabrd_gpu(nrow, ncol, nb,
227  A(i, i), lda, dA(i, i), ldda,
228  d+i, e+i, tauq+i, taup+i,
229  work, ldwrkx, dwork, ldwrkx, // x, dx
230  work+(ldwrkx*nb), ldwrky, dwork+(ldwrkx*nb), ldwrky); // y, dy
231 
232  /* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
233  of the form A := A - V*Y' - X*U' */
234  nrow = m - i - nb;
235  ncol = n - i - nb;
236 
237  // Send Y back to the GPU
238  magma_dsetmatrix( nrow, nb, work + nb, ldwrkx, dwork + nb, ldwrkx );
239  magma_dsetmatrix( ncol, nb,
240  work + (ldwrkx+1)*nb, ldwrky,
241  dwork + (ldwrkx+1)*nb, ldwrky );
242 
244  nrow, ncol, nb,
245  c_neg_one, dA(i+nb, i ), ldda,
246  dwork+(ldwrkx+1)*nb, ldwrky,
247  c_one, dA(i+nb, i+nb), ldda);
248 
250  nrow, ncol, nb,
251  c_neg_one, dwork+nb, ldwrkx,
252  dA( i, i+nb ), ldda,
253  c_one, dA( i+nb, i+nb ), ldda);
254 
255  /* Copy diagonal and off-diagonal elements of B back into A */
256  if (m >= n) {
257  jmax = i + nb;
258  for (j = i; j < jmax; ++j) {
259  *A(j, j ) = MAGMA_D_MAKE( d[j], 0. );
260  *A(j, j+1) = MAGMA_D_MAKE( e[j], 0. );
261  }
262  } else {
263  jmax = i + nb;
264  for (j = i; j < jmax; ++j) {
265  *A(j, j ) = MAGMA_D_MAKE( d[j], 0. );
266  *A(j+1, j ) = MAGMA_D_MAKE( e[j], 0. );
267  /* L20: */
268  }
269  }
270  }
271 
272  /* Use unblocked code to reduce the remainder of the matrix */
273  nrow = m - i;
274  ncol = n - i;
275 
276  if ( 0 < (minmn-nx) )
277  magma_dgetmatrix( nrow, ncol, dA(i, i), ldda, A( i, i), lda );
278 
279  lapackf77_dgebrd( &nrow, &ncol,
280  A(i, i), &lda, d+i, e+i,
281  tauq+i, taup+i, work, &lwork, &iinfo);
282  work[0] = ws;
283 
284  magma_free( da );
285  return *info;
286 } /* dgebrd_ */
287