MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dgebrd.cpp File Reference
#include "common_magma.h"
Include dependency graph for dgebrd.cpp:

Go to the source code of this file.

Macros

#define A(i, j)   (a + (j)*lda + (i))
 
#define dA(i, j)   (da+ (j)*ldda + (i))
 

Functions

magma_int_t magma_dgebrd (magma_int_t m, magma_int_t n, double *a, magma_int_t lda, double *d, double *e, double *tauq, double *taup, double *work, magma_int_t lwork, magma_int_t *info)
 

Macro Definition Documentation

#define A (   i,
 
)    (a + (j)*lda + (i))

Definition at line 13 of file dgebrd.cpp.

#define dA (   i,
 
)    (da+ (j)*ldda + (i))

Definition at line 14 of file dgebrd.cpp.

Function Documentation

magma_int_t magma_dgebrd ( magma_int_t  m,
magma_int_t  n,
double *  a,
magma_int_t  lda,
double *  d,
double *  e,
double *  tauq,
double *  taup,
double *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 17 of file dgebrd.cpp.

References __func__, A, dA, dwork, lapackf77_dgebrd(), MAGMA_D_MAKE, MAGMA_D_NEG_ONE, MAGMA_D_ONE, magma_dgemm(), magma_dgetmatrix, magma_dlabrd_gpu(), magma_dmalloc(), magma_dsetmatrix, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_dgebrd_nb(), MAGMA_SUCCESS, magma_xerbla(), MagmaNoTrans, MagmaTrans, max, and min.

22 {
23 /* -- MAGMA (version 1.4.0) --
24  Univ. of Tennessee, Knoxville
25  Univ. of California, Berkeley
26  Univ. of Colorado, Denver
27  August 2013
28 
29  Purpose
30  =======
31  DGEBRD reduces a general real M-by-N matrix A to upper or lower
32  bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
33 
34  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
35 
36  Arguments
37  =========
38  M (input) INTEGER
39  The number of rows in the matrix A. M >= 0.
40 
41  N (input) INTEGER
42  The number of columns in the matrix A. N >= 0.
43 
44  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
45  On entry, the M-by-N general matrix to be reduced.
46  On exit,
47  if m >= n, the diagonal and the first superdiagonal are
48  overwritten with the upper bidiagonal matrix B; the
49  elements below the diagonal, with the array TAUQ, represent
50  the orthogonal matrix Q as a product of elementary
51  reflectors, and the elements above the first superdiagonal,
52  with the array TAUP, represent the orthogonal matrix P as
53  a product of elementary reflectors;
54  if m < n, the diagonal and the first subdiagonal are
55  overwritten with the lower bidiagonal matrix B; the
56  elements below the first subdiagonal, with the array TAUQ,
57  represent the orthogonal matrix Q as a product of
58  elementary reflectors, and the elements above the diagonal,
59  with the array TAUP, represent the orthogonal matrix P as
60  a product of elementary reflectors.
61  See Further Details.
62 
63  LDA (input) INTEGER
64  The leading dimension of the array A. LDA >= max(1,M).
65 
66  D (output) double precision array, dimension (min(M,N))
67  The diagonal elements of the bidiagonal matrix B:
68  D(i) = A(i,i).
69 
70  E (output) double precision array, dimension (min(M,N)-1)
71  The off-diagonal elements of the bidiagonal matrix B:
72  if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
73  if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
74 
75  TAUQ (output) DOUBLE_PRECISION array dimension (min(M,N))
76  The scalar factors of the elementary reflectors which
77  represent the orthogonal matrix Q. See Further Details.
78 
79  TAUP (output) DOUBLE_PRECISION array, dimension (min(M,N))
80  The scalar factors of the elementary reflectors which
81  represent the orthogonal matrix P. See Further Details.
82 
83  WORK (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK))
84  On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
85 
86  LWORK (input) INTEGER
87  The length of the array WORK. LWORK >= (M+N)*NB, where NB
88  is the optimal blocksize.
89 
90  If LWORK = -1, then a workspace query is assumed; the routine
91  only calculates the optimal size of the WORK array, returns
92  this value as the first entry of the WORK array, and no error
93  message related to LWORK is issued by XERBLA.
94 
95  INFO (output) INTEGER
96  = 0: successful exit
97  < 0: if INFO = -i, the i-th argument had an illegal value.
98 
99  Further Details
100  ===============
101  The matrices Q and P are represented as products of elementary
102  reflectors:
103 
104  If m >= n,
105  Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
106  Each H(i) and G(i) has the form:
107  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
108  where tauq and taup are real scalars, and v and u are real vectors;
109  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
110  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
111  tauq is stored in TAUQ(i) and taup in TAUP(i).
112 
113  If m < n,
114  Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
115  Each H(i) and G(i) has the form:
116  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
117  where tauq and taup are real scalars, and v and u are real vectors;
118  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
119  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
120  tauq is stored in TAUQ(i) and taup in TAUP(i).
121 
122  The contents of A on exit are illustrated by the following examples:
123 
124  m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
125 
126  ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
127  ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
128  ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
129  ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
130  ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
131  ( v1 v2 v3 v4 v5 )
132 
133  where d and e denote diagonal and off-diagonal elements of B, vi
134  denotes an element of the vector defining H(i), and ui an element of
135  the vector defining G(i).
136  ===================================================================== */
137 
138  double c_neg_one = MAGMA_D_NEG_ONE;
139  double c_one = MAGMA_D_ONE;
140  double *da, *dwork;
141 
142  magma_int_t ncol, nrow, jmax, nb, ldda;
143 
144  magma_int_t i, j, nx;
145  magma_int_t iinfo;
146 
147  magma_int_t minmn;
148  magma_int_t ldwrkx, ldwrky, lwkopt;
149  magma_int_t lquery;
150 
151  nb = magma_get_dgebrd_nb(n);
152  ldda = m;
153 
154  lwkopt = (m + n) * nb;
155  work[0] = MAGMA_D_MAKE( lwkopt, 0. );
156  lquery = (lwork == -1);
157 
158  /* Check arguments */
159  *info = 0;
160  if (m < 0) {
161  *info = -1;
162  } else if (n < 0) {
163  *info = -2;
164  } else if (lda < max(1,m)) {
165  *info = -4;
166  } else if (lwork < lwkopt && (! lquery) ) {
167  *info = -10;
168  }
169  if (*info < 0) {
170  magma_xerbla( __func__, -(*info) );
171  return *info;
172  }
173  else if (lquery)
174  return *info;
175 
176  /* Quick return if possible */
177  minmn = min(m,n);
178  if (minmn == 0) {
179  work[0] = c_one;
180  return *info;
181  }
182 
183  if (MAGMA_SUCCESS != magma_dmalloc( &da, n*ldda + (m + n)*nb )) {
184  fprintf (stderr, "!!!! device memory allocation error in dgebrd\n" );
185  *info = MAGMA_ERR_DEVICE_ALLOC;
186  return *info;
187  }
188  dwork = da + (n)*ldda;
189 
190  ldwrkx = m;
191  ldwrky = n;
192 
193  /* Set the block/unblock crossover point NX. */
194  nx = 128;
195 
196  /* Copy the matrix to the GPU */
197  if (minmn - nx >= 1) {
198  magma_dsetmatrix( m, n, a, lda, da, ldda );
199  }
200 
201  for (i=0; i< (minmn - nx); i += nb) {
202 
203  /* Reduce rows and columns i:i+nb-1 to bidiagonal form and return
204  the matrices X and Y which are needed to update the unreduced
205  part of the matrix */
206  nrow = m - i;
207  ncol = n - i;
208 
209  /* Get the current panel (no need for the 1st iteration) */
210  if ( i > 0 ) {
211  magma_dgetmatrix( nrow, nb, dA(i, i), ldda, A( i, i), lda );
212  magma_dgetmatrix( nb, ncol - nb,
213  dA(i, i+nb), ldda,
214  A( i, i+nb), lda );
215  }
216 
217  magma_dlabrd_gpu(nrow, ncol, nb,
218  A(i, i), lda, dA(i, i), ldda,
219  d+i, e+i, tauq+i, taup+i,
220  work, ldwrkx, dwork, ldwrkx, // x, dx
221  work+(ldwrkx*nb), ldwrky, dwork+(ldwrkx*nb), ldwrky); // y, dy
222 
223  /* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
224  of the form A := A - V*Y' - X*U' */
225  nrow = m - i - nb;
226  ncol = n - i - nb;
227 
228  // Send Y back to the GPU
229  magma_dsetmatrix( nrow, nb, work + nb, ldwrkx, dwork + nb, ldwrkx );
230  magma_dsetmatrix( ncol, nb,
231  work + (ldwrkx+1)*nb, ldwrky,
232  dwork + (ldwrkx+1)*nb, ldwrky );
233 
235  nrow, ncol, nb,
236  c_neg_one, dA(i+nb, i ), ldda,
237  dwork+(ldwrkx+1)*nb, ldwrky,
238  c_one, dA(i+nb, i+nb), ldda);
239 
241  nrow, ncol, nb,
242  c_neg_one, dwork+nb, ldwrkx,
243  dA( i, i+nb ), ldda,
244  c_one, dA( i+nb, i+nb ), ldda);
245 
246  /* Copy diagonal and off-diagonal elements of B back into A */
247  if (m >= n) {
248  jmax = i + nb;
249  for (j = i; j < jmax; ++j) {
250  *A(j, j ) = MAGMA_D_MAKE( d[j], 0. );
251  *A(j, j+1) = MAGMA_D_MAKE( e[j], 0. );
252  }
253  } else {
254  jmax = i + nb;
255  for (j = i; j < jmax; ++j) {
256  *A(j, j ) = MAGMA_D_MAKE( d[j], 0. );
257  *A(j+1, j ) = MAGMA_D_MAKE( e[j], 0. );
258  }
259  }
260  }
261 
262  /* Use unblocked code to reduce the remainder of the matrix */
263  nrow = m - i;
264  ncol = n - i;
265 
266  if ( 0 < minmn - nx ) {
267  magma_dgetmatrix( nrow, ncol, dA(i, i), ldda, A(i, i), lda );
268  }
269 
270  lapackf77_dgebrd( &nrow, &ncol,
271  A(i, i), &lda, d+i, e+i,
272  tauq+i, taup+i, work, &lwork, &iinfo);
273  work[0] = MAGMA_D_MAKE( lwkopt, 0. );
274 
275  magma_free( da );
276  return *info;
277 } /* dgebrd */
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_D_ONE
Definition: magma.h:176
#define A(i, j)
Definition: dgebrd.cpp:13
magma_int_t magma_dlabrd_gpu(magma_int_t m, magma_int_t n, magma_int_t nb, double *a, magma_int_t lda, double *da, magma_int_t ldda, double *d, double *e, double *tauq, double *taup, double *x, magma_int_t ldx, double *dx, magma_int_t lddx, double *y, magma_int_t ldy, double *dy, magma_int_t lddy)
Definition: dlabrd_gpu.cpp:18
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define magma_dgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_d.h:705
#define dwork(dev, i, j)
#define MAGMA_D_MAKE(r, i)
Definition: magma.h:167
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MagmaTrans
Definition: magma.h:58
void lapackf77_dgebrd(const magma_int_t *m, const magma_int_t *n, double *A, const magma_int_t *lda, double *d, double *e, double *tauq, double *taup, double *work, const magma_int_t *lwork, magma_int_t *info)
magma_int_t ldda
#define dA(i, j)
Definition: dgebrd.cpp:14
#define MAGMA_SUCCESS
Definition: magma.h:106
void magma_dgemm(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dB, magma_int_t lddb, double beta, magmaDouble_ptr dC, magma_int_t lddc)
magma_int_t magma_get_dgebrd_nb(magma_int_t m)
Definition: get_nb.cpp:458
#define MAGMA_D_NEG_ONE
Definition: magma.h:178
#define MagmaNoTrans
Definition: magma.h:57
#define max(a, b)
Definition: common_magma.h:82
#define magma_dsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_d.h:702

Here is the call graph for this function:

Here is the caller graph for this function: