MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dsytrd.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  August 2013
7 
8  @author Stan Tomov
9  @author Raffaele Solca
10 
11  @generated d Wed Aug 14 12:16:14 2013
12 
13 */
14 #include "common_magma.h"
15 
16 #define A(i, j) ( a+(j)*lda + (i))
17 #define dA(i, j) (da+(j)*ldda + (i))
18 
19 extern "C" magma_int_t
21  double *a, magma_int_t lda,
22  double *d, double *e, double *tau,
23  double *work, magma_int_t lwork,
24  magma_int_t *info)
25 {
26 /* -- MAGMA (version 1.4.0) --
27  Univ. of Tennessee, Knoxville
28  Univ. of California, Berkeley
29  Univ. of Colorado, Denver
30  August 2013
31 
32  Purpose
33  =======
34  DSYTRD reduces a real symmetric matrix A to real symmetric
35  tridiagonal form T by an orthogonal similarity transformation:
36  Q**T * A * Q = T.
37 
38  Arguments
39  =========
40  UPLO (input) CHARACTER*1
41  = 'U': Upper triangle of A is stored;
42  = 'L': Lower triangle of A is stored.
43 
44  N (input) INTEGER
45  The order of the matrix A. N >= 0.
46 
47  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
48  On entry, the symmetric matrix A. If UPLO = 'U', the leading
49  N-by-N upper triangular part of A contains the upper
50  triangular part of the matrix A, and the strictly lower
51  triangular part of A is not referenced. If UPLO = 'L', the
52  leading N-by-N lower triangular part of A contains the lower
53  triangular part of the matrix A, and the strictly upper
54  triangular part of A is not referenced.
55  On exit, if UPLO = 'U', the diagonal and first superdiagonal
56  of A are overwritten by the corresponding elements of the
57  tridiagonal matrix T, and the elements above the first
58  superdiagonal, with the array TAU, represent the orthogonal
59  matrix Q as a product of elementary reflectors; if UPLO
60  = 'L', the diagonal and first subdiagonal of A are over-
61  written by the corresponding elements of the tridiagonal
62  matrix T, and the elements below the first subdiagonal, with
63  the array TAU, represent the orthogonal matrix Q as a product
64  of elementary reflectors. See Further Details.
65 
66  LDA (input) INTEGER
67  The leading dimension of the array A. LDA >= max(1,N).
68 
69  D (output) DOUBLE_PRECISION array, dimension (N)
70  The diagonal elements of the tridiagonal matrix T:
71  D(i) = A(i,i).
72 
73  E (output) DOUBLE_PRECISION array, dimension (N-1)
74  The off-diagonal elements of the tridiagonal matrix T:
75  E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
76 
77  TAU (output) DOUBLE_PRECISION array, dimension (N-1)
78  The scalar factors of the elementary reflectors (see Further
79  Details).
80 
81  WORK (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK))
82  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
83 
84  LWORK (input) INTEGER
85  The dimension of the array WORK. LWORK >= N*NB, where NB is the
86  optimal blocksize given by magma_get_dsytrd_nb().
87 
88  If LWORK = -1, then a workspace query is assumed; the routine
89  only calculates the optimal size of the WORK array, returns
90  this value as the first entry of the WORK array, and no error
91  message related to LWORK is issued by XERBLA.
92 
93  INFO (output) INTEGER
94  = 0: successful exit
95  < 0: if INFO = -i, the i-th argument had an illegal value
96 
97  Further Details
98  ===============
99  If UPLO = 'U', the matrix Q is represented as a product of elementary
100  reflectors
101 
102  Q = H(n-1) . . . H(2) H(1).
103 
104  Each H(i) has the form
105 
106  H(i) = I - tau * v * v'
107 
108  where tau is a real scalar, and v is a real vector with
109  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
110  A(1:i-1,i+1), and tau in TAU(i).
111 
112  If UPLO = 'L', the matrix Q is represented as a product of elementary
113  reflectors
114 
115  Q = H(1) H(2) . . . H(n-1).
116 
117  Each H(i) has the form
118 
119  H(i) = I - tau * v * v'
120 
121  where tau is a real scalar, and v is a real vector with
122  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
123  and tau in TAU(i).
124 
125  The contents of A on exit are illustrated by the following examples
126  with n = 5:
127 
128  if UPLO = 'U': if UPLO = 'L':
129 
130  ( d e v2 v3 v4 ) ( d )
131  ( d e v3 v4 ) ( e d )
132  ( d e v4 ) ( v1 e d )
133  ( d e ) ( v1 v2 e d )
134  ( d ) ( v1 v2 v3 e d )
135 
136  where d and e denote diagonal and off-diagonal elements of T, and vi
137  denotes an element of the vector defining H(i).
138  ===================================================================== */
139 
140  char uplo_[2] = {uplo, 0};
141 
142  magma_int_t ldda = lda;
144 
145  double c_neg_one = MAGMA_D_NEG_ONE;
146  double c_one = MAGMA_D_ONE;
147  double d_one = MAGMA_D_ONE;
148 
149  magma_int_t kk, nx;
150  magma_int_t i, j, i_n;
151  magma_int_t iinfo;
152  magma_int_t ldwork, lddwork, lwkopt;
153  magma_int_t lquery;
154 
155  *info = 0;
156  int upper = lapackf77_lsame(uplo_, "U");
157  lquery = lwork == -1;
158  if (! upper && ! lapackf77_lsame(uplo_, "L")) {
159  *info = -1;
160  } else if (n < 0) {
161  *info = -2;
162  } else if (lda < max(1,n)) {
163  *info = -4;
164  } else if (lwork < nb*n && ! lquery) {
165  *info = -9;
166  }
167 
168  /* Determine the block size. */
169  ldwork = lddwork = n;
170  lwkopt = n * nb;
171  if (*info == 0) {
172  MAGMA_D_SET2REAL( work[0], lwkopt );
173  }
174 
175  if (*info != 0) {
176  magma_xerbla( __func__, -(*info) );
177  return *info;
178  }
179  else if (lquery)
180  return *info;
181 
182  /* Quick return if possible */
183  if (n == 0) {
184  work[0] = c_one;
185  return *info;
186  }
187 
188  double *da;
189  if (MAGMA_SUCCESS != magma_dmalloc( &da, n*ldda + 2*n*nb )) {
190  *info = MAGMA_ERR_DEVICE_ALLOC;
191  return *info;
192  }
193 
194  double *dwork = da + (n)*ldda;
195 
196  if (n < 2048)
197  nx = n;
198  else
199  nx = 512;
200 
201  if (upper) {
202  /* Copy the matrix to the GPU */
203  magma_dsetmatrix( n, n, A(0, 0), lda, dA(0, 0), ldda );
204 
205  /* Reduce the upper triangle of A.
206  Columns 1:kk are handled by the unblocked method. */
207  kk = n - (n - nx + nb - 1) / nb * nb;
208 
209  for (i = n - nb; i >= kk; i -= nb) {
210  /* Reduce columns i:i+nb-1 to tridiagonal form and form the
211  matrix W which is needed to update the unreduced part of
212  the matrix */
213 
214  /* Get the current panel (no need for the 1st iteration) */
215  if (i!=n-nb)
216  magma_dgetmatrix( i+nb, nb, dA(0, i), ldda, A(0, i), lda );
217 
218  magma_dlatrd(uplo, i+nb, nb, A(0, 0), lda, e, tau,
219  work, ldwork, dA(0, 0), ldda, dwork, lddwork);
220 
221  /* Update the unreduced submatrix A(0:i-2,0:i-2), using an
222  update of the form: A := A - V*W' - W*V' */
223  magma_dsetmatrix( i + nb, nb, work, ldwork, dwork, lddwork );
224 
225  magma_dsyr2k(uplo, MagmaNoTrans, i, nb, c_neg_one,
226  dA(0, i), ldda, dwork,
227  lddwork, d_one, dA(0, 0), ldda);
228 
229  /* Copy superdiagonal elements back into A, and diagonal
230  elements into D */
231  for (j = i; j < i+nb; ++j) {
232  MAGMA_D_SET2REAL( *A(j-1, j), e[j - 1] );
233  d[j] = MAGMA_D_REAL( *A(j, j) );
234  }
235  }
236 
237  magma_dgetmatrix( kk, kk, dA(0, 0), ldda, A(0, 0), lda );
238 
239  /* Use unblocked code to reduce the last or only block */
240  lapackf77_dsytd2(uplo_, &kk, A(0, 0), &lda, d, e, tau, &iinfo);
241  }
242  else {
243  /* Copy the matrix to the GPU */
244  if (1<=n-nx)
245  magma_dsetmatrix( n, n, A(0,0), lda, dA(0,0), ldda );
246 
247  #ifdef FAST_HEMV
248  // TODO this leaks memory from da, above
249  double *dwork2;
250  if (MAGMA_SUCCESS != magma_dmalloc( &dwork2, n*n )) {
251  *info = MAGMA_ERR_DEVICE_ALLOC;
252  return *info;
253  }
254  #endif
255  /* Reduce the lower triangle of A */
256  for (i = 0; i < n-nx; i += nb) {
257  /* Reduce columns i:i+nb-1 to tridiagonal form and form the
258  matrix W which is needed to update the unreduced part of
259  the matrix */
260 
261  /* Get the current panel (no need for the 1st iteration) */
262  if (i!=0)
263  magma_dgetmatrix( n-i, nb, dA(i, i), ldda, A(i, i), lda );
264  #ifdef FAST_HEMV
265  magma_dlatrd2(uplo, n-i, nb, A(i, i), lda, &e[i],
266  &tau[i], work, ldwork,
267  dA(i, i), ldda,
268  dwork, lddwork, dwork2, n*n);
269  #else
270  magma_dlatrd(uplo, n-i, nb, A(i, i), lda, &e[i],
271  &tau[i], work, ldwork,
272  dA(i, i), ldda,
273  dwork, lddwork);
274  #endif
275  /* Update the unreduced submatrix A(i+ib:n,i+ib:n), using
276  an update of the form: A := A - V*W' - W*V' */
277  magma_dsetmatrix( n-i, nb, work, ldwork, dwork, lddwork );
278 
279  magma_dsyr2k(MagmaLower, MagmaNoTrans, n-i-nb, nb, c_neg_one,
280  dA(i+nb, i), ldda,
281  &dwork[nb], lddwork, d_one,
282  dA(i+nb, i+nb), ldda);
283 
284  /* Copy subdiagonal elements back into A, and diagonal
285  elements into D */
286  for (j = i; j < i+nb; ++j) {
287  MAGMA_D_SET2REAL( *A(j+1, j), e[j] );
288  d[j] = MAGMA_D_REAL( *A(j, j) );
289  }
290  }
291 
292  #ifdef FAST_HEMV
293  magma_free( dwork2 );
294  #endif
295 
296  /* Use unblocked code to reduce the last or only block */
297  if (1<=n-nx)
298  magma_dgetmatrix( n-i, n-i, dA(i, i), ldda, A(i, i), lda );
299  i_n = n-i;
300  lapackf77_dsytrd(uplo_, &i_n, A(i, i), &lda, &d[i], &e[i],
301  &tau[i], work, &lwork, &iinfo);
302 
303  }
304 
305  magma_free( da );
306  MAGMA_D_SET2REAL( work[0], lwkopt );
307 
308  return *info;
309 } /* magma_dsytrd */
magma_int_t magma_dsytrd(char uplo, magma_int_t n, double *A, magma_int_t lda, double *d, double *e, double *tau, double *work, magma_int_t lwork, magma_int_t *info)
Definition: dsytrd.cpp:20
magma_int_t magma_get_dsytrd_nb(magma_int_t m)
Definition: get_nb.cpp:391
#define MAGMA_D_ONE
Definition: magma.h:176
#define __func__
Definition: common_magma.h:65
#define A(i, j)
Definition: dsytrd.cpp:16
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
magma_int_t ldda
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define magma_dgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_d.h:705
#define dwork(dev, i, j)
#define MagmaLower
Definition: magma.h:62
#define dA(i, j)
Definition: dsytrd.cpp:17
magma_int_t magma_dlatrd2(char uplo, magma_int_t n, magma_int_t nb, double *a, magma_int_t lda, double *e, double *tau, double *w, magma_int_t ldw, double *da, magma_int_t ldda, double *dw, magma_int_t lddw, double *dwork, magma_int_t ldwork)
Definition: dlatrd2.cpp:29
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define MAGMA_D_REAL(a)
Definition: magma.h:168
void magma_dsyr2k(magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dB, magma_int_t lddb, double beta, magmaDouble_ptr dC, magma_int_t lddc)
#define MAGMA_SUCCESS
Definition: magma.h:106
void lapackf77_dsytd2(const char *uplo, const magma_int_t *n, double *A, const magma_int_t *lda, double *d, double *e, double *tau, magma_int_t *info)
#define MAGMA_D_SET2REAL(v, t)
Definition: magma.h:159
void lapackf77_dsytrd(const char *uplo, const magma_int_t *n, double *A, const magma_int_t *lda, double *d, double *e, double *tau, double *work, const magma_int_t *lwork, magma_int_t *info)
#define MAGMA_D_NEG_ONE
Definition: magma.h:178
#define MagmaNoTrans
Definition: magma.h:57
#define max(a, b)
Definition: common_magma.h:82
magma_int_t magma_dlatrd(char uplo, magma_int_t n, magma_int_t nb, double *a, magma_int_t lda, double *e, double *tau, double *w, magma_int_t ldw, double *da, magma_int_t ldda, double *dw, magma_int_t lddw)
Definition: dlatrd.cpp:28
#define magma_dsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_d.h:702