MAGMA  magma-1.4.0 Matrix Algebra on GPU and Multicore Architectures
dsyevd.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  August 2013
7
8  @author Stan Tomov
9  @author Mark Gates
10
11  @precisions normal d -> s
12
13 */
14 #include "common_magma.h"
15
16 extern "C" magma_int_t
17 magma_dsyevd(char jobz, char uplo,
18  magma_int_t n,
19  double *a, magma_int_t lda,
20  double *w,
21  double *work, magma_int_t lwork,
22  magma_int_t *iwork, magma_int_t liwork,
23  magma_int_t *info)
24 {
25 /* -- MAGMA (version 1.4.0) --
26  Univ. of Tennessee, Knoxville
27  Univ. of California, Berkeley
28  Univ. of Colorado, Denver
29  August 2013
30
31  Purpose
32  =======
33  DSYEVD computes all eigenvalues and, optionally, eigenvectors of
34  a real symmetric matrix A. If eigenvectors are desired, it uses a
35  divide and conquer algorithm.
36
37  The divide and conquer algorithm makes very mild assumptions about
38  floating point arithmetic. It will work on machines with a guard
39  digit in add/subtract, or on those binary machines without guard
40  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
41  Cray-2. It could conceivably fail on hexadecimal or decimal machines
42  without guard digits, but we know of none.
43
44  Arguments
45  =========
46  JOBZ (input) CHARACTER*1
47  = 'N': Compute eigenvalues only;
48  = 'V': Compute eigenvalues and eigenvectors.
49
50  UPLO (input) CHARACTER*1
51  = 'U': Upper triangle of A is stored;
52  = 'L': Lower triangle of A is stored.
53
54  N (input) INTEGER
55  The order of the matrix A. N >= 0.
56
57  A (input/output) DOUBLE_PRECISION array, dimension (LDA, N)
58  On entry, the symmetric matrix A. If UPLO = 'U', the
59  leading N-by-N upper triangular part of A contains the
60  upper triangular part of the matrix A. If UPLO = 'L',
61  the leading N-by-N lower triangular part of A contains
62  the lower triangular part of the matrix A.
63  On exit, if JOBZ = 'V', then if INFO = 0, A contains the
64  orthonormal eigenvectors of the matrix A.
65  If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
66  or the upper triangle (if UPLO='U') of A, including the
67  diagonal, is destroyed.
68
69  LDA (input) INTEGER
70  The leading dimension of the array A. LDA >= max(1,N).
71
72  W (output) DOUBLE PRECISION array, dimension (N)
73  If INFO = 0, the eigenvalues in ascending order.
74
75  WORK (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK))
76  On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
77
78  LWORK (input) INTEGER
79  The length of the array WORK.
80  If N <= 1, LWORK >= 1.
81  If JOBZ = 'N' and N > 1, LWORK >= 2*N + N*NB.
82  If JOBZ = 'V' and N > 1, LWORK >= max( 2*N + N*NB, 1 + 6*N + 2*N**2 ).
83  NB can be obtained through magma_get_dsytrd_nb(N).
84
85  If LWORK = -1, then a workspace query is assumed; the routine
86  only calculates the optimal sizes of the WORK and IWORK
87  arrays, returns these values as the first entries of the WORK
88  and IWORK arrays, and no error message related to LWORK or
89  LIWORK is issued by XERBLA.
90
91  IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
92  On exit, if INFO = 0, IWORK[0] returns the optimal LIWORK.
93
94  LIWORK (input) INTEGER
95  The dimension of the array IWORK.
96  If N <= 1, LIWORK >= 1.
97  If JOBZ = 'N' and N > 1, LIWORK >= 1.
98  If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N.
99
100  If LIWORK = -1, then a workspace query is assumed; the
101  routine only calculates the optimal sizes of the WORK and
102  IWORK arrays, returns these values as the first entries of
103  the WORK and IWORK arrays, and no error message related to
104  LWORK or LIWORK is issued by XERBLA.
105
106  INFO (output) INTEGER
107  = 0: successful exit
108  < 0: if INFO = -i, the i-th argument had an illegal value
109  > 0: if INFO = i and JOBZ = 'N', then the algorithm failed
110  to converge; i off-diagonal elements of an intermediate
111  tridiagonal form did not converge to zero;
112  if INFO = i and JOBZ = 'V', then the algorithm failed
113  to compute an eigenvalue while working on the submatrix
114  lying in rows and columns INFO/(N+1) through
115  mod(INFO,N+1).
116
117  Further Details
118  ===============
119  Based on contributions by
120  Jeff Rutter, Computer Science Division, University of California
121  at Berkeley, USA
122
123  Modified description of INFO. Sven, 16 Feb 05.
124  ===================================================================== */
125
126  char uplo_[2] = {uplo, 0};
127  char jobz_[2] = {jobz, 0};
128  magma_int_t ione = 1;
129  magma_int_t izero = 0;
130  double d_one = 1.;
131
132  double d__1;
133
134  double eps;
135  magma_int_t inde;
136  double anrm;
137  double rmin, rmax;
138  double sigma;
139  magma_int_t iinfo, lwmin;
140  magma_int_t lower;
141  magma_int_t wantz;
142  magma_int_t indwk2, llwrk2;
143  magma_int_t iscale;
144  double safmin;
145  double bignum;
146  magma_int_t indtau;
147  magma_int_t indwrk, liwmin;
148  magma_int_t llwork;
149  double smlnum;
150  magma_int_t lquery;
151
152  double* dwork;
153
154  wantz = lapackf77_lsame(jobz_, MagmaVecStr);
155  lower = lapackf77_lsame(uplo_, MagmaLowerStr);
156  lquery = lwork == -1 || liwork == -1;
157
158  *info = 0;
159
160  if (! (wantz || lapackf77_lsame(jobz_, MagmaNoVecStr))) {
161  *info = -1;
162  } else if (! (lower || lapackf77_lsame(uplo_, MagmaUpperStr))) {
163  *info = -2;
164  } else if (n < 0) {
165  *info = -3;
166  } else if (lda < max(1,n)) {
167  *info = -5;
168  }
169
171  if ( n <= 1 ) {
172  lwmin = 1;
173  liwmin = 1;
174  }
175  else if ( wantz ) {
176  lwmin = max( 2*n + n*nb, 1 + 6*n + 2*n*n );
177  liwmin = 3 + 5*n;
178  }
179  else {
180  lwmin = 2*n + n*nb;
181  liwmin = 1;
182  }
183  // multiply by 1+eps to ensure length gets rounded up,
184  // if it cannot be exactly represented in floating point.
185  work[0] = lwmin * (1. + lapackf77_dlamch("Epsilon"));
186  iwork[0] = liwmin;
187
188  if ((lwork < lwmin) && !lquery) {
189  *info = -8;
190  } else if ((liwork < liwmin) && ! lquery) {
191  *info = -10;
192  }
193
194  if (*info != 0) {
195  magma_xerbla( __func__, -(*info) );
196  return *info;
197  }
198  else if (lquery) {
199  return *info;
200  }
201
202  /* Quick return if possible */
203  if (n == 0) {
204  return *info;
205  }
206
207  if (n == 1) {
208  w[0] = a[0];
209  if (wantz) {
210  a[0] = 1.;
211  }
212  return *info;
213  }
214
215  /* Check if matrix is very small then just call LAPACK on CPU, no need for GPU */
216  if (n <= 128){
217  #ifdef ENABLE_DEBUG
218  printf("--------------------------------------------------------------\n");
219  printf(" warning matrix too small N=%d NB=%d, calling lapack on CPU \n", (int) n, (int) nb);
220  printf("--------------------------------------------------------------\n");
221  #endif
222  lapackf77_dsyevd(jobz_, uplo_,
223  &n, a, &lda,
224  w, work, &lwork,
225  iwork, &liwork, info);
226  return *info;
227  }
228
229  /* Get machine constants. */
230  safmin = lapackf77_dlamch("Safe minimum");
231  eps = lapackf77_dlamch("Precision");
232  smlnum = safmin / eps;
233  bignum = 1. / smlnum;
234  rmin = magma_dsqrt(smlnum);
235  rmax = magma_dsqrt(bignum);
236
237  /* Scale matrix to allowable range, if necessary. */
238  anrm = lapackf77_dlansy("M", uplo_, &n, a, &lda, work );
239  iscale = 0;
240  if (anrm > 0. && anrm < rmin) {
241  iscale = 1;
242  sigma = rmin / anrm;
243  } else if (anrm > rmax) {
244  iscale = 1;
245  sigma = rmax / anrm;
246  }
247  if (iscale == 1) {
248  lapackf77_dlascl(uplo_, &izero, &izero, &d_one, &sigma, &n, &n, a,
249  &lda, info);
250  }
251
252  /* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */
253  // dsytrd work: e (n) + tau (n) + llwork (n*nb) ==> 2n + n*nb
254  // dstedx work: e (n) + tau (n) + z (n*n) + llwrk2 (1 + 4*n + n^2) ==> 1 + 6n + 2n^2
255  inde = 0;
256  indtau = inde + n;
257  indwrk = indtau + n;
258  indwk2 = indwrk + n*n;
259  llwork = lwork - indwrk;
260  llwrk2 = lwork - indwk2;
261
262 //
263 #ifdef ENABLE_TIMER
264  magma_timestr_t start, end;
265  start = get_current_time();
266 #endif
267
268  magma_dsytrd(uplo, n, a, lda, w, &work[inde],
269  &work[indtau], &work[indwrk], llwork, &iinfo);
270
271 #ifdef ENABLE_TIMER
272  end = get_current_time();
273  printf("time dsytrd = %6.2f\n", GetTimerValue(start,end)/1000.);
274 #endif
275
276  /* For eigenvalues only, call DSTERF. For eigenvectors, first call
277  DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
278  tridiagonal matrix, then call DORMTR to multiply it to the Householder
279  transformations represented as Householder vectors in A. */
280  if (! wantz) {
281  lapackf77_dsterf(&n, w, &work[inde], info);
282  } else {
283
284 #ifdef ENABLE_TIMER
285  start = get_current_time();
286 #endif
287
288  if (MAGMA_SUCCESS != magma_dmalloc( &dwork, 3*n*(n/2 + 1) )) {
289  *info = MAGMA_ERR_DEVICE_ALLOC;
290  return *info;
291  }
292
293  // TTT Possible bug for n < 128
294  magma_dstedx('A', n, 0., 0., 0, 0, w, &work[inde],
295  &work[indwrk], n, &work[indwk2],
296  llwrk2, iwork, liwork, dwork, info);
297
298  magma_free( dwork );
299
300 #ifdef ENABLE_TIMER
301  end = get_current_time();
302  printf("time dstedx = %6.2f\n", GetTimerValue(start,end)/1000.);
303  start = get_current_time();
304 #endif
305
306  magma_dormtr(MagmaLeft, uplo, MagmaNoTrans, n, n, a, lda, &work[indtau],
307  &work[indwrk], n, &work[indwk2], llwrk2, &iinfo);
308
309  lapackf77_dlacpy("A", &n, &n, &work[indwrk], &n, a, &lda);
310
311 #ifdef ENABLE_TIMER
312  end = get_current_time();
313  printf("time dormtr + copy = %6.2f\n", GetTimerValue(start,end)/1000.);
314 #endif
315  }
316
317  /* If matrix was scaled, then rescale eigenvalues appropriately. */
318  if (iscale == 1) {
319  d__1 = 1. / sigma;
320  blasf77_dscal(&n, &d__1, w, &ione);
321  }
322
323  work[0] = lwmin * (1. + lapackf77_dlamch("Epsilon")); // round up
324  iwork[0] = liwmin;
325
326  return *info;
327 } /* magma_dsyevd */
magma_int_t magma_dsytrd(char uplo, magma_int_t n, double *A, magma_int_t lda, double *d, double *e, double *tau, double *work, magma_int_t lwork, magma_int_t *info)
Definition: dsytrd.cpp:20
#define magma_dsqrt
Definition: common_magma.h:98
void lapackf77_dlacpy(const char *uplo, const magma_int_t *m, const magma_int_t *n, const double *A, const magma_int_t *lda, double *B, const magma_int_t *ldb)
magma_int_t magma_get_dsytrd_nb(magma_int_t m)
Definition: get_nb.cpp:391
#define lapackf77_dsterf
Definition: magma_lapack.h:33
#define MagmaLeft
Definition: magma.h:68
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define lapackf77_dlamch
Definition: magma_lapack.h:27
#define dwork(dev, i, j)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
magma_int_t magma_dstedx(char range, magma_int_t n, double vl, double vu, magma_int_t il, magma_int_t iu, double *d, double *e, double *z, magma_int_t ldz, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, double *dwork, magma_int_t *info)
Definition: dstedx.cpp:18
#define MagmaLowerStr
Definition: magma.h:85
void lapackf77_dsyevd(const char *jobz, const char *uplo, const magma_int_t *n, double *A, const magma_int_t *lda, double *w, double *work, const magma_int_t *lwork, DWORKFORZ_AND_LD magma_int_t *iwork, const magma_int_t *liwork, magma_int_t *info)
void lapackf77_dlascl(const char *type, const magma_int_t *kl, const magma_int_t *ku, double *cfrom, double *cto, const magma_int_t *m, const magma_int_t *n, double *A, const magma_int_t *lda, magma_int_t *info)
magma_int_t magma_dsyevd(char jobz, char uplo, magma_int_t n, double *a, magma_int_t lda, double *w, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
Definition: dsyevd.cpp:17
#define MAGMA_SUCCESS
Definition: magma.h:106
void blasf77_dscal(const magma_int_t *n, const double *alpha, double *x, const magma_int_t *incx)
#define MagmaNoVecStr
Definition: magma_types.h:423
double lapackf77_dlansy(const char *norm, const char *uplo, const magma_int_t *n, const double *A, const magma_int_t *lda, double *work)
magma_int_t magma_dormtr(char side, char uplo, char trans, magma_int_t m, magma_int_t n, double *a, magma_int_t lda, double *tau, double *c, magma_int_t ldc, double *work, magma_int_t lwork, magma_int_t *info)
Definition: dormtr.cpp:17
double GetTimerValue(magma_timestr_t start, magma_timestr_t end)
Definition: timer.cpp:94
#define MagmaUpperStr
Definition: magma.h:84
#define MagmaNoTrans
Definition: magma.h:57
#define MagmaVecStr
Definition: magma_types.h:424
#define max(a, b)
Definition: common_magma.h:82
magma_timestr_t get_current_time(void)
Definition: timer.cpp:76