MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
cheevr.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @author Raffaele Solca
9 
10  @generated c Thu May 10 22:27:01 2012
11 
12 */
13 #include "common_magma.h"
14 
15 extern "C" {
16 #ifdef ADD_
17 # define lapackf77_ieeeck ieeeck_
18 #elif defined NOCHANGE
19 # define lapackf77_ieeeck ieeeck
20 #endif
21  magma_int_t lapackf77_ieeeck(magma_int_t* ispec, float* zero, float* one);
22 }
23 
24 extern "C" magma_int_t
25 magma_cheevr(char jobz, char range, char uplo, magma_int_t n,
26  cuFloatComplex *a, magma_int_t lda, float vl, float vu,
27  magma_int_t il, magma_int_t iu, float abstol, magma_int_t *m,
28  float *w, cuFloatComplex *z, magma_int_t ldz, magma_int_t *isuppz,
29  cuFloatComplex *work, magma_int_t lwork,
30  float *rwork, magma_int_t lrwork, magma_int_t *iwork,
31  magma_int_t liwork, magma_int_t *info)
32 {
33  /* -- MAGMA (version 1.2.0) --
34  Univ. of Tennessee, Knoxville
35  Univ. of California, Berkeley
36  Univ. of Colorado, Denver
37  May 2012
38 
39  Purpose
40  =======
41 
42  CHEEVR computes selected eigenvalues and, optionally, eigenvectors
43  of a complex Hermitian matrix T. Eigenvalues and eigenvectors can
44  be selected by specifying either a range of values or a range of
45  indices for the desired eigenvalues.
46 
47  Whenever possible, CHEEVR calls ZSTEGR to compute the
48  eigenspectrum using Relatively Robust Representations. ZSTEGR
49  computes eigenvalues by the dqds algorithm, while orthogonal
50  eigenvectors are computed from various "good" L D L^T representations
51  (also known as Relatively Robust Representations). Gram-Schmidt
52  orthogonalization is avoided as far as possible. More specifically,
53  the various steps of the algorithm are as follows. For the i-th
54  unreduced block of T,
55  (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T
56  is a relatively robust representation,
57  (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high
58  relative accuracy by the dqds algorithm,
59  (c) If there is a cluster of close eigenvalues, "choose" sigma_i
60  close to the cluster, and go to step (a),
61  (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T,
62  compute the corresponding eigenvector by forming a
63  rank-revealing twisted factorization.
64  The desired accuracy of the output can be specified by the input
65  parameter ABSTOL.
66 
67  For more details, see "A new O(n^2) algorithm for the symmetric
68  tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon,
69  Computer Science Division Technical Report No. UCB//CSD-97-971,
70  UC Berkeley, May 1997.
71 
72 
73  Note 1 : CHEEVR calls ZSTEGR when the full spectrum is requested
74  on machines which conform to the ieee-754 floating point standard.
75  CHEEVR calls DSTEBZ and ZSTEIN on non-ieee machines and
76  when partial spectrum requests are made.
77 
78  Normal execution of ZSTEGR may create NaNs and infinities and
79  hence may abort due to a floating point exception in environments
80  which do not handle NaNs and infinities in the ieee standard default
81  manner.
82 
83  Arguments
84  =========
85  JOBZ (input) CHARACTER*1
86  = 'N': Compute eigenvalues only;
87  = 'V': Compute eigenvalues and eigenvectors.
88 
89  RANGE (input) CHARACTER*1
90  = 'A': all eigenvalues will be found.
91  = 'V': all eigenvalues in the half-open interval (VL,VU]
92  will be found.
93  = 'I': the IL-th through IU-th eigenvalues will be found.
94 
95  UPLO (input) CHARACTER*1
96  = 'U': Upper triangle of A is stored;
97  = 'L': Lower triangle of A is stored.
98 
99  N (input) INTEGER
100  The order of the matrix A. N >= 0.
101 
102  A (input/output) COMPLEX*16 array, dimension (LDA, N)
103  On entry, the Hermitian matrix A. If UPLO = 'U', the
104  leading N-by-N upper triangular part of A contains the
105  upper triangular part of the matrix A. If UPLO = 'L',
106  the leading N-by-N lower triangular part of A contains
107  the lower triangular part of the matrix A.
108  On exit, the lower triangle (if UPLO='L') or the upper
109  triangle (if UPLO='U') of A, including the diagonal, is
110  destroyed.
111 
112  LDA (input) INTEGER
113  The leading dimension of the array A. LDA >= max(1,N).
114 
115  VL (input) DOUBLE PRECISION
116  VU (input) DOUBLE PRECISION
117  If RANGE='V', the lower and upper bounds of the interval to
118  be searched for eigenvalues. VL < VU.
119  Not referenced if RANGE = 'A' or 'I'.
120 
121  IL (input) INTEGER
122  IU (input) INTEGER
123  If RANGE='I', the indices (in ascending order) of the
124  smallest and largest eigenvalues to be returned.
125  1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
126  Not referenced if RANGE = 'A' or 'V'.
127 
128  ABSTOL (input) DOUBLE PRECISION
129  The absolute error tolerance for the eigenvalues.
130  An approximate eigenvalue is accepted as converged
131  when it is determined to lie in an interval [a,b]
132  of width less than or equal to
133 
134  ABSTOL + EPS * max( |a|,|b| ) ,
135 
136  where EPS is the machine precision. If ABSTOL is less than
137  or equal to zero, then EPS*|T| will be used in its place,
138  where |T| is the 1-norm of the tridiagonal matrix obtained
139  by reducing A to tridiagonal form.
140 
141  See "Computing Small Singular Values of Bidiagonal Matrices
142  with Guaranteed High Relative Accuracy," by Demmel and
143  Kahan, LAPACK Working Note #3.
144 
145  If high relative accuracy is important, set ABSTOL to
146  DLAMCH( 'Safe minimum' ). Doing so will guarantee that
147  eigenvalues are computed to high relative accuracy when
148  possible in future releases. The current code does not
149  make any guarantees about high relative accuracy, but
150  furutre releases will. See J. Barlow and J. Demmel,
151  "Computing Accurate Eigensystems of Scaled Diagonally
152  Dominant Matrices", LAPACK Working Note #7, for a discussion
153  of which matrices define their eigenvalues to high relative
154  accuracy.
155 
156  M (output) INTEGER
157  The total number of eigenvalues found. 0 <= M <= N.
158  If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
159 
160  W (output) DOUBLE PRECISION array, dimension (N)
161  The first M elements contain the selected eigenvalues in
162  ascending order.
163 
164  Z (output) COMPLEX*16 array, dimension (LDZ, max(1,M))
165  If JOBZ = 'V', then if INFO = 0, the first M columns of Z
166  contain the orthonormal eigenvectors of the matrix A
167  corresponding to the selected eigenvalues, with the i-th
168  column of Z holding the eigenvector associated with W(i).
169  If JOBZ = 'N', then Z is not referenced.
170  Note: the user must ensure that at least max(1,M) columns are
171  supplied in the array Z; if RANGE = 'V', the exact value of M
172  is not known in advance and an upper bound must be used.
173 
174  LDZ (input) INTEGER
175  The leading dimension of the array Z. LDZ >= 1, and if
176  JOBZ = 'V', LDZ >= max(1,N).
177 
178  ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) )
179  The support of the eigenvectors in Z, i.e., the indices
180  indicating the nonzero elements in Z. The i-th eigenvector
181  is nonzero only in elements ISUPPZ( 2*i-1 ) through
182  ISUPPZ( 2*i ).
183  ********* Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1
184 
185  WORK (workspace/output) COMPLEX*16 array, dimension (LWORK)
186  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
187 
188  LWORK (input) INTEGER
189  The length of the array WORK. LWORK >= max(1,2*N).
190  For optimal efficiency, LWORK >= (NB+1)*N,
191  where NB is the max of the blocksize for CHETRD and for
192  CUNMTR as returned by ILAENV.
193 
194  If LWORK = -1, then a workspace query is assumed; the routine
195  only calculates the optimal size of the WORK array, returns
196  this value as the first entry of the WORK array, and no error
197  message related to LWORK is issued by XERBLA.
198 
199  RWORK (workspace/output) DOUBLE PRECISION array, dimension (LRWORK)
200  On exit, if INFO = 0, RWORK(1) returns the optimal
201  (and minimal) LRWORK.
202 
203  LRWORK (input) INTEGER
204  The length of the array RWORK. LRWORK >= max(1,24*N).
205 
206  If LRWORK = -1, then a workspace query is assumed; the routine
207  only calculates the optimal size of the RWORK array, returns
208  this value as the first entry of the RWORK array, and no error
209  message related to LRWORK is issued by XERBLA.
210 
211  IWORK (workspace/output) INTEGER array, dimension (LIWORK)
212  On exit, if INFO = 0, IWORK(1) returns the optimal
213  (and minimal) LIWORK.
214 
215  LIWORK (input) INTEGER
216  The dimension of the array IWORK. LIWORK >= max(1,10*N).
217 
218  If LIWORK = -1, then a workspace query is assumed; the
219  routine only calculates the optimal size of the IWORK array,
220  returns this value as the first entry of the IWORK array, and
221  no error message related to LIWORK is issued by XERBLA.
222 
223  INFO (output) INTEGER
224  = 0: successful exit
225  < 0: if INFO = -i, the i-th argument had an illegal value
226  > 0: Internal error
227 
228  Further Details
229  ===============
230  Based on contributions by
231  Inderjit Dhillon, IBM Almaden, USA
232  Osni Marques, LBNL/NERSC, USA
233  Ken Stanley, Computer Science Division, University of
234  California at Berkeley, USA
235  ===================================================================== */
236 
237  char uplo_[2] = {uplo, 0};
238  char jobz_[2] = {jobz, 0};
239  char range_[2] = {range, 0};
240 
241  static magma_int_t izero = 0;
242  static magma_int_t ione = 1;
243  static float szero = 0.;
244  static float sone = 1.;
245 
246  static magma_int_t indrd, indre;
247  static magma_int_t imax;
248  static magma_int_t lopt, itmp1, indree, indrdd;
249  static magma_int_t lower, wantz, tryrac;
250  static magma_int_t i, j, jj, i__1;
251  static magma_int_t alleig, valeig, indeig;
252  static magma_int_t iscale, indibl, indifl;
253  static magma_int_t indiwo, indisp, indtau;
254  static magma_int_t indrwk, indwk;
255  static magma_int_t llwork, llrwork, nsplit;
256  static magma_int_t lquery, ieeeok;
257  static magma_int_t iinfo;
258  static magma_int_t lwmin, lrwmin, liwmin;
259  static float safmin;
260  static float bignum;
261  static float smlnum;
262  static float eps, tmp1;
263  static float anrm;
264  static float sigma, d__1;
265  static float rmin, rmax;
266 
267  lower = lapackf77_lsame(uplo_, MagmaLowerStr);
268  wantz = lapackf77_lsame(jobz_, MagmaVectorsStr);
269  alleig = lapackf77_lsame(range_, "A");
270  valeig = lapackf77_lsame(range_, "V");
271  indeig = lapackf77_lsame(range_, "I");
272  lquery = lwork == -1 || lrwork == -1 || liwork == -1;
273 
274  *info = 0;
275  if (! (wantz || lapackf77_lsame(jobz_, MagmaNoVectorsStr))) {
276  *info = -1;
277  } else if (! (alleig || valeig || indeig)) {
278  *info = -2;
279  } else if (! (lower || lapackf77_lsame(uplo_, MagmaUpperStr))) {
280  *info = -3;
281  } else if (n < 0) {
282  *info = -4;
283  } else if (lda < max(1,n)) {
284  *info = -6;
285  } else if (ldz < 1 || wantz && ldz < n) {
286  *info = -15;
287  } else {
288  if (valeig) {
289  if (n > 0 && vu <= vl) {
290  *info = -8;
291  }
292  } else if (indeig) {
293  if (il < 1 || il > max(1,n)) {
294  *info = -9;
295  } else if (iu < min(n,il) || iu > n) {
296  *info = -10;
297  }
298  }
299  }
300 
302 
303  lwmin = n * (nb + 1);
304  lrwmin = 24 * n;
305  liwmin = 10 * n;
306 
307  MAGMA_C_SET2REAL(work[0],(float)lwmin);
308  rwork[0] = lrwmin;
309  iwork[0] = liwmin;
310 
311  if (lwork < lwmin && ! lquery) {
312  *info = -18;
313  } else if ((lrwork < lrwmin) && ! lquery) {
314  *info = -20;
315  } else if ((liwork < liwmin) && ! lquery) {
316  *info = -22;
317  }
318 
319  if (*info != 0) {
320  magma_xerbla(__func__, -(*info));
321  return *info;
322  } else if (lquery) {
323  return *info;
324  }
325 
326  /* Quick return if possible */
327 
328  *m = 0;
329  if (n == 0) {
330  return *info;
331  }
332 
333  if (n == 1) {
334  w[0] = MAGMA_C_REAL(a[0]);
335  if (alleig || indeig) {
336  *m = 1;
337  } else if (valeig) {
338  if (vl < w[0] && vu >= w[0]) {
339  *m = 1;
340  }
341  }
342  if (wantz) {
343  z[0]=MAGMA_C_ONE;
344  }
345  return *info;
346  }
347 
348  --w;
349  --work;
350  --rwork;
351  --iwork;
352  --isuppz;
353 
354  /* Get machine constants. */
355  safmin = lapackf77_slamch("Safe minimum");
356  eps = lapackf77_slamch("Precision");
357  smlnum = safmin / eps;
358  bignum = 1. / smlnum;
359  rmin = magma_ssqrt(smlnum);
360  rmax = magma_ssqrt(bignum);
361 
362  /* Scale matrix to allowable range, if necessary. */
363 
364 
365  anrm = lapackf77_clanhe("M", uplo_, &n, a, &lda, &rwork[1]);
366  iscale = 0;
367  if (anrm > 0. && anrm < rmin) {
368  iscale = 1;
369  sigma = rmin / anrm;
370  } else if (anrm > rmax) {
371  iscale = 1;
372  sigma = rmax / anrm;
373  }
374  if (iscale == 1) {
375  d__1 = 1.;
376  lapackf77_clascl(uplo_, &izero, &izero, &d__1, &sigma, &n, &n, a,
377  &lda, info);
378 
379  if (abstol > 0.) {
380  abstol *= sigma;
381  }
382  if (valeig) {
383  vl *= sigma;
384  vu *= sigma;
385  }
386  }
387 
388  /* Call CHETRD to reduce Hermitian matrix to tridiagonal form. */
389 
390  indtau = 1;
391  indwk = indtau + n;
392 
393  indre = 1;
394  indrd = indre + n;
395  indree = indrd + n;
396  indrdd = indree + n;
397  indrwk = indrdd + n;
398  llwork = lwork - indwk + 1;
399  llrwork = lrwork - indrwk + 1;
400 
401  indifl = 1;
402  indibl = indifl + n;
403  indisp = indibl + n;
404  indiwo = indisp + n;
405 
406  magma_chetrd(uplo, n, a, lda, &rwork[indrd], &rwork[indre], &work[indtau], &work[indwk], llwork, &iinfo);
407 
408  lopt = n + (magma_int_t)MAGMA_C_REAL(work[indwk]);
409 
410 
411  /* If all eigenvalues are desired and ABSTOL is less than or equal to
412  zero, then call DSTERF
413  or ZUNGTR and ZSTEQR. If this fails for
414  some eigenvalue, then try DSTEBZ. */
415  ieeeok = lapackf77_ieeeck( &ione, &szero, &sone);
416 
417  /* If only the eigenvalues are required call DSTERF for all or DSTEBZ for a part */
418 
419  if (! wantz) {
420  blasf77_scopy(&n, &rwork[indrd], &ione, &w[1], &ione);
421  i__1 = n - 1;
422  if ((alleig || indeig && il == 1 && iu == n)){
423  lapackf77_ssterf(&n, &w[1], &rwork[indre], info);
424  *m = n;
425  } else {
426  lapackf77_sstebz(range_, "E", &n, &vl, &vu, &il, &iu, &abstol,
427  &rwork[indrd], &rwork[indre], m,
428  &nsplit, &w[1], &iwork[indibl], &iwork[indisp],
429  &rwork[indrwk], &iwork[indiwo], info);
430  }
431 
432  /* Otherwise call ZSTEMR if infinite and NaN arithmetic is supported */
433 
434  } else if (ieeeok==1){
435  i__1 = n - 1;
436 
437  blasf77_scopy(&i__1, &rwork[indre], &ione, &rwork[indree], &ione);
438  blasf77_scopy(&n, &rwork[indrd], &ione, &rwork[indrdd], &ione);
439 
440  if (abstol < 2*n*eps)
441  tryrac=1;
442  else
443  tryrac=0;
444 
445  lapackf77_cstemr(jobz_, range_, &n, &rwork[indrdd], &rwork[indree], &vl, &vu, &il,
446  &iu, m, &w[1], z, &ldz, &n, &isuppz[1], &tryrac, &rwork[indrwk],
447  &llrwork, &iwork[1], &liwork, info);
448 
449  if (*info == 0 && wantz) {
450  magma_cunmtr(MagmaLeft, uplo, MagmaNoTrans, n, *m, a, lda, &work[indtau],
451  z, ldz, &work[indwk], llwork, &iinfo);
452  }
453  }
454 
455 
456  /* Call DSTEBZ and ZSTEIN if infinite and NaN arithmetic is not supported or ZSTEMR didn't converge. */
457  if (wantz && (ieeeok ==0 || *info != 0)) {
458  *info = 0;
459 
460  lapackf77_sstebz(range_, "B", &n, &vl, &vu, &il, &iu, &abstol, &rwork[indrd], &rwork[indre], m,
461  &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwo], info);
462 
463  lapackf77_cstein(&n, &rwork[indrd], &rwork[indre], m, &w[1], &iwork[indibl], &iwork[indisp],
464  z, &ldz, &rwork[indrwk], &iwork[indiwo], &iwork[indifl], info);
465 
466  /* Apply unitary matrix used in reduction to tridiagonal
467  form to eigenvectors returned by ZSTEIN. */
468  magma_cunmtr(MagmaLeft, uplo, MagmaNoTrans, n, *m, a, lda, &work[indtau],
469  z, ldz, &work[indwk], llwork, &iinfo);
470  }
471 
472  /* If matrix was scaled, then rescale eigenvalues appropriately. */
473  if (iscale == 1) {
474  if (*info == 0) {
475  imax = *m;
476  } else {
477  imax = *info - 1;
478  }
479  d__1 = 1. / sigma;
480  sscal_(&imax, &d__1, &w[1], &ione);
481  }
482 
483  /* If eigenvalues are not in order, then sort them, along with
484  eigenvectors. */
485  if (wantz) {
486  for (j = 1; j <= *m-1; ++j) {
487  i = 0;
488  tmp1 = w[j];
489  for (jj = j + 1; jj <= *m; ++jj) {
490  if (w[jj] < tmp1) {
491  i = jj;
492  tmp1 = w[jj];
493  }
494  }
495 
496  if (i != 0) {
497  itmp1 = iwork[indibl + i - 1];
498  w[i] = w[j];
499  iwork[indibl + i - 1] = iwork[indibl + j - 1];
500  w[j] = tmp1;
501  iwork[indibl + j - 1] = itmp1;
502  blasf77_cswap(&n, z + (i-1)*ldz, &ione, z + (j-1)*ldz, &ione);
503  }
504  }
505  }
506 
507  /* Set WORK(1) to optimal complex workspace size. */
508 
509  work[1] = MAGMA_C_MAKE((float) lopt, 0.);
510  rwork[1] = (float) lrwmin;
511  iwork[1] = liwmin;
512 
513  return *info;
514 
515 } /* magma_cheevr */
516