MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zheevx_gpu.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @author Raffaele Solca
9 
10  @precisions normal z -> c
11 
12  */
13 #include "common_magma.h"
14 
15 /* These interfaces are used for TAU profiling */
16 extern"C"{
17  void Mylapackf77_zstein(magma_int_t *n, double *d, double *e, magma_int_t *m,
18  double *w, magma_int_t *iblock, magma_int_t *isplit,
19  cuDoubleComplex *z, magma_int_t *ldz, double *work,
20  magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
21  {
22  lapackf77_zstein(n, d, e, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info);
23  }
24 
25  void Mylapackf77_dstebz(char *range, char *order, magma_int_t *n, double *vl,
26  double *vu, magma_int_t *il, magma_int_t *iu, double *abstol,
27  double *d, double *e, magma_int_t *m, magma_int_t *nsplit,
28  double *w, magma_int_t *iblock, magma_int_t *isplit,
29  double *work, magma_int_t *iwork, magma_int_t *info)
30  {
31  lapackf77_dstebz(range, order, n, vl, vu, il, iu, abstol,
32  d, e, m, nsplit, w, iblock, isplit, work, iwork,info);
33  }
34 }
35 
36 extern "C" magma_int_t
37 magma_zheevx_gpu(char jobz, char range, char uplo, magma_int_t n,
38  cuDoubleComplex *da, magma_int_t ldda, double vl, double vu,
39  magma_int_t il, magma_int_t iu, double abstol, magma_int_t *m,
40  double *w, cuDoubleComplex *dz, magma_int_t lddz,
41  cuDoubleComplex *wa, magma_int_t ldwa,
42  cuDoubleComplex *wz, magma_int_t ldwz,
43  cuDoubleComplex *work, magma_int_t lwork,
44  double *rwork, magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
45 {
46 /* -- MAGMA (version 1.2.0) --
47  Univ. of Tennessee, Knoxville
48  Univ. of California, Berkeley
49  Univ. of Colorado, Denver
50  May 2012
51 
52  Purpose
53  =======
54  ZHEEVX computes selected eigenvalues and, optionally, eigenvectors
55  of a complex Hermitian matrix A. Eigenvalues and eigenvectors can
56  be selected by specifying either a range of values or a range of
57  indices for the desired eigenvalues.
58 
59  Arguments
60  =========
61  JOBZ (input) CHARACTER*1
62  = 'N': Compute eigenvalues only;
63  = 'V': Compute eigenvalues and eigenvectors.
64 
65  RANGE (input) CHARACTER*1
66  = 'A': all eigenvalues will be found.
67  = 'V': all eigenvalues in the half-open interval (VL,VU]
68  will be found.
69  = 'I': the IL-th through IU-th eigenvalues will be found.
70 
71  UPLO (input) CHARACTER*1
72  = 'U': Upper triangle of A is stored;
73  = 'L': Lower triangle of A is stored.
74 
75  N (input) INTEGER
76  The order of the matrix A. N >= 0.
77 
78  DA (device input/output) COMPLEX*16 array, dimension (LDDA, N)
79  On entry, the Hermitian matrix A. If UPLO = 'U', the
80  leading N-by-N upper triangular part of A contains the
81  upper triangular part of the matrix A. If UPLO = 'L',
82  the leading N-by-N lower triangular part of A contains
83  the lower triangular part of the matrix A.
84  On exit, the lower triangle (if UPLO='L') or the upper
85  triangle (if UPLO='U') of A, including the diagonal, is
86  destroyed.
87 
88  LDDA (input) INTEGER
89  The leading dimension of the array DA. LDDA >= max(1,N).
90 
91  VL (input) DOUBLE PRECISION
92  VU (input) DOUBLE PRECISION
93  If RANGE='V', the lower and upper bounds of the interval to
94  be searched for eigenvalues. VL < VU.
95  Not referenced if RANGE = 'A' or 'I'.
96 
97  IL (input) INTEGER
98  IU (input) INTEGER
99  If RANGE='I', the indices (in ascending order) of the
100  smallest and largest eigenvalues to be returned.
101  1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
102  Not referenced if RANGE = 'A' or 'V'.
103 
104  ABSTOL (input) DOUBLE PRECISION
105  The absolute error tolerance for the eigenvalues.
106  An approximate eigenvalue is accepted as converged
107  when it is determined to lie in an interval [a,b]
108  of width less than or equal to
109 
110  ABSTOL + EPS * max( |a|,|b| ) ,
111 
112  where EPS is the machine precision. If ABSTOL is less than
113  or equal to zero, then EPS*|T| will be used in its place,
114  where |T| is the 1-norm of the tridiagonal matrix obtained
115  by reducing A to tridiagonal form.
116 
117  Eigenvalues will be computed most accurately when ABSTOL is
118  set to twice the underflow threshold 2*DLAMCH('S'), not zero.
119  If this routine returns with INFO>0, indicating that some
120  eigenvectors did not converge, try setting ABSTOL to
121  2*DLAMCH('S').
122 
123  See "Computing Small Singular Values of Bidiagonal Matrices
124  with Guaranteed High Relative Accuracy," by Demmel and
125  Kahan, LAPACK Working Note #3.
126 
127  M (output) INTEGER
128  The total number of eigenvalues found. 0 <= M <= N.
129  If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
130 
131  W (output) DOUBLE PRECISION array, dimension (N)
132  On normal exit, the first M elements contain the selected
133  eigenvalues in ascending order.
134 
135  DZ (device output) COMPLEX*16 array, dimension (LDDZ, max(1,M))
136  If JOBZ = 'V', then if INFO = 0, the first M columns of Z
137  contain the orthonormal eigenvectors of the matrix A
138  corresponding to the selected eigenvalues, with the i-th
139  column of Z holding the eigenvector associated with W(i).
140  If an eigenvector fails to converge, then that column of Z
141  contains the latest approximation to the eigenvector, and the
142  index of the eigenvector is returned in IFAIL.
143  If JOBZ = 'N', then Z is not referenced.
144  Note: the user must ensure that at least max(1,M) columns are
145  supplied in the array Z; if RANGE = 'V', the exact value of M
146  is not known in advance and an upper bound must be used.
147 ********* (workspace) If FAST_HEMV is defined DZ should be (LDDZ, max(1,N)) in both cases.
148 
149  LDDZ (input) INTEGER
150  The leading dimension of the array DZ. LDDZ >= 1, and if
151  JOBZ = 'V', LDDZ >= max(1,N).
152 
153  WA (workspace) COMPLEX_16 array, dimension (LDWA, N)
154 
155  LDWA (input) INTEGER
156  The leading dimension of the array WA. LDWA >= max(1,N).
157 
158  WZ (workspace) COMPLEX_16 array, dimension (LDWZ, max(1,M))
159 
160  LDWZ (input) INTEGER
161  The leading dimension of the array DZ. LDWZ >= 1, and if
162  JOBZ = 'V', LDWZ >= max(1,N).
163 
164  WORK (workspace/output) COMPLEX*16 array, dimension (LWORK)
165  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
166 
167  LWORK (input) INTEGER
168  The length of the array WORK. LWORK >= (NB+1)*N,
169  where NB is the max of the blocksize for ZHETRD.
170 
171  If LWORK = -1, then a workspace query is assumed; the routine
172  only calculates the optimal size of the WORK array, returns
173  this value as the first entry of the WORK array, and no error
174  message related to LWORK is issued by XERBLA.
175 
176  RWORK (workspace) DOUBLE PRECISION array, dimension (7*N)
177 
178  IWORK (workspace) INTEGER array, dimension (5*N)
179 
180  IFAIL (output) INTEGER array, dimension (N)
181  If JOBZ = 'V', then if INFO = 0, the first M elements of
182  IFAIL are zero. If INFO > 0, then IFAIL contains the
183  indices of the eigenvectors that failed to converge.
184  If JOBZ = 'N', then IFAIL is not referenced.
185 
186  INFO (output) INTEGER
187  = 0: successful exit
188  < 0: if INFO = -i, the i-th argument had an illegal value
189  > 0: if INFO = i, then i eigenvectors failed to converge.
190  Their indices are stored in array IFAIL.
191  ===================================================================== */
192 
193  char uplo_[2] = {uplo, 0};
194  char jobz_[2] = {jobz, 0};
195  char range_[2] = {range, 0};
196 
197  static magma_int_t ione = 1;
198 
199  static char order[1];
200  static magma_int_t indd, inde;
201  static magma_int_t imax;
202  static magma_int_t lopt, itmp1, indee;
203  static magma_int_t lower, wantz;
204  static magma_int_t i, j, jj, i__1;
205  static magma_int_t alleig, valeig, indeig;
206  static magma_int_t iscale, indibl;
207  static magma_int_t indiwk, indisp, indtau;
208  static magma_int_t indrwk, indwrk;
209  static magma_int_t llwork, nsplit;
210  static magma_int_t lquery;
211  static magma_int_t iinfo;
212  static double safmin;
213  static double bignum;
214  static double smlnum;
215  static double eps, tmp1;
216  static double anrm;
217  static double sigma, d__1;
218  static double rmin, rmax;
219 
220  double *dwork;
221 
222  /* Function Body */
223  lower = lapackf77_lsame(uplo_, MagmaLowerStr);
224  wantz = lapackf77_lsame(jobz_, MagmaVectorsStr);
225  alleig = lapackf77_lsame(range_, "A");
226  valeig = lapackf77_lsame(range_, "V");
227  indeig = lapackf77_lsame(range_, "I");
228  lquery = lwork == -1;
229 
230  *info = 0;
231  if (! (wantz || lapackf77_lsame(jobz_, MagmaNoVectorsStr))) {
232  *info = -1;
233  } else if (! (alleig || valeig || indeig)) {
234  *info = -2;
235  } else if (! (lower || lapackf77_lsame(uplo_, MagmaUpperStr))) {
236  *info = -3;
237  } else if (n < 0) {
238  *info = -4;
239  } else if (ldda < max(1,n)) {
240  *info = -6;
241  } else if (lddz < 1 || wantz && lddz < n) {
242  *info = -15;
243  } else if (ldwa < max(1,n)) {
244  *info = -17;
245  } else if (ldwz < 1 || wantz && ldwz < n) {
246  *info = -19;
247  } else {
248  if (valeig) {
249  if (n > 0 && vu <= vl) {
250  *info = -8;
251  }
252  } else if (indeig) {
253  if (il < 1 || il > max(1,n)) {
254  *info = -9;
255  } else if (iu < min(n,il) || iu > n) {
256  *info = -10;
257  }
258  }
259  }
260 
262 
263  lopt = n * (nb + 1);
264 
265  MAGMA_Z_SET2REAL(work[0],(double)lopt);
266 
267 
268  if (lwork < lopt && ! lquery) {
269  *info = -21;
270  }
271 
272  if (*info != 0) {
273  magma_xerbla( __func__, -(*info));
274  return *info;
275  } else if (lquery) {
276  return *info;
277  }
278 
279  /* Quick return if possible */
280  *m = 0;
281  if (n == 0) {
282  return *info;
283  }
284 
285  if (n == 1) {
286  cuDoubleComplex tmp;
287  magma_zgetvector( 1, da, 1, &tmp, 1 );
288  w[0] = MAGMA_Z_REAL(tmp);
289  if (alleig || indeig) {
290  *m = 1;
291  } else if (valeig) {
292  if (vl < w[0] && vu >= w[0]) {
293  *m = 1;
294  }
295  }
296  if (wantz) {
297  tmp = MAGMA_Z_ONE;
298  magma_zsetvector( 1, &tmp, 1, da, 1 );
299  }
300  return *info;
301  }
302 
303  if (MAGMA_SUCCESS != magma_dmalloc( &dwork, n )) {
304  fprintf (stderr, "!!!! device memory allocation error (magma_zheevx_gpu)\n");
305  *info = MAGMA_ERR_DEVICE_ALLOC;
306  return *info;
307  }
308 
309  --w;
310  --work;
311  --rwork;
312  --iwork;
313  --ifail;
314 
315  /* Get machine constants. */
316  safmin = lapackf77_dlamch("Safe minimum");
317  eps = lapackf77_dlamch("Precision");
318  smlnum = safmin / eps;
319  bignum = 1. / smlnum;
320  rmin = magma_dsqrt(smlnum);
321  rmax = magma_dsqrt(bignum);
322 
323  /* Scale matrix to allowable range, if necessary. */
324  anrm = magmablas_zlanhe('M', uplo, n, da, ldda, dwork);
325  iscale = 0;
326  if (anrm > 0. && anrm < rmin) {
327  iscale = 1;
328  sigma = rmin / anrm;
329  } else if (anrm > rmax) {
330  iscale = 1;
331  sigma = rmax / anrm;
332  }
333  if (iscale == 1) {
334  d__1 = 1.;
335  magmablas_zlascl(uplo, 0, 0, 1., sigma, n, n, da,
336  ldda, info);
337 
338  if (abstol > 0.) {
339  abstol *= sigma;
340  }
341  if (valeig) {
342  vl *= sigma;
343  vu *= sigma;
344  }
345  }
346 
347  /* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */
348 
349  indd = 1;
350  inde = indd + n;
351  indrwk = inde + n;
352  indtau = 1;
353  indwrk = indtau + n;
354  llwork = lwork - indwrk + 1;
355 
356 #ifdef FAST_HEMV
357  magma_zhetrd2_gpu(uplo, n, da, ldda, &rwork[indd], &rwork[inde],
358  &work[indtau], wa, ldwa, &work[indwrk], llwork, dz, lddz*n, &iinfo);
359 #else
360  magma_zhetrd_gpu (uplo, n, da, ldda, &rwork[indd], &rwork[inde],
361  &work[indtau], wa, ldwa, &work[indwrk], llwork, &iinfo);
362 #endif
363 
364  lopt = n + (magma_int_t)MAGMA_Z_REAL(work[indwrk]);
365 
366  /* If all eigenvalues are desired and ABSTOL is less than or equal to
367  zero, then call DSTERF or ZUNGTR and ZSTEQR. If this fails for
368  some eigenvalue, then try DSTEBZ. */
369 
370  if ((alleig || indeig && il == 1 && iu == n) && abstol <= 0.) {
371  blasf77_dcopy(&n, &rwork[indd], &ione, &w[1], &ione);
372  indee = indrwk + 2*n;
373  if (! wantz) {
374  i__1 = n - 1;
375  blasf77_dcopy(&i__1, &rwork[inde], &ione, &rwork[indee], &ione);
376  lapackf77_dsterf(&n, &w[1], &rwork[indee], info);
377  } else {
378  lapackf77_zlacpy("A", &n, &n, wa, &ldwa, wz, &ldwz);
379  lapackf77_zungtr(uplo_, &n, wz, &ldwz, &work[indtau], &work[indwrk], &llwork, &iinfo);
380  i__1 = n - 1;
381  blasf77_dcopy(&i__1, &rwork[inde], &ione, &rwork[indee], &ione);
382  lapackf77_zsteqr(jobz_, &n, &w[1], &rwork[indee], wz, &ldwz, &rwork[indrwk], info);
383  if (*info == 0) {
384  for (i = 1; i <= n; ++i) {
385  ifail[i] = 0;
386  }
387  magma_zsetmatrix( n, n, wz, ldwz, dz, lddz );
388  }
389 
390  }
391  if (*info == 0) {
392  *m = n;
393  }
394  }
395  /* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. */
396  if (*m == 0) {
397  *info = 0;
398  if (wantz) {
399  *(unsigned char *)order = 'B';
400  } else {
401  *(unsigned char *)order = 'E';
402  }
403  indibl = 1;
404  indisp = indibl + n;
405  indiwk = indisp + n;
406 
407  Mylapackf77_dstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m,
408  &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwk], info);
409 
410  if (wantz) {
411 
412  Mylapackf77_zstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp],
413  wz, &ldwz, &rwork[indrwk], &iwork[indiwk], &ifail[1], info);
414 
415  magma_zsetmatrix( n, *m, wz, ldwz, dz, lddz );
416 
417  /* Apply unitary matrix used in reduction to tridiagonal
418  form to eigenvectors returned by ZSTEIN. */
419  magma_zunmtr_gpu(MagmaLeft, uplo, MagmaNoTrans, n, *m, da, ldda, &work[indtau],
420  dz, lddz, wa, ldwa, &iinfo);
421  }
422  }
423  /* If matrix was scaled, then rescale eigenvalues appropriately. */
424  if (iscale == 1) {
425  if (*info == 0) {
426  imax = *m;
427  } else {
428  imax = *info - 1;
429  }
430  d__1 = 1. / sigma;
431  dscal_(&imax, &d__1, &w[1], &ione);
432  }
433 
434  /* If eigenvalues are not in order, then sort them, along with
435  eigenvectors. */
436 
437  if (wantz) {
438  for (j = 1; j <= *m-1; ++j) {
439  i = 0;
440  tmp1 = w[j];
441  for (jj = j + 1; jj <= *m; ++jj) {
442  if (w[jj] < tmp1) {
443  i = jj;
444  tmp1 = w[jj];
445  }
446  }
447 
448  if (i != 0) {
449  itmp1 = iwork[indibl + i - 1];
450  w[i] = w[j];
451  iwork[indibl + i - 1] = iwork[indibl + j - 1];
452  w[j] = tmp1;
453  iwork[indibl + j - 1] = itmp1;
454  magma_zswap(n, dz + (i-1)*lddz, ione, dz + (j-1)*lddz, ione);
455  if (*info != 0) {
456  itmp1 = ifail[i];
457  ifail[i] = ifail[j];
458  ifail[j] = itmp1;
459  }
460  }
461  }
462  }
463 
464  /* Set WORK(1) to optimal complex workspace size. */
465 
466  work[1] = MAGMA_Z_MAKE((double) lopt, 0.);
467 
468  return *info;
469 
470 } /* magma_zheevx_gpu_ */
471