MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zheevx_gpu.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  August 2013
7 
8  @author Raffaele Solca
9  @author Azzam Haidar
10 
11  @precisions normal z -> c
12 
13  */
14 #include "common_magma.h"
15 
16 extern "C" magma_int_t
17 magma_zheevx_gpu(char jobz, char range, char uplo, magma_int_t n,
18  magmaDoubleComplex *da, magma_int_t ldda, double vl, double vu,
19  magma_int_t il, magma_int_t iu, double abstol, magma_int_t *m,
20  double *w, magmaDoubleComplex *dz, magma_int_t lddz,
21  magmaDoubleComplex *wa, magma_int_t ldwa,
22  magmaDoubleComplex *wz, magma_int_t ldwz,
23  magmaDoubleComplex *work, magma_int_t lwork,
24  double *rwork, magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
25 {
26 /* -- MAGMA (version 1.4.0) --
27  Univ. of Tennessee, Knoxville
28  Univ. of California, Berkeley
29  Univ. of Colorado, Denver
30  August 2013
31 
32  Purpose
33  =======
34  ZHEEVX computes selected eigenvalues and, optionally, eigenvectors
35  of a complex Hermitian matrix A. Eigenvalues and eigenvectors can
36  be selected by specifying either a range of values or a range of
37  indices for the desired eigenvalues.
38 
39  Arguments
40  =========
41  JOBZ (input) CHARACTER*1
42  = 'N': Compute eigenvalues only;
43  = 'V': Compute eigenvalues and eigenvectors.
44 
45  RANGE (input) CHARACTER*1
46  = 'A': all eigenvalues will be found.
47  = 'V': all eigenvalues in the half-open interval (VL,VU]
48  will be found.
49  = 'I': the IL-th through IU-th eigenvalues will be found.
50 
51  UPLO (input) CHARACTER*1
52  = 'U': Upper triangle of A is stored;
53  = 'L': Lower triangle of A is stored.
54 
55  N (input) INTEGER
56  The order of the matrix A. N >= 0.
57 
58  DA (device input/output) COMPLEX_16 array, dimension (LDDA, N)
59  On entry, the Hermitian matrix A. If UPLO = 'U', the
60  leading N-by-N upper triangular part of A contains the
61  upper triangular part of the matrix A. If UPLO = 'L',
62  the leading N-by-N lower triangular part of A contains
63  the lower triangular part of the matrix A.
64  On exit, the lower triangle (if UPLO='L') or the upper
65  triangle (if UPLO='U') of A, including the diagonal, is
66  destroyed.
67 
68  LDDA (input) INTEGER
69  The leading dimension of the array DA. LDDA >= max(1,N).
70 
71  VL (input) DOUBLE PRECISION
72  VU (input) DOUBLE PRECISION
73  If RANGE='V', the lower and upper bounds of the interval to
74  be searched for eigenvalues. VL < VU.
75  Not referenced if RANGE = 'A' or 'I'.
76 
77  IL (input) INTEGER
78  IU (input) INTEGER
79  If RANGE='I', the indices (in ascending order) of the
80  smallest and largest eigenvalues to be returned.
81  1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
82  Not referenced if RANGE = 'A' or 'V'.
83 
84  ABSTOL (input) DOUBLE PRECISION
85  The absolute error tolerance for the eigenvalues.
86  An approximate eigenvalue is accepted as converged
87  when it is determined to lie in an interval [a,b]
88  of width less than or equal to
89 
90  ABSTOL + EPS * max( |a|,|b| ) ,
91 
92  where EPS is the machine precision. If ABSTOL is less than
93  or equal to zero, then EPS*|T| will be used in its place,
94  where |T| is the 1-norm of the tridiagonal matrix obtained
95  by reducing A to tridiagonal form.
96 
97  Eigenvalues will be computed most accurately when ABSTOL is
98  set to twice the underflow threshold 2*DLAMCH('S'), not zero.
99  If this routine returns with INFO>0, indicating that some
100  eigenvectors did not converge, try setting ABSTOL to
101  2*DLAMCH('S').
102 
103  See "Computing Small Singular Values of Bidiagonal Matrices
104  with Guaranteed High Relative Accuracy," by Demmel and
105  Kahan, LAPACK Working Note #3.
106 
107  M (output) INTEGER
108  The total number of eigenvalues found. 0 <= M <= N.
109  If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
110 
111  W (output) DOUBLE PRECISION array, dimension (N)
112  On normal exit, the first M elements contain the selected
113  eigenvalues in ascending order.
114 
115  DZ (device output) COMPLEX_16 array, dimension (LDDZ, max(1,M))
116  If JOBZ = 'V', then if INFO = 0, the first M columns of Z
117  contain the orthonormal eigenvectors of the matrix A
118  corresponding to the selected eigenvalues, with the i-th
119  column of Z holding the eigenvector associated with W(i).
120  If an eigenvector fails to converge, then that column of Z
121  contains the latest approximation to the eigenvector, and the
122  index of the eigenvector is returned in IFAIL.
123  If JOBZ = 'N', then Z is not referenced.
124  Note: the user must ensure that at least max(1,M) columns are
125  supplied in the array Z; if RANGE = 'V', the exact value of M
126  is not known in advance and an upper bound must be used.
127 ********* (workspace) If FAST_HEMV is defined DZ should be (LDDZ, max(1,N)) in both cases.
128 
129  LDDZ (input) INTEGER
130  The leading dimension of the array DZ. LDDZ >= 1, and if
131  JOBZ = 'V', LDDZ >= max(1,N).
132 
133  WA (workspace) COMPLEX_16 array, dimension (LDWA, N)
134 
135  LDWA (input) INTEGER
136  The leading dimension of the array WA. LDWA >= max(1,N).
137 
138  WZ (workspace) COMPLEX_16 array, dimension (LDWZ, max(1,M))
139 
140  LDWZ (input) INTEGER
141  The leading dimension of the array DZ. LDWZ >= 1, and if
142  JOBZ = 'V', LDWZ >= max(1,N).
143 
144  WORK (workspace/output) COMPLEX_16 array, dimension (LWORK)
145  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
146 
147  LWORK (input) INTEGER
148  The length of the array WORK. LWORK >= (NB+1)*N,
149  where NB is the max of the blocksize for ZHETRD.
150 
151  If LWORK = -1, then a workspace query is assumed; the routine
152  only calculates the optimal size of the WORK array, returns
153  this value as the first entry of the WORK array, and no error
154  message related to LWORK is issued by XERBLA.
155 
156  RWORK (workspace) DOUBLE PRECISION array, dimension (7*N)
157 
158  IWORK (workspace) INTEGER array, dimension (5*N)
159 
160  IFAIL (output) INTEGER array, dimension (N)
161  If JOBZ = 'V', then if INFO = 0, the first M elements of
162  IFAIL are zero. If INFO > 0, then IFAIL contains the
163  indices of the eigenvectors that failed to converge.
164  If JOBZ = 'N', then IFAIL is not referenced.
165 
166  INFO (output) INTEGER
167  = 0: successful exit
168  < 0: if INFO = -i, the i-th argument had an illegal value
169  > 0: if INFO = i, then i eigenvectors failed to converge.
170  Their indices are stored in array IFAIL.
171  ===================================================================== */
172 
173  char uplo_[2] = {uplo, 0};
174  char jobz_[2] = {jobz, 0};
175  char range_[2] = {range, 0};
176 
177  magma_int_t ione = 1;
178 
179  char order[1];
180  magma_int_t indd, inde;
181  magma_int_t imax;
182  magma_int_t lopt, itmp1, indee;
183  magma_int_t lower, wantz;
184  magma_int_t i, j, jj, i__1;
185  magma_int_t alleig, valeig, indeig;
186  magma_int_t iscale, indibl;
187  magma_int_t indiwk, indisp, indtau;
188  magma_int_t indrwk, indwrk;
189  magma_int_t llwork, nsplit;
190  magma_int_t lquery;
191  magma_int_t iinfo;
192  double safmin;
193  double bignum;
194  double smlnum;
195  double eps, tmp1;
196  double anrm;
197  double sigma, d__1;
198  double rmin, rmax;
199 
200  double *dwork;
201 
202  /* Function Body */
203  lower = lapackf77_lsame(uplo_, MagmaLowerStr);
204  wantz = lapackf77_lsame(jobz_, MagmaVecStr);
205  alleig = lapackf77_lsame(range_, "A");
206  valeig = lapackf77_lsame(range_, "V");
207  indeig = lapackf77_lsame(range_, "I");
208  lquery = lwork == -1;
209 
210  *info = 0;
211  if (! (wantz || lapackf77_lsame(jobz_, MagmaNoVecStr))) {
212  *info = -1;
213  } else if (! (alleig || valeig || indeig)) {
214  *info = -2;
215  } else if (! (lower || lapackf77_lsame(uplo_, MagmaUpperStr))) {
216  *info = -3;
217  } else if (n < 0) {
218  *info = -4;
219  } else if (ldda < max(1,n)) {
220  *info = -6;
221  } else if (lddz < 1 || (wantz && lddz < n)) {
222  *info = -15;
223  } else if (ldwa < max(1,n)) {
224  *info = -17;
225  } else if (ldwz < 1 || (wantz && ldwz < n)) {
226  *info = -19;
227  } else {
228  if (valeig) {
229  if (n > 0 && vu <= vl) {
230  *info = -8;
231  }
232  } else if (indeig) {
233  if (il < 1 || il > max(1,n)) {
234  *info = -9;
235  } else if (iu < min(n,il) || iu > n) {
236  *info = -10;
237  }
238  }
239  }
240 
242 
243  lopt = n * (nb + 1);
244 
245  MAGMA_Z_SET2REAL(work[0],(double)lopt);
246 
247  if (lwork < lopt && ! lquery) {
248  *info = -21;
249  }
250 
251  if (*info != 0) {
252  magma_xerbla( __func__, -(*info));
253  return *info;
254  } else if (lquery) {
255  return *info;
256  }
257 
258  *m = 0;
259  /* Check if matrix is very small then just call LAPACK on CPU, no need for GPU */
260  if (n <= 128) {
261  #ifdef ENABLE_DEBUG
262  printf("--------------------------------------------------------------\n");
263  printf(" warning matrix too small N=%d NB=%d, calling lapack on CPU \n", (int) n, (int) nb);
264  printf("--------------------------------------------------------------\n");
265  #endif
266  magmaDoubleComplex *a = (magmaDoubleComplex *) malloc( n * n * sizeof(magmaDoubleComplex) );
267  magma_zgetmatrix(n, n, da, ldda, a, n);
268  lapackf77_zheevx(jobz_, range_, uplo_,
269  &n, a, &n, &vl, &vu, &il, &iu, &abstol, m,
270  w, wz, &ldwz, work, &lwork,
271  rwork, iwork, ifail, info);
272  magma_zsetmatrix( n, n, a, n, da, ldda);
273  magma_zsetmatrix( n, *m, wz, ldwz, dz, lddz);
274  free(a);
275  return *info;
276  }
277 
278  if (MAGMA_SUCCESS != magma_dmalloc( &dwork, n )) {
279  fprintf (stderr, "!!!! device memory allocation error (magma_zheevx_gpu)\n");
280  *info = MAGMA_ERR_DEVICE_ALLOC;
281  return *info;
282  }
283 
284  --w;
285  --work;
286  --rwork;
287  --iwork;
288  --ifail;
289 
290  /* Get machine constants. */
291  safmin = lapackf77_dlamch("Safe minimum");
292  eps = lapackf77_dlamch("Precision");
293  smlnum = safmin / eps;
294  bignum = 1. / smlnum;
295  rmin = magma_dsqrt(smlnum);
296  rmax = magma_dsqrt(bignum);
297 
298  /* Scale matrix to allowable range, if necessary. */
299  anrm = magmablas_zlanhe('M', uplo, n, da, ldda, dwork);
300  iscale = 0;
301  sigma = 1;
302  if (anrm > 0. && anrm < rmin) {
303  iscale = 1;
304  sigma = rmin / anrm;
305  } else if (anrm > rmax) {
306  iscale = 1;
307  sigma = rmax / anrm;
308  }
309  if (iscale == 1) {
310  d__1 = 1.;
311  magmablas_zlascl(uplo, 0, 0, 1., sigma, n, n, da,
312  ldda, info);
313 
314  if (abstol > 0.) {
315  abstol *= sigma;
316  }
317  if (valeig) {
318  vl *= sigma;
319  vu *= sigma;
320  }
321  }
322 
323  /* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */
324  indd = 1;
325  inde = indd + n;
326  indrwk = inde + n;
327  indtau = 1;
328  indwrk = indtau + n;
329  llwork = lwork - indwrk + 1;
330 
331 #ifdef FAST_HEMV
332  magma_zhetrd2_gpu(uplo, n, da, ldda, &rwork[indd], &rwork[inde],
333  &work[indtau], wa, ldwa, &work[indwrk], llwork, dz, lddz*n, &iinfo);
334 #else
335  magma_zhetrd_gpu (uplo, n, da, ldda, &rwork[indd], &rwork[inde],
336  &work[indtau], wa, ldwa, &work[indwrk], llwork, &iinfo);
337 #endif
338 
339  lopt = n + (magma_int_t)MAGMA_Z_REAL(work[indwrk]);
340 
341  /* If all eigenvalues are desired and ABSTOL is less than or equal to
342  zero, then call DSTERF or ZUNGTR and ZSTEQR. If this fails for
343  some eigenvalue, then try DSTEBZ. */
344  if ((alleig || (indeig && il == 1 && iu == n)) && abstol <= 0.) {
345  blasf77_dcopy(&n, &rwork[indd], &ione, &w[1], &ione);
346  indee = indrwk + 2*n;
347  if (! wantz) {
348  i__1 = n - 1;
349  blasf77_dcopy(&i__1, &rwork[inde], &ione, &rwork[indee], &ione);
350  lapackf77_dsterf(&n, &w[1], &rwork[indee], info);
351  }
352  else {
353  lapackf77_zlacpy("A", &n, &n, wa, &ldwa, wz, &ldwz);
354  lapackf77_zungtr(uplo_, &n, wz, &ldwz, &work[indtau], &work[indwrk], &llwork, &iinfo);
355  i__1 = n - 1;
356  blasf77_dcopy(&i__1, &rwork[inde], &ione, &rwork[indee], &ione);
357  lapackf77_zsteqr(jobz_, &n, &w[1], &rwork[indee], wz, &ldwz, &rwork[indrwk], info);
358  if (*info == 0) {
359  for (i = 1; i <= n; ++i) {
360  ifail[i] = 0;
361  }
362  magma_zsetmatrix( n, n, wz, ldwz, dz, lddz );
363  }
364  }
365  if (*info == 0) {
366  *m = n;
367  }
368  }
369 
370  /* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. */
371  if (*m == 0) {
372  *info = 0;
373  if (wantz) {
374  *(unsigned char *)order = 'B';
375  } else {
376  *(unsigned char *)order = 'E';
377  }
378  indibl = 1;
379  indisp = indibl + n;
380  indiwk = indisp + n;
381 
382  lapackf77_dstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m,
383  &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwk], info);
384 
385  if (wantz) {
386 
387  lapackf77_zstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp],
388  wz, &ldwz, &rwork[indrwk], &iwork[indiwk], &ifail[1], info);
389 
390  magma_zsetmatrix( n, *m, wz, ldwz, dz, lddz );
391 
392  /* Apply unitary matrix used in reduction to tridiagonal
393  form to eigenvectors returned by ZSTEIN. */
394  magma_zunmtr_gpu(MagmaLeft, uplo, MagmaNoTrans, n, *m, da, ldda, &work[indtau],
395  dz, lddz, wa, ldwa, &iinfo);
396  }
397  }
398  /* If matrix was scaled, then rescale eigenvalues appropriately. */
399  if (iscale == 1) {
400  if (*info == 0) {
401  imax = *m;
402  } else {
403  imax = *info - 1;
404  }
405  d__1 = 1. / sigma;
406  blasf77_dscal(&imax, &d__1, &w[1], &ione);
407  }
408 
409  /* If eigenvalues are not in order, then sort them, along with
410  eigenvectors. */
411  if (wantz) {
412  for (j = 1; j <= *m-1; ++j) {
413  i = 0;
414  tmp1 = w[j];
415  for (jj = j + 1; jj <= *m; ++jj) {
416  if (w[jj] < tmp1) {
417  i = jj;
418  tmp1 = w[jj];
419  }
420  }
421 
422  if (i != 0) {
423  itmp1 = iwork[indibl + i - 1];
424  w[i] = w[j];
425  iwork[indibl + i - 1] = iwork[indibl + j - 1];
426  w[j] = tmp1;
427  iwork[indibl + j - 1] = itmp1;
428  magma_zswap(n, dz + (i-1)*lddz, ione, dz + (j-1)*lddz, ione);
429  if (*info != 0) {
430  itmp1 = ifail[i];
431  ifail[i] = ifail[j];
432  ifail[j] = itmp1;
433  }
434  }
435  }
436  }
437 
438  /* Set WORK(1) to optimal complex workspace size. */
439  work[1] = MAGMA_Z_MAKE((double) lopt, 0.);
440 
441  return *info;
442 
443 } /* magma_zheevx_gpu */
#define magma_dsqrt
Definition: common_magma.h:98
#define min(a, b)
Definition: common_magma.h:86
#define lapackf77_dsterf
Definition: magma_lapack.h:33
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define MagmaLeft
Definition: magma.h:68
#define __func__
Definition: common_magma.h:65
magma_int_t magma_zhetrd_gpu(char uplo, magma_int_t n, cuDoubleComplex *da, magma_int_t ldda, double *d, double *e, cuDoubleComplex *tau, cuDoubleComplex *wa, magma_int_t ldwa, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
void lapackf77_zheevx(const char *jobz, const char *range, const char *uplo, const magma_int_t *n, magmaDoubleComplex *A, const magma_int_t *lda, double *vl, double *vu, magma_int_t *il, magma_int_t *iu, double *abstol, magma_int_t *m, double *w, magmaDoubleComplex *z__, magma_int_t *ldz, magmaDoubleComplex *work, magma_int_t *lwork, double *rwork, magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
magma_int_t magma_get_zhetrd_nb(magma_int_t m)
Definition: get_nb.cpp:423
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define lapackf77_zungtr
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_zunmtr_gpu(char side, char uplo, char trans, magma_int_t m, magma_int_t n, cuDoubleComplex *da, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *dc, magma_int_t lddc, cuDoubleComplex *wa, magma_int_t ldwa, magma_int_t *info)
#define lapackf77_zlacpy
Definition: magma_zlapack.h:73
void lapackf77_zstein(const magma_int_t *n, const double *d, const double *e, const magma_int_t *m, const double *w, const magma_int_t *iblock, const magma_int_t *isplit, magmaDoubleComplex *Z, const magma_int_t *ldz, double *work, magma_int_t *iwork, magma_int_t *ifailv, magma_int_t *info)
void blasf77_dcopy(const magma_int_t *n, const double *x, const magma_int_t *incx, double *y, const magma_int_t *incy)
#define lapackf77_dlamch
Definition: magma_lapack.h:27
#define dwork(dev, i, j)
#define vl(i, j)
magma_int_t magma_zheevx_gpu(char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *da, magma_int_t ldda, double vl, double vu, magma_int_t il, magma_int_t iu, double abstol, magma_int_t *m, double *w, magmaDoubleComplex *dz, magma_int_t lddz, magmaDoubleComplex *wa, magma_int_t ldwa, magmaDoubleComplex *wz, magma_int_t ldwz, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
Definition: zheevx_gpu.cpp:17
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
void magma_zswap(magma_int_t n, magmaDoubleComplex_ptr dx, magma_int_t incx, magmaDoubleComplex_ptr dy, magma_int_t incy)
#define MAGMA_Z_SET2REAL(v, t)
Definition: magma.h:115
#define lapackf77_dstebz
Definition: magma_dlapack.h:51
magma_int_t ldda
#define MagmaLowerStr
Definition: magma.h:85
double magmablas_zlanhe(char norm, char uplo, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, double *WORK)
#define lapackf77_zsteqr
Definition: magma_zlapack.h:94
#define MAGMA_SUCCESS
Definition: magma.h:106
void blasf77_dscal(const magma_int_t *n, const double *alpha, double *x, const magma_int_t *incx)
#define MagmaNoVecStr
Definition: magma_types.h:423
void magmablas_zlascl(char type, int kl, int ku, double cfrom, double cto, int m, int n, cuDoubleComplex *A, int lda, int *info)
#define MagmaUpperStr
Definition: magma.h:84
#define MagmaNoTrans
Definition: magma.h:57
#define magma_zsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_z.h:702
#define MagmaVecStr
Definition: magma_types.h:424
#define max(a, b)
Definition: common_magma.h:82
magma_int_t magma_zhetrd2_gpu(char uplo, magma_int_t n, cuDoubleComplex *da, magma_int_t ldda, double *d, double *e, cuDoubleComplex *tau, cuDoubleComplex *wa, magma_int_t ldwa, cuDoubleComplex *work, magma_int_t lwork, cuDoubleComplex *dwork, magma_int_t ldwork, magma_int_t *info)
#define MAGMA_Z_REAL(a)
Definition: magma.h:124