MAGMA  magma-1.4.0 Matrix Algebra on GPU and Multicore Architectures
zhegvx.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
6  August 2013
7
8  @author Raffaele Solca
9
10  @precisions normal z -> c
11
12 */
13 #include "common_magma.h"
14
15 extern "C" magma_int_t
16 magma_zhegvx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n,
18  double vl, double vu, magma_int_t il, magma_int_t iu, double abstol,
19  magma_int_t *m, double *w, magmaDoubleComplex *z, magma_int_t ldz,
20  magmaDoubleComplex *work, magma_int_t lwork, double *rwork,
21  magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
22 {
23 /* -- MAGMA (version 1.4.0) --
24  Univ. of Tennessee, Knoxville
25  Univ. of California, Berkeley
27  August 2013
28
29  Purpose
30  =======
31  ZHEGVX computes selected eigenvalues, and optionally, eigenvectors
32  of a complex generalized Hermitian-definite eigenproblem, of the form
33  A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and
34  B are assumed to be Hermitian and B is also positive definite.
35  Eigenvalues and eigenvectors can be selected by specifying either a
36  range of values or a range of indices for the desired eigenvalues.
37
38  Arguments
39  =========
40  ITYPE (input) INTEGER
41  Specifies the problem type to be solved:
42  = 1: A*x = (lambda)*B*x
43  = 2: A*B*x = (lambda)*x
44  = 3: B*A*x = (lambda)*x
45
46  JOBZ (input) CHARACTER*1
47  = 'N': Compute eigenvalues only;
48  = 'V': Compute eigenvalues and eigenvectors.
49
50  RANGE (input) CHARACTER*1
51  = 'A': all eigenvalues will be found.
52  = 'V': all eigenvalues in the half-open interval (VL,VU]
53  will be found.
54  = 'I': the IL-th through IU-th eigenvalues will be found.
55
56  UPLO (input) CHARACTER*1
57  = 'U': Upper triangles of A and B are stored;
58  = 'L': Lower triangles of A and B are stored.
59
60  N (input) INTEGER
61  The order of the matrices A and B. N >= 0.
62
63  A (input/output) COMPLEX_16 array, dimension (LDA, N)
64  On entry, the Hermitian matrix A. If UPLO = 'U', the
65  leading N-by-N upper triangular part of A contains the
66  upper triangular part of the matrix A. If UPLO = 'L',
67  the leading N-by-N lower triangular part of A contains
68  the lower triangular part of the matrix A.
69
70  On exit, the lower triangle (if UPLO='L') or the upper
71  triangle (if UPLO='U') of A, including the diagonal, is
72  destroyed.
73
74  LDA (input) INTEGER
75  The leading dimension of the array A. LDA >= max(1,N).
76
77  B (input/output) COMPLEX_16 array, dimension (LDB, N)
78  On entry, the Hermitian matrix B. If UPLO = 'U', the
79  leading N-by-N upper triangular part of B contains the
80  upper triangular part of the matrix B. If UPLO = 'L',
81  the leading N-by-N lower triangular part of B contains
82  the lower triangular part of the matrix B.
83
84  On exit, if INFO <= N, the part of B containing the matrix is
85  overwritten by the triangular factor U or L from the Cholesky
86  factorization B = U**H*U or B = L*L**H.
87
88  LDB (input) INTEGER
89  The leading dimension of the array B. LDB >= max(1,N).
90
91  VL (input) DOUBLE PRECISION
92  VU (input) DOUBLE PRECISION
93  If RANGE='V', the lower and upper bounds of the interval to
94  be searched for eigenvalues. VL < VU.
95  Not referenced if RANGE = 'A' or 'I'.
96
97  IL (input) INTEGER
98  IU (input) INTEGER
99  If RANGE='I', the indices (in ascending order) of the
100  smallest and largest eigenvalues to be returned.
101  1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
102  Not referenced if RANGE = 'A' or 'V'.
103
104  ABSTOL (input) DOUBLE PRECISION
105  The absolute error tolerance for the eigenvalues.
106  An approximate eigenvalue is accepted as converged
107  when it is determined to lie in an interval [a,b]
108  of width less than or equal to
109
110  ABSTOL + EPS * max( |a|,|b| ) ,
111
112  where EPS is the machine precision. If ABSTOL is less than
113  or equal to zero, then EPS*|T| will be used in its place,
114  where |T| is the 1-norm of the tridiagonal matrix obtained
115  by reducing A to tridiagonal form.
116
117  Eigenvalues will be computed most accurately when ABSTOL is
118  set to twice the underflow threshold 2*DLAMCH('S'), not zero.
119  If this routine returns with INFO>0, indicating that some
120  eigenvectors did not converge, try setting ABSTOL to
121  2*DLAMCH('S').
122
123  M (output) INTEGER
124  The total number of eigenvalues found. 0 <= M <= N.
125  If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
126
127  W (output) DOUBLE PRECISION array, dimension (N)
128  The first M elements contain the selected
129  eigenvalues in ascending order.
130
131  Z (output) COMPLEX_16 array, dimension (LDZ, max(1,M))
132  If JOBZ = 'N', then Z is not referenced.
133  If JOBZ = 'V', then if INFO = 0, the first M columns of Z
134  contain the orthonormal eigenvectors of the matrix A
135  corresponding to the selected eigenvalues, with the i-th
136  column of Z holding the eigenvector associated with W(i).
137  The eigenvectors are normalized as follows:
138  if ITYPE = 1 or 2, Z**T*B*Z = I;
139  if ITYPE = 3, Z**T*inv(B)*Z = I.
140
141  If an eigenvector fails to converge, then that column of Z
142  contains the latest approximation to the eigenvector, and the
143  index of the eigenvector is returned in IFAIL.
144  Note: the user must ensure that at least max(1,M) columns are
145  supplied in the array Z; if RANGE = 'V', the exact value of M
146  is not known in advance and an upper bound must be used.
147
148  LDZ (input) INTEGER
149  The leading dimension of the array Z. LDZ >= 1, and if
150  JOBZ = 'V', LDZ >= max(1,N).
151
152  WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
153  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
154
155  LWORK (input) INTEGER
156  The length of the array WORK. LWORK >= max(1,2*N).
157  For optimal efficiency, LWORK >= (NB+1)*N,
158  where NB is the blocksize for ZHETRD returned by ILAENV.
159
160  If LWORK = -1, then a workspace query is assumed; the routine
161  only calculates the optimal size of the WORK array, returns
162  this value as the first entry of the WORK array, and no error
163  message related to LWORK is issued by XERBLA.
164
165  RWORK (workspace) DOUBLE PRECISION array, dimension (7*N)
166
167  IWORK (workspace) INTEGER array, dimension (5*N)
168
169  IFAIL (output) INTEGER array, dimension (N)
170  If JOBZ = 'V', then if INFO = 0, the first M elements of
171  IFAIL are zero. If INFO > 0, then IFAIL contains the
172  indices of the eigenvectors that failed to converge.
173  If JOBZ = 'N', then IFAIL is not referenced.
174
175  INFO (output) INTEGER
176  = 0: successful exit
177  < 0: if INFO = -i, the i-th argument had an illegal value
178  > 0: ZPOTRF or ZHEEVX returned an error code:
179  <= N: if INFO = i, ZHEEVX failed to converge;
180  i eigenvectors failed to converge. Their indices
181  are stored in array IFAIL.
182  > N: if INFO = N + i, for 1 <= i <= N, then the leading
183  minor of order i of B is not positive definite.
184  The factorization of B could not be completed and
185  no eigenvalues or eigenvectors were computed.
186
187  Further Details
188  ===============
189  Based on contributions by
190  Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
191  ===================================================================== */
192
193  char uplo_[2] = {uplo, 0};
194  char jobz_[2] = {jobz, 0};
195  char range_[2] = {range, 0};
196
198
202  magma_int_t ldda = n;
203  magma_int_t lddb = n;
204  magma_int_t lddz = n;
205
206  magma_int_t lower;
207  char trans[1];
208  magma_int_t wantz;
209  magma_int_t lquery;
210  magma_int_t alleig, valeig, indeig;
211
212  magma_int_t lwmin;
213
214  magma_queue_t stream;
215  magma_queue_create( &stream );
216
217  wantz = lapackf77_lsame(jobz_, MagmaVecStr);
218  lower = lapackf77_lsame(uplo_, MagmaLowerStr);
219  alleig = lapackf77_lsame(range_, "A");
220  valeig = lapackf77_lsame(range_, "V");
221  indeig = lapackf77_lsame(range_, "I");
222  lquery = lwork == -1;
223
224  *info = 0;
225  if (itype < 1 || itype > 3) {
226  *info = -1;
227  } else if (! (alleig || valeig || indeig)) {
228  *info = -2;
229  } else if (! (wantz || lapackf77_lsame(jobz_, MagmaNoVecStr))) {
230  *info = -3;
231  } else if (! (lower || lapackf77_lsame(uplo_, MagmaUpperStr))) {
232  *info = -4;
233  } else if (n < 0) {
234  *info = -5;
235  } else if (lda < max(1,n)) {
236  *info = -7;
237  } else if (ldb < max(1,n)) {
238  *info = -9;
239  } else if (ldz < 1 || (wantz && ldz < n)) {
240  *info = -18;
241  } else {
242  if (valeig) {
243  if (n > 0 && vu <= vl) {
244  *info = -11;
245  }
246  } else if (indeig) {
247  if (il < 1 || il > max(1,n)) {
248  *info = -12;
249  } else if (iu < min(n,il) || iu > n) {
250  *info = -13;
251  }
252  }
253  }
254
256
257  lwmin = n * (nb + 1);
258
259  MAGMA_Z_SET2REAL(work[0],(double)lwmin);
260
261
262  if (lwork < lwmin && ! lquery) {
263  *info = -20;
264  }
265
266  if (*info != 0) {
267  magma_xerbla( __func__, -(*info));
268  return *info;
269  } else if (lquery) {
270  return *info;
271  }
272
273  /* Quick return if possible */
274  if (n == 0) {
275  return *info;
276  }
277
278  if (MAGMA_SUCCESS != magma_zmalloc( &da, n*ldda ) ||
279  MAGMA_SUCCESS != magma_zmalloc( &db, n*lddb ) ||
280  MAGMA_SUCCESS != magma_zmalloc( &dz, n*lddz )) {
281  *info = MAGMA_ERR_DEVICE_ALLOC;
282  return *info;
283  }
284
285  /* Form a Cholesky factorization of B. */
286
287  magma_zsetmatrix( n, n, b, ldb, db, lddb );
288
290  a, lda,
291  da, ldda, stream );
292
293  magma_zpotrf_gpu(uplo_[0], n, db, lddb, info);
294  if (*info != 0) {
295  *info = n + *info;
296  return *info;
297  }
298
299  magma_queue_sync( stream );
300
302  db, lddb,
303  b, ldb, stream );
304
305  /* Transform problem to standard eigenvalue problem and solve. */
306  magma_zhegst_gpu(itype, uplo, n, da, ldda, db, lddb, info);
307  magma_zheevx_gpu(jobz, range, uplo, n, da, ldda, vl, vu, il, iu, abstol, m, w, dz, lddz, a, lda, z, ldz, work, lwork, rwork, iwork, ifail, info);
308
309  if (wantz && *info == 0) {
310  /* Backtransform eigenvectors to the original problem. */
311  if (itype == 1 || itype == 2) {
312  /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
313  backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */
314  if (lower) {
315  *(unsigned char *)trans = MagmaConjTrans;
316  } else {
317  *(unsigned char *)trans = MagmaNoTrans;
318  }
319  magma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
320  }
321  else if (itype == 3) {
322  /* For B*A*x=(lambda)*x;
323  backtransform eigenvectors: x = L*y or U'*y */
324  if (lower) {
325  *(unsigned char *)trans = MagmaNoTrans;
326  } else {
327  *(unsigned char *)trans = MagmaConjTrans;
328  }
329  magma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
330  }
331
332  magma_zgetmatrix( n, *m, dz, lddz, z, ldz );
333  }
334
335  magma_queue_sync( stream );
336  magma_queue_destroy( stream );
337
338  magma_free( da );
339  magma_free( db );
340  magma_free( dz );
341
342  return *info;
343 } /* zhegvx */
void magma_ztrmm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb)
#define min(a, b)
Definition: common_magma.h:86
#define MagmaLeft
Definition: magma.h:68
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
#define magma_zgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_z.h:714
magma_int_t magma_get_zhetrd_nb(magma_int_t m)
Definition: get_nb.cpp:423
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
magma_int_t magma_zhegst_gpu(magma_int_t itype, char uplo, magma_int_t n, cuDoubleComplex *da, magma_int_t ldda, cuDoubleComplex *db, magma_int_t lddb, magma_int_t *info)
int magma_int_t
Definition: magmablas.h:12
#define magma_queue_destroy(queue)
Definition: magma.h:116
void magma_ztrsm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb)
#define vl(i, j)
magma_int_t magma_zheevx_gpu(char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *da, magma_int_t ldda, double vl, double vu, magma_int_t il, magma_int_t iu, double abstol, magma_int_t *m, double *w, magmaDoubleComplex *dz, magma_int_t lddz, magmaDoubleComplex *wa, magma_int_t ldwa, magmaDoubleComplex *wz, magma_int_t ldwz, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
Definition: zheevx_gpu.cpp:17
magma_int_t magma_zpotrf_gpu(char uplo, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define MagmaConjTrans
Definition: magma.h:59
#define MAGMA_Z_SET2REAL(v, t)
Definition: magma.h:115
magma_int_t ldda
#define MagmaLowerStr
Definition: magma.h:85
#define MagmaNonUnit
Definition: magma.h:65
#define MAGMA_SUCCESS
Definition: magma.h:106
#define magma_zsetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_z.h:711
magma_int_t magma_zhegvx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *b, magma_int_t ldb, double vl, double vu, magma_int_t il, magma_int_t iu, double abstol, magma_int_t *m, double *w, magmaDoubleComplex *z, magma_int_t ldz, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
Definition: zhegvx.cpp:16
#define MagmaNoVecStr
Definition: magma_types.h:423
#define MAGMA_Z_ONE
Definition: magma.h:132
#define MagmaUpperStr
Definition: magma.h:84
#define MagmaNoTrans
Definition: magma.h:57
#define magma_zsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_z.h:702
#define MagmaVecStr
Definition: magma_types.h:424
#define max(a, b)
Definition: common_magma.h:82
#define magma_queue_sync(queue)
Definition: magma.h:119