MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zhegvx.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @author Raffaele Solca
9 
10  @precisions normal z -> c
11 
12  */
13 #include "common_magma.h"
14 
15 void Mymagma_ztrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
16  cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
17  cuDoubleComplex *dz, magma_int_t lddz)
18 {
19  magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
21 }
22 
23 void Mymagma_ztrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
24  cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
25  cuDoubleComplex *dz, magma_int_t lddz)
26 {
27  magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
29 }
30 
31 extern "C" magma_int_t
32 magma_zhegvx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n,
33  cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb,
34  double vl, double vu, magma_int_t il, magma_int_t iu, double abstol,
35  magma_int_t *m, double *w, cuDoubleComplex *z, magma_int_t ldz,
36  cuDoubleComplex *work, magma_int_t lwork, double *rwork,
37  magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
38 {
39 /*
40  -- MAGMA (version 1.2.0) --
41  Univ. of Tennessee, Knoxville
42  Univ. of California, Berkeley
43  Univ. of Colorado, Denver
44  May 2012
45 
46  ITYPE (input) INTEGER
47  Specifies the problem type to be solved:
48  = 1: A*x = (lambda)*B*x
49  = 2: A*B*x = (lambda)*x
50  = 3: B*A*x = (lambda)*x
51 
52  JOBZ (input) CHARACTER*1
53  = 'N': Compute eigenvalues only;
54  = 'V': Compute eigenvalues and eigenvectors.
55 
56  RANGE (input) CHARACTER*1
57  = 'A': all eigenvalues will be found.
58  = 'V': all eigenvalues in the half-open interval (VL,VU]
59  will be found.
60  = 'I': the IL-th through IU-th eigenvalues will be found.
61 
62  UPLO (input) CHARACTER*1
63  = 'U': Upper triangles of A and B are stored;
64  = 'L': Lower triangles of A and B are stored.
65 
66  N (input) INTEGER
67  The order of the matrices A and B. N >= 0.
68 
69  A (input/output) COMPLEX*16 array, dimension (LDA, N)
70  On entry, the Hermitian matrix A. If UPLO = 'U', the
71  leading N-by-N upper triangular part of A contains the
72  upper triangular part of the matrix A. If UPLO = 'L',
73  the leading N-by-N lower triangular part of A contains
74  the lower triangular part of the matrix A.
75 
76  On exit, the lower triangle (if UPLO='L') or the upper
77  triangle (if UPLO='U') of A, including the diagonal, is
78  destroyed.
79 
80  LDA (input) INTEGER
81  The leading dimension of the array A. LDA >= max(1,N).
82 
83  B (input/output) COMPLEX*16 array, dimension (LDB, N)
84  On entry, the Hermitian matrix B. If UPLO = 'U', the
85  leading N-by-N upper triangular part of B contains the
86  upper triangular part of the matrix B. If UPLO = 'L',
87  the leading N-by-N lower triangular part of B contains
88  the lower triangular part of the matrix B.
89 
90  On exit, if INFO <= N, the part of B containing the matrix is
91  overwritten by the triangular factor U or L from the Cholesky
92  factorization B = U**H*U or B = L*L**H.
93 
94  LDB (input) INTEGER
95  The leading dimension of the array B. LDB >= max(1,N).
96 
97  VL (input) DOUBLE PRECISION
98  VU (input) DOUBLE PRECISION
99  If RANGE='V', the lower and upper bounds of the interval to
100  be searched for eigenvalues. VL < VU.
101  Not referenced if RANGE = 'A' or 'I'.
102 
103  IL (input) INTEGER
104  IU (input) INTEGER
105  If RANGE='I', the indices (in ascending order) of the
106  smallest and largest eigenvalues to be returned.
107  1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
108  Not referenced if RANGE = 'A' or 'V'.
109 
110  ABSTOL (input) DOUBLE PRECISION
111  The absolute error tolerance for the eigenvalues.
112  An approximate eigenvalue is accepted as converged
113  when it is determined to lie in an interval [a,b]
114  of width less than or equal to
115 
116  ABSTOL + EPS * max( |a|,|b| ) ,
117 
118  where EPS is the machine precision. If ABSTOL is less than
119  or equal to zero, then EPS*|T| will be used in its place,
120  where |T| is the 1-norm of the tridiagonal matrix obtained
121  by reducing A to tridiagonal form.
122 
123  Eigenvalues will be computed most accurately when ABSTOL is
124  set to twice the underflow threshold 2*DLAMCH('S'), not zero.
125  If this routine returns with INFO>0, indicating that some
126  eigenvectors did not converge, try setting ABSTOL to
127  2*DLAMCH('S').
128 
129  M (output) INTEGER
130  The total number of eigenvalues found. 0 <= M <= N.
131  If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
132 
133  W (output) DOUBLE PRECISION array, dimension (N)
134  The first M elements contain the selected
135  eigenvalues in ascending order.
136 
137  Z (output) COMPLEX*16 array, dimension (LDZ, max(1,M))
138  If JOBZ = 'N', then Z is not referenced.
139  If JOBZ = 'V', then if INFO = 0, the first M columns of Z
140  contain the orthonormal eigenvectors of the matrix A
141  corresponding to the selected eigenvalues, with the i-th
142  column of Z holding the eigenvector associated with W(i).
143  The eigenvectors are normalized as follows:
144  if ITYPE = 1 or 2, Z**T*B*Z = I;
145  if ITYPE = 3, Z**T*inv(B)*Z = I.
146 
147  If an eigenvector fails to converge, then that column of Z
148  contains the latest approximation to the eigenvector, and the
149  index of the eigenvector is returned in IFAIL.
150  Note: the user must ensure that at least max(1,M) columns are
151  supplied in the array Z; if RANGE = 'V', the exact value of M
152  is not known in advance and an upper bound must be used.
153 
154  LDZ (input) INTEGER
155  The leading dimension of the array Z. LDZ >= 1, and if
156  JOBZ = 'V', LDZ >= max(1,N).
157 
158  WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
159  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
160 
161  LWORK (input) INTEGER
162  The length of the array WORK. LWORK >= max(1,2*N).
163  For optimal efficiency, LWORK >= (NB+1)*N,
164  where NB is the blocksize for ZHETRD returned by ILAENV.
165 
166  If LWORK = -1, then a workspace query is assumed; the routine
167  only calculates the optimal size of the WORK array, returns
168  this value as the first entry of the WORK array, and no error
169  message related to LWORK is issued by XERBLA.
170 
171  RWORK (workspace) DOUBLE PRECISION array, dimension (7*N)
172 
173  IWORK (workspace) INTEGER array, dimension (5*N)
174 
175  IFAIL (output) INTEGER array, dimension (N)
176  If JOBZ = 'V', then if INFO = 0, the first M elements of
177  IFAIL are zero. If INFO > 0, then IFAIL contains the
178  indices of the eigenvectors that failed to converge.
179  If JOBZ = 'N', then IFAIL is not referenced.
180 
181  INFO (output) INTEGER
182  = 0: successful exit
183  < 0: if INFO = -i, the i-th argument had an illegal value
184  > 0: ZPOTRF or ZHEEVX returned an error code:
185  <= N: if INFO = i, ZHEEVX failed to converge;
186  i eigenvectors failed to converge. Their indices
187  are stored in array IFAIL.
188  > N: if INFO = N + i, for 1 <= i <= N, then the leading
189  minor of order i of B is not positive definite.
190  The factorization of B could not be completed and
191  no eigenvalues or eigenvectors were computed.
192 
193  Further Details
194  ===============
195 
196  Based on contributions by
197  Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
198 ===================================================================== */
199 
200  char uplo_[2] = {uplo, 0};
201  char jobz_[2] = {jobz, 0};
202  char range_[2] = {range, 0};
203 
204  cuDoubleComplex c_one = MAGMA_Z_ONE;
205 
206  cuDoubleComplex *da;
207  cuDoubleComplex *db;
208  cuDoubleComplex *dz;
209  magma_int_t ldda = n;
210  magma_int_t lddb = n;
211  magma_int_t lddz = n;
212 
213  static magma_int_t lower;
214  static char trans[1];
215  static magma_int_t wantz;
216  static magma_int_t lquery;
217  static magma_int_t alleig, valeig, indeig;
218 
219  static magma_int_t lwmin;
220 
221  static cudaStream_t stream;
222  magma_queue_create( &stream );
223 
224  wantz = lapackf77_lsame(jobz_, MagmaVectorsStr);
225  lower = lapackf77_lsame(uplo_, MagmaLowerStr);
226  alleig = lapackf77_lsame(range_, "A");
227  valeig = lapackf77_lsame(range_, "V");
228  indeig = lapackf77_lsame(range_, "I");
229  lquery = lwork == -1;
230 
231  *info = 0;
232  if (itype < 1 || itype > 3) {
233  *info = -1;
234  } else if (! (alleig || valeig || indeig)) {
235  *info = -2;
236  } else if (! (wantz || lapackf77_lsame(jobz_, MagmaNoVectorsStr))) {
237  *info = -3;
238  } else if (! (lower || lapackf77_lsame(uplo_, MagmaUpperStr))) {
239  *info = -4;
240  } else if (n < 0) {
241  *info = -5;
242  } else if (lda < max(1,n)) {
243  *info = -7;
244  } else if (ldb < max(1,n)) {
245  *info = -9;
246  } else if (ldz < 1 || wantz && ldz < n) {
247  *info = -18;
248  } else {
249  if (valeig) {
250  if (n > 0 && vu <= vl) {
251  *info = -11;
252  }
253  } else if (indeig) {
254  if (il < 1 || il > max(1,n)) {
255  *info = -12;
256  } else if (iu < min(n,il) || iu > n) {
257  *info = -13;
258  }
259  }
260  }
261 
263 
264  lwmin = n * (nb + 1);
265 
266  MAGMA_Z_SET2REAL(work[0],(double)lwmin);
267 
268 
269  if (lwork < lwmin && ! lquery) {
270  *info = -20;
271  }
272 
273  if (*info != 0) {
274  magma_xerbla( __func__, -(*info));
275  return *info;
276  } else if (lquery) {
277  return *info;
278  }
279 
280  /* Quick return if possible */
281  if (n == 0) {
282  return *info;
283  }
284 
285  if (MAGMA_SUCCESS != magma_zmalloc( &da, n*ldda ) ||
286  MAGMA_SUCCESS != magma_zmalloc( &db, n*lddb ) ||
287  MAGMA_SUCCESS != magma_zmalloc( &dz, n*lddz )) {
288  *info = MAGMA_ERR_DEVICE_ALLOC;
289  return *info;
290  }
291 
292  /* Form a Cholesky factorization of B. */
293 
294  magma_zsetmatrix( n, n, b, ldb, db, lddb );
295 
297  a, lda,
298  da, ldda, stream );
299 
300  magma_zpotrf_gpu(uplo_[0], n, db, lddb, info);
301  if (*info != 0) {
302  *info = n + *info;
303  return *info;
304  }
305 
306  magma_queue_sync( stream );
307 
309  db, lddb,
310  b, ldb, stream );
311 
312  /* Transform problem to standard eigenvalue problem and solve. */
313 
314  magma_zhegst_gpu(itype, uplo, n, da, ldda, db, lddb, info);
315 
316  magma_zheevx_gpu(jobz, range, uplo, n, da, ldda, vl, vu, il, iu, abstol, m, w, dz, lddz, a, lda, z, ldz, work, lwork, rwork, iwork, ifail, info);
317 
318  if (wantz && *info == 0) {
319 
320  /* Backtransform eigenvectors to the original problem. */
321 
322  if (itype == 1 || itype == 2) {
323 
324  /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
325  backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */
326 
327  if (lower) {
328  *(unsigned char *)trans = MagmaConjTrans;
329  } else {
330  *(unsigned char *)trans = MagmaNoTrans;
331  }
332 
333  Mymagma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
334 
335  } else if (itype == 3) {
336 
337  /* For B*A*x=(lambda)*x;
338  backtransform eigenvectors: x = L*y or U'*y */
339 
340  if (lower) {
341  *(unsigned char *)trans = MagmaNoTrans;
342  } else {
343  *(unsigned char *)trans = MagmaConjTrans;
344  }
345 
346  Mymagma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
347 
348  }
349 
350  magma_zgetmatrix( n, *m, dz, lddz, z, ldz );
351 
352  }
353 
354  magma_queue_sync( stream );
355 
356  magma_queue_destroy( stream );
357 
358  magma_free( da );
359  magma_free( db );
360  magma_free( dz );
361 
362  return *info;
363 
364 } /* zhegvx */