MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zhegvd.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @author Raffaele Solca
9  @author Stan Tomov
10 
11  @precisions normal z -> c
12 
13 */
14 #include "common_magma.h"
15 
16 /* This ztrmm interface is used for TAU profiling */
17 void Mymagma_ztrmm(char side, char uplo, char trans, char unit,
19  cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
20  cuDoubleComplex *dz, magma_int_t lddz)
21 {
22  magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
24 }
25 
26 /* This ztrsm interface is used for TAU profiling */
27 void Mymagma_ztrsm(char side, char uplo, char trans, char unit,
29  cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
30  cuDoubleComplex *dz, magma_int_t lddz)
31 {
32  magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
34 }
35 
36 extern "C" magma_int_t
37 magma_zhegvd(magma_int_t itype, char jobz, char uplo, magma_int_t n,
38  cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb,
39  double *w, cuDoubleComplex *work, magma_int_t lwork,
40  double *rwork, magma_int_t lrwork,
41  magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
42 {
43 /* -- MAGMA (version 1.2.0) --
44  Univ. of Tennessee, Knoxville
45  Univ. of California, Berkeley
46  Univ. of Colorado, Denver
47  May 2012
48 
49  Purpose
50  =======
51  ZHEGVD computes all the eigenvalues, and optionally, the eigenvectors
52  of a complex generalized Hermitian-definite eigenproblem, of the form
53  A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and
54  B are assumed to be Hermitian and B is also positive definite.
55  If eigenvectors are desired, it uses a divide and conquer algorithm.
56 
57  The divide and conquer algorithm makes very mild assumptions about
58  floating point arithmetic. It will work on machines with a guard
59  digit in add/subtract, or on those binary machines without guard
60  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
61  Cray-2. It could conceivably fail on hexadecimal or decimal machines
62  without guard digits, but we know of none.
63 
64  Arguments
65  =========
66  ITYPE (input) INTEGER
67  Specifies the problem type to be solved:
68  = 1: A*x = (lambda)*B*x
69  = 2: A*B*x = (lambda)*x
70  = 3: B*A*x = (lambda)*x
71 
72  JOBZ (input) CHARACTER*1
73  = 'N': Compute eigenvalues only;
74  = 'V': Compute eigenvalues and eigenvectors.
75 
76  UPLO (input) CHARACTER*1
77  = 'U': Upper triangles of A and B are stored;
78  = 'L': Lower triangles of A and B are stored.
79 
80  N (input) INTEGER
81  The order of the matrices A and B. N >= 0.
82 
83  A (input/output) COMPLEX*16 array, dimension (LDA, N)
84  On entry, the Hermitian matrix A. If UPLO = 'U', the
85  leading N-by-N upper triangular part of A contains the
86  upper triangular part of the matrix A. If UPLO = 'L',
87  the leading N-by-N lower triangular part of A contains
88  the lower triangular part of the matrix A.
89 
90  On exit, if JOBZ = 'V', then if INFO = 0, A contains the
91  matrix Z of eigenvectors. The eigenvectors are normalized
92  as follows:
93  if ITYPE = 1 or 2, Z**H*B*Z = I;
94  if ITYPE = 3, Z**H*inv(B)*Z = I.
95  If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
96  or the lower triangle (if UPLO='L') of A, including the
97  diagonal, is destroyed.
98 
99  LDA (input) INTEGER
100  The leading dimension of the array A. LDA >= max(1,N).
101 
102  B (input/output) COMPLEX*16 array, dimension (LDB, N)
103  On entry, the Hermitian matrix B. If UPLO = 'U', the
104  leading N-by-N upper triangular part of B contains the
105  upper triangular part of the matrix B. If UPLO = 'L',
106  the leading N-by-N lower triangular part of B contains
107  the lower triangular part of the matrix B.
108 
109  On exit, if INFO <= N, the part of B containing the matrix is
110  overwritten by the triangular factor U or L from the Cholesky
111  factorization B = U**H*U or B = L*L**H.
112 
113  LDB (input) INTEGER
114  The leading dimension of the array B. LDB >= max(1,N).
115 
116  W (output) DOUBLE PRECISION array, dimension (N)
117  If INFO = 0, the eigenvalues in ascending order.
118 
119  WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
120  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
121 
122  LWORK (input) INTEGER
123  The length of the array WORK.
124  If N <= 1, LWORK >= 1.
125  If JOBZ = 'N' and N > 1, LWORK >= N + 1.
126  If JOBZ = 'V' and N > 1, LWORK >= 2*N*nb + N**2.
127 
128  If LWORK = -1, then a workspace query is assumed; the routine
129  only calculates the optimal sizes of the WORK, RWORK and
130  IWORK arrays, returns these values as the first entries of
131  the WORK, RWORK and IWORK arrays, and no error message
132  related to LWORK or LRWORK or LIWORK is issued by XERBLA.
133 
134  RWORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LRWORK))
135  On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
136 
137  LRWORK (input) INTEGER
138  The dimension of the array RWORK.
139  If N <= 1, LRWORK >= 1.
140  If JOBZ = 'N' and N > 1, LRWORK >= N.
141  If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2.
142 
143  If LRWORK = -1, then a workspace query is assumed; the
144  routine only calculates the optimal sizes of the WORK, RWORK
145  and IWORK arrays, returns these values as the first entries
146  of the WORK, RWORK and IWORK arrays, and no error message
147  related to LWORK or LRWORK or LIWORK is issued by XERBLA.
148 
149  IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
150  On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
151 
152  LIWORK (input) INTEGER
153  The dimension of the array IWORK.
154  If N <= 1, LIWORK >= 1.
155  If JOBZ = 'N' and N > 1, LIWORK >= 1.
156  If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N.
157 
158  If LIWORK = -1, then a workspace query is assumed; the
159  routine only calculates the optimal sizes of the WORK, RWORK
160  and IWORK arrays, returns these values as the first entries
161  of the WORK, RWORK and IWORK arrays, and no error message
162  related to LWORK or LRWORK or LIWORK is issued by XERBLA.
163 
164  INFO (output) INTEGER
165  = 0: successful exit
166  < 0: if INFO = -i, the i-th argument had an illegal value
167  > 0: ZPOTRF or ZHEEVD returned an error code:
168  <= N: if INFO = i and JOBZ = 'N', then the algorithm
169  failed to converge; i off-diagonal elements of an
170  intermediate tridiagonal form did not converge to
171  zero;
172  if INFO = i and JOBZ = 'V', then the algorithm
173  failed to compute an eigenvalue while working on
174  the submatrix lying in rows and columns INFO/(N+1)
175  through mod(INFO,N+1);
176  > N: if INFO = N + i, for 1 <= i <= N, then the leading
177  minor of order i of B is not positive definite.
178  The factorization of B could not be completed and
179  no eigenvalues or eigenvectors were computed.
180 
181  Further Details
182  ===============
183 
184  Based on contributions by
185  Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
186 
187  Modified so that no backsubstitution is performed if ZHEEVD fails to
188  converge (NEIG in old code could be greater than N causing out of
189  bounds reference to A - reported by Ralf Meyer). Also corrected the
190  description of INFO and the test on ITYPE. Sven, 16 Feb 05.
191  ===================================================================== */
192 
193  char uplo_[2] = {uplo, 0};
194  char jobz_[2] = {jobz, 0};
195 
196  cuDoubleComplex c_one = MAGMA_Z_ONE;
197 
198  cuDoubleComplex *da;
199  cuDoubleComplex *db;
200  magma_int_t ldda = n;
201  magma_int_t lddb = n;
202 
203  static magma_int_t lower;
204  static char trans[1];
205  static magma_int_t wantz;
206  static magma_int_t lquery;
207 
208  // static magma_int_t lopt;
209  static magma_int_t lwmin;
210  // static magma_int_t liopt;
211  static magma_int_t liwmin;
212  // static magma_int_t lropt;
213  static magma_int_t lrwmin;
214 
215  static cudaStream_t stream;
216  magma_queue_create( &stream );
217 
218  wantz = lapackf77_lsame(jobz_, MagmaVectorsStr);
219  lower = lapackf77_lsame(uplo_, MagmaLowerStr);
220  lquery = lwork == -1 || lrwork == -1 || liwork == -1;
221 
222  *info = 0;
223  if (itype < 1 || itype > 3) {
224  *info = -1;
225  } else if (! (wantz || lapackf77_lsame(jobz_, MagmaNoVectorsStr))) {
226  *info = -2;
227  } else if (! (lower || lapackf77_lsame(uplo_, MagmaUpperStr))) {
228  *info = -3;
229  } else if (n < 0) {
230  *info = -4;
231  } else if (lda < max(1,n)) {
232  *info = -6;
233  } else if (ldb < max(1,n)) {
234  *info = -8;
235  }
236 
238 
239  if (wantz) {
240  lwmin = 2 * n + n * n;
241  lrwmin = 1 + 5 * n + 2 * n * n;
242  liwmin = 5 * n + 3;
243  } else {
244  lwmin = n * (nb + 1);
245  lrwmin = n;
246  liwmin = 1;
247  }
248 
249  MAGMA_Z_SET2REAL(work[0],(double)lwmin);
250  rwork[0] = lrwmin;
251  iwork[0] = liwmin;
252 
253  if (lwork < lwmin && ! lquery) {
254  *info = -11;
255  } else if (lrwork < lrwmin && ! lquery) {
256  *info = -13;
257  } else if (liwork < liwmin && ! lquery) {
258  *info = -15;
259  }
260 
261  if (*info != 0) {
262  magma_xerbla( __func__, -(*info) );
263  return *info;
264  }
265  else if (lquery) {
266  return *info;
267  }
268 
269  /* Quick return if possible */
270  if (n == 0) {
271  return *info;
272  }
273 
274  if (MAGMA_SUCCESS != magma_zmalloc( &da, n*ldda ) ||
275  MAGMA_SUCCESS != magma_zmalloc( &db, n*lddb )) {
276  *info = MAGMA_ERR_DEVICE_ALLOC;
277  return *info;
278  }
279 
280  /* Form a Cholesky factorization of B. */
281  magma_zsetmatrix( n, n, b, ldb, db, lddb );
282 
284  a, lda,
285  da, ldda, stream );
286 
287  magma_zpotrf_gpu(uplo_[0], n, db, lddb, info);
288  if (*info != 0) {
289  *info = n + *info;
290  return *info;
291  }
292 
293  magma_queue_sync( stream );
294 
296  db, lddb,
297  b, ldb, stream );
298 
299  /* Transform problem to standard eigenvalue problem and solve. */
300  magma_zhegst_gpu(itype, uplo_[0], n, da, ldda, db, lddb, info);
301 
302  magma_zheevd_gpu(jobz_[0], uplo_[0], n, da, ldda, w, a, lda,
303  work, lwork, rwork, lrwork, iwork, liwork, info);
304  /* Computing MAX */
305  // d__1 = (double) lopt, d__2 = work[1].r;
306  // lopt = (magma_int_t) max(d__1,d__2);
307  /* Computing MAX */
308  // d__1 = (double) lropt;
309  // lropt = (magma_int_t) max(d__1,rwork[1]);
310  /* Computing MAX */
311  // d__1 = (double) liopt, d__2 = (doublereal) iwork[1];
312  // liopt = (magma_int_t) max(d__1,d__2);
313 
314  if (wantz && *info == 0)
315  {
316  /* Backtransform eigenvectors to the original problem. */
317  if (itype == 1 || itype == 2)
318  {
319  /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
320  backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */
321  if (lower) {
322  *(unsigned char *)trans = MagmaConjTrans;
323  } else {
324  *(unsigned char *)trans = MagmaNoTrans;
325  }
326 
327  Mymagma_ztrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
328  n, n, c_one, db, lddb, da, ldda);
329 
330  } else if (itype == 3)
331  {
332  /* For B*A*x=(lambda)*x;
333  backtransform eigenvectors: x = L*y or U'*y */
334  if (lower) {
335  *(unsigned char *)trans = MagmaNoTrans;
336  } else {
337  *(unsigned char *)trans = MagmaConjTrans;
338  }
339 
340  Mymagma_ztrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
341  n, n, c_one, db, lddb, da, ldda);
342  }
343 
344  magma_zgetmatrix( n, n, da, ldda, a, lda );
345 
346  }
347 
348  magma_queue_sync( stream );
349  magma_queue_destroy( stream );
350 
351  /*work[0].r = (doublereal) lopt, work[0].i = 0.;
352  rwork[0] = (doublereal) lropt;
353  iwork[0] = liopt;*/
354 
355  magma_free( da );
356  magma_free( db );
357 
358  return *info;
359 } /* magma_zhegvd */