MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zhegvd.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  August 2013
7 
8  @author Raffaele Solca
9  @author Azzam Haidar
10  @author Stan Tomov
11 
12  @precisions normal z -> c
13 
14 */
15 #include "common_magma.h"
16 #define PRECISION_z
17 
18 extern "C" magma_int_t
19 magma_zhegvd(magma_int_t itype, char jobz, char uplo, magma_int_t n,
20  magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *b, magma_int_t ldb,
21  double *w, magmaDoubleComplex *work, magma_int_t lwork,
22  double *rwork, magma_int_t lrwork,
23  magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
24 {
25 /* -- MAGMA (version 1.4.0) --
26  Univ. of Tennessee, Knoxville
27  Univ. of California, Berkeley
28  Univ. of Colorado, Denver
29  August 2013
30 
31  Purpose
32  =======
33  ZHEGVD computes all the eigenvalues, and optionally, the eigenvectors
34  of a complex generalized Hermitian-definite eigenproblem, of the form
35  A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and
36  B are assumed to be Hermitian and B is also positive definite.
37  If eigenvectors are desired, it uses a divide and conquer algorithm.
38 
39  The divide and conquer algorithm makes very mild assumptions about
40  floating point arithmetic. It will work on machines with a guard
41  digit in add/subtract, or on those binary machines without guard
42  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
43  Cray-2. It could conceivably fail on hexadecimal or decimal machines
44  without guard digits, but we know of none.
45 
46  Arguments
47  =========
48  ITYPE (input) INTEGER
49  Specifies the problem type to be solved:
50  = 1: A*x = (lambda)*B*x
51  = 2: A*B*x = (lambda)*x
52  = 3: B*A*x = (lambda)*x
53 
54  JOBZ (input) CHARACTER*1
55  = 'N': Compute eigenvalues only;
56  = 'V': Compute eigenvalues and eigenvectors.
57 
58  UPLO (input) CHARACTER*1
59  = 'U': Upper triangles of A and B are stored;
60  = 'L': Lower triangles of A and B are stored.
61 
62  N (input) INTEGER
63  The order of the matrices A and B. N >= 0.
64 
65  A (input/output) COMPLEX_16 array, dimension (LDA, N)
66  On entry, the Hermitian matrix A. If UPLO = 'U', the
67  leading N-by-N upper triangular part of A contains the
68  upper triangular part of the matrix A. If UPLO = 'L',
69  the leading N-by-N lower triangular part of A contains
70  the lower triangular part of the matrix A.
71 
72  On exit, if JOBZ = 'V', then if INFO = 0, A contains the
73  matrix Z of eigenvectors. The eigenvectors are normalized
74  as follows:
75  if ITYPE = 1 or 2, Z**H*B*Z = I;
76  if ITYPE = 3, Z**H*inv(B)*Z = I.
77  If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
78  or the lower triangle (if UPLO='L') of A, including the
79  diagonal, is destroyed.
80 
81  LDA (input) INTEGER
82  The leading dimension of the array A. LDA >= max(1,N).
83 
84  B (input/output) COMPLEX_16 array, dimension (LDB, N)
85  On entry, the Hermitian matrix B. If UPLO = 'U', the
86  leading N-by-N upper triangular part of B contains the
87  upper triangular part of the matrix B. If UPLO = 'L',
88  the leading N-by-N lower triangular part of B contains
89  the lower triangular part of the matrix B.
90 
91  On exit, if INFO <= N, the part of B containing the matrix is
92  overwritten by the triangular factor U or L from the Cholesky
93  factorization B = U**H*U or B = L*L**H.
94 
95  LDB (input) INTEGER
96  The leading dimension of the array B. LDB >= max(1,N).
97 
98  W (output) DOUBLE PRECISION array, dimension (N)
99  If INFO = 0, the eigenvalues in ascending order.
100 
101  WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
102  On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
103 
104  LWORK (input) INTEGER
105  The length of the array WORK.
106  If N <= 1, LWORK >= 1.
107  If JOBZ = 'N' and N > 1, LWORK >= N + N*NB.
108  If JOBZ = 'V' and N > 1, LWORK >= max( N + N*NB, 2*N + N**2 ).
109  NB can be obtained through magma_get_zhetrd_nb(N).
110 
111  If LWORK = -1, then a workspace query is assumed; the routine
112  only calculates the optimal sizes of the WORK, RWORK and
113  IWORK arrays, returns these values as the first entries of
114  the WORK, RWORK and IWORK arrays, and no error message
115  related to LWORK or LRWORK or LIWORK is issued by XERBLA.
116 
117  RWORK (workspace/output) DOUBLE PRECISION array, dimension (LRWORK)
118  On exit, if INFO = 0, RWORK[0] returns the optimal LRWORK.
119 
120  LRWORK (input) INTEGER
121  The dimension of the array RWORK.
122  If N <= 1, LRWORK >= 1.
123  If JOBZ = 'N' and N > 1, LRWORK >= N.
124  If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2.
125 
126  If LRWORK = -1, then a workspace query is assumed; the
127  routine only calculates the optimal sizes of the WORK, RWORK
128  and IWORK arrays, returns these values as the first entries
129  of the WORK, RWORK and IWORK arrays, and no error message
130  related to LWORK or LRWORK or LIWORK is issued by XERBLA.
131 
132  IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
133  On exit, if INFO = 0, IWORK[0] returns the optimal LIWORK.
134 
135  LIWORK (input) INTEGER
136  The dimension of the array IWORK.
137  If N <= 1, LIWORK >= 1.
138  If JOBZ = 'N' and N > 1, LIWORK >= 1.
139  If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N.
140 
141  If LIWORK = -1, then a workspace query is assumed; the
142  routine only calculates the optimal sizes of the WORK, RWORK
143  and IWORK arrays, returns these values as the first entries
144  of the WORK, RWORK and IWORK arrays, and no error message
145  related to LWORK or LRWORK or LIWORK is issued by XERBLA.
146 
147  INFO (output) INTEGER
148  = 0: successful exit
149  < 0: if INFO = -i, the i-th argument had an illegal value
150  > 0: ZPOTRF or ZHEEVD returned an error code:
151  <= N: if INFO = i and JOBZ = 'N', then the algorithm
152  failed to converge; i off-diagonal elements of an
153  intermediate tridiagonal form did not converge to
154  zero;
155  if INFO = i and JOBZ = 'V', then the algorithm
156  failed to compute an eigenvalue while working on
157  the submatrix lying in rows and columns INFO/(N+1)
158  through mod(INFO,N+1);
159  > N: if INFO = N + i, for 1 <= i <= N, then the leading
160  minor of order i of B is not positive definite.
161  The factorization of B could not be completed and
162  no eigenvalues or eigenvectors were computed.
163 
164  Further Details
165  ===============
166  Based on contributions by
167  Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
168 
169  Modified so that no backsubstitution is performed if ZHEEVD fails to
170  converge (NEIG in old code could be greater than N causing out of
171  bounds reference to A - reported by Ralf Meyer). Also corrected the
172  description of INFO and the test on ITYPE. Sven, 16 Feb 05.
173  ===================================================================== */
174 
175  char uplo_[2] = {uplo, 0};
176  char jobz_[2] = {jobz, 0};
177 
178  magmaDoubleComplex c_one = MAGMA_Z_ONE;
179 
180  magmaDoubleComplex *da;
181  magmaDoubleComplex *db;
182  magma_int_t ldda = n;
183  magma_int_t lddb = n;
184 
185  magma_int_t lower;
186  char trans[1];
187  magma_int_t wantz;
188  magma_int_t lquery;
189 
190  magma_int_t lwmin;
191  magma_int_t liwmin;
192  magma_int_t lrwmin;
193 
194  magma_queue_t stream;
195  magma_queue_create( &stream );
196 
197  wantz = lapackf77_lsame(jobz_, MagmaVecStr);
198  lower = lapackf77_lsame(uplo_, MagmaLowerStr);
199  lquery = lwork == -1 || lrwork == -1 || liwork == -1;
200 
201  *info = 0;
202  if (itype < 1 || itype > 3) {
203  *info = -1;
204  } else if (! (wantz || lapackf77_lsame(jobz_, MagmaNoVecStr))) {
205  *info = -2;
206  } else if (! (lower || lapackf77_lsame(uplo_, MagmaUpperStr))) {
207  *info = -3;
208  } else if (n < 0) {
209  *info = -4;
210  } else if (lda < max(1,n)) {
211  *info = -6;
212  } else if (ldb < max(1,n)) {
213  *info = -8;
214  }
215 
217  if ( n <= 1 ) {
218  lwmin = 1;
219  lrwmin = 1;
220  liwmin = 1;
221  }
222  else if ( wantz ) {
223  lwmin = max( n + n*nb, 2*n + n*n );
224  lrwmin = 1 + 5*n + 2*n*n;
225  liwmin = 3 + 5*n;
226  }
227  else {
228  lwmin = n + n*nb;
229  lrwmin = n;
230  liwmin = 1;
231  }
232 
233  work[0] = MAGMA_Z_MAKE( lwmin * (1. + lapackf77_dlamch("Epsilon")), 0.); // round up
234  rwork[0] = lrwmin * (1. + lapackf77_dlamch("Epsilon"));
235  iwork[0] = liwmin;
236 
237  if (lwork < lwmin && ! lquery) {
238  *info = -11;
239  } else if (lrwork < lrwmin && ! lquery) {
240  *info = -13;
241  } else if (liwork < liwmin && ! lquery) {
242  *info = -15;
243  }
244 
245  if (*info != 0) {
246  magma_xerbla( __func__, -(*info) );
247  return *info;
248  }
249  else if (lquery) {
250  return *info;
251  }
252 
253 /* Quick return if possible */
254  if (n == 0) {
255  return *info;
256  }
257 
258  /* Check if matrix is very small then just call LAPACK on CPU, no need for GPU */
259  if (n <= 128){
260  #ifdef ENABLE_DEBUG
261  printf("--------------------------------------------------------------\n");
262  printf(" warning matrix too small N=%d NB=%d, calling lapack on CPU \n", (int) n, (int) nb);
263  printf("--------------------------------------------------------------\n");
264  #endif
265  lapackf77_zhegvd(&itype, jobz_, uplo_,
266  &n, a, &lda, b, &ldb,
267  w, work, &lwork,
268 #if defined(PRECISION_z) || defined(PRECISION_c)
269  rwork, &lrwork,
270 #endif
271  iwork, &liwork, info);
272  return *info;
273  }
274 
275  if (MAGMA_SUCCESS != magma_zmalloc( &da, n*ldda ) ||
276  MAGMA_SUCCESS != magma_zmalloc( &db, n*lddb )) {
277  *info = MAGMA_ERR_DEVICE_ALLOC;
278  return *info;
279  }
280 
281 /* Form a Cholesky factorization of B. */
282  magma_zsetmatrix( n, n, b, ldb, db, lddb );
283 
285  a, lda,
286  da, ldda, stream );
287 
288 #ifdef ENABLE_TIMER
289  magma_timestr_t start, end;
290  start = get_current_time();
291 #endif
292  magma_zpotrf_gpu(uplo, n, db, lddb, info);
293  if (*info != 0) {
294  *info = n + *info;
295  return *info;
296  }
297 #ifdef ENABLE_TIMER
298  end = get_current_time();
299  printf("time zpotrf_gpu = %6.2f\n", GetTimerValue(start,end)/1000.);
300 #endif
301 
302  magma_queue_sync( stream );
304  db, lddb,
305  b, ldb, stream );
306 
307 #ifdef ENABLE_TIMER
308  start = get_current_time();
309 #endif
310 /* Transform problem to standard eigenvalue problem and solve. */
311  magma_zhegst_gpu(itype, uplo, n, da, ldda, db, lddb, info);
312 #ifdef ENABLE_TIMER
313  end = get_current_time();
314  printf("time zhegst_gpu = %6.2f\n", GetTimerValue(start,end)/1000.);
315 #endif
316 
317  /* simple fix to be able to run bigger size.
318  * need to have a dwork here that will be used
319  * a db and then passed to dsyevd.
320  * */
321  if(n > 5000){
322  magma_queue_sync( stream );
323  magma_free( db );
324  }
325 
326 #ifdef ENABLE_TIMER
327  start = get_current_time();
328 #endif
329  magma_zheevd_gpu(jobz, uplo, n, da, ldda, w, a, lda,
330  work, lwork, rwork, lrwork, iwork, liwork, info);
331 #ifdef ENABLE_TIMER
332  end = get_current_time();
333  printf("time zheevd_gpu = %6.2f\n", GetTimerValue(start,end)/1000.);
334 #endif
335 
336  if (wantz && *info == 0)
337  {
338 #ifdef ENABLE_TIMER
339  start = get_current_time();
340 #endif
341  /* allocate and copy db back */
342  if(n > 5000){
343  if (MAGMA_SUCCESS != magma_zmalloc( &db, n*lddb ) ){
344  *info = MAGMA_ERR_DEVICE_ALLOC;
345  return *info;
346  }
347  magma_zsetmatrix( n, n, b, ldb, db, lddb );
348  }
349 /* Backtransform eigenvectors to the original problem. */
350  if (itype == 1 || itype == 2)
351  {
352 /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
353  backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */
354  if (lower) {
355  *(unsigned char *)trans = MagmaConjTrans;
356  } else {
357  *(unsigned char *)trans = MagmaNoTrans;
358  }
359 
360  magma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit,
361  n, n, c_one, db, lddb, da, ldda);
362  }
363  else if (itype == 3)
364  {
365 /* For B*A*x=(lambda)*x;
366  backtransform eigenvectors: x = L*y or U'*y */
367  if (lower) {
368  *(unsigned char *)trans = MagmaNoTrans;
369  } else {
370  *(unsigned char *)trans = MagmaConjTrans;
371  }
372 
373  magma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit,
374  n, n, c_one, db, lddb, da, ldda);
375  }
376 
377  magma_zgetmatrix( n, n, da, ldda, a, lda );
378 #ifdef ENABLE_TIMER
379  end = get_current_time();
380  printf("time ztrsm/mm + getmatrix = %6.2f\n", GetTimerValue(start,end)/1000.);
381 #endif
382  /* free db */
383  if(n > 5000){
384  magma_free( db );
385  }
386  }
387 
388  magma_queue_sync( stream );
389  magma_queue_destroy( stream );
390 
391  work[0] = MAGMA_Z_MAKE( lwmin * (1. + lapackf77_dlamch("Epsilon")), 0.); // round up
392  rwork[0] = lrwmin * (1. + lapackf77_dlamch("Epsilon"));
393  iwork[0] = liwmin;
394 
395  magma_free( da );
396  if(n <= 5000){
397  magma_free( db );
398  }
399 
400  return *info;
401 } /* magma_zhegvd */
void magma_ztrmm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb)
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define MagmaLeft
Definition: magma.h:68
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
#define magma_zgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_z.h:714
magma_int_t magma_get_zhetrd_nb(magma_int_t m)
Definition: get_nb.cpp:423
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
#define PRECISION_c
Definition: cprint.cpp:14
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
magma_int_t magma_zhegst_gpu(magma_int_t itype, char uplo, magma_int_t n, cuDoubleComplex *da, magma_int_t ldda, cuDoubleComplex *db, magma_int_t lddb, magma_int_t *info)
int magma_int_t
Definition: magmablas.h:12
#define magma_queue_destroy(queue)
Definition: magma.h:116
#define lapackf77_dlamch
Definition: magma_lapack.h:27
void magma_ztrsm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb)
magma_int_t magma_zpotrf_gpu(char uplo, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define MagmaConjTrans
Definition: magma.h:59
magma_int_t magma_zheevd_gpu(char jobz, char uplo, magma_int_t n, cuDoubleComplex *da, magma_int_t ldda, double *w, cuDoubleComplex *wa, magma_int_t ldwa, cuDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
magma_int_t ldda
#define MagmaLowerStr
Definition: magma.h:85
#define PRECISION_z
Definition: zhegvd.cpp:16
magma_int_t magma_zhegvd(magma_int_t itype, char jobz, char uplo, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb, double *w, cuDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
#define MagmaNonUnit
Definition: magma.h:65
#define MAGMA_SUCCESS
Definition: magma.h:106
#define lapackf77_zhegvd
Definition: magma_zlapack.h:69
#define magma_zsetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_z.h:711
#define MagmaNoVecStr
Definition: magma_types.h:423
#define MAGMA_Z_ONE
Definition: magma.h:132
double GetTimerValue(magma_timestr_t start, magma_timestr_t end)
Definition: timer.cpp:94
#define MagmaUpperStr
Definition: magma.h:84
#define MagmaNoTrans
Definition: magma.h:57
#define magma_zsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_z.h:702
#define MagmaVecStr
Definition: magma_types.h:424
#define max(a, b)
Definition: common_magma.h:82
magma_timestr_t get_current_time(void)
Definition: timer.cpp:76
#define magma_queue_sync(queue)
Definition: magma.h:119