MAGMA  magma-1.4.0 Matrix Algebra on GPU and Multicore Architectures
zhegvd.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
6  August 2013
7
8  @author Raffaele Solca
9  @author Azzam Haidar
10  @author Stan Tomov
11
12  @precisions normal z -> c
13
14 */
15 #include "common_magma.h"
16 #define PRECISION_z
17
18 extern "C" magma_int_t
19 magma_zhegvd(magma_int_t itype, char jobz, char uplo, magma_int_t n,
21  double *w, magmaDoubleComplex *work, magma_int_t lwork,
22  double *rwork, magma_int_t lrwork,
23  magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
24 {
25 /* -- MAGMA (version 1.4.0) --
26  Univ. of Tennessee, Knoxville
27  Univ. of California, Berkeley
29  August 2013
30
31  Purpose
32  =======
33  ZHEGVD computes all the eigenvalues, and optionally, the eigenvectors
34  of a complex generalized Hermitian-definite eigenproblem, of the form
35  A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and
36  B are assumed to be Hermitian and B is also positive definite.
37  If eigenvectors are desired, it uses a divide and conquer algorithm.
38
39  The divide and conquer algorithm makes very mild assumptions about
40  floating point arithmetic. It will work on machines with a guard
41  digit in add/subtract, or on those binary machines without guard
42  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
43  Cray-2. It could conceivably fail on hexadecimal or decimal machines
44  without guard digits, but we know of none.
45
46  Arguments
47  =========
48  ITYPE (input) INTEGER
49  Specifies the problem type to be solved:
50  = 1: A*x = (lambda)*B*x
51  = 2: A*B*x = (lambda)*x
52  = 3: B*A*x = (lambda)*x
53
54  JOBZ (input) CHARACTER*1
55  = 'N': Compute eigenvalues only;
56  = 'V': Compute eigenvalues and eigenvectors.
57
58  UPLO (input) CHARACTER*1
59  = 'U': Upper triangles of A and B are stored;
60  = 'L': Lower triangles of A and B are stored.
61
62  N (input) INTEGER
63  The order of the matrices A and B. N >= 0.
64
65  A (input/output) COMPLEX_16 array, dimension (LDA, N)
66  On entry, the Hermitian matrix A. If UPLO = 'U', the
67  leading N-by-N upper triangular part of A contains the
68  upper triangular part of the matrix A. If UPLO = 'L',
69  the leading N-by-N lower triangular part of A contains
70  the lower triangular part of the matrix A.
71
72  On exit, if JOBZ = 'V', then if INFO = 0, A contains the
73  matrix Z of eigenvectors. The eigenvectors are normalized
74  as follows:
75  if ITYPE = 1 or 2, Z**H*B*Z = I;
76  if ITYPE = 3, Z**H*inv(B)*Z = I.
77  If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
78  or the lower triangle (if UPLO='L') of A, including the
79  diagonal, is destroyed.
80
81  LDA (input) INTEGER
82  The leading dimension of the array A. LDA >= max(1,N).
83
84  B (input/output) COMPLEX_16 array, dimension (LDB, N)
85  On entry, the Hermitian matrix B. If UPLO = 'U', the
86  leading N-by-N upper triangular part of B contains the
87  upper triangular part of the matrix B. If UPLO = 'L',
88  the leading N-by-N lower triangular part of B contains
89  the lower triangular part of the matrix B.
90
91  On exit, if INFO <= N, the part of B containing the matrix is
92  overwritten by the triangular factor U or L from the Cholesky
93  factorization B = U**H*U or B = L*L**H.
94
95  LDB (input) INTEGER
96  The leading dimension of the array B. LDB >= max(1,N).
97
98  W (output) DOUBLE PRECISION array, dimension (N)
99  If INFO = 0, the eigenvalues in ascending order.
100
101  WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
102  On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
103
104  LWORK (input) INTEGER
105  The length of the array WORK.
106  If N <= 1, LWORK >= 1.
107  If JOBZ = 'N' and N > 1, LWORK >= N + N*NB.
108  If JOBZ = 'V' and N > 1, LWORK >= max( N + N*NB, 2*N + N**2 ).
109  NB can be obtained through magma_get_zhetrd_nb(N).
110
111  If LWORK = -1, then a workspace query is assumed; the routine
112  only calculates the optimal sizes of the WORK, RWORK and
113  IWORK arrays, returns these values as the first entries of
114  the WORK, RWORK and IWORK arrays, and no error message
115  related to LWORK or LRWORK or LIWORK is issued by XERBLA.
116
117  RWORK (workspace/output) DOUBLE PRECISION array, dimension (LRWORK)
118  On exit, if INFO = 0, RWORK[0] returns the optimal LRWORK.
119
120  LRWORK (input) INTEGER
121  The dimension of the array RWORK.
122  If N <= 1, LRWORK >= 1.
123  If JOBZ = 'N' and N > 1, LRWORK >= N.
124  If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2.
125
126  If LRWORK = -1, then a workspace query is assumed; the
127  routine only calculates the optimal sizes of the WORK, RWORK
128  and IWORK arrays, returns these values as the first entries
129  of the WORK, RWORK and IWORK arrays, and no error message
130  related to LWORK or LRWORK or LIWORK is issued by XERBLA.
131
132  IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
133  On exit, if INFO = 0, IWORK[0] returns the optimal LIWORK.
134
135  LIWORK (input) INTEGER
136  The dimension of the array IWORK.
137  If N <= 1, LIWORK >= 1.
138  If JOBZ = 'N' and N > 1, LIWORK >= 1.
139  If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N.
140
141  If LIWORK = -1, then a workspace query is assumed; the
142  routine only calculates the optimal sizes of the WORK, RWORK
143  and IWORK arrays, returns these values as the first entries
144  of the WORK, RWORK and IWORK arrays, and no error message
145  related to LWORK or LRWORK or LIWORK is issued by XERBLA.
146
147  INFO (output) INTEGER
148  = 0: successful exit
149  < 0: if INFO = -i, the i-th argument had an illegal value
150  > 0: ZPOTRF or ZHEEVD returned an error code:
151  <= N: if INFO = i and JOBZ = 'N', then the algorithm
152  failed to converge; i off-diagonal elements of an
153  intermediate tridiagonal form did not converge to
154  zero;
155  if INFO = i and JOBZ = 'V', then the algorithm
156  failed to compute an eigenvalue while working on
157  the submatrix lying in rows and columns INFO/(N+1)
158  through mod(INFO,N+1);
159  > N: if INFO = N + i, for 1 <= i <= N, then the leading
160  minor of order i of B is not positive definite.
161  The factorization of B could not be completed and
162  no eigenvalues or eigenvectors were computed.
163
164  Further Details
165  ===============
166  Based on contributions by
167  Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
168
169  Modified so that no backsubstitution is performed if ZHEEVD fails to
170  converge (NEIG in old code could be greater than N causing out of
171  bounds reference to A - reported by Ralf Meyer). Also corrected the
172  description of INFO and the test on ITYPE. Sven, 16 Feb 05.
173  ===================================================================== */
174
175  char uplo_[2] = {uplo, 0};
176  char jobz_[2] = {jobz, 0};
177
179
182  magma_int_t ldda = n;
183  magma_int_t lddb = n;
184
185  magma_int_t lower;
186  char trans[1];
187  magma_int_t wantz;
188  magma_int_t lquery;
189
190  magma_int_t lwmin;
191  magma_int_t liwmin;
192  magma_int_t lrwmin;
193
194  magma_queue_t stream;
195  magma_queue_create( &stream );
196
197  wantz = lapackf77_lsame(jobz_, MagmaVecStr);
198  lower = lapackf77_lsame(uplo_, MagmaLowerStr);
199  lquery = lwork == -1 || lrwork == -1 || liwork == -1;
200
201  *info = 0;
202  if (itype < 1 || itype > 3) {
203  *info = -1;
204  } else if (! (wantz || lapackf77_lsame(jobz_, MagmaNoVecStr))) {
205  *info = -2;
206  } else if (! (lower || lapackf77_lsame(uplo_, MagmaUpperStr))) {
207  *info = -3;
208  } else if (n < 0) {
209  *info = -4;
210  } else if (lda < max(1,n)) {
211  *info = -6;
212  } else if (ldb < max(1,n)) {
213  *info = -8;
214  }
215
217  if ( n <= 1 ) {
218  lwmin = 1;
219  lrwmin = 1;
220  liwmin = 1;
221  }
222  else if ( wantz ) {
223  lwmin = max( n + n*nb, 2*n + n*n );
224  lrwmin = 1 + 5*n + 2*n*n;
225  liwmin = 3 + 5*n;
226  }
227  else {
228  lwmin = n + n*nb;
229  lrwmin = n;
230  liwmin = 1;
231  }
232
233  work[0] = MAGMA_Z_MAKE( lwmin * (1. + lapackf77_dlamch("Epsilon")), 0.); // round up
234  rwork[0] = lrwmin * (1. + lapackf77_dlamch("Epsilon"));
235  iwork[0] = liwmin;
236
237  if (lwork < lwmin && ! lquery) {
238  *info = -11;
239  } else if (lrwork < lrwmin && ! lquery) {
240  *info = -13;
241  } else if (liwork < liwmin && ! lquery) {
242  *info = -15;
243  }
244
245  if (*info != 0) {
246  magma_xerbla( __func__, -(*info) );
247  return *info;
248  }
249  else if (lquery) {
250  return *info;
251  }
252
253 /* Quick return if possible */
254  if (n == 0) {
255  return *info;
256  }
257
258  /* Check if matrix is very small then just call LAPACK on CPU, no need for GPU */
259  if (n <= 128){
260  #ifdef ENABLE_DEBUG
261  printf("--------------------------------------------------------------\n");
262  printf(" warning matrix too small N=%d NB=%d, calling lapack on CPU \n", (int) n, (int) nb);
263  printf("--------------------------------------------------------------\n");
264  #endif
265  lapackf77_zhegvd(&itype, jobz_, uplo_,
266  &n, a, &lda, b, &ldb,
267  w, work, &lwork,
268 #if defined(PRECISION_z) || defined(PRECISION_c)
269  rwork, &lrwork,
270 #endif
271  iwork, &liwork, info);
272  return *info;
273  }
274
275  if (MAGMA_SUCCESS != magma_zmalloc( &da, n*ldda ) ||
276  MAGMA_SUCCESS != magma_zmalloc( &db, n*lddb )) {
277  *info = MAGMA_ERR_DEVICE_ALLOC;
278  return *info;
279  }
280
281 /* Form a Cholesky factorization of B. */
282  magma_zsetmatrix( n, n, b, ldb, db, lddb );
283
285  a, lda,
286  da, ldda, stream );
287
288 #ifdef ENABLE_TIMER
289  magma_timestr_t start, end;
290  start = get_current_time();
291 #endif
292  magma_zpotrf_gpu(uplo, n, db, lddb, info);
293  if (*info != 0) {
294  *info = n + *info;
295  return *info;
296  }
297 #ifdef ENABLE_TIMER
298  end = get_current_time();
299  printf("time zpotrf_gpu = %6.2f\n", GetTimerValue(start,end)/1000.);
300 #endif
301
302  magma_queue_sync( stream );
304  db, lddb,
305  b, ldb, stream );
306
307 #ifdef ENABLE_TIMER
308  start = get_current_time();
309 #endif
310 /* Transform problem to standard eigenvalue problem and solve. */
311  magma_zhegst_gpu(itype, uplo, n, da, ldda, db, lddb, info);
312 #ifdef ENABLE_TIMER
313  end = get_current_time();
314  printf("time zhegst_gpu = %6.2f\n", GetTimerValue(start,end)/1000.);
315 #endif
316
317  /* simple fix to be able to run bigger size.
318  * need to have a dwork here that will be used
319  * a db and then passed to dsyevd.
320  * */
321  if(n > 5000){
322  magma_queue_sync( stream );
323  magma_free( db );
324  }
325
326 #ifdef ENABLE_TIMER
327  start = get_current_time();
328 #endif
329  magma_zheevd_gpu(jobz, uplo, n, da, ldda, w, a, lda,
330  work, lwork, rwork, lrwork, iwork, liwork, info);
331 #ifdef ENABLE_TIMER
332  end = get_current_time();
333  printf("time zheevd_gpu = %6.2f\n", GetTimerValue(start,end)/1000.);
334 #endif
335
336  if (wantz && *info == 0)
337  {
338 #ifdef ENABLE_TIMER
339  start = get_current_time();
340 #endif
341  /* allocate and copy db back */
342  if(n > 5000){
343  if (MAGMA_SUCCESS != magma_zmalloc( &db, n*lddb ) ){
344  *info = MAGMA_ERR_DEVICE_ALLOC;
345  return *info;
346  }
347  magma_zsetmatrix( n, n, b, ldb, db, lddb );
348  }
349 /* Backtransform eigenvectors to the original problem. */
350  if (itype == 1 || itype == 2)
351  {
352 /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
353  backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */
354  if (lower) {
355  *(unsigned char *)trans = MagmaConjTrans;
356  } else {
357  *(unsigned char *)trans = MagmaNoTrans;
358  }
359
360  magma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit,
361  n, n, c_one, db, lddb, da, ldda);
362  }
363  else if (itype == 3)
364  {
365 /* For B*A*x=(lambda)*x;
366  backtransform eigenvectors: x = L*y or U'*y */
367  if (lower) {
368  *(unsigned char *)trans = MagmaNoTrans;
369  } else {
370  *(unsigned char *)trans = MagmaConjTrans;
371  }
372
373  magma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit,
374  n, n, c_one, db, lddb, da, ldda);
375  }
376
377  magma_zgetmatrix( n, n, da, ldda, a, lda );
378 #ifdef ENABLE_TIMER
379  end = get_current_time();
380  printf("time ztrsm/mm + getmatrix = %6.2f\n", GetTimerValue(start,end)/1000.);
381 #endif
382  /* free db */
383  if(n > 5000){
384  magma_free( db );
385  }
386  }
387
388  magma_queue_sync( stream );
389  magma_queue_destroy( stream );
390
391  work[0] = MAGMA_Z_MAKE( lwmin * (1. + lapackf77_dlamch("Epsilon")), 0.); // round up
392  rwork[0] = lrwmin * (1. + lapackf77_dlamch("Epsilon"));
393  iwork[0] = liwmin;
394
395  magma_free( da );
396  if(n <= 5000){
397  magma_free( db );
398  }
399
400  return *info;
401 } /* magma_zhegvd */
void magma_ztrmm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb)
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define MagmaLeft
Definition: magma.h:68
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
#define magma_zgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_z.h:714
magma_int_t magma_get_zhetrd_nb(magma_int_t m)
Definition: get_nb.cpp:423
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
#define PRECISION_c
Definition: cprint.cpp:14
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
magma_int_t magma_zhegst_gpu(magma_int_t itype, char uplo, magma_int_t n, cuDoubleComplex *da, magma_int_t ldda, cuDoubleComplex *db, magma_int_t lddb, magma_int_t *info)
int magma_int_t
Definition: magmablas.h:12
#define magma_queue_destroy(queue)
Definition: magma.h:116
#define lapackf77_dlamch
Definition: magma_lapack.h:27
void magma_ztrsm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb)
magma_int_t magma_zpotrf_gpu(char uplo, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define MagmaConjTrans
Definition: magma.h:59
magma_int_t magma_zheevd_gpu(char jobz, char uplo, magma_int_t n, cuDoubleComplex *da, magma_int_t ldda, double *w, cuDoubleComplex *wa, magma_int_t ldwa, cuDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
magma_int_t ldda
#define MagmaLowerStr
Definition: magma.h:85
#define PRECISION_z
Definition: zhegvd.cpp:16
magma_int_t magma_zhegvd(magma_int_t itype, char jobz, char uplo, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb, double *w, cuDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
#define MagmaNonUnit
Definition: magma.h:65
#define MAGMA_SUCCESS
Definition: magma.h:106
#define lapackf77_zhegvd
Definition: magma_zlapack.h:69
#define magma_zsetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_z.h:711
#define MagmaNoVecStr
Definition: magma_types.h:423
#define MAGMA_Z_ONE
Definition: magma.h:132
double GetTimerValue(magma_timestr_t start, magma_timestr_t end)
Definition: timer.cpp:94
#define MagmaUpperStr
Definition: magma.h:84
#define MagmaNoTrans
Definition: magma.h:57
#define magma_zsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_z.h:702
#define MagmaVecStr
Definition: magma_types.h:424
#define max(a, b)
Definition: common_magma.h:82
magma_timestr_t get_current_time(void)
Definition: timer.cpp:76
#define magma_queue_sync(queue)
Definition: magma.h:119