MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zheevd_m.cpp File Reference
#include "common_magma.h"
Include dependency graph for zheevd_m.cpp:

Go to the source code of this file.

Macros

#define PRECISION_z
 

Functions

magma_int_t magma_zheevd_m (magma_int_t nrgpu, char jobz, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, double *w, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 

Macro Definition Documentation

#define PRECISION_z

Definition at line 16 of file zheevd_m.cpp.

Function Documentation

magma_int_t magma_zheevd_m ( magma_int_t  nrgpu,
char  jobz,
char  uplo,
magma_int_t  n,
magmaDoubleComplex *  a,
magma_int_t  lda,
double *  w,
magmaDoubleComplex *  work,
magma_int_t  lwork,
double *  rwork,
magma_int_t  lrwork,
magma_int_t iwork,
magma_int_t  liwork,
magma_int_t info 
)

Definition at line 19 of file zheevd_m.cpp.

References __func__, blasf77_dscal(), dwork, get_current_time(), GetTimerValue(), lapackf77_dlamch, lapackf77_dsterf, lapackf77_lsame, lapackf77_zheevd, lapackf77_zlacpy, lapackf77_zlanhe, lapackf77_zlascl, magma_dmalloc(), magma_dsqrt, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_zhetrd_nb(), MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_MAKE, MAGMA_Z_ONE, MAGMA_Z_REAL, magma_zhetrd_mgpu(), magma_zstedx(), magma_zstedx_m(), magma_zunmtr_m(), MagmaLeft, MagmaLowerStr, MagmaNoTrans, MagmaNoVecStr, MagmaUpperStr, MagmaVecStr, max, PRECISION_c, and PRECISION_z.

27 {
28 /* -- MAGMA (version 1.4.0) --
29  Univ. of Tennessee, Knoxville
30  Univ. of California, Berkeley
31  Univ. of Colorado, Denver
32  August 2013
33 
34  Purpose
35  =======
36  ZHEEVD computes all eigenvalues and, optionally, eigenvectors of a
37  complex Hermitian matrix A. If eigenvectors are desired, it uses a
38  divide and conquer algorithm.
39 
40  The divide and conquer algorithm makes very mild assumptions about
41  floating point arithmetic. It will work on machines with a guard
42  digit in add/subtract, or on those binary machines without guard
43  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
44  Cray-2. It could conceivably fail on hexadecimal or decimal machines
45  without guard digits, but we know of none.
46 
47  Arguments
48  =========
49  JOBZ (input) CHARACTER*1
50  = 'N': Compute eigenvalues only;
51  = 'V': Compute eigenvalues and eigenvectors.
52 
53  UPLO (input) CHARACTER*1
54  = 'U': Upper triangle of A is stored;
55  = 'L': Lower triangle of A is stored.
56 
57  N (input) INTEGER
58  The order of the matrix A. N >= 0.
59 
60  A (input/output) COMPLEX_16 array, dimension (LDA, N)
61  On entry, the Hermitian matrix A. If UPLO = 'U', the
62  leading N-by-N upper triangular part of A contains the
63  upper triangular part of the matrix A. If UPLO = 'L',
64  the leading N-by-N lower triangular part of A contains
65  the lower triangular part of the matrix A.
66  On exit, if JOBZ = 'V', then if INFO = 0, A contains the
67  orthonormal eigenvectors of the matrix A.
68  If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
69  or the upper triangle (if UPLO='U') of A, including the
70  diagonal, is destroyed.
71 
72  LDA (input) INTEGER
73  The leading dimension of the array A. LDA >= max(1,N).
74 
75  W (output) DOUBLE PRECISION array, dimension (N)
76  If INFO = 0, the eigenvalues in ascending order.
77 
78  WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
79  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
80 
81  LWORK (input) INTEGER
82  The length of the array WORK.
83  If N <= 1, LWORK >= 1.
84  If JOBZ = 'N' and N > 1, LWORK >= N * (NB + 1).
85  If JOBZ = 'V' and N > 1, LWORK >= 2*N + N**2.
86 
87  If LWORK = -1, then a workspace query is assumed; the routine
88  only calculates the optimal sizes of the WORK, RWORK and
89  IWORK arrays, returns these values as the first entries of
90  the WORK, RWORK and IWORK arrays, and no error message
91  related to LWORK or LRWORK or LIWORK is issued by XERBLA.
92 
93  RWORK (workspace/output) DOUBLE PRECISION array,
94  dimension (LRWORK)
95  On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
96 
97  LRWORK (input) INTEGER
98  The dimension of the array RWORK.
99  If N <= 1, LRWORK >= 1.
100  If JOBZ = 'N' and N > 1, LRWORK >= N.
101  If JOBZ = 'V' and N > 1, LRWORK >=
102  1 + 5*N + 2*N**2.
103 
104  If LRWORK = -1, then a workspace query is assumed; the
105  routine only calculates the optimal sizes of the WORK, RWORK
106  and IWORK arrays, returns these values as the first entries
107  of the WORK, RWORK and IWORK arrays, and no error message
108  related to LWORK or LRWORK or LIWORK is issued by XERBLA.
109 
110  IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
111  On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
112 
113  LIWORK (input) INTEGER
114  The dimension of the array IWORK.
115  If N <= 1, LIWORK >= 1.
116  If JOBZ = 'N' and N > 1, LIWORK >= 1.
117  If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N.
118 
119  If LIWORK = -1, then a workspace query is assumed; the
120  routine only calculates the optimal sizes of the WORK, RWORK
121  and IWORK arrays, returns these values as the first entries
122  of the WORK, RWORK and IWORK arrays, and no error message
123  related to LWORK or LRWORK or LIWORK is issued by XERBLA.
124 
125  INFO (output) INTEGER
126  = 0: successful exit
127  < 0: if INFO = -i, the i-th argument had an illegal value
128  > 0: if INFO = i and JOBZ = 'N', then the algorithm failed
129  to converge; i off-diagonal elements of an intermediate
130  tridiagonal form did not converge to zero;
131  if INFO = i and JOBZ = 'V', then the algorithm failed
132  to compute an eigenvalue while working on the submatrix
133  lying in rows and columns INFO/(N+1) through
134  mod(INFO,N+1).
135 
136  Further Details
137  ===============
138  Based on contributions by
139  Jeff Rutter, Computer Science Division, University of California
140  at Berkeley, USA
141 
142  Modified description of INFO. Sven, 16 Feb 05.
143  ===================================================================== */
144 
145  char uplo_[2] = {uplo, 0};
146  char jobz_[2] = {jobz, 0};
147  magma_int_t ione = 1;
148  magma_int_t izero = 0;
149  double d_one = 1.;
150 
151  double d__1;
152 
153  double eps;
154  magma_int_t inde;
155  double anrm;
156  magma_int_t imax;
157  double rmin, rmax;
158  double sigma;
159  magma_int_t iinfo, lwmin;
160  magma_int_t lower;
161  magma_int_t llrwk;
162  magma_int_t wantz;
163  magma_int_t indwk2, llwrk2;
164  magma_int_t iscale;
165  double safmin;
166  double bignum;
167  magma_int_t indtau;
168  magma_int_t indrwk, indwrk, liwmin;
169  magma_int_t lrwmin, llwork;
170  double smlnum;
171  magma_int_t lquery;
172 
173  wantz = lapackf77_lsame(jobz_, MagmaVecStr);
174  lower = lapackf77_lsame(uplo_, MagmaLowerStr);
175  lquery = lwork == -1 || lrwork == -1 || liwork == -1;
176 
177  *info = 0;
178  if (! (wantz || lapackf77_lsame(jobz_, MagmaNoVecStr))) {
179  *info = -1;
180  } else if (! (lower || lapackf77_lsame(uplo_, MagmaUpperStr))) {
181  *info = -2;
182  } else if (n < 0) {
183  *info = -3;
184  } else if (lda < max(1,n)) {
185  *info = -5;
186  }
187 
189  if ( n <= 1 ) {
190  lwmin = 1;
191  lrwmin = 1;
192  liwmin = 1;
193  }
194  else if ( wantz ) {
195  lwmin = 2*n + n*n;
196  lrwmin = 1 + 5*n + 2*n*n;
197  liwmin = 3 + 5*n;
198  }
199  else {
200  lwmin = n + n*nb;
201  lrwmin = n;
202  liwmin = 1;
203  }
204  // multiply by 1+eps to ensure length gets rounded up,
205  // if it cannot be exactly represented in floating point.
206  work[0] = MAGMA_Z_MAKE( lwmin * (1. + lapackf77_dlamch("Epsilon")), 0.);
207  rwork[0] = lrwmin * (1. + lapackf77_dlamch("Epsilon"));
208  iwork[0] = liwmin;
209 
210  if ((lwork < lwmin) && !lquery) {
211  *info = -8;
212  } else if ((lrwork < lrwmin) && ! lquery) {
213  *info = -10;
214  } else if ((liwork < liwmin) && ! lquery) {
215  *info = -12;
216  }
217 
218  if (*info != 0) {
219  magma_xerbla( __func__, -(*info) );
220  return *info;
221  }
222  else if (lquery) {
223  return *info;
224  }
225 
226  /* Quick return if possible */
227  if (n == 0) {
228  return *info;
229  }
230 
231  if (n == 1) {
232  w[0] = MAGMA_Z_REAL(a[0]);
233  if (wantz) {
234  a[0] = MAGMA_Z_ONE;
235  }
236  return *info;
237  }
238 
239  /* Check if matrix is very small then just call LAPACK on CPU, no need for GPU */
240  if (n <= 128){
241  #ifdef ENABLE_DEBUG
242  printf("--------------------------------------------------------------\n");
243  printf(" warning matrix too small N=%d NB=%d, calling lapack on CPU \n", (int) n, (int) nb);
244  printf("--------------------------------------------------------------\n");
245  #endif
246  lapackf77_zheevd(jobz_, uplo_,
247  &n, a, &lda,
248  w, work, &lwork,
249 #if defined(PRECISION_z) || defined(PRECISION_c)
250  rwork, &lrwork,
251 #endif
252  iwork, &liwork, info);
253  return *info;
254  }
255 
256  /* Get machine constants. */
257  safmin = lapackf77_dlamch("Safe minimum");
258  eps = lapackf77_dlamch("Precision");
259  smlnum = safmin / eps;
260  bignum = 1. / smlnum;
261  rmin = magma_dsqrt(smlnum);
262  rmax = magma_dsqrt(bignum);
263 
264  /* Scale matrix to allowable range, if necessary. */
265  anrm = lapackf77_zlanhe("M", uplo_, &n, a, &lda, rwork);
266  iscale = 0;
267  if (anrm > 0. && anrm < rmin) {
268  iscale = 1;
269  sigma = rmin / anrm;
270  } else if (anrm > rmax) {
271  iscale = 1;
272  sigma = rmax / anrm;
273  }
274  if (iscale == 1) {
275  lapackf77_zlascl(uplo_, &izero, &izero, &d_one, &sigma, &n, &n, a,
276  &lda, info);
277  }
278 
279  /* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */
280  // zhetrd rwork: e (n)
281  // zstedx rwork: e (n) + llrwk (1 + 4*N + 2*N**2) ==> 1 + 5n + 2n^2
282  inde = 0;
283  indrwk = inde + n;
284  llrwk = lrwork - indrwk;
285 
286  // zhetrd work: tau (n) + llwork (n*nb) ==> n + n*nb
287  // zstedx work: tau (n) + z (n^2)
288  // zunmtr work: tau (n) + z (n^2) + llwrk2 (n or n*nb) ==> 2n + n^2, or n + n*nb + n^2
289  indtau = 0;
290  indwrk = indtau + n;
291  indwk2 = indwrk + n*n;
292  llwork = lwork - indwrk;
293  llwrk2 = lwork - indwk2;
294 
295 //
296 #ifdef ENABLE_TIMER
297  magma_timestr_t start, end;
298  start = get_current_time();
299 #endif
300 
301  magma_zhetrd_mgpu(nrgpu, 1, uplo_[0], n, a, lda, w, &rwork[inde],
302  &work[indtau], &work[indwrk], llwork, &iinfo);
303 
304 #ifdef ENABLE_TIMER
305  end = get_current_time();
306  printf("time zhetrd = %6.2f\n", GetTimerValue(start,end)/1000.);
307 #endif
308 
309  /* For eigenvalues only, call DSTERF. For eigenvectors, first call
310  ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
311  tridiagonal matrix, then call ZUNMTR to multiply it to the Householder
312  transformations represented as Householder vectors in A. */
313  if (! wantz) {
314  lapackf77_dsterf(&n, w, &rwork[inde], info);
315  } else {
316 
317 #ifdef ENABLE_TIMER
318  start = get_current_time();
319 #endif
320 
321 #ifdef USE_SINGLE_GPU
322  if (MAGMA_SUCCESS != magma_dmalloc( &dwork, 3*n*(n/2 + 1) )) {
323  *info = MAGMA_ERR_DEVICE_ALLOC;
324  return *info;
325  }
326 
327  magma_zstedx('A', n, 0, 0, 0, 0, w, &rwork[inde],
328  &work[indwrk], n, &rwork[indrwk],
329  llrwk, iwork, liwork, dwork, info);
330 
331  magma_free( dwork );
332 #else
333  magma_zstedx_m(nrgpu, 'A', n, 0, 0, 0, 0, w, &rwork[inde],
334  &work[indwrk], n, &rwork[indrwk],
335  llrwk, iwork, liwork, info);
336 #endif
337 
338 #ifdef ENABLE_TIMER
339  end = get_current_time();
340  printf("time zstedc = %6.2f\n", GetTimerValue(start,end)/1000.);
341  start = get_current_time();
342 #endif
343 
344  magma_zunmtr_m(nrgpu, MagmaLeft, uplo, MagmaNoTrans, n, n, a, lda, &work[indtau],
345  &work[indwrk], n, &work[indwk2], llwrk2, &iinfo);
346 
347  lapackf77_zlacpy("A", &n, &n, &work[indwrk], &n, a, &lda);
348 
349 #ifdef ENABLE_TIMER
350  end = get_current_time();
351  printf("time zunmtr + copy = %6.2f\n", GetTimerValue(start,end)/1000.);
352 #endif
353 
354  }
355 
356  /* If matrix was scaled, then rescale eigenvalues appropriately. */
357  if (iscale == 1) {
358  if (*info == 0) {
359  imax = n;
360  } else {
361  imax = *info - 1;
362  }
363  d__1 = 1. / sigma;
364  blasf77_dscal(&imax, &d__1, w, &ione);
365  }
366 
367  work[0] = MAGMA_Z_MAKE( lwmin * (1. + lapackf77_dlamch("Epsilon")), 0.); // round up
368  rwork[0] = lrwmin * (1. + lapackf77_dlamch("Epsilon"));
369  iwork[0] = liwmin;
370 
371  return *info;
372 } /* magma_zheevd_m */
#define magma_dsqrt
Definition: common_magma.h:98
#define lapackf77_dsterf
Definition: magma_lapack.h:33
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define MagmaLeft
Definition: magma.h:68
#define __func__
Definition: common_magma.h:65
magma_int_t magma_get_zhetrd_nb(magma_int_t m)
Definition: get_nb.cpp:423
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define PRECISION_c
Definition: cprint.cpp:14
#define lapackf77_zlanhe
Definition: magma_zlapack.h:76
magma_int_t magma_zhetrd_mgpu(magma_int_t num_gpus, magma_int_t k, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, double *d, double *e, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define lapackf77_zlacpy
Definition: magma_zlapack.h:73
magma_int_t magma_zstedx_m(magma_int_t nrgpu, char range, magma_int_t n, double vl, double vu, magma_int_t il, magma_int_t iu, double *D, double *E, magmaDoubleComplex *Z, magma_int_t ldz, double *rwork, magma_int_t ldrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
Definition: zstedx_m.cpp:15
#define lapackf77_dlamch
Definition: magma_lapack.h:27
#define dwork(dev, i, j)
magma_int_t magma_zunmtr_m(magma_int_t nrgpu, char side, char uplo, char trans, magma_int_t m, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *c, magma_int_t ldc, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
Definition: zunmtr_m.cpp:17
#define lapackf77_zheevd
Definition: magma_zlapack.h:67
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define MagmaLowerStr
Definition: magma.h:85
#define MAGMA_SUCCESS
Definition: magma.h:106
void blasf77_dscal(const magma_int_t *n, const double *alpha, double *x, const magma_int_t *incx)
magma_int_t magma_zstedx(char range, magma_int_t n, double vl, double vu, magma_int_t il, magma_int_t iu, double *D, double *E, magmaDoubleComplex *Z, magma_int_t ldz, double *rwork, magma_int_t ldrwork, magma_int_t *iwork, magma_int_t liwork, double *dwork, magma_int_t *info)
Definition: zstedx.cpp:16
#define MagmaNoVecStr
Definition: magma_types.h:423
#define PRECISION_z
Definition: zheevd_m.cpp:16
#define MAGMA_Z_ONE
Definition: magma.h:132
double GetTimerValue(magma_timestr_t start, magma_timestr_t end)
Definition: timer.cpp:94
#define lapackf77_zlascl
Definition: magma_zlapack.h:83
#define MagmaUpperStr
Definition: magma.h:84
#define MagmaNoTrans
Definition: magma.h:57
#define MagmaVecStr
Definition: magma_types.h:424
#define max(a, b)
Definition: common_magma.h:82
magma_timestr_t get_current_time(void)
Definition: timer.cpp:76
#define MAGMA_Z_REAL(a)
Definition: magma.h:124

Here is the call graph for this function:

Here is the caller graph for this function: