MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zheevd_m.cpp File Reference
#include "common_magma.h"
Include dependency graph for zheevd_m.cpp:

Go to the source code of this file.

Macros

#define DWORKFORZ_AND_LD   double *rwork, magma_int_t *ldrwork,
#define ENABLE_TIMER

Functions

magma_int_t magma_zhetrd_mgpu (int num_gpus, int k, char uplo, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, double *d, double *e, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
void magma_zstedx_ (char *range, magma_int_t *n, double *vl, double *vu, magma_int_t *il, magma_int_t *iu, double *D, double *E, cuDoubleComplex *Z, magma_int_t *ldz, double *rwork, magma_int_t *ldrwork, magma_int_t *iwork, magma_int_t *liwork, double *dwork, magma_int_t *info)
magma_int_t magma_zstedx (char range, magma_int_t n, double vl, double vu, magma_int_t il, magma_int_t iu, double *D, double *E, cuDoubleComplex *Z, magma_int_t ldz, double *rwork, magma_int_t ldrwork, magma_int_t *iwork, magma_int_t liwork, double *dwork, magma_int_t *info)
magma_int_t magma_zstedx_m (magma_int_t nrgpu, char range, magma_int_t n, double vl, double vu, magma_int_t il, magma_int_t iu, double *D, double *E, cuDoubleComplex *Z, magma_int_t ldz, double *rwork, magma_int_t ldrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
magma_int_t magma_zunmtr_m (magma_int_t nrgpu, char side, char uplo, char trans, magma_int_t m, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *c, magma_int_t ldc, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
magma_int_t magma_zheevd_m (magma_int_t nrgpu, char jobz, char uplo, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, double *w, cuDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)

Macro Definition Documentation

#define DWORKFORZ_AND_LD   double *rwork, magma_int_t *ldrwork,

Definition at line 15 of file zheevd_m.cpp.

#define ENABLE_TIMER

Function Documentation

magma_int_t magma_zheevd_m ( magma_int_t  nrgpu,
char  jobz,
char  uplo,
magma_int_t  n,
cuDoubleComplex *  a,
magma_int_t  lda,
double *  w,
cuDoubleComplex *  work,
magma_int_t  lwork,
double *  rwork,
magma_int_t  lrwork,
magma_int_t iwork,
magma_int_t  liwork,
magma_int_t info 
)

Definition at line 53 of file zheevd_m.cpp.

References __func__, blasf77_dscal(), dwork, get_current_time(), GetTimerValue(), lapackf77_dlamch, lapackf77_dsterf, lapackf77_lsame, lapackf77_zlacpy, lapackf77_zlanhe, lapackf77_zlascl, magma_dmalloc(), magma_dsqrt, MAGMA_ERR_DEVICE_ALLOC, magma_free(), magma_get_zhetrd_nb(), MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_MAKE, MAGMA_Z_ONE, MAGMA_Z_REAL, MAGMA_Z_SET2REAL, magma_zhetrd_mgpu(), magma_zstedx(), magma_zstedx_m(), magma_zunmtr_m(), MagmaLeft, MagmaLowerStr, MagmaNoTrans, MagmaNoVectorsStr, MagmaUpperStr, MagmaVectorsStr, max, and uplo.

{
/* -- MAGMA (version 1.2.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
May 2012
Purpose
=======
ZHEEVD computes all eigenvalues and, optionally, eigenvectors of a
complex Hermitian matrix A. If eigenvectors are desired, it uses a
divide and conquer algorithm.
The divide and conquer algorithm makes very mild assumptions about
floating point arithmetic. It will work on machines with a guard
digit in add/subtract, or on those binary machines without guard
digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
Cray-2. It could conceivably fail on hexadecimal or decimal machines
without guard digits, but we know of none.
Arguments
=========
JOBZ (input) CHARACTER*1
= 'N': Compute eigenvalues only;
= 'V': Compute eigenvalues and eigenvectors.
UPLO (input) CHARACTER*1
= 'U': Upper triangle of A is stored;
= 'L': Lower triangle of A is stored.
N (input) INTEGER
The order of the matrix A. N >= 0.
A (input/output) COMPLEX_16 array, dimension (LDA, N)
On entry, the Hermitian matrix A. If UPLO = 'U', the
leading N-by-N upper triangular part of A contains the
upper triangular part of the matrix A. If UPLO = 'L',
the leading N-by-N lower triangular part of A contains
the lower triangular part of the matrix A.
On exit, if JOBZ = 'V', then if INFO = 0, A contains the
orthonormal eigenvectors of the matrix A.
If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
or the upper triangle (if UPLO='U') of A, including the
diagonal, is destroyed.
LDA (input) INTEGER
The leading dimension of the array A. LDA >= max(1,N).
W (output) DOUBLE PRECISION array, dimension (N)
If INFO = 0, the eigenvalues in ascending order.
WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
LWORK (input) INTEGER
The length of the array WORK.
If N <= 1, LWORK must be at least 1.
If JOBZ = 'N' and N > 1, LWORK must be at least N * (NB + 1).
If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2.
If LWORK = -1, then a workspace query is assumed; the routine
only calculates the optimal sizes of the WORK, RWORK and
IWORK arrays, returns these values as the first entries of
the WORK, RWORK and IWORK arrays, and no error message
related to LWORK or LRWORK or LIWORK is issued by XERBLA.
RWORK (workspace/output) DOUBLE PRECISION array,
dimension (LRWORK)
On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
LRWORK (input) INTEGER
The dimension of the array RWORK.
If N <= 1, LRWORK must be at least 1.
If JOBZ = 'N' and N > 1, LRWORK must be at least N.
If JOBZ = 'V' and N > 1, LRWORK must be at least
1 + 5*N + 2*N**2.
If LRWORK = -1, then a workspace query is assumed; the
routine only calculates the optimal sizes of the WORK, RWORK
and IWORK arrays, returns these values as the first entries
of the WORK, RWORK and IWORK arrays, and no error message
related to LWORK or LRWORK or LIWORK is issued by XERBLA.
IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
LIWORK (input) INTEGER
The dimension of the array IWORK.
If N <= 1, LIWORK must be at least 1.
If JOBZ = 'N' and N > 1, LIWORK must be at least 1.
If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
If LIWORK = -1, then a workspace query is assumed; the
routine only calculates the optimal sizes of the WORK, RWORK
and IWORK arrays, returns these values as the first entries
of the WORK, RWORK and IWORK arrays, and no error message
related to LWORK or LRWORK or LIWORK is issued by XERBLA.
INFO (output) INTEGER
= 0: successful exit
< 0: if INFO = -i, the i-th argument had an illegal value
> 0: if INFO = i and JOBZ = 'N', then the algorithm failed
to converge; i off-diagonal elements of an intermediate
tridiagonal form did not converge to zero;
if INFO = i and JOBZ = 'V', then the algorithm failed
to compute an eigenvalue while working on the submatrix
lying in rows and columns INFO/(N+1) through
mod(INFO,N+1).
Further Details
===============
Based on contributions by
Jeff Rutter, Computer Science Division, University of California
at Berkeley, USA
Modified description of INFO. Sven, 16 Feb 05.
===================================================================== */
char uplo_[2] = {uplo, 0};
char jobz_[2] = {jobz, 0};
magma_int_t c__1 = 1;
magma_int_t c_n1 = -1;
magma_int_t c__0 = 0;
double c_b18 = 1.;
double d__1;
double eps;
double anrm;
double rmin, rmax;
double sigma;
magma_int_t iinfo, lwmin;
magma_int_t lower;
magma_int_t llrwk;
magma_int_t wantz;
magma_int_t indwk2, llwrk2;
magma_int_t iscale;
double safmin;
double bignum;
magma_int_t indtau;
magma_int_t indrwk, indwrk, liwmin;
magma_int_t lrwmin, llwork;
double smlnum;
magma_int_t lquery;
double* dwork;
lower = lapackf77_lsame(uplo_, MagmaLowerStr);
lquery = lwork == -1 || lrwork == -1 || liwork == -1;
*info = 0;
if (! (wantz || lapackf77_lsame(jobz_, MagmaNoVectorsStr))) {
*info = -1;
} else if (! (lower || lapackf77_lsame(uplo_, MagmaUpperStr))) {
*info = -2;
} else if (n < 0) {
*info = -3;
} else if (lda < max(1,n)) {
*info = -5;
}
if (wantz) {
lwmin = 2 * n + n * n;
lrwmin = 1 + 5 * n + 2 * n * n;
liwmin = 5 * n + 3;
} else {
lwmin = n * (nb + 1);
lrwmin = n;
liwmin = 1;
}
MAGMA_Z_SET2REAL(work[0],(double)lwmin);
rwork[0] = lrwmin;
iwork[0] = liwmin;
if ((lwork < lwmin) && !lquery) {
*info = -8;
} else if ((lrwork < lrwmin) && ! lquery) {
*info = -10;
} else if ((liwork < liwmin) && ! lquery) {
*info = -12;
}
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
else if (lquery) {
return *info;
}
/* Quick return if possible */
if (n == 0) {
return *info;
}
if (n == 1) {
w[0] = MAGMA_Z_REAL(a[0]);
if (wantz) {
a[0] = MAGMA_Z_ONE;
}
return *info;
}
/* Get machine constants. */
safmin = lapackf77_dlamch("Safe minimum");
eps = lapackf77_dlamch("Precision");
smlnum = safmin / eps;
bignum = 1. / smlnum;
rmin = magma_dsqrt(smlnum);
rmax = magma_dsqrt(bignum);
/* Scale matrix to allowable range, if necessary. */
anrm = lapackf77_zlanhe("M", uplo_, &n, a, &lda, rwork);
iscale = 0;
if (anrm > 0. && anrm < rmin) {
iscale = 1;
sigma = rmin / anrm;
} else if (anrm > rmax) {
iscale = 1;
sigma = rmax / anrm;
}
if (iscale == 1) {
lapackf77_zlascl(uplo_, &c__0, &c__0, &c_b18, &sigma, &n, &n, a,
&lda, info);
}
/* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */
inde = 0;
indtau = 0;
indwrk = indtau + n;
indrwk = inde + n;
indwk2 = indwrk + n * n;
llwork = lwork - indwrk;
llwrk2 = lwork - indwk2;
llrwk = lrwork - indrwk;
#define ENABLE_TIMER
#ifdef ENABLE_TIMER
magma_timestr_t start, end;
start = get_current_time();
#endif
//magma_zhetrd(uplo_[0], n, a, lda, w, &rwork[inde],
// &work[indtau], &work[indwrk], llwork, &iinfo);
magma_zhetrd_mgpu(nrgpu, 4, uplo_[0], n, a, lda, w, &rwork[inde],
&work[indtau], &work[indwrk], llwork, &iinfo);
#ifdef ENABLE_TIMER
printf("time zhetrd = %6.2f\n", GetTimerValue(start,end)/1000.);
#endif
/* For eigenvalues only, call DSTERF. For eigenvectors, first call
ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
tridiagonal matrix, then call ZUNMTR to multiply it to the Householder
transformations represented as Householder vectors in A. */
if (! wantz) {
lapackf77_dsterf(&n, w, &rwork[inde], info);
} else {
//lapackf77_zstedc("I", &n, w, &rwork[inde], &work[indwrk], &n, &work[indwk2],
// &llwrk2, &rwork[indrwk], &llrwk, iwork, &liwork, info);
#ifdef ENABLE_TIMER
start = get_current_time();
#endif
#ifdef USE_SINGLE_GPU
if (MAGMA_SUCCESS != magma_dmalloc( &dwork, 3*n*(n/2 + 1) )) {
return *info;
}
/* double vl = 0;
double vu = 0;
magma_int_t il = 0;
magma_int_t iu = 0;
char range_[2] = {'A', 0};
magma_zstedx_(range_, &n, &vl, &vu, &il, &iu, w, &rwork[inde],
&work[indwrk], &n, &rwork[indrwk],
&llrwk, iwork, &liwork, dwork, info);
*/
magma_zstedx('A', n, 0, 0, 0, 0, w, &rwork[inde],
&work[indwrk], n, &rwork[indrwk],
llrwk, iwork, liwork, dwork, info);
magma_free( dwork );
#else
magma_zstedx_m(nrgpu, 'A', n, 0, 0, 0, 0, w, &rwork[inde],
&work[indwrk], n, &rwork[indrwk],
llrwk, iwork, liwork, info);
#endif
#ifdef ENABLE_TIMER
printf("time zstedc = %6.2f\n", GetTimerValue(start,end)/1000.);
start = get_current_time();
#endif
//magma_zunmtr(MagmaLeft, uplo, MagmaNoTrans, n, n, a, lda, &work[indtau],
magma_zunmtr_m(nrgpu, MagmaLeft, uplo, MagmaNoTrans, n, n, a, lda, &work[indtau],
&work[indwrk], n, &work[indwk2], llwrk2, &iinfo);
lapackf77_zlacpy("A", &n, &n, &work[indwrk], &n, a, &lda);
#ifdef ENABLE_TIMER
printf("time zunmtr + copy = %6.2f\n", GetTimerValue(start,end)/1000.);
#endif
}
/* If matrix was scaled, then rescale eigenvalues appropriately. */
if (iscale == 1) {
if (*info == 0) {
imax = n;
} else {
imax = *info - 1;
}
d__1 = 1. / sigma;
blasf77_dscal(&imax, &d__1, w, &c__1);
}
work[0] = MAGMA_Z_MAKE((double) lwmin, 0.);
rwork[0] = (double) lrwmin;
iwork[0] = liwmin;
return *info;
} /* magma_zheevd_m */

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zhetrd_mgpu ( int  num_gpus,
int  k,
char  uplo,
magma_int_t  n,
cuDoubleComplex *  a,
magma_int_t  lda,
double *  d,
double *  e,
cuDoubleComplex *  tau,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zstedx ( char  range,
magma_int_t  n,
double  vl,
double  vu,
magma_int_t  il,
magma_int_t  iu,
double *  D,
double *  E,
cuDoubleComplex *  Z,
magma_int_t  ldz,
double *  rwork,
magma_int_t  ldrwork,
magma_int_t iwork,
magma_int_t  liwork,
double *  dwork,
magma_int_t info 
)

Definition at line 23 of file zstedx.cpp.

References __func__, get_zstedx_smlsize(), lapackf77_lsame, lapackf77_zsteqr, magma_dstedx(), magma_xerbla(), MAGMA_Z_SET2REAL, max, and min.

{
/*
-- MAGMA (version 1.2.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
May 2012
.. Scalar Arguments ..
CHARACTER RANGE
INTEGER IL, IU, INFO, LDZ, LIWORK, LRWORK, N
DOUBLE PRECISION VL, VU
..
.. Array Arguments ..
INTEGER IWORK( * )
DOUBLE PRECISION D( * ), E( * ), RWORK( * ), DWORK ( * )
COMPLEX*16 Z( LDZ, * )
..
Purpose
=======
ZSTEDX computes some eigenvalues and eigenvectors of a
symmetric tridiagonal matrix using the divide and conquer method.
This code makes very mild assumptions about floating point
arithmetic. It will work on machines with a guard digit in
add/subtract, or on those binary machines without guard digits
which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
It could conceivably fail on hexadecimal or decimal machines
without guard digits, but we know of none. See DLAEX3 for details.
Arguments
=========
RANGE (input) CHARACTER*1
= 'A': all eigenvalues will be found.
= 'V': all eigenvalues in the half-open interval (VL,VU]
will be found.
= 'I': the IL-th through IU-th eigenvalues will be found.
N (input) INTEGER
The dimension of the symmetric tridiagonal matrix. N >= 0.
VL (input) DOUBLE PRECISION
VU (input) DOUBLE PRECISION
If RANGE='V', the lower and upper bounds of the interval to
be searched for eigenvalues. VL < VU.
Not referenced if RANGE = 'A' or 'I'.
IL (input) INTEGER
IU (input) INTEGER
If RANGE='I', the indices (in ascending order) of the
smallest and largest eigenvalues to be returned.
1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
Not referenced if RANGE = 'A' or 'V'.
D (input/output) DOUBLE PRECISION array, dimension (N)
On entry, the diagonal elements of the tridiagonal matrix.
On exit, if INFO = 0, the eigenvalues in ascending order.
E (input/output) DOUBLE PRECISION array, dimension (N-1)
On entry, the subdiagonal elements of the tridiagonal matrix.
On exit, E has been destroyed.
Z (output) COMPLEX*16 array, dimension (LDZ,N)
On exit, if INFO = 0, Z contains the orthonormal eigenvectors
of the symmetric tridiagonal matrix.
LDZ (input) INTEGER
The leading dimension of the array Z. LDZ >= max(1,N).
RWORK (workspace/output) DOUBLE PRECISION array,
dimension (LRWORK)
On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
LRWORK (input) INTEGER
The dimension of the array RWORK.
LRWORK must be at least 1 + 4*N + 2*N**2 .
Note that if N is less than or
equal to the minimum divide size, usually 25, then LRWORK
need only be max(1,2*(N-1)).
If LRWORK = -1, then a workspace query is assumed; the
routine only calculates the optimal sizes of the WORK, RWORK
and IWORK arrays, returns these values as the first entries
of the WORK, RWORK and IWORK arrays, and no error message
related to LWORK or LRWORK or LIWORK is issued by XERBLA.
IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
LIWORK (input) INTEGER
The dimension of the array IWORK.
LIWORK must be at least 3 + 5*N .
Note that if N is less than or
equal to the minimum divide size, usually 25, then LIWORK
need only be 1.
If LIWORK = -1, then a workspace query is assumed; the
routine only calculates the optimal sizes of the WORK, RWORK
and IWORK arrays, returns these values as the first entries
of the WORK, RWORK and IWORK arrays, and no error message
related to LWORK or LRWORK or LIWORK is issued by XERBLA.
DWORK (device workspace) DOUBLE PRECISION array, dimension (3*N*N/2+3*N)
INFO (output) INTEGER
= 0: successful exit.
< 0: if INFO = -i, the i-th argument had an illegal value.
> 0: The algorithm failed to compute an eigenvalue while
working on the submatrix lying in rows and columns
INFO/(N+1) through mod(INFO,N+1).
Further Details
===============
Based on contributions by
Jeff Rutter, Computer Science Division, University of California
at Berkeley, USA
=====================================================================
*/
char range_[2] = {range, 0};
magma_int_t alleig, indeig, valeig, lquery;
magma_int_t i, j, smlsiz;
magma_int_t liwmin, lrwmin;
alleig = lapackf77_lsame(range_, "A");
valeig = lapackf77_lsame(range_, "V");
indeig = lapackf77_lsame(range_, "I");
lquery = lrwork == -1 || liwork == -1;
*info = 0;
if (! (alleig || valeig || indeig)) {
*info = -1;
} else if (n < 0) {
*info = -2;
} else if (ldz < max(1,n)) {
*info = -10;
} else {
if (valeig) {
if (n > 0 && vu <= vl) {
*info = -4;
}
} else if (indeig) {
if (il < 1 || il > max(1,n)) {
*info = -5;
} else if (iu < min(n,il) || iu > n) {
*info = -6;
}
}
}
if (*info == 0) {
// Compute the workspace requirements
smlsiz = get_zstedx_smlsize();
if( n <= 1 ){
lrwmin = 1;
liwmin = 1;
} else {
lrwmin = 1 + 4*n + 2*n*n;
liwmin = 3 + 5*n;
}
rwork[0] = lrwmin;
iwork[0] = liwmin;
if (lrwork < lrwmin && ! lquery) {
*info = -12;
} else if (liwork < liwmin && ! lquery) {
*info = -14;
}
}
if (*info != 0) {
magma_xerbla( __func__, -(*info));
return *info;
} else if (lquery) {
return *info;
}
// Quick return if possible
if(n==0)
return *info;
if(n==1){
return *info;
}
// If N is smaller than the minimum divide size (SMLSIZ+1), then
// solve the problem with another solver.
if (n < smlsiz){
char char_I[]= {'I', 0};
lapackf77_zsteqr(char_I, &n, d, e, z, &ldz, rwork, info);
} else {
// We simply call DSTEDX instead.
magma_dstedx(range, n, vl, vu, il, iu, d, e, rwork, n,
rwork+n*n, lrwork-n*n, iwork, liwork, dwork, info);
for(j=0; j<n; ++j)
for(i=0; i<n; ++i){
MAGMA_Z_SET2REAL(*(z+i+ldz*j), *(rwork+i+n*j));
}
}
rwork[0] = lrwmin;
iwork[0] = liwmin;
return *info;
} /* zstedx */

Here is the call graph for this function:

void magma_zstedx_ ( char *  range,
magma_int_t n,
double *  vl,
double *  vu,
magma_int_t il,
magma_int_t iu,
double *  D,
double *  E,
cuDoubleComplex *  Z,
magma_int_t ldz,
double *  rwork,
magma_int_t ldrwork,
magma_int_t iwork,
magma_int_t liwork,
double *  dwork,
magma_int_t info 
)
magma_int_t magma_zstedx_m ( magma_int_t  nrgpu,
char  range,
magma_int_t  n,
double  vl,
double  vu,
magma_int_t  il,
magma_int_t  iu,
double *  D,
double *  E,
cuDoubleComplex *  Z,
magma_int_t  ldz,
double *  rwork,
magma_int_t  ldrwork,
magma_int_t iwork,
magma_int_t  liwork,
magma_int_t info 
)

Definition at line 26 of file zstedx_m.cpp.

References __func__, get_zstedx_smlsize(), lapackf77_lsame, lapackf77_zsteqr, magma_dstedx_m(), magma_xerbla(), MAGMA_Z_SET2REAL, max, and min.

{
/*
-- MAGMA (version 1.2.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
May 2012
.. Scalar Arguments ..
CHARACTER RANGE
INTEGER IL, IU, INFO, LDZ, LIWORK, LRWORK, N
DOUBLE PRECISION VL, VU
..
.. Array Arguments ..
INTEGER IWORK( * )
DOUBLE PRECISION D( * ), E( * ), RWORK( * ), DWORK ( * )
COMPLEX*16 Z( LDZ, * )
..
Purpose
=======
ZSTEDX computes some eigenvalues and eigenvectors of a
symmetric tridiagonal matrix using the divide and conquer method.
This code makes very mild assumptions about floating point
arithmetic. It will work on machines with a guard digit in
add/subtract, or on those binary machines without guard digits
which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
It could conceivably fail on hexadecimal or decimal machines
without guard digits, but we know of none. See DLAEX3 for details.
Arguments
=========
RANGE (input) CHARACTER*1
= 'A': all eigenvalues will be found.
= 'V': all eigenvalues in the half-open interval (VL,VU]
will be found.
= 'I': the IL-th through IU-th eigenvalues will be found.
N (input) INTEGER
The dimension of the symmetric tridiagonal matrix. N >= 0.
VL (input) DOUBLE PRECISION
VU (input) DOUBLE PRECISION
If RANGE='V', the lower and upper bounds of the interval to
be searched for eigenvalues. VL < VU.
Not referenced if RANGE = 'A' or 'I'.
IL (input) INTEGER
IU (input) INTEGER
If RANGE='I', the indices (in ascending order) of the
smallest and largest eigenvalues to be returned.
1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
Not referenced if RANGE = 'A' or 'V'.
D (input/output) DOUBLE PRECISION array, dimension (N)
On entry, the diagonal elements of the tridiagonal matrix.
On exit, if INFO = 0, the eigenvalues in ascending order.
E (input/output) DOUBLE PRECISION array, dimension (N-1)
On entry, the subdiagonal elements of the tridiagonal matrix.
On exit, E has been destroyed.
Z (output) COMPLEX*16 array, dimension (LDZ,N)
On exit, if INFO = 0, Z contains the orthonormal eigenvectors
of the symmetric tridiagonal matrix.
LDZ (input) INTEGER
The leading dimension of the array Z. LDZ >= max(1,N).
RWORK (workspace/output) DOUBLE PRECISION array,
dimension (LRWORK)
On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
LRWORK (input) INTEGER
The dimension of the array RWORK.
LRWORK must be at least 1 + 4*N + 2*N**2 .
Note that if N is less than or
equal to the minimum divide size, usually 25, then LRWORK
need only be max(1,2*(N-1)).
If LRWORK = -1, then a workspace query is assumed; the
routine only calculates the optimal sizes of the WORK, RWORK
and IWORK arrays, returns these values as the first entries
of the WORK, RWORK and IWORK arrays, and no error message
related to LWORK or LRWORK or LIWORK is issued by XERBLA.
IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
LIWORK (input) INTEGER
The dimension of the array IWORK.
LIWORK must be at least 3 + 5*N .
Note that if N is less than or
equal to the minimum divide size, usually 25, then LIWORK
need only be 1.
If LIWORK = -1, then a workspace query is assumed; the
routine only calculates the optimal sizes of the WORK, RWORK
and IWORK arrays, returns these values as the first entries
of the WORK, RWORK and IWORK arrays, and no error message
related to LWORK or LRWORK or LIWORK is issued by XERBLA.
INFO (output) INTEGER
= 0: successful exit.
< 0: if INFO = -i, the i-th argument had an illegal value.
> 0: The algorithm failed to compute an eigenvalue while
working on the submatrix lying in rows and columns
INFO/(N+1) through mod(INFO,N+1).
Further Details
===============
Based on contributions by
Jeff Rutter, Computer Science Division, University of California
at Berkeley, USA
=====================================================================
*/
char range_[2] = {range, 0};
magma_int_t alleig, indeig, valeig, lquery;
magma_int_t i, j, smlsiz;
magma_int_t liwmin, lrwmin;
alleig = lapackf77_lsame(range_, "A");
valeig = lapackf77_lsame(range_, "V");
indeig = lapackf77_lsame(range_, "I");
lquery = lrwork == -1 || liwork == -1;
*info = 0;
if (! (alleig || valeig || indeig)) {
*info = -1;
} else if (n < 0) {
*info = -2;
} else if (ldz < max(1,n)) {
*info = -10;
} else {
if (valeig) {
if (n > 0 && vu <= vl) {
*info = -4;
}
} else if (indeig) {
if (il < 1 || il > max(1,n)) {
*info = -5;
} else if (iu < min(n,il) || iu > n) {
*info = -6;
}
}
}
if (*info == 0) {
// Compute the workspace requirements
smlsiz = get_zstedx_smlsize();
if( n <= 1 ){
lrwmin = 1;
liwmin = 1;
} else {
lrwmin = 1 + 4*n + 2*n*n;
liwmin = 3 + 5*n;
}
rwork[0] = lrwmin;
iwork[0] = liwmin;
if (lrwork < lrwmin && ! lquery) {
*info = -12;
} else if (liwork < liwmin && ! lquery) {
*info = -14;
}
}
if (*info != 0) {
magma_xerbla( __func__, -(*info));
return *info;
} else if (lquery) {
return *info;
}
// Quick return if possible
if(n==0)
return *info;
if(n==1){
return *info;
}
// If N is smaller than the minimum divide size (SMLSIZ+1), then
// solve the problem with another solver.
if (n < smlsiz){
char char_I[]= {'I', 0};
lapackf77_zsteqr(char_I, &n, d, e, z, &ldz, rwork, info);
} else {
// We simply call DSTEDX instead.
magma_dstedx_m(nrgpu, range, n, vl, vu, il, iu, d, e, rwork, n,
rwork+n*n, lrwork-n*n, iwork, liwork, info);
for(j=0; j<n; ++j)
for(i=0; i<n; ++i){
MAGMA_Z_SET2REAL(*(z+i+ldz*j), *(rwork+i+n*j));
}
}
rwork[0] = lrwmin;
iwork[0] = liwmin;
return *info;
} /* magma_zstedx_m */

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zunmtr_m ( magma_int_t  nrgpu,
char  side,
char  uplo,
char  trans,
magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  c,
magma_int_t  ldc,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 26 of file zunmtr_m.cpp.

References __func__, lapackf77_lsame, magma_xerbla(), MAGMA_Z_ONE, MAGMA_Z_SET2REAL, magma_zunmqr_m(), max, side, trans, and uplo.

{
/* -- MAGMA (version 1.2.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
May 2012
Purpose
=======
ZUNMTR overwrites the general complex M-by-N matrix C with
SIDE = 'L' SIDE = 'R'
TRANS = 'N': Q * C C * Q
TRANS = 'T': Q**H * C C * Q**H
where Q is a complex orthogonal matrix of order nq, with nq = m if
SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
nq-1 elementary reflectors, as returned by SSYTRD:
if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
Arguments
=========
SIDE (input) CHARACTER*1
= 'L': apply Q or Q**H from the Left;
= 'R': apply Q or Q**H from the Right.
UPLO (input) CHARACTER*1
= 'U': Upper triangle of A contains elementary reflectors
from SSYTRD;
= 'L': Lower triangle of A contains elementary reflectors
from SSYTRD.
TRANS (input) CHARACTER*1
= 'N': No transpose, apply Q;
= 'T': Transpose, apply Q**H.
M (input) INTEGER
The number of rows of the matrix C. M >= 0.
N (input) INTEGER
The number of columns of the matrix C. N >= 0.
A (input) COMPLEX_16 array, dimension
(LDA,M) if SIDE = 'L'
(LDA,N) if SIDE = 'R'
The vectors which define the elementary reflectors, as
returned by SSYTRD.
LDA (input) INTEGER
The leading dimension of the array A.
LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
TAU (input) COMPLEX_16 array, dimension
(M-1) if SIDE = 'L'
(N-1) if SIDE = 'R'
TAU(i) must contain the scalar factor of the elementary
reflector H(i), as returned by SSYTRD.
C (input/output) COMPLEX_16 array, dimension (LDC,N)
On entry, the M-by-N matrix C.
On exit, C is overwritten by Q*C or Q\*\*H*C or C*Q\*\*H or C*Q.
LDC (input) INTEGER
The leading dimension of the array C. LDC >= max(1,M).
WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
LWORK (input) INTEGER
The dimension of the array WORK.
If SIDE = 'L', LWORK >= max(1,N);
if SIDE = 'R', LWORK >= max(1,M).
For optimum performance LWORK >= N*NB if SIDE = 'L', and
LWORK >= M*NB if SIDE = 'R', where NB is the optimal
blocksize.
If LWORK = -1, then a workspace query is assumed; the routine
only calculates the optimal size of the WORK array, returns
this value as the first entry of the WORK array, and no error
message related to LWORK is issued.
INFO (output) INTEGER
= 0: successful exit
< 0: if INFO = -i, the i-th argument had an illegal value
===================================================================== */
cuDoubleComplex c_one = MAGMA_Z_ONE;
char side_[2] = {side, 0};
char uplo_[2] = {uplo, 0};
char trans_[2] = {trans, 0};
static magma_int_t i1, i2, nb, mi, ni, nq, nw;
long int left, upper, lquery;
static magma_int_t iinfo;
static magma_int_t lwkopt;
*info = 0;
left = lapackf77_lsame(side_, "L");
upper = lapackf77_lsame(uplo_, "U");
lquery = lwork == -1;
/* NQ is the order of Q and NW is the minimum dimension of WORK */
if (left) {
nq = m;
nw = n;
} else {
nq = n;
nw = m;
}
if (! left && ! lapackf77_lsame(side_, "R")) {
*info = -1;
} else if (! upper && ! lapackf77_lsame(uplo_, "L")) {
*info = -2;
} else if (! lapackf77_lsame(trans_, "N") &&
! lapackf77_lsame(trans_, "C")) {
*info = -3;
} else if (m < 0) {
*info = -4;
} else if (n < 0) {
*info = -5;
} else if (lda < max(1,nq)) {
*info = -7;
} else if (ldc < max(1,m)) {
*info = -10;
} else if (lwork < max(1,nw) && ! lquery) {
*info = -12;
}
if (*info == 0)
{
nb = 32;
lwkopt = max(1,nw) * nb;
MAGMA_Z_SET2REAL( work[0], lwkopt );
}
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
else if (lquery) {
return *info;
}
/* Quick return if possible */
if (m == 0 || n == 0 || nq == 1) {
work[0] = c_one;
return *info;
}
if (left) {
mi = m - 1;
ni = n;
} else {
mi = m;
ni = n - 1;
}
if (upper)
{
/* Q was determined by a call to SSYTRD with UPLO = 'U' */
i__2 = nq - 1;
printf("zunmtr_m upper case not implemented");
exit(-1);
//lapackf77_zunmql(side_, trans_, &mi, &ni, &i__2, &a[lda], &lda,
// tau, c, &ldc, work, &lwork, &iinfo);
//magma_zunmql_m(nrgpu, side, trans, mi, ni, i__2, &a[lda], lda, tau,
// c, ldc, work, lwork, &iinfo);
}
else
{
/* Q was determined by a call to SSYTRD with UPLO = 'L' */
if (left) {
i1 = 1;
i2 = 0;
} else {
i1 = 0;
i2 = 1;
}
i__2 = nq - 1;
magma_zunmqr_m(nrgpu, side, trans, mi, ni, i__2, &a[1], lda, tau,
&c[i1 + i2 * ldc], ldc, work, lwork, &iinfo);
}
MAGMA_Z_SET2REAL( work[0], lwkopt );
return *info;
} /* magma_zunmtr */

Here is the call graph for this function:

Here is the caller graph for this function: