MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
sstedx_m.cpp File Reference
#include "common_magma.h"
Include dependency graph for sstedx_m.cpp:

Go to the source code of this file.

Macros

#define Z(ix, iy)   (z + (ix) + ldz * (iy))
#define lapackf77_slanst   slanst

Functions

magma_int_t magma_slaex0_m (magma_int_t nrgpu, magma_int_t n, float *d, float *e, float *q, magma_int_t ldq, float *work, magma_int_t *iwork, char range, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *info)
float lapackf77_slanst (char *norm, magma_int_t *n, float *d, float *e)
magma_int_t get_sstedx_smlsize ()
magma_int_t magma_sstedx_m (magma_int_t nrgpu, char range, magma_int_t n, float vl, float vu, magma_int_t il, magma_int_t iu, float *d, float *e, float *z, magma_int_t ldz, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)

Macro Definition Documentation

#define lapackf77_slanst   slanst

Definition at line 18 of file sstedx_m.cpp.

#define Z (   ix,
  iy 
)    (z + (ix) + ldz * (iy))

Definition at line 13 of file sstedx_m.cpp.


Function Documentation

magma_int_t get_sstedx_smlsize ( )

Definition at line 29 of file sstedx_m.cpp.

{
return 25;
}
float lapackf77_slanst ( char *  norm,
magma_int_t n,
float *  d,
float *  e 
)
magma_int_t magma_slaex0_m ( magma_int_t  nrgpu,
magma_int_t  n,
float *  d,
float *  e,
float *  q,
magma_int_t  ldq,
float *  work,
magma_int_t iwork,
char  range,
float  vl,
float  vu,
magma_int_t  il,
magma_int_t  iu,
magma_int_t info 
)

Definition at line 33 of file slaex0_m.cpp.

References __func__, blasf77_scopy(), get_current_time(), get_slaex0_smlsize(), GetTimerValue(), lapackf77_slacpy(), lapackf77_ssteqr(), MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_ILLEGAL_VALUE, magma_free(), magma_get_slaex3_m_nb(), magma_getdevice(), magma_queue_create(), magma_queue_destroy(), MAGMA_S_ABS, magma_setdevice(), magma_slaex1_m(), magma_smalloc(), MAGMA_SUCCESS, magma_xerbla(), max, N_MAX_GPU, and Q.

{
/*
-- MAGMA (version 1.2.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
May 2012
.. Scalar Arguments ..
CHARACTER RANGE
INTEGER IL, IU, INFO, LDQ, N
DOUBLE PRECISION VL, VU
..
.. Array Arguments ..
INTEGER IWORK( * )
DOUBLE PRECISION D( * ), E( * ), Q( LDQ, * ),
$ WORK( * )
..
Purpose
=======
DLAEX0 computes all eigenvalues and the choosen eigenvectors of a
symmetric tridiagonal matrix using the divide and conquer method.
Arguments
=========
N (input) INTEGER
The dimension of the symmetric tridiagonal matrix. N >= 0.
D (input/output) DOUBLE PRECISION array, dimension (N)
On entry, the main diagonal of the tridiagonal matrix.
On exit, its eigenvalues.
E (input) DOUBLE PRECISION array, dimension (N-1)
The off-diagonal elements of the tridiagonal matrix.
On exit, E has been destroyed.
Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
On entry, Q will be the identity matrix.
On exit, Q contains the eigenvectors of the
tridiagonal matrix.
LDQ (input) INTEGER
The leading dimension of the array Q. If eigenvectors are
desired, then LDQ >= max(1,N). In any case, LDQ >= 1.
WORK (workspace) DOUBLE PRECISION array,
the dimension of WORK must be at least 4*N + N**2.
IWORK (workspace) INTEGER array,
the dimension of IWORK must be at least 3 + 5*N.
RANGE (input) CHARACTER*1
= 'A': all eigenvalues will be found.
= 'V': all eigenvalues in the half-open interval (VL,VU]
will be found.
= 'I': the IL-th through IU-th eigenvalues will be found.
VL (input) DOUBLE PRECISION
VU (input) DOUBLE PRECISION
If RANGE='V', the lower and upper bounds of the interval to
be searched for eigenvalues. VL < VU.
Not referenced if RANGE = 'A' or 'I'.
IL (input) INTEGER
IU (input) INTEGER
If RANGE='I', the indices (in ascending order) of the
smallest and largest eigenvalues to be returned.
1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
Not referenced if RANGE = 'A' or 'V'.
INFO (output) INTEGER
= 0: successful exit.
< 0: if INFO = -i, the i-th argument had an illegal value.
> 0: The algorithm failed to compute an eigenvalue while
working on the submatrix lying in rows and columns
INFO/(N+1) through mod(INFO,N+1).
Further Details
===============
Based on contributions by
Jeff Rutter, Computer Science Division, University of California
at Berkeley, USA
=====================================================================
*/
magma_int_t ione = 1;
char range_;
magma_int_t curlvl, curprb, i, indxq;
magma_int_t igpu, j, k, matsiz, msd2, smlsiz;
magma_int_t submat, subpbs, tlvls;
float* dw[N_MAX_GPU];
cudaStream_t stream [N_MAX_GPU][2];
int gpu_b;
magma_getdevice(&gpu_b);
// Test the input parameters.
*info = 0;
if( n < 0 )
*info = -1;
else if( ldq < max(1, n) )
*info = -5;
if( *info != 0 ){
magma_xerbla( __func__, -*info );
}
// Quick return if possible
if(n == 0)
return MAGMA_SUCCESS;
//workspace dimension for nrgpu > 1
size_t tmp = (n-1)/2+1;
if (nrgpu > 1){
size_t tmp2 = (tmp-1) / (nrgpu/2) + 1;
tmp = tmp * tmp2 + 2 * magma_get_slaex3_m_nb()*(tmp + tmp2);
}
for (igpu = 0; igpu < nrgpu; ++igpu){
if(nrgpu==1){
if (MAGMA_SUCCESS != magma_smalloc( &dw[igpu], 3*n*(n/2 + 1) )) {
*info = -15;
}
}
else {
if (MAGMA_SUCCESS != magma_smalloc( &dw[igpu], tmp )) {
*info = -15;
}
}
magma_queue_create( &stream[igpu][0] );
magma_queue_create( &stream[igpu][1] );
}
smlsiz = get_slaex0_smlsize();
// Determine the size and placement of the submatrices, and save in
// the leading elements of IWORK.
iwork[0] = n;
subpbs= 1;
tlvls = 0;
while (iwork[subpbs - 1] > smlsiz) {
for (j = subpbs; j > 0; --j){
iwork[2*j - 1] = (iwork[j-1]+1)/2;
iwork[2*j - 2] = iwork[j-1]/2;
}
++tlvls;
subpbs *= 2;
}
for (j=1; j<subpbs; ++j)
iwork[j] += iwork[j-1];
// Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
// using rank-1 modifications (cuts).
for(i=0; i < subpbs-1; ++i){
submat = iwork[i];
d[submat-1] -= MAGMA_S_ABS(e[submat-1]);
d[submat] -= MAGMA_S_ABS(e[submat-1]);
}
indxq = 4*n + 3;
// Solve each submatrix eigenproblem at the bottom of the divide and
// conquer tree.
char char_I[] = {'I', 0};
//#define ENABLE_TIMER
#ifdef ENABLE_TIMER
magma_timestr_t start, end;
start = get_current_time();
#endif
for (i = 0; i < subpbs; ++i){
if(i == 0){
submat = 0;
matsiz = iwork[0];
} else {
submat = iwork[i-1];
matsiz = iwork[i] - iwork[i-1];
}
lapackf77_ssteqr(char_I , &matsiz, &d[submat], &e[submat],
Q(submat, submat), &ldq, work, info); // change to edc?
if(*info != 0){
printf("info: %d\n, submat: %d\n", *info, submat);
*info = (submat+1)*(n+1) + submat + matsiz;
printf("info: %d\n", *info);
return MAGMA_SUCCESS;
}
k = 1;
for(j = submat; j < iwork[i]; ++j){
iwork[indxq+j] = k;
++k;
}
}
#ifdef ENABLE_TIMER
printf("for: ssteqr = %6.2f\n", GetTimerValue(start,end)/1000.);
#endif
// Successively merge eigensystems of adjacent submatrices
// into eigensystem for the corresponding larger matrix.
curlvl = 1;
while (subpbs > 1){
#ifdef ENABLE_TIMER
magma_timestr_t start, end;
start = get_current_time();
#endif
for (i=0; i<subpbs-1; i+=2){
if(i == 0){
submat = 0;
matsiz = iwork[1];
msd2 = iwork[0];
} else {
submat = iwork[i-1];
matsiz = iwork[i+1] - iwork[i-1];
msd2 = matsiz / 2;
}
// Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
// into an eigensystem of size MATSIZ.
// DLAEX1 is used only for the full eigensystem of a tridiagonal
// matrix.
if (matsiz == n)
range_=range;
else
// We need all the eigenvectors if it is not last step
range_='A';
magma_slaex1_m(nrgpu, matsiz, &d[submat], Q(submat, submat), ldq,
&iwork[indxq+submat], e[submat+msd2-1], msd2,
work, &iwork[subpbs], dw, stream,
range_, vl, vu, il, iu, info);
if(*info != 0){
*info = (submat+1)*(n+1) + submat + matsiz;
return MAGMA_SUCCESS;
}
iwork[i/2]= iwork[i+1];
}
subpbs /= 2;
++curlvl;
#ifdef ENABLE_TIMER
printf("%d: time: %6.2f\n", curlvl, GetTimerValue(start,end)/1000.);
#endif
}
// Re-merge the eigenvalues/vectors which were deflated at the final
// merge step.
for(i = 0; i<n; ++i){
j = iwork[indxq+i] - 1;
work[i] = d[j];
blasf77_scopy(&n, Q(0, j), &ione, &work[ n*(i+1) ], &ione);
}
blasf77_scopy(&n, work, &ione, d, &ione);
char char_A[] = {'A',0};
lapackf77_slacpy ( char_A, &n, &n, &work[n], &n, q, &ldq );
for (igpu = 0; igpu < nrgpu; ++igpu){
magma_queue_destroy( stream[igpu][0] );
magma_queue_destroy( stream[igpu][1] );
/*if(nrgpu==1)*/
magma_free( dw[igpu] );
}
return MAGMA_SUCCESS;
} /* magma_slaex0 */

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sstedx_m ( magma_int_t  nrgpu,
char  range,
magma_int_t  n,
float  vl,
float  vu,
magma_int_t  il,
magma_int_t  iu,
float *  d,
float *  e,
float *  z,
magma_int_t  ldz,
float *  work,
magma_int_t  lwork,
magma_int_t iwork,
magma_int_t  liwork,
magma_int_t info 
)

Definition at line 36 of file sstedx_m.cpp.

References __func__, blasf77_sswap(), get_sstedx_smlsize(), lapackf77_lsame, lapackf77_slamch, lapackf77_slanst, lapackf77_slascl(), lapackf77_slaset(), lapackf77_ssteqr(), MAGMA_ERR_ILLEGAL_VALUE, MAGMA_S_ABS, magma_slaex0_m(), MAGMA_SUCCESS, magma_xerbla(), max, min, and Z.

{
/*
-- MAGMA (version 1.2.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
May 2012
.. Scalar Arguments ..
CHARACTER RANGE
INTEGER IL, IU, INFO, LDZ, LIWORK, LWORK, N
DOUBLE PRECISION VL, VU
..
.. Array Arguments ..
INTEGER IWORK( * )
DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ),
$ DWORK ( * )
..
Purpose
=======
DSTEDX computes some eigenvalues and, optionally, eigenvectors of a
symmetric tridiagonal matrix using the divide and conquer method.
This code makes very mild assumptions about floating point
arithmetic. It will work on machines with a guard digit in
add/subtract, or on those binary machines without guard digits
which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
It could conceivably fail on hexadecimal or decimal machines
without guard digits, but we know of none. See DLAEX3 for details.
Arguments
=========
RANGE (input) CHARACTER*1
= 'A': all eigenvalues will be found.
= 'V': all eigenvalues in the half-open interval (VL,VU]
will be found.
= 'I': the IL-th through IU-th eigenvalues will be found.
N (input) INTEGER
The dimension of the symmetric tridiagonal matrix. N >= 0.
VL (input) DOUBLE PRECISION
VU (input) DOUBLE PRECISION
If RANGE='V', the lower and upper bounds of the interval to
be searched for eigenvalues. VL < VU.
Not referenced if RANGE = 'A' or 'I'.
IL (input) INTEGER
IU (input) INTEGER
If RANGE='I', the indices (in ascending order) of the
smallest and largest eigenvalues to be returned.
1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
Not referenced if RANGE = 'A' or 'V'.
D (input/output) DOUBLE PRECISION array, dimension (N)
On entry, the diagonal elements of the tridiagonal matrix.
On exit, if INFO = 0, the eigenvalues in ascending order.
E (input/output) DOUBLE PRECISION array, dimension (N-1)
On entry, the subdiagonal elements of the tridiagonal matrix.
On exit, E has been destroyed.
Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
On exit, if INFO = 0, Z contains the orthonormal eigenvectors
of the symmetric tridiagonal matrix.
LDZ (input) INTEGER
The leading dimension of the array Z. LDZ >= max(1,N).
WORK (workspace/output) DOUBLE PRECISION array,
dimension (LWORK)
On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
LWORK (input) INTEGER
The dimension of the array WORK.
If N > 1 then LWORK must be at least ( 1 + 4*N + N**2 ).
Note that if N is less than or
equal to the minimum divide size, usually 25, then LWORK need
only be max(1,2*(N-1)).
If LWORK = -1, then a workspace query is assumed; the routine
only calculates the optimal size of the WORK array, returns
this value as the first entry of the WORK array, and no error
message related to LWORK is issued by XERBLA.
IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
LIWORK (input) INTEGER
The dimension of the array IWORK.
LIWORK must be at least ( 3 + 5*N ).
Note that if N is less than or
equal to the minimum divide size, usually 25, then LIWORK
need only be 1.
If LIWORK = -1, then a workspace query is assumed; the
routine only calculates the optimal size of the IWORK array,
returns this value as the first entry of the IWORK array, and
no error message related to LIWORK is issued by XERBLA.
INFO (output) INTEGER
= 0: successful exit.
< 0: if INFO = -i, the i-th argument had an illegal value.
> 0: The algorithm failed to compute an eigenvalue while
working on the submatrix lying in rows and columns
INFO/(N+1) through mod(INFO,N+1).
Further Details
===============
Based on contributions by
Jeff Rutter, Computer Science Division, University of California
at Berkeley, USA
Modified by Francoise Tisseur, University of Tennessee.
=====================================================================
*/
char range_[2] = {range, 0};
float d_zero = 0.;
float d_one = 1.;
magma_int_t izero = 0;
magma_int_t ione = 1;
magma_int_t alleig, indeig, valeig, lquery;
magma_int_t i, j, k, m;
magma_int_t liwmin, lwmin;
magma_int_t start, end, smlsiz;
float eps, orgnrm, p, tiny;
// Test the input parameters.
alleig = lapackf77_lsame(range_, "A");
valeig = lapackf77_lsame(range_, "V");
indeig = lapackf77_lsame(range_, "I");
lquery = lwork == -1 || liwork == -1;
*info = 0;
if (! (alleig || valeig || indeig)) {
*info = -1;
} else if (n < 0) {
*info = -2;
} else if (ldz < max(1,n)) {
*info = -10;
} else {
if (valeig) {
if (n > 0 && vu <= vl) {
*info = -4;
}
} else if (indeig) {
if (il < 1 || il > max(1,n)) {
*info = -5;
} else if (iu < min(n,il) || iu > n) {
*info = -6;
}
}
}
if (*info == 0) {
// Compute the workspace requirements
smlsiz = get_sstedx_smlsize();
if( n <= 1 ){
lwmin = 1;
liwmin = 1;
} else {
lwmin = 1 + 4*n + n*n;
liwmin = 3 + 5*n;
}
work[0] = lwmin;
iwork[0] = liwmin;
if (lwork < lwmin && ! lquery) {
*info = -12;
} else if (liwork < liwmin && ! lquery) {
*info = -14;
}
}
if (*info != 0) {
magma_xerbla( __func__, -(*info));
} else if (lquery) {
return MAGMA_SUCCESS;
}
// Quick return if possible
if(n==0)
return MAGMA_SUCCESS;
if(n==1){
*z = 1.;
return MAGMA_SUCCESS;
}
// If N is smaller than the minimum divide size (SMLSIZ+1), then
// solve the problem with another solver.
if (n < smlsiz){
char char_I[]= {'I', 0};
lapackf77_ssteqr(char_I, &n, d, e, z, &ldz, work, info);
} else {
char char_F[]= {'F', 0};
lapackf77_slaset(char_F, &n, &n, &d_zero, &d_one, z, &ldz);
//Scale.
char char_M[]= {'M', 0};
orgnrm = lapackf77_slanst(char_M, &n, d, e);
if (orgnrm == 0){
work[0] = lwmin;
iwork[0] = liwmin;
return MAGMA_SUCCESS;
}
eps = lapackf77_slamch( "Epsilon" );
if (alleig){
start = 0;
while ( start < n ){
// Let FINISH be the position of the next subdiagonal entry
// such that E( END ) <= TINY or FINISH = N if no such
// subdiagonal exists. The matrix identified by the elements
// between START and END constitutes an independent
// sub-problem.
for(end = start+1; end < n; ++end){
tiny = eps * sqrt( MAGMA_S_ABS(d[end-1]*d[end]));
if (MAGMA_S_ABS(e[end-1]) <= tiny)
break;
}
// (Sub) Problem determined. Compute its size and solve it.
m = end - start;
if (m==1){
start = end;
continue;
}
if (m > smlsiz){
// Scale
char char_G[] = {'G', 0};
orgnrm = lapackf77_slanst(char_M, &m, &d[start], &e[start]);
lapackf77_slascl(char_G, &izero, &izero, &orgnrm, &d_one, &m, &ione, &d[start], &m, info);
magma_int_t mm = m-1;
lapackf77_slascl(char_G, &izero, &izero, &orgnrm, &d_one, &mm, &ione, &e[start], &mm, info);
magma_slaex0_m( nrgpu, m, &d[start], &e[start], Z(start, start), ldz, work, iwork, 'A', vl, vu, il, iu, info);
if( *info != 0) {
return MAGMA_SUCCESS;
}
// Scale Back
lapackf77_slascl(char_G, &izero, &izero, &d_one, &orgnrm, &m, &ione, &d[start], &m, info);
} else {
char char_I[]= {'I', 0};
lapackf77_ssteqr( char_I, &m, &d[start], &e[start], Z(start, start), &ldz, work, info);
if (*info != 0){
*info = (start+1) *(n+1) + end;
}
}
start = end;
}
// If the problem split any number of times, then the eigenvalues
// will not be properly ordered. Here we permute the eigenvalues
// (and the associated eigenvectors) into ascending order.
if (m < n){
// Use Selection Sort to minimize swaps of eigenvectors
for (i = 1; i < n; ++i){
k = i-1;
p = d[i-1];
for (j = i; j < n; ++j){
if (d[j] < p){
k = j;
p = d[j];
}
}
if(k != i-1) {
d[k] = d[i-1];
d[i-1] = p;
blasf77_sswap(&n, Z(0,i-1), &ione, Z(0,k), &ione);
}
}
}
} else {
// Scale
char char_G[] = {'G', 0};
lapackf77_slascl(char_G, &izero, &izero, &orgnrm, &d_one, &n, &ione, d, &n, info);
magma_int_t nm = n-1;
lapackf77_slascl(char_G, &izero, &izero, &orgnrm, &d_one, &nm, &ione, e, &nm, info);
magma_slaex0_m(nrgpu, n, d, e, z, ldz, work, iwork, range, vl, vu, il, iu, info);
if( *info != 0) {
return MAGMA_SUCCESS;
}
// Scale Back
lapackf77_slascl(char_G, &izero, &izero, &d_one, &orgnrm, &n, &ione, d, &n, info);
}
}
work[0] = lwmin;
iwork[0] = liwmin;
return MAGMA_SUCCESS;
} /* magma_sstedx_m */

Here is the call graph for this function:

Here is the caller graph for this function: