MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
slaex0_m.cpp File Reference
#include "common_magma.h"
Include dependency graph for slaex0_m.cpp:

Go to the source code of this file.

Macros

#define N_MAX_GPU   8
#define Q(ix, iy)   (q + (ix) + ldq * (iy))

Functions

magma_int_t magma_slaex1_m (magma_int_t nrgpu, magma_int_t n, float *d, float *q, magma_int_t ldq, magma_int_t *indxq, float rho, magma_int_t cutpnt, float *work, magma_int_t *iwork, float **dwork, cudaStream_t stream[N_MAX_GPU][2], char range, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *info)
int magma_get_slaex3_m_nb ()
magma_int_t get_slaex0_smlsize ()
magma_int_t magma_slaex0_m (magma_int_t nrgpu, magma_int_t n, float *d, float *e, float *q, magma_int_t ldq, float *work, magma_int_t *iwork, char range, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *info)

Macro Definition Documentation

#define N_MAX_GPU   8

Definition at line 11 of file slaex0_m.cpp.

#define Q (   ix,
  iy 
)    (q + (ix) + ldq * (iy))

Definition at line 14 of file slaex0_m.cpp.


Function Documentation

magma_int_t get_slaex0_smlsize ( )

Definition at line 26 of file slaex0_m.cpp.

{
return 25;
}
int magma_get_slaex3_m_nb ( )

Definition at line 24 of file slaex3_m.cpp.

{ return 1024;}

Here is the caller graph for this function:

magma_int_t magma_slaex0_m ( magma_int_t  nrgpu,
magma_int_t  n,
float *  d,
float *  e,
float *  q,
magma_int_t  ldq,
float *  work,
magma_int_t iwork,
char  range,
float  vl,
float  vu,
magma_int_t  il,
magma_int_t  iu,
magma_int_t info 
)

Definition at line 33 of file slaex0_m.cpp.

References __func__, blasf77_scopy(), get_current_time(), get_slaex0_smlsize(), GetTimerValue(), lapackf77_slacpy(), lapackf77_ssteqr(), MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_ILLEGAL_VALUE, magma_free(), magma_get_slaex3_m_nb(), magma_getdevice(), magma_queue_create(), magma_queue_destroy(), MAGMA_S_ABS, magma_setdevice(), magma_slaex1_m(), magma_smalloc(), MAGMA_SUCCESS, magma_xerbla(), max, N_MAX_GPU, and Q.

{
/*
-- MAGMA (version 1.2.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
May 2012
.. Scalar Arguments ..
CHARACTER RANGE
INTEGER IL, IU, INFO, LDQ, N
DOUBLE PRECISION VL, VU
..
.. Array Arguments ..
INTEGER IWORK( * )
DOUBLE PRECISION D( * ), E( * ), Q( LDQ, * ),
$ WORK( * )
..
Purpose
=======
DLAEX0 computes all eigenvalues and the choosen eigenvectors of a
symmetric tridiagonal matrix using the divide and conquer method.
Arguments
=========
N (input) INTEGER
The dimension of the symmetric tridiagonal matrix. N >= 0.
D (input/output) DOUBLE PRECISION array, dimension (N)
On entry, the main diagonal of the tridiagonal matrix.
On exit, its eigenvalues.
E (input) DOUBLE PRECISION array, dimension (N-1)
The off-diagonal elements of the tridiagonal matrix.
On exit, E has been destroyed.
Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
On entry, Q will be the identity matrix.
On exit, Q contains the eigenvectors of the
tridiagonal matrix.
LDQ (input) INTEGER
The leading dimension of the array Q. If eigenvectors are
desired, then LDQ >= max(1,N). In any case, LDQ >= 1.
WORK (workspace) DOUBLE PRECISION array,
the dimension of WORK must be at least 4*N + N**2.
IWORK (workspace) INTEGER array,
the dimension of IWORK must be at least 3 + 5*N.
RANGE (input) CHARACTER*1
= 'A': all eigenvalues will be found.
= 'V': all eigenvalues in the half-open interval (VL,VU]
will be found.
= 'I': the IL-th through IU-th eigenvalues will be found.
VL (input) DOUBLE PRECISION
VU (input) DOUBLE PRECISION
If RANGE='V', the lower and upper bounds of the interval to
be searched for eigenvalues. VL < VU.
Not referenced if RANGE = 'A' or 'I'.
IL (input) INTEGER
IU (input) INTEGER
If RANGE='I', the indices (in ascending order) of the
smallest and largest eigenvalues to be returned.
1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
Not referenced if RANGE = 'A' or 'V'.
INFO (output) INTEGER
= 0: successful exit.
< 0: if INFO = -i, the i-th argument had an illegal value.
> 0: The algorithm failed to compute an eigenvalue while
working on the submatrix lying in rows and columns
INFO/(N+1) through mod(INFO,N+1).
Further Details
===============
Based on contributions by
Jeff Rutter, Computer Science Division, University of California
at Berkeley, USA
=====================================================================
*/
magma_int_t ione = 1;
char range_;
magma_int_t curlvl, curprb, i, indxq;
magma_int_t igpu, j, k, matsiz, msd2, smlsiz;
magma_int_t submat, subpbs, tlvls;
float* dw[N_MAX_GPU];
cudaStream_t stream [N_MAX_GPU][2];
int gpu_b;
magma_getdevice(&gpu_b);
// Test the input parameters.
*info = 0;
if( n < 0 )
*info = -1;
else if( ldq < max(1, n) )
*info = -5;
if( *info != 0 ){
magma_xerbla( __func__, -*info );
}
// Quick return if possible
if(n == 0)
return MAGMA_SUCCESS;
//workspace dimension for nrgpu > 1
size_t tmp = (n-1)/2+1;
if (nrgpu > 1){
size_t tmp2 = (tmp-1) / (nrgpu/2) + 1;
tmp = tmp * tmp2 + 2 * magma_get_slaex3_m_nb()*(tmp + tmp2);
}
for (igpu = 0; igpu < nrgpu; ++igpu){
if(nrgpu==1){
if (MAGMA_SUCCESS != magma_smalloc( &dw[igpu], 3*n*(n/2 + 1) )) {
*info = -15;
}
}
else {
if (MAGMA_SUCCESS != magma_smalloc( &dw[igpu], tmp )) {
*info = -15;
}
}
magma_queue_create( &stream[igpu][0] );
magma_queue_create( &stream[igpu][1] );
}
smlsiz = get_slaex0_smlsize();
// Determine the size and placement of the submatrices, and save in
// the leading elements of IWORK.
iwork[0] = n;
subpbs= 1;
tlvls = 0;
while (iwork[subpbs - 1] > smlsiz) {
for (j = subpbs; j > 0; --j){
iwork[2*j - 1] = (iwork[j-1]+1)/2;
iwork[2*j - 2] = iwork[j-1]/2;
}
++tlvls;
subpbs *= 2;
}
for (j=1; j<subpbs; ++j)
iwork[j] += iwork[j-1];
// Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
// using rank-1 modifications (cuts).
for(i=0; i < subpbs-1; ++i){
submat = iwork[i];
d[submat-1] -= MAGMA_S_ABS(e[submat-1]);
d[submat] -= MAGMA_S_ABS(e[submat-1]);
}
indxq = 4*n + 3;
// Solve each submatrix eigenproblem at the bottom of the divide and
// conquer tree.
char char_I[] = {'I', 0};
//#define ENABLE_TIMER
#ifdef ENABLE_TIMER
magma_timestr_t start, end;
start = get_current_time();
#endif
for (i = 0; i < subpbs; ++i){
if(i == 0){
submat = 0;
matsiz = iwork[0];
} else {
submat = iwork[i-1];
matsiz = iwork[i] - iwork[i-1];
}
lapackf77_ssteqr(char_I , &matsiz, &d[submat], &e[submat],
Q(submat, submat), &ldq, work, info); // change to edc?
if(*info != 0){
printf("info: %d\n, submat: %d\n", *info, submat);
*info = (submat+1)*(n+1) + submat + matsiz;
printf("info: %d\n", *info);
return MAGMA_SUCCESS;
}
k = 1;
for(j = submat; j < iwork[i]; ++j){
iwork[indxq+j] = k;
++k;
}
}
#ifdef ENABLE_TIMER
printf("for: ssteqr = %6.2f\n", GetTimerValue(start,end)/1000.);
#endif
// Successively merge eigensystems of adjacent submatrices
// into eigensystem for the corresponding larger matrix.
curlvl = 1;
while (subpbs > 1){
#ifdef ENABLE_TIMER
magma_timestr_t start, end;
start = get_current_time();
#endif
for (i=0; i<subpbs-1; i+=2){
if(i == 0){
submat = 0;
matsiz = iwork[1];
msd2 = iwork[0];
} else {
submat = iwork[i-1];
matsiz = iwork[i+1] - iwork[i-1];
msd2 = matsiz / 2;
}
// Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
// into an eigensystem of size MATSIZ.
// DLAEX1 is used only for the full eigensystem of a tridiagonal
// matrix.
if (matsiz == n)
range_=range;
else
// We need all the eigenvectors if it is not last step
range_='A';
magma_slaex1_m(nrgpu, matsiz, &d[submat], Q(submat, submat), ldq,
&iwork[indxq+submat], e[submat+msd2-1], msd2,
work, &iwork[subpbs], dw, stream,
range_, vl, vu, il, iu, info);
if(*info != 0){
*info = (submat+1)*(n+1) + submat + matsiz;
return MAGMA_SUCCESS;
}
iwork[i/2]= iwork[i+1];
}
subpbs /= 2;
++curlvl;
#ifdef ENABLE_TIMER
printf("%d: time: %6.2f\n", curlvl, GetTimerValue(start,end)/1000.);
#endif
}
// Re-merge the eigenvalues/vectors which were deflated at the final
// merge step.
for(i = 0; i<n; ++i){
j = iwork[indxq+i] - 1;
work[i] = d[j];
blasf77_scopy(&n, Q(0, j), &ione, &work[ n*(i+1) ], &ione);
}
blasf77_scopy(&n, work, &ione, d, &ione);
char char_A[] = {'A',0};
lapackf77_slacpy ( char_A, &n, &n, &work[n], &n, q, &ldq );
for (igpu = 0; igpu < nrgpu; ++igpu){
magma_queue_destroy( stream[igpu][0] );
magma_queue_destroy( stream[igpu][1] );
/*if(nrgpu==1)*/
magma_free( dw[igpu] );
}
return MAGMA_SUCCESS;
} /* magma_slaex0 */

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_slaex1_m ( magma_int_t  nrgpu,
magma_int_t  n,
float *  d,
float *  q,
magma_int_t  ldq,
magma_int_t indxq,
float  rho,
magma_int_t  cutpnt,
float *  work,
magma_int_t iwork,
float **  dwork,
cudaStream_t  stream[N_MAX_GPU][2],
char  range,
float  vl,
float  vu,
magma_int_t  il,
magma_int_t  iu,
magma_int_t info 
)

Definition at line 39 of file slaex1_m.cpp.

References __func__, blasf77_scopy(), MAGMA_ERR_ILLEGAL_VALUE, magma_slaed2_(), magma_slaex3_m(), MAGMA_SUCCESS, magma_xerbla(), max, min, and Q.

{
/*
-- MAGMA (version 1.2.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
May 2012
.. Scalar Arguments ..
CHARACTER RANGE
INTEGER IL, IU, CUTPNT, INFO, LDQ, N
DOUBLE PRECISION RHO, VL, VU
..
.. Array Arguments ..
INTEGER INDXQ( * ), iwork[* )
DOUBLE PRECISION D( * ), Q( LDQ, * ), WORK( * ), DWORK( * )
..
Purpose
=======
DLAEX1 computes the updated eigensystem of a diagonal
matrix after modification by a rank-one symmetric matrix.
T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
where Z = Q'u, u is a vector of length N with ones in the
CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
The eigenvectors of the original matrix are stored in Q, and the
eigenvalues are in D. The algorithm consists of three stages:
The first stage consists of deflating the size of the problem
when there are multiple eigenvalues or if there is a zero in
the Z vector. For each such occurence the dimension of the
secular equation problem is reduced by one. This stage is
performed by the routine DLAED2.
The second stage consists of calculating the updated
eigenvalues. This is done by finding the roots of the secular
equation via the routine DLAED4 (as called by DLAED3).
This routine also calculates the eigenvectors of the current
problem.
The final stage consists of computing the updated eigenvectors
directly using the updated eigenvalues. The eigenvectors for
the current problem are multiplied with the eigenvectors from
the overall problem.
Arguments
=========
N (input) INTEGER
The dimension of the symmetric tridiagonal matrix. N >= 0.
D (input/output) DOUBLE PRECISION array, dimension (N)
On entry, the eigenvalues of the rank-1-perturbed matrix.
On exit, the eigenvalues of the repaired matrix.
Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
On entry, the eigenvectors of the rank-1-perturbed matrix.
On exit, the eigenvectors of the repaired tridiagonal matrix.
LDQ (input) INTEGER
The leading dimension of the array Q. LDQ >= max(1,N).
INDXQ (input/output) INTEGER array, dimension (N)
On entry, the permutation which separately sorts the two
subproblems in D into ascending order.
On exit, the permutation which will reintegrate the
subproblems back into sorted order,
i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
RHO (input) DOUBLE PRECISION
The subdiagonal entry used to create the rank-1 modification.
CUTPNT (input) INTEGER
The location of the last eigenvalue in the leading sub-matrix.
min(1,N) <= CUTPNT <= N/2.
WORK (workspace) DOUBLE PRECISION array, dimension (4*N + N**2)
IWORK (workspace) INTEGER array, dimension (4*N)
DWORK (devices workspaces) DOUBLE PRECISION array of arrays,
dimension NRGPU.
if NRGPU = 1 the dimension of the first workspace
should be (3*N*N/2+3*N)
otherwise the NRGPU workspaces should have the size
ceil((N-N1) * (N-N1) / floor(nrgpu/2)) +
NB * ((N-N1) + (N-N1) / floor(nrgpu/2))
STREAM (device stream) cudaStream_t array,
dimension (N_MAX_GPU,2)
RANGE (input) CHARACTER*1
= 'A': all eigenvalues will be found.
= 'V': all eigenvalues in the half-open interval (VL,VU]
will be found.
= 'I': the IL-th through IU-th eigenvalues will be found.
VL (input) DOUBLE PRECISION
VU (input) DOUBLE PRECISION
if RANGE='V', the lower and upper bounds of the interval to
be searched for eigenvalues. VL < VU.
Not referenced if RANGE = 'A' or 'I'.
IL (input) INTEGER
IU (input) INTEGER
if RANGE='I', the indices (in ascending order) of the
smallest and largest eigenvalues to be returned.
1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
Not referenced if RANGE = 'A' or 'V'.
INFO (output) INTEGER
= 0: successful exit.
< 0: if INFO = -i, the i-th argument had an illegal value.
> 0: if INFO = 1, an eigenvalue did not converge
Further Details
===============
Based on contributions by
Jeff Rutter, Computer Science Division, University of California
at Berkeley, USA
Modified by Francoise Tisseur, University of Tennessee.
=====================================================================
*/
magma_int_t coltyp, i, idlmda;
magma_int_t indx, indxc, indxp;
magma_int_t iq2, is, iw, iz, k, tmp;
magma_int_t ione = 1;
// Test the input parameters.
*info = 0;
if( n < 0 )
*info = -1;
else if( ldq < max(1, n) )
*info = -4;
else if( min( 1, n/2 ) > cutpnt || n/2 < cutpnt )
*info = -7;
if( *info != 0 ){
magma_xerbla( __func__, -*info );
}
// Quick return if possible
if( n == 0 )
return MAGMA_SUCCESS;
// The following values are integer pointers which indicate
// the portion of the workspace
// used by a particular array in DLAED2 and DLAED3.
iz = 0;
idlmda = iz + n;
iw = idlmda + n;
iq2 = iw + n;
indx = 0;
indxc = indx + n;
coltyp = indxc + n;
indxp = coltyp + n;
// Form the z-vector which consists of the last row of Q_1 and the
// first row of Q_2.
blasf77_scopy( &cutpnt, Q(cutpnt-1, 0), &ldq, &work[iz], &ione);
tmp = n-cutpnt;
blasf77_scopy( &tmp, Q(cutpnt, cutpnt), &ldq, &work[iz+cutpnt], &ione);
// Deflate eigenvalues.
magma_slaed2_(&k, &n, &cutpnt, d, q, &ldq, indxq, &rho, &work[iz],
&work[idlmda], &work[iw], &work[iq2],
&iwork[indx], &iwork[indxc], &iwork[indxp],
&iwork[coltyp], info);
if( *info != 0 )
return MAGMA_SUCCESS;
// Solve Secular Equation.
if( k != 0 ){
is = (iwork[coltyp]+iwork[coltyp+1])*cutpnt + (iwork[coltyp+1]+iwork[coltyp+2])*(n-cutpnt) + iq2;
magma_slaex3_m(nrgpu, k, n, cutpnt, d, q, ldq, rho,
&work[idlmda], &work[iq2], &iwork[indxc],
&iwork[coltyp], &work[iw], &work[is],
indxq, dwork, stream, range, vl, vu, il, iu, info );
if( *info != 0 )
return MAGMA_SUCCESS;
}
else {
for (i = 0; i<n; ++i)
indxq[i] = i+1;
}
return MAGMA_SUCCESS;
} /* magma_slaex1_m */

Here is the call graph for this function:

Here is the caller graph for this function: