MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
sstedx_m.cpp File Reference
#include "common_magma.h"
Include dependency graph for sstedx_m.cpp:

Go to the source code of this file.

Macros

#define Z(ix, iy)   (z + (ix) + ldz * (iy))
 

Functions

magma_int_t magma_slaex0_m (magma_int_t nrgpu, magma_int_t n, float *d, float *e, float *q, magma_int_t ldq, float *work, magma_int_t *iwork, char range, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *info)
 
magma_int_t magma_sstedx_m (magma_int_t nrgpu, char range, magma_int_t n, float vl, float vu, magma_int_t il, magma_int_t iu, float *d, float *e, float *z, magma_int_t ldz, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 

Macro Definition Documentation

#define Z (   ix,
  iy 
)    (z + (ix) + ldz * (iy))

Definition at line 14 of file sstedx_m.cpp.

Function Documentation

magma_int_t magma_slaex0_m ( magma_int_t  nrgpu,
magma_int_t  n,
float *  d,
float *  e,
float *  q,
magma_int_t  ldq,
float *  work,
magma_int_t iwork,
char  range,
float  vl,
float  vu,
magma_int_t  il,
magma_int_t  iu,
magma_int_t info 
)

Definition at line 29 of file slaex0_m.cpp.

References __func__, blasf77_scopy(), get_current_time(), GetTimerValue(), lapackf77_slacpy(), lapackf77_ssteqr(), MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_ILLEGAL_VALUE, magma_free, magma_get_slaex3_m_nb(), magma_get_smlsize_divideconquer(), magma_getdevice(), magma_queue_create, magma_queue_destroy, MAGMA_S_ABS, magma_setdevice(), magma_slaex1_m(), magma_smalloc(), MAGMA_SUCCESS, magma_xerbla(), MagmaMaxGPUs, max, and Q.

33 {
34 /* -- MAGMA (version 1.4.0) --
35  Univ. of Tennessee, Knoxville
36  Univ. of California, Berkeley
37  Univ. of Colorado, Denver
38  August 2013
39 
40  .. Scalar Arguments ..
41  CHARACTER RANGE
42  INTEGER IL, IU, INFO, LDQ, N
43  DOUBLE PRECISION VL, VU
44  ..
45  .. Array Arguments ..
46  INTEGER IWORK( * )
47  DOUBLE PRECISION D( * ), E( * ), Q( LDQ, * ),
48  $ WORK( * )
49  ..
50 
51  Purpose
52  =======
53  SLAEX0 computes all eigenvalues and the choosen eigenvectors of a
54  symmetric tridiagonal matrix using the divide and conquer method.
55 
56  Arguments
57  =========
58  N (input) INTEGER
59  The dimension of the symmetric tridiagonal matrix. N >= 0.
60 
61  D (input/output) DOUBLE PRECISION array, dimension (N)
62  On entry, the main diagonal of the tridiagonal matrix.
63  On exit, its eigenvalues.
64 
65  E (input) DOUBLE PRECISION array, dimension (N-1)
66  The off-diagonal elements of the tridiagonal matrix.
67  On exit, E has been destroyed.
68 
69  Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
70  On entry, Q will be the identity matrix.
71  On exit, Q contains the eigenvectors of the
72  tridiagonal matrix.
73 
74  LDQ (input) INTEGER
75  The leading dimension of the array Q. If eigenvectors are
76  desired, then LDQ >= max(1,N). In any case, LDQ >= 1.
77 
78  WORK (workspace) DOUBLE PRECISION array,
79  the dimension of WORK >= 4*N + N**2.
80 
81  IWORK (workspace) INTEGER array,
82  the dimension of IWORK >= 3 + 5*N.
83 
84  RANGE (input) CHARACTER*1
85  = 'A': all eigenvalues will be found.
86  = 'V': all eigenvalues in the half-open interval (VL,VU]
87  will be found.
88  = 'I': the IL-th through IU-th eigenvalues will be found.
89 
90  VL (input) DOUBLE PRECISION
91  VU (input) DOUBLE PRECISION
92  If RANGE='V', the lower and upper bounds of the interval to
93  be searched for eigenvalues. VL < VU.
94  Not referenced if RANGE = 'A' or 'I'.
95 
96  IL (input) INTEGER
97  IU (input) INTEGER
98  If RANGE='I', the indices (in ascending order) of the
99  smallest and largest eigenvalues to be returned.
100  1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
101  Not referenced if RANGE = 'A' or 'V'.
102 
103  INFO (output) INTEGER
104  = 0: successful exit.
105  < 0: if INFO = -i, the i-th argument had an illegal value.
106  > 0: The algorithm failed to compute an eigenvalue while
107  working on the submatrix lying in rows and columns
108  INFO/(N+1) through mod(INFO,N+1).
109 
110  Further Details
111  ===============
112  Based on contributions by
113  Jeff Rutter, Computer Science Division, University of California
114  at Berkeley, USA
115 
116  =====================================================================
117 */
118 
119  magma_int_t ione = 1;
120  char range_;
121  magma_int_t curlvl, i, indxq;
122  magma_int_t igpu, j, k, matsiz, msd2, smlsiz;
123  magma_int_t submat, subpbs, tlvls;
124  float* dw[MagmaMaxGPUs];
125  magma_queue_t stream [MagmaMaxGPUs][2];
126  int gpu_b;
127  magma_getdevice(&gpu_b);
128 
129  // Test the input parameters.
130 
131  *info = 0;
132 
133  if( n < 0 )
134  *info = -1;
135  else if( ldq < max(1, n) )
136  *info = -5;
137  if( *info != 0 ){
138  magma_xerbla( __func__, -*info );
140  }
141 
142  // Quick return if possible
143  if(n == 0)
144  return MAGMA_SUCCESS;
145 
146  //workspace dimension for nrgpu > 1
147  size_t tmp = (n-1)/2+1;
148  if (nrgpu > 1){
149  size_t tmp2 = (tmp-1) / (nrgpu/2) + 1;
150  tmp = tmp * tmp2 + 2 * magma_get_slaex3_m_nb()*(tmp + tmp2);
151  }
152 
153  for (igpu = 0; igpu < nrgpu; ++igpu){
154  magma_setdevice(igpu);
155  if(nrgpu==1){
156  if (MAGMA_SUCCESS != magma_smalloc( &dw[igpu], 3*n*(n/2 + 1) )) {
157  *info = -15;
158  return MAGMA_ERR_DEVICE_ALLOC;
159  }
160  }
161  else {
162  if (MAGMA_SUCCESS != magma_smalloc( &dw[igpu], tmp )) {
163  *info = -15;
164  return MAGMA_ERR_DEVICE_ALLOC;
165  }
166  }
167  magma_queue_create( &stream[igpu][0] );
168  magma_queue_create( &stream[igpu][1] );
169  }
170 
172 
173  // Determine the size and placement of the submatrices, and save in
174  // the leading elements of IWORK.
175 
176  iwork[0] = n;
177  subpbs= 1;
178  tlvls = 0;
179  while (iwork[subpbs - 1] > smlsiz) {
180  for (j = subpbs; j > 0; --j){
181  iwork[2*j - 1] = (iwork[j-1]+1)/2;
182  iwork[2*j - 2] = iwork[j-1]/2;
183  }
184  ++tlvls;
185  subpbs *= 2;
186  }
187  for (j=1; j<subpbs; ++j)
188  iwork[j] += iwork[j-1];
189 
190  // Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
191  // using rank-1 modifications (cuts).
192 
193  for(i=0; i < subpbs-1; ++i){
194  submat = iwork[i];
195  d[submat-1] -= MAGMA_S_ABS(e[submat-1]);
196  d[submat] -= MAGMA_S_ABS(e[submat-1]);
197  }
198 
199  indxq = 4*n + 3;
200 
201  // Solve each submatrix eigenproblem at the bottom of the divide and
202  // conquer tree.
203 
204  char char_I[] = {'I', 0};
205 
206 #ifdef ENABLE_TIMER_DIVIDE_AND_CONQUER
207  magma_timestr_t start, end;
208  start = get_current_time();
209 #endif
210 
211  for (i = 0; i < subpbs; ++i){
212  if(i == 0){
213  submat = 0;
214  matsiz = iwork[0];
215  } else {
216  submat = iwork[i-1];
217  matsiz = iwork[i] - iwork[i-1];
218  }
219  lapackf77_ssteqr(char_I , &matsiz, &d[submat], &e[submat],
220  Q(submat, submat), &ldq, work, info); // change to edc?
221  if(*info != 0){
222  printf("info: %d\n, submat: %d\n", (int) *info, (int) submat);
223  *info = (submat+1)*(n+1) + submat + matsiz;
224  printf("info: %d\n", (int) *info);
225  return MAGMA_SUCCESS;
226  }
227  k = 1;
228  for(j = submat; j < iwork[i]; ++j){
229  iwork[indxq+j] = k;
230  ++k;
231  }
232  }
233 
234 #ifdef ENABLE_TIMER_DIVIDE_AND_CONQUER
235  end = get_current_time();
236  printf(" for: ssteqr = %6.2f\n", GetTimerValue(start,end)/1000.);
237 #endif
238  // Successively merge eigensystems of adjacent submatrices
239  // into eigensystem for the corresponding larger matrix.
240 
241  curlvl = 1;
242  while (subpbs > 1){
243 #ifdef ENABLE_TIMER_DIVIDE_AND_CONQUER
244  magma_timestr_t start, end;
245  start = get_current_time();
246 #endif
247  for (i=0; i<subpbs-1; i+=2){
248  if(i == 0){
249  submat = 0;
250  matsiz = iwork[1];
251  msd2 = iwork[0];
252  } else {
253  submat = iwork[i-1];
254  matsiz = iwork[i+1] - iwork[i-1];
255  msd2 = matsiz / 2;
256  }
257 
258  // Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
259  // into an eigensystem of size MATSIZ.
260  // SLAEX1 is used only for the full eigensystem of a tridiagonal
261  // matrix.
262 
263  if (matsiz == n)
264  range_=range;
265  else
266  // We need all the eigenvectors if it is not last step
267  range_='A';
268 
269  magma_slaex1_m(nrgpu, matsiz, &d[submat], Q(submat, submat), ldq,
270  &iwork[indxq+submat], e[submat+msd2-1], msd2,
271  work, &iwork[subpbs], dw, stream,
272  range_, vl, vu, il, iu, info);
273 
274  if(*info != 0){
275  *info = (submat+1)*(n+1) + submat + matsiz;
276  return MAGMA_SUCCESS;
277  }
278  iwork[i/2]= iwork[i+1];
279  }
280  subpbs /= 2;
281  ++curlvl;
282 #ifdef ENABLE_TIMER_DIVIDE_AND_CONQUER
283  end = get_current_time();
284  //printf("%d: time: %6.2f\n", curlvl, GetTimerValue(start,end)/1000.);
285 #endif
286 
287  }
288 
289  // Re-merge the eigenvalues/vectors which were deflated at the final
290  // merge step.
291 
292  for(i = 0; i<n; ++i){
293  j = iwork[indxq+i] - 1;
294  work[i] = d[j];
295  blasf77_scopy(&n, Q(0, j), &ione, &work[ n*(i+1) ], &ione);
296  }
297  blasf77_scopy(&n, work, &ione, d, &ione);
298  char char_A[] = {'A',0};
299  lapackf77_slacpy ( char_A, &n, &n, &work[n], &n, q, &ldq );
300 
301  for (igpu = 0; igpu < nrgpu; ++igpu){
302  magma_setdevice(igpu);
303  magma_queue_destroy( stream[igpu][0] );
304  magma_queue_destroy( stream[igpu][1] );
305  magma_free( dw[igpu] );
306  }
307 
308  magma_setdevice(gpu_b);
309 
310  return MAGMA_SUCCESS;
311 
312 } /* magma_slaex0 */
#define MAGMA_ERR_ILLEGAL_VALUE
Definition: magma.h:107
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
void lapackf77_slacpy(const char *uplo, const magma_int_t *m, const magma_int_t *n, const float *A, const magma_int_t *lda, float *B, const magma_int_t *ldb)
int magma_int_t
Definition: magmablas.h:12
#define Q(ix, iy)
Definition: slaex0_m.cpp:14
#define magma_queue_destroy(queue)
Definition: magma.h:116
#define vl(i, j)
void magma_setdevice(magma_device_t dev)
void magma_getdevice(magma_device_t *dev)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MagmaMaxGPUs
Definition: magma_types.h:255
void lapackf77_ssteqr(const char *compz, const magma_int_t *n, float *d, float *e, float *Z, const magma_int_t *ldz, float *work, magma_int_t *info)
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define MAGMA_S_ABS(a)
Definition: magma.h:196
magma_int_t magma_slaex1_m(magma_int_t nrgpu, magma_int_t n, float *d, float *q, magma_int_t ldq, magma_int_t *indxq, float rho, magma_int_t cutpnt, float *work, magma_int_t *iwork, float **dwork, magma_queue_t stream[MagmaMaxGPUs][2], char range, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *info)
Definition: slaex1_m.cpp:28
magma_int_t magma_get_smlsize_divideconquer()
Definition: get_nb.cpp:768
double GetTimerValue(magma_timestr_t start, magma_timestr_t end)
Definition: timer.cpp:94
#define max(a, b)
Definition: common_magma.h:82
magma_timestr_t get_current_time(void)
Definition: timer.cpp:76
void blasf77_scopy(const magma_int_t *n, const float *x, const magma_int_t *incx, float *y, const magma_int_t *incy)
magma_int_t magma_get_slaex3_m_nb()
Definition: slaex3_m.cpp:27

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sstedx_m ( magma_int_t  nrgpu,
char  range,
magma_int_t  n,
float  vl,
float  vu,
magma_int_t  il,
magma_int_t  iu,
float *  d,
float *  e,
float *  z,
magma_int_t  ldz,
float *  work,
magma_int_t  lwork,
magma_int_t iwork,
magma_int_t  liwork,
magma_int_t info 
)

Definition at line 24 of file sstedx_m.cpp.

References __func__, blasf77_sswap(), lapackf77_lsame, lapackf77_slamch, lapackf77_slanst(), lapackf77_slascl(), lapackf77_slaset(), lapackf77_ssteqr(), MAGMA_ERR_ILLEGAL_VALUE, magma_get_numthreads(), magma_get_smlsize_divideconquer(), MAGMA_S_ABS, magma_setlapack_numthreads(), magma_slaex0_m(), MAGMA_SUCCESS, magma_xerbla(), max, min, and Z.

28 {
29 /* -- MAGMA (version 1.4.0) --
30  Univ. of Tennessee, Knoxville
31  Univ. of California, Berkeley
32  Univ. of Colorado, Denver
33  August 2013
34 
35  .. Scalar Arguments ..
36  CHARACTER RANGE
37  INTEGER IL, IU, INFO, LDZ, LIWORK, LWORK, N
38  DOUBLE PRECISION VL, VU
39  ..
40  .. Array Arguments ..
41  INTEGER IWORK( * )
42  DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ),
43  $ DWORK ( * )
44  ..
45 
46  Purpose
47  =======
48  SSTEDX computes some eigenvalues and, optionally, eigenvectors of a
49  symmetric tridiagonal matrix using the divide and conquer method.
50 
51  This code makes very mild assumptions about floating point
52  arithmetic. It will work on machines with a guard digit in
53  add/subtract, or on those binary machines without guard digits
54  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
55  It could conceivably fail on hexadecimal or decimal machines
56  without guard digits, but we know of none. See SLAEX3 for details.
57 
58  Arguments
59  =========
60  RANGE (input) CHARACTER*1
61  = 'A': all eigenvalues will be found.
62  = 'V': all eigenvalues in the half-open interval (VL,VU]
63  will be found.
64  = 'I': the IL-th through IU-th eigenvalues will be found.
65 
66  N (input) INTEGER
67  The dimension of the symmetric tridiagonal matrix. N >= 0.
68 
69  VL (input) DOUBLE PRECISION
70  VU (input) DOUBLE PRECISION
71  If RANGE='V', the lower and upper bounds of the interval to
72  be searched for eigenvalues. VL < VU.
73  Not referenced if RANGE = 'A' or 'I'.
74 
75  IL (input) INTEGER
76  IU (input) INTEGER
77  If RANGE='I', the indices (in ascending order) of the
78  smallest and largest eigenvalues to be returned.
79  1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
80  Not referenced if RANGE = 'A' or 'V'.
81 
82  D (input/output) DOUBLE PRECISION array, dimension (N)
83  On entry, the diagonal elements of the tridiagonal matrix.
84  On exit, if INFO = 0, the eigenvalues in ascending order.
85 
86  E (input/output) DOUBLE PRECISION array, dimension (N-1)
87  On entry, the subdiagonal elements of the tridiagonal matrix.
88  On exit, E has been destroyed.
89 
90  Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
91  On exit, if INFO = 0, Z contains the orthonormal eigenvectors
92  of the symmetric tridiagonal matrix.
93 
94  LDZ (input) INTEGER
95  The leading dimension of the array Z. LDZ >= max(1,N).
96 
97  WORK (workspace/output) DOUBLE PRECISION array,
98  dimension (LWORK)
99  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
100 
101  LWORK (input) INTEGER
102  The dimension of the array WORK.
103  If N > 1 then LWORK >= ( 1 + 4*N + N**2 ).
104  Note that if N is less than or
105  equal to the minimum divide size, usually 25, then LWORK need
106  only be max(1,2*(N-1)).
107 
108  If LWORK = -1, then a workspace query is assumed; the routine
109  only calculates the optimal size of the WORK array, returns
110  this value as the first entry of the WORK array, and no error
111  message related to LWORK is issued by XERBLA.
112 
113  IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
114  On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
115 
116  LIWORK (input) INTEGER
117  The dimension of the array IWORK.
118  LIWORK >= ( 3 + 5*N ).
119  Note that if N is less than or
120  equal to the minimum divide size, usually 25, then LIWORK
121  need only be 1.
122 
123  If LIWORK = -1, then a workspace query is assumed; the
124  routine only calculates the optimal size of the IWORK array,
125  returns this value as the first entry of the IWORK array, and
126  no error message related to LIWORK is issued by XERBLA.
127 
128  INFO (output) INTEGER
129  = 0: successful exit.
130  < 0: if INFO = -i, the i-th argument had an illegal value.
131  > 0: The algorithm failed to compute an eigenvalue while
132  working on the submatrix lying in rows and columns
133  INFO/(N+1) through mod(INFO,N+1).
134 
135  Further Details
136  ===============
137  Based on contributions by
138  Jeff Rutter, Computer Science Division, University of California
139  at Berkeley, USA
140  Modified by Francoise Tisseur, University of Tennessee.
141 
142  =====================================================================
143 */
144  char range_[2] = {range, 0};
145 
146  float d_zero = 0.;
147  float d_one = 1.;
148  magma_int_t izero = 0;
149  magma_int_t ione = 1;
150 
151 
152  magma_int_t alleig, indeig, valeig, lquery;
153  magma_int_t i, j, k, m;
154  magma_int_t liwmin, lwmin;
155  magma_int_t start, end, smlsiz;
156  float eps, orgnrm, p, tiny;
157 
158  // Test the input parameters.
159 
160  alleig = lapackf77_lsame(range_, "A");
161  valeig = lapackf77_lsame(range_, "V");
162  indeig = lapackf77_lsame(range_, "I");
163  lquery = lwork == -1 || liwork == -1;
164 
165  *info = 0;
166 
167  if (! (alleig || valeig || indeig)) {
168  *info = -1;
169  } else if (n < 0) {
170  *info = -2;
171  } else if (ldz < max(1,n)) {
172  *info = -10;
173  } else {
174  if (valeig) {
175  if (n > 0 && vu <= vl) {
176  *info = -4;
177  }
178  } else if (indeig) {
179  if (il < 1 || il > max(1,n)) {
180  *info = -5;
181  } else if (iu < min(n,il) || iu > n) {
182  *info = -6;
183  }
184  }
185  }
186 
187  if (*info == 0) {
188  // Compute the workspace requirements
189 
191  if( n <= 1 ){
192  lwmin = 1;
193  liwmin = 1;
194  } else {
195  lwmin = 1 + 4*n + n*n;
196  liwmin = 3 + 5*n;
197  }
198 
199  work[0] = lwmin;
200  iwork[0] = liwmin;
201 
202  if (lwork < lwmin && ! lquery) {
203  *info = -12;
204  } else if (liwork < liwmin && ! lquery) {
205  *info = -14;
206  }
207  }
208 
209  if (*info != 0) {
210  magma_xerbla( __func__, -(*info));
212  } else if (lquery) {
213  return MAGMA_SUCCESS;
214  }
215 
216  // Quick return if possible
217 
218  if(n==0)
219  return MAGMA_SUCCESS;
220  if(n==1){
221  *z = 1.;
222  return MAGMA_SUCCESS;
223  }
224 
225  /* determine the number of threads */
226  magma_int_t threads = magma_get_numthreads();
228 
229 #ifdef ENABLE_DEBUG
230  printf(" D&C_m is using %d threads\n", threads);
231 #endif
232 
233  // If N is smaller than the minimum divide size (SMLSIZ+1), then
234  // solve the problem with another solver.
235 
236  if (n < smlsiz){
237  char char_I[]= {'I', 0};
238  lapackf77_ssteqr(char_I, &n, d, e, z, &ldz, work, info);
239  } else {
240  char char_F[]= {'F', 0};
241  lapackf77_slaset(char_F, &n, &n, &d_zero, &d_one, z, &ldz);
242 
243  //Scale.
244  char char_M[]= {'M', 0};
245 
246  orgnrm = lapackf77_slanst(char_M, &n, d, e);
247 
248  if (orgnrm == 0){
249  work[0] = lwmin;
250  iwork[0] = liwmin;
251  return MAGMA_SUCCESS;
252  }
253 
254  eps = lapackf77_slamch( "Epsilon" );
255 
256  if (alleig){
257  start = 0;
258  while ( start < n ){
259 
260  // Let FINISH be the position of the next subdiagonal entry
261  // such that E( END ) <= TINY or FINISH = N if no such
262  // subdiagonal exists. The matrix identified by the elements
263  // between START and END constitutes an independent
264  // sub-problem.
265 
266  for(end = start+1; end < n; ++end){
267  tiny = eps * sqrt( MAGMA_S_ABS(d[end-1]*d[end]));
268  if (MAGMA_S_ABS(e[end-1]) <= tiny)
269  break;
270  }
271 
272  // (Sub) Problem determined. Compute its size and solve it.
273 
274  m = end - start;
275  if (m==1){
276  start = end;
277  continue;
278  }
279  if (m > smlsiz){
280 
281  // Scale
282  char char_G[] = {'G', 0};
283  orgnrm = lapackf77_slanst(char_M, &m, &d[start], &e[start]);
284  lapackf77_slascl(char_G, &izero, &izero, &orgnrm, &d_one, &m, &ione, &d[start], &m, info);
285  magma_int_t mm = m-1;
286  lapackf77_slascl(char_G, &izero, &izero, &orgnrm, &d_one, &mm, &ione, &e[start], &mm, info);
287 
288  magma_slaex0_m( nrgpu, m, &d[start], &e[start], Z(start, start), ldz, work, iwork, 'A', vl, vu, il, iu, info);
289 
290  if( *info != 0) {
291  return MAGMA_SUCCESS;
292  }
293 
294  // Scale Back
295  lapackf77_slascl(char_G, &izero, &izero, &d_one, &orgnrm, &m, &ione, &d[start], &m, info);
296 
297  } else {
298 
299  char char_I[]= {'I', 0};
300  lapackf77_ssteqr( char_I, &m, &d[start], &e[start], Z(start, start), &ldz, work, info);
301  if (*info != 0){
302  *info = (start+1) *(n+1) + end;
303  }
304  }
305 
306  start = end;
307  }
308 
309 
310  // If the problem split any number of times, then the eigenvalues
311  // will not be properly ordered. Here we permute the eigenvalues
312  // (and the associated eigenvectors) into ascending order.
313 
314  if (m < n){
315 
316  // Use Selection Sort to minimize swaps of eigenvectors
317  for (i = 1; i < n; ++i){
318  k = i-1;
319  p = d[i-1];
320  for (j = i; j < n; ++j){
321  if (d[j] < p){
322  k = j;
323  p = d[j];
324  }
325  }
326  if(k != i-1) {
327  d[k] = d[i-1];
328  d[i-1] = p;
329  blasf77_sswap(&n, Z(0,i-1), &ione, Z(0,k), &ione);
330  }
331  }
332  }
333 
334  } else {
335 
336  // Scale
337  char char_G[] = {'G', 0};
338  lapackf77_slascl(char_G, &izero, &izero, &orgnrm, &d_one, &n, &ione, d, &n, info);
339  magma_int_t nm = n-1;
340  lapackf77_slascl(char_G, &izero, &izero, &orgnrm, &d_one, &nm, &ione, e, &nm, info);
341 
342  magma_slaex0_m(nrgpu, n, d, e, z, ldz, work, iwork, range, vl, vu, il, iu, info);
343 
344  if( *info != 0) {
345  return MAGMA_SUCCESS;
346  }
347 
348  // Scale Back
349  lapackf77_slascl(char_G, &izero, &izero, &d_one, &orgnrm, &n, &ione, d, &n, info);
350 
351  }
352  }
353 
354  work[0] = lwmin;
355  iwork[0] = liwmin;
356 
357  return MAGMA_SUCCESS;
358 
359 } /* magma_sstedx_m */
#define MAGMA_ERR_ILLEGAL_VALUE
Definition: magma.h:107
#define min(a, b)
Definition: common_magma.h:86
#define Z(ix, iy)
Definition: sstedx_m.cpp:14
#define __func__
Definition: common_magma.h:65
magma_int_t magma_slaex0_m(magma_int_t nrgpu, magma_int_t n, float *d, float *e, float *q, magma_int_t ldq, float *work, magma_int_t *iwork, char range, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *info)
Definition: slaex0_m.cpp:29
int magma_int_t
Definition: magmablas.h:12
void blasf77_sswap(const magma_int_t *n, float *x, const magma_int_t *incx, float *y, const magma_int_t *incy)
#define vl(i, j)
void magma_setlapack_numthreads(magma_int_t num_threads)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
void lapackf77_ssteqr(const char *compz, const magma_int_t *n, float *d, float *e, float *Z, const magma_int_t *ldz, float *work, magma_int_t *info)
void lapackf77_slaset(const char *uplo, const magma_int_t *m, const magma_int_t *n, const float *alpha, const float *beta, float *A, const magma_int_t *lda)
#define MAGMA_SUCCESS
Definition: magma.h:106
float lapackf77_slanst(const char *norm, const magma_int_t *n, const float *d, const float *e)
#define lapackf77_slamch
Definition: magma_lapack.h:26
#define MAGMA_S_ABS(a)
Definition: magma.h:196
magma_int_t magma_get_smlsize_divideconquer()
Definition: get_nb.cpp:768
magma_int_t magma_get_numthreads()
#define max(a, b)
Definition: common_magma.h:82
void lapackf77_slascl(const char *type, const magma_int_t *kl, const magma_int_t *ku, float *cfrom, float *cto, const magma_int_t *m, const magma_int_t *n, float *A, const magma_int_t *lda, magma_int_t *info)

Here is the call graph for this function:

Here is the caller graph for this function: