MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
ssytrd.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @author Stan Tomov
9  @author Raffaele Solca
10 
11  @generated s Thu May 10 22:26:55 2012
12 
13 */
14 #include "common_magma.h"
15 
16 // === Define what BLAS to use ============================================
17 
18 //#define FAST_HEMV
19 
20 // === End defining what BLAS to use ======================================
21 #define PRECISION_s
22 
23 #if (defined(PRECISION_s))
24 // #define magma_ssyr2k magmablas_ssyr2k
25 #endif
26 // === End defining what BLAS to use ======================================
27 
28 #define A(i, j) ( a+(j)*lda + (i))
29 #define dA(i, j) (da+(j)*ldda + (i))
30 
31 extern "C" magma_int_t
33  float *a, magma_int_t lda,
34  float *d, float *e, float *tau,
35  float *work, magma_int_t lwork,
36  magma_int_t *info)
37 {
38 /* -- MAGMA (version 1.2.0) --
39  Univ. of Tennessee, Knoxville
40  Univ. of California, Berkeley
41  Univ. of Colorado, Denver
42  May 2012
43 
44  Purpose
45  =======
46  SSYTRD reduces a real symmetric matrix A to real symmetric
47  tridiagonal form T by an orthogonal similarity transformation:
48  Q**T * A * Q = T.
49 
50  Arguments
51  =========
52  UPLO (input) CHARACTER*1
53  = 'U': Upper triangle of A is stored;
54  = 'L': Lower triangle of A is stored.
55 
56  N (input) INTEGER
57  The order of the matrix A. N >= 0.
58 
59  A (input/output) REAL array, dimension (LDA,N)
60  On entry, the symmetric matrix A. If UPLO = 'U', the leading
61  N-by-N upper triangular part of A contains the upper
62  triangular part of the matrix A, and the strictly lower
63  triangular part of A is not referenced. If UPLO = 'L', the
64  leading N-by-N lower triangular part of A contains the lower
65  triangular part of the matrix A, and the strictly upper
66  triangular part of A is not referenced.
67  On exit, if UPLO = 'U', the diagonal and first superdiagonal
68  of A are overwritten by the corresponding elements of the
69  tridiagonal matrix T, and the elements above the first
70  superdiagonal, with the array TAU, represent the orthogonal
71  matrix Q as a product of elementary reflectors; if UPLO
72  = 'L', the diagonal and first subdiagonal of A are over-
73  written by the corresponding elements of the tridiagonal
74  matrix T, and the elements below the first subdiagonal, with
75  the array TAU, represent the orthogonal matrix Q as a product
76  of elementary reflectors. See Further Details.
77 
78  LDA (input) INTEGER
79  The leading dimension of the array A. LDA >= max(1,N).
80 
81  D (output) REAL array, dimension (N)
82  The diagonal elements of the tridiagonal matrix T:
83  D(i) = A(i,i).
84 
85  E (output) REAL array, dimension (N-1)
86  The off-diagonal elements of the tridiagonal matrix T:
87  E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
88 
89  TAU (output) REAL array, dimension (N-1)
90  The scalar factors of the elementary reflectors (see Further
91  Details).
92 
93  WORK (workspace/output) REAL array, dimension (MAX(1,LWORK))
94  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
95 
96  LWORK (input) INTEGER
97  The dimension of the array WORK. LWORK >= 1.
98  For optimum performance LWORK >= N*NB, where NB is the
99  optimal blocksize.
100 
101  If LWORK = -1, then a workspace query is assumed; the routine
102  only calculates the optimal size of the WORK array, returns
103  this value as the first entry of the WORK array, and no error
104  message related to LWORK is issued by XERBLA.
105 
106  INFO (output) INTEGER
107  = 0: successful exit
108  < 0: if INFO = -i, the i-th argument had an illegal value
109 
110  Further Details
111  ===============
112  If UPLO = 'U', the matrix Q is represented as a product of elementary
113  reflectors
114 
115  Q = H(n-1) . . . H(2) H(1).
116 
117  Each H(i) has the form
118 
119  H(i) = I - tau * v * v'
120 
121  where tau is a real scalar, and v is a real vector with
122  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
123  A(1:i-1,i+1), and tau in TAU(i).
124 
125  If UPLO = 'L', the matrix Q is represented as a product of elementary
126  reflectors
127 
128  Q = H(1) H(2) . . . H(n-1).
129 
130  Each H(i) has the form
131 
132  H(i) = I - tau * v * v'
133 
134  where tau is a real scalar, and v is a real vector with
135  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
136  and tau in TAU(i).
137 
138  The contents of A on exit are illustrated by the following examples
139  with n = 5:
140 
141  if UPLO = 'U': if UPLO = 'L':
142 
143  ( d e v2 v3 v4 ) ( d )
144  ( d e v3 v4 ) ( e d )
145  ( d e v4 ) ( v1 e d )
146  ( d e ) ( v1 v2 e d )
147  ( d ) ( v1 v2 v3 e d )
148 
149  where d and e denote diagonal and off-diagonal elements of T, and vi
150  denotes an element of the vector defining H(i).
151  ===================================================================== */
152 
153  char uplo_[2] = {uplo, 0};
154 
155  magma_int_t ldda = lda;
157 
158  float c_neg_one = MAGMA_S_NEG_ONE;
159  float c_one = MAGMA_S_ONE;
160  float d_one = MAGMA_D_ONE;
161 
162  static magma_int_t kk, nx;
163  static magma_int_t i, j, i_n;
164  static magma_int_t iinfo;
165  static magma_int_t ldwork, lddwork, lwkopt;
166  static magma_int_t lquery;
167 
168  *info = 0;
169  long int upper = lapackf77_lsame(uplo_, "U");
170  lquery = lwork == -1;
171  if (! upper && ! lapackf77_lsame(uplo_, "L")) {
172  *info = -1;
173  } else if (n < 0) {
174  *info = -2;
175  } else if (lda < max(1,n)) {
176  *info = -4;
177  } else if (lwork < nb*n && ! lquery) {
178  *info = -9;
179  }
180 
181  if (*info == 0) {
182  /* Determine the block size. */
183  ldwork = lddwork = n;
184  lwkopt = n * nb;
185  MAGMA_S_SET2REAL( work[0], lwkopt );
186  }
187 
188  if (*info != 0) {
189  magma_xerbla( __func__, -(*info) );
190  return *info;
191  }
192  else if (lquery)
193  return *info;
194 
195  /* Quick return if possible */
196  if (n == 0) {
197  work[0] = c_one;
198  return *info;
199  }
200 
201  float *da;
202  if (MAGMA_SUCCESS != magma_smalloc( &da, n*ldda + 2*n*nb )) {
203  *info = MAGMA_ERR_DEVICE_ALLOC;
204  return *info;
205  }
206 
207  float *dwork = da + (n)*ldda;
208 
209  if (n < 2048)
210  nx = n;
211  else
212  nx = 512;
213 
214  if (upper) {
215 
216  /* Copy the matrix to the GPU */
217  magma_ssetmatrix( n, n, A(0, 0), lda, dA(0, 0), ldda );
218 
219  /* Reduce the upper triangle of A.
220  Columns 1:kk are handled by the unblocked method. */
221  kk = n - (n - nx + nb - 1) / nb * nb;
222 
223  for (i = n - nb; i >= kk; i -= nb)
224  {
225  /* Reduce columns i:i+nb-1 to tridiagonal form and form the
226  matrix W which is needed to update the unreduced part of
227  the matrix */
228 
229  /* Get the current panel (no need for the 1st iteration) */
230  if (i!=n-nb)
231  magma_sgetmatrix( i+nb, nb, dA(0, i), ldda, A(0, i), lda );
232 
233  magma_slatrd(uplo, i+nb, nb, A(0, 0), lda, e, tau,
234  work, ldwork, dA(0, 0), ldda, dwork, lddwork);
235 
236  /* Update the unreduced submatrix A(0:i-2,0:i-2), using an
237  update of the form: A := A - V*W' - W*V' */
238  magma_ssetmatrix( i + nb, nb, work, ldwork, dwork, lddwork );
239 
240  magma_ssyr2k(uplo, MagmaNoTrans, i, nb, c_neg_one,
241  dA(0, i), ldda, dwork,
242  lddwork, d_one, dA(0, 0), ldda);
243 
244  /* Copy superdiagonal elements back into A, and diagonal
245  elements into D */
246  for (j = i; j < i+nb; ++j) {
247  MAGMA_S_SET2REAL( *A(j-1, j), e[j - 1] );
248  d[j] = MAGMA_S_REAL( *A(j, j) );
249  }
250 
251  }
252 
253  magma_sgetmatrix( kk, kk, dA(0, 0), ldda, A(0, 0), lda );
254 
255  /* Use unblocked code to reduce the last or only block */
256  lapackf77_ssytd2(uplo_, &kk, A(0, 0), &lda, d, e, tau, &iinfo);
257  }
258  else
259  {
260  /* Copy the matrix to the GPU */
261  if (1<=n-nx)
262  magma_ssetmatrix( n, n, A(0,0), lda, dA(0,0), ldda );
263 
264  #ifdef FAST_HEMV
265  // TODO this leaks memory from da, above
266  float *dwork2;
267  if (MAGMA_SUCCESS != magma_smalloc( &dwork2, n*n )) {
268  *info = MAGMA_ERR_DEVICE_ALLOC;
269  return *info;
270  }
271  #endif
272  /* Reduce the lower triangle of A */
273  for (i = 0; i < n-nx; i += nb)
274  {
275  /* Reduce columns i:i+nb-1 to tridiagonal form and form the
276  matrix W which is needed to update the unreduced part of
277  the matrix */
278 
279  /* Get the current panel (no need for the 1st iteration) */
280  if (i!=0)
281  magma_sgetmatrix( n-i, nb, dA(i, i), ldda, A(i, i), lda );
282  #ifdef FAST_HEMV
283  magma_slatrd2(uplo, n-i, nb, A(i, i), lda, &e[i],
284  &tau[i], work, ldwork,
285  dA(i, i), ldda,
286  dwork, lddwork, dwork2, n*n);
287  #else
288  magma_slatrd(uplo, n-i, nb, A(i, i), lda, &e[i],
289  &tau[i], work, ldwork,
290  dA(i, i), ldda,
291  dwork, lddwork);
292  #endif
293  /* Update the unreduced submatrix A(i+ib:n,i+ib:n), using
294  an update of the form: A := A - V*W' - W*V' */
295  magma_ssetmatrix( n-i, nb, work, ldwork, dwork, lddwork );
296 
297  magma_ssyr2k(MagmaLower, MagmaNoTrans, n-i-nb, nb, c_neg_one,
298  dA(i+nb, i), ldda,
299  &dwork[nb], lddwork, d_one,
300  dA(i+nb, i+nb), ldda);
301 
302  /* Copy subdiagonal elements back into A, and diagonal
303  elements into D */
304  for (j = i; j < i+nb; ++j) {
305  MAGMA_S_SET2REAL( *A(j+1, j), e[j] );
306  d[j] = MAGMA_S_REAL( *A(j, j) );
307  }
308  }
309 
310  #ifdef FAST_HEMV
311  magma_free( dwork2 );
312  #endif
313 
314  /* Use unblocked code to reduce the last or only block */
315  if (1<=n-nx)
316  magma_sgetmatrix( n-i, n-i, dA(i, i), ldda, A(i, i), lda );
317  i_n = n-i;
318  lapackf77_ssytrd(uplo_, &i_n, A(i, i), &lda, &d[i], &e[i],
319  &tau[i], work, &lwork, &iinfo);
320 
321  }
322 
323  magma_free( da );
324  MAGMA_S_SET2REAL( work[0], lwkopt );
325 
326  return *info;
327 } /* magma_ssytrd */