MAGMA  magma-1.4.0 Matrix Algebra on GPU and Multicore Architectures
ssytrd.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
6  August 2013
7
8  @author Stan Tomov
9  @author Raffaele Solca
10
11  @generated s Wed Aug 14 12:16:14 2013
12
13 */
14 #include "common_magma.h"
15
16 #define A(i, j) ( a+(j)*lda + (i))
17 #define dA(i, j) (da+(j)*ldda + (i))
18
19 extern "C" magma_int_t
21  float *a, magma_int_t lda,
22  float *d, float *e, float *tau,
23  float *work, magma_int_t lwork,
24  magma_int_t *info)
25 {
26 /* -- MAGMA (version 1.4.0) --
27  Univ. of Tennessee, Knoxville
28  Univ. of California, Berkeley
30  August 2013
31
32  Purpose
33  =======
34  SSYTRD reduces a real symmetric matrix A to real symmetric
35  tridiagonal form T by an orthogonal similarity transformation:
36  Q**T * A * Q = T.
37
38  Arguments
39  =========
40  UPLO (input) CHARACTER*1
41  = 'U': Upper triangle of A is stored;
42  = 'L': Lower triangle of A is stored.
43
44  N (input) INTEGER
45  The order of the matrix A. N >= 0.
46
47  A (input/output) REAL array, dimension (LDA,N)
48  On entry, the symmetric matrix A. If UPLO = 'U', the leading
49  N-by-N upper triangular part of A contains the upper
50  triangular part of the matrix A, and the strictly lower
51  triangular part of A is not referenced. If UPLO = 'L', the
52  leading N-by-N lower triangular part of A contains the lower
53  triangular part of the matrix A, and the strictly upper
54  triangular part of A is not referenced.
55  On exit, if UPLO = 'U', the diagonal and first superdiagonal
56  of A are overwritten by the corresponding elements of the
57  tridiagonal matrix T, and the elements above the first
58  superdiagonal, with the array TAU, represent the orthogonal
59  matrix Q as a product of elementary reflectors; if UPLO
60  = 'L', the diagonal and first subdiagonal of A are over-
61  written by the corresponding elements of the tridiagonal
62  matrix T, and the elements below the first subdiagonal, with
63  the array TAU, represent the orthogonal matrix Q as a product
64  of elementary reflectors. See Further Details.
65
66  LDA (input) INTEGER
67  The leading dimension of the array A. LDA >= max(1,N).
68
69  D (output) REAL array, dimension (N)
70  The diagonal elements of the tridiagonal matrix T:
71  D(i) = A(i,i).
72
73  E (output) REAL array, dimension (N-1)
74  The off-diagonal elements of the tridiagonal matrix T:
75  E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
76
77  TAU (output) REAL array, dimension (N-1)
78  The scalar factors of the elementary reflectors (see Further
79  Details).
80
81  WORK (workspace/output) REAL array, dimension (MAX(1,LWORK))
82  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
83
84  LWORK (input) INTEGER
85  The dimension of the array WORK. LWORK >= N*NB, where NB is the
86  optimal blocksize given by magma_get_ssytrd_nb().
87
88  If LWORK = -1, then a workspace query is assumed; the routine
89  only calculates the optimal size of the WORK array, returns
90  this value as the first entry of the WORK array, and no error
91  message related to LWORK is issued by XERBLA.
92
93  INFO (output) INTEGER
94  = 0: successful exit
95  < 0: if INFO = -i, the i-th argument had an illegal value
96
97  Further Details
98  ===============
99  If UPLO = 'U', the matrix Q is represented as a product of elementary
100  reflectors
101
102  Q = H(n-1) . . . H(2) H(1).
103
104  Each H(i) has the form
105
106  H(i) = I - tau * v * v'
107
108  where tau is a real scalar, and v is a real vector with
109  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
110  A(1:i-1,i+1), and tau in TAU(i).
111
112  If UPLO = 'L', the matrix Q is represented as a product of elementary
113  reflectors
114
115  Q = H(1) H(2) . . . H(n-1).
116
117  Each H(i) has the form
118
119  H(i) = I - tau * v * v'
120
121  where tau is a real scalar, and v is a real vector with
122  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
123  and tau in TAU(i).
124
125  The contents of A on exit are illustrated by the following examples
126  with n = 5:
127
128  if UPLO = 'U': if UPLO = 'L':
129
130  ( d e v2 v3 v4 ) ( d )
131  ( d e v3 v4 ) ( e d )
132  ( d e v4 ) ( v1 e d )
133  ( d e ) ( v1 v2 e d )
134  ( d ) ( v1 v2 v3 e d )
135
136  where d and e denote diagonal and off-diagonal elements of T, and vi
137  denotes an element of the vector defining H(i).
138  ===================================================================== */
139
140  char uplo_[2] = {uplo, 0};
141
142  magma_int_t ldda = lda;
144
145  float c_neg_one = MAGMA_S_NEG_ONE;
146  float c_one = MAGMA_S_ONE;
147  float d_one = MAGMA_D_ONE;
148
149  magma_int_t kk, nx;
150  magma_int_t i, j, i_n;
151  magma_int_t iinfo;
152  magma_int_t ldwork, lddwork, lwkopt;
153  magma_int_t lquery;
154
155  *info = 0;
156  int upper = lapackf77_lsame(uplo_, "U");
157  lquery = lwork == -1;
158  if (! upper && ! lapackf77_lsame(uplo_, "L")) {
159  *info = -1;
160  } else if (n < 0) {
161  *info = -2;
162  } else if (lda < max(1,n)) {
163  *info = -4;
164  } else if (lwork < nb*n && ! lquery) {
165  *info = -9;
166  }
167
168  /* Determine the block size. */
169  ldwork = lddwork = n;
170  lwkopt = n * nb;
171  if (*info == 0) {
172  MAGMA_S_SET2REAL( work[0], lwkopt );
173  }
174
175  if (*info != 0) {
176  magma_xerbla( __func__, -(*info) );
177  return *info;
178  }
179  else if (lquery)
180  return *info;
181
182  /* Quick return if possible */
183  if (n == 0) {
184  work[0] = c_one;
185  return *info;
186  }
187
188  float *da;
189  if (MAGMA_SUCCESS != magma_smalloc( &da, n*ldda + 2*n*nb )) {
190  *info = MAGMA_ERR_DEVICE_ALLOC;
191  return *info;
192  }
193
194  float *dwork = da + (n)*ldda;
195
196  if (n < 2048)
197  nx = n;
198  else
199  nx = 512;
200
201  if (upper) {
202  /* Copy the matrix to the GPU */
203  magma_ssetmatrix( n, n, A(0, 0), lda, dA(0, 0), ldda );
204
205  /* Reduce the upper triangle of A.
206  Columns 1:kk are handled by the unblocked method. */
207  kk = n - (n - nx + nb - 1) / nb * nb;
208
209  for (i = n - nb; i >= kk; i -= nb) {
210  /* Reduce columns i:i+nb-1 to tridiagonal form and form the
211  matrix W which is needed to update the unreduced part of
212  the matrix */
213
214  /* Get the current panel (no need for the 1st iteration) */
215  if (i!=n-nb)
216  magma_sgetmatrix( i+nb, nb, dA(0, i), ldda, A(0, i), lda );
217
218  magma_slatrd(uplo, i+nb, nb, A(0, 0), lda, e, tau,
219  work, ldwork, dA(0, 0), ldda, dwork, lddwork);
220
221  /* Update the unreduced submatrix A(0:i-2,0:i-2), using an
222  update of the form: A := A - V*W' - W*V' */
223  magma_ssetmatrix( i + nb, nb, work, ldwork, dwork, lddwork );
224
225  magma_ssyr2k(uplo, MagmaNoTrans, i, nb, c_neg_one,
226  dA(0, i), ldda, dwork,
227  lddwork, d_one, dA(0, 0), ldda);
228
229  /* Copy superdiagonal elements back into A, and diagonal
230  elements into D */
231  for (j = i; j < i+nb; ++j) {
232  MAGMA_S_SET2REAL( *A(j-1, j), e[j - 1] );
233  d[j] = MAGMA_S_REAL( *A(j, j) );
234  }
235  }
236
237  magma_sgetmatrix( kk, kk, dA(0, 0), ldda, A(0, 0), lda );
238
239  /* Use unblocked code to reduce the last or only block */
240  lapackf77_ssytd2(uplo_, &kk, A(0, 0), &lda, d, e, tau, &iinfo);
241  }
242  else {
243  /* Copy the matrix to the GPU */
244  if (1<=n-nx)
245  magma_ssetmatrix( n, n, A(0,0), lda, dA(0,0), ldda );
246
247  #ifdef FAST_HEMV
248  // TODO this leaks memory from da, above
249  float *dwork2;
250  if (MAGMA_SUCCESS != magma_smalloc( &dwork2, n*n )) {
251  *info = MAGMA_ERR_DEVICE_ALLOC;
252  return *info;
253  }
254  #endif
255  /* Reduce the lower triangle of A */
256  for (i = 0; i < n-nx; i += nb) {
257  /* Reduce columns i:i+nb-1 to tridiagonal form and form the
258  matrix W which is needed to update the unreduced part of
259  the matrix */
260
261  /* Get the current panel (no need for the 1st iteration) */
262  if (i!=0)
263  magma_sgetmatrix( n-i, nb, dA(i, i), ldda, A(i, i), lda );
264  #ifdef FAST_HEMV
265  magma_slatrd2(uplo, n-i, nb, A(i, i), lda, &e[i],
266  &tau[i], work, ldwork,
267  dA(i, i), ldda,
268  dwork, lddwork, dwork2, n*n);
269  #else
270  magma_slatrd(uplo, n-i, nb, A(i, i), lda, &e[i],
271  &tau[i], work, ldwork,
272  dA(i, i), ldda,
273  dwork, lddwork);
274  #endif
275  /* Update the unreduced submatrix A(i+ib:n,i+ib:n), using
276  an update of the form: A := A - V*W' - W*V' */
277  magma_ssetmatrix( n-i, nb, work, ldwork, dwork, lddwork );
278
279  magma_ssyr2k(MagmaLower, MagmaNoTrans, n-i-nb, nb, c_neg_one,
280  dA(i+nb, i), ldda,
281  &dwork[nb], lddwork, d_one,
282  dA(i+nb, i+nb), ldda);
283
284  /* Copy subdiagonal elements back into A, and diagonal
285  elements into D */
286  for (j = i; j < i+nb; ++j) {
287  MAGMA_S_SET2REAL( *A(j+1, j), e[j] );
288  d[j] = MAGMA_S_REAL( *A(j, j) );
289  }
290  }
291
292  #ifdef FAST_HEMV
293  magma_free( dwork2 );
294  #endif
295
296  /* Use unblocked code to reduce the last or only block */
297  if (1<=n-nx)
298  magma_sgetmatrix( n-i, n-i, dA(i, i), ldda, A(i, i), lda );
299  i_n = n-i;
300  lapackf77_ssytrd(uplo_, &i_n, A(i, i), &lda, &d[i], &e[i],
301  &tau[i], work, &lwork, &iinfo);
302
303  }
304
305  magma_free( da );
306  MAGMA_S_SET2REAL( work[0], lwkopt );
307
308  return *info;
309 } /* magma_ssytrd */
void lapackf77_ssytd2(const char *uplo, const magma_int_t *n, float *A, const magma_int_t *lda, float *d, float *e, float *tau, magma_int_t *info)
void magma_ssyr2k(magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dB, magma_int_t lddb, float beta, magmaFloat_ptr dC, magma_int_t lddc)
#define MAGMA_S_REAL(a)
Definition: magma.h:190
#define dA(i, j)
Definition: ssytrd.cpp:17
#define MAGMA_D_ONE
Definition: magma.h:176
#define __func__
Definition: common_magma.h:65
magma_int_t magma_slatrd2(char uplo, magma_int_t n, magma_int_t nb, float *a, magma_int_t lda, float *e, float *tau, float *w, magma_int_t ldw, float *da, magma_int_t ldda, float *dw, magma_int_t lddw, float *dwork, magma_int_t ldwork)
Definition: slatrd2.cpp:29
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
#define MAGMA_S_NEG_ONE
Definition: magma.h:200
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_ssytrd(char uplo, magma_int_t n, float *A, magma_int_t lda, float *d, float *e, float *tau, float *work, magma_int_t lwork, magma_int_t *info)
Definition: ssytrd.cpp:20
#define magma_ssetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_s.h:702
magma_int_t ldda
#define dwork(dev, i, j)
#define MagmaLower
Definition: magma.h:62
magma_int_t magma_slatrd(char uplo, magma_int_t n, magma_int_t nb, float *a, magma_int_t lda, float *e, float *tau, float *w, magma_int_t ldw, float *da, magma_int_t ldda, float *dw, magma_int_t lddw)
Definition: slatrd.cpp:28
#define MAGMA_S_ONE
Definition: magma.h:198
void lapackf77_ssytrd(const char *uplo, const magma_int_t *n, float *A, const magma_int_t *lda, float *d, float *e, float *tau, float *work, const magma_int_t *lwork, magma_int_t *info)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define MAGMA_S_SET2REAL(v, t)
Definition: magma.h:181
magma_int_t magma_get_ssytrd_nb(magma_int_t m)
Definition: get_nb.cpp:376
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define magma_sgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_s.h:705
#define A(i, j)
Definition: ssytrd.cpp:16
#define MagmaNoTrans
Definition: magma.h:57
#define max(a, b)
Definition: common_magma.h:82