MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
slatrd.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  August 2013
7 
8  @author Stan Tomov
9  @author Raffaele Solca
10 
11  @generated s Wed Aug 14 12:16:14 2013
12 
13 */
14 #include "common_magma.h"
15 
16 #include <cblas.h>
17 #include <assert.h>
18 
19 #define PRECISION_s
20 
21 #define A(i, j) (a+(j)*lda + (i))
22 #define W(i, j) (w+(j)*ldw + (i))
23 
24 #define dA(i, j) (da+(j)*ldda + (i))
25 #define dW(i, j) (dw+(j)*lddw + (i))
26 
27 extern "C" magma_int_t
29  float *a, magma_int_t lda,
30  float *e, float *tau,
31  float *w, magma_int_t ldw,
32  float *da, magma_int_t ldda,
33  float *dw, magma_int_t lddw)
34 {
35 /* -- MAGMA (version 1.4.0) --
36  Univ. of Tennessee, Knoxville
37  Univ. of California, Berkeley
38  Univ. of Colorado, Denver
39  August 2013
40 
41  Purpose
42  =======
43  SLATRD reduces NB rows and columns of a real symmetric matrix A to
44  symmetric tridiagonal form by an orthogonal similarity
45  transformation Q' * A * Q, and returns the matrices V and W which are
46  needed to apply the transformation to the unreduced part of A.
47 
48  If UPLO = 'U', SLATRD reduces the last NB rows and columns of a
49  matrix, of which the upper triangle is supplied;
50  if UPLO = 'L', SLATRD reduces the first NB rows and columns of a
51  matrix, of which the lower triangle is supplied.
52 
53  This is an auxiliary routine called by SSYTRD.
54 
55  Arguments
56  =========
57  UPLO (input) CHARACTER*1
58  Specifies whether the upper or lower triangular part of the
59  symmetric matrix A is stored:
60  = 'U': Upper triangular
61  = 'L': Lower triangular
62 
63  N (input) INTEGER
64  The order of the matrix A.
65 
66  NB (input) INTEGER
67  The number of rows and columns to be reduced.
68 
69  A (input/output) REAL array, dimension (LDA,N)
70  On entry, the symmetric matrix A. If UPLO = 'U', the leading
71  n-by-n upper triangular part of A contains the upper
72  triangular part of the matrix A, and the strictly lower
73  triangular part of A is not referenced. If UPLO = 'L', the
74  leading n-by-n lower triangular part of A contains the lower
75  triangular part of the matrix A, and the strictly upper
76  triangular part of A is not referenced.
77  On exit:
78  if UPLO = 'U', the last NB columns have been reduced to
79  tridiagonal form, with the diagonal elements overwriting
80  the diagonal elements of A; the elements above the diagonal
81  with the array TAU, represent the orthogonal matrix Q as a
82  product of elementary reflectors;
83  if UPLO = 'L', the first NB columns have been reduced to
84  tridiagonal form, with the diagonal elements overwriting
85  the diagonal elements of A; the elements below the diagonal
86  with the array TAU, represent the orthogonal matrix Q as a
87  product of elementary reflectors.
88  See Further Details.
89 
90  LDA (input) INTEGER
91  The leading dimension of the array A. LDA >= (1,N).
92 
93  E (output) REAL array, dimension (N-1)
94  If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
95  elements of the last NB columns of the reduced matrix;
96  if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
97  the first NB columns of the reduced matrix.
98 
99  TAU (output) REAL array, dimension (N-1)
100  The scalar factors of the elementary reflectors, stored in
101  TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
102  See Further Details.
103 
104  W (output) REAL array, dimension (LDW,NB)
105  The n-by-nb matrix W required to update the unreduced part
106  of A.
107 
108  LDW (input) INTEGER
109  The leading dimension of the array W. LDW >= max(1,N).
110 
111  Further Details
112  ===============
113  If UPLO = 'U', the matrix Q is represented as a product of elementary
114  reflectors
115 
116  Q = H(n) H(n-1) . . . H(n-nb+1).
117 
118  Each H(i) has the form
119 
120  H(i) = I - tau * v * v'
121 
122  where tau is a real scalar, and v is a real vector with
123  v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
124  and tau in TAU(i-1).
125 
126  If UPLO = 'L', the matrix Q is represented as a product of elementary
127  reflectors
128 
129  Q = H(1) H(2) . . . H(nb).
130 
131  Each H(i) has the form
132 
133  H(i) = I - tau * v * v'
134 
135  where tau is a real scalar, and v is a real vector with
136  v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
137  and tau in TAU(i).
138 
139  The elements of the vectors v together form the n-by-nb matrix V
140  which is needed, with W, to apply the transformation to the unreduced
141  part of the matrix, using a symmetric rank-2k update of the form:
142  A := A - V*W' - W*V'.
143 
144  The contents of A on exit are illustrated by the following examples
145  with n = 5 and nb = 2:
146 
147  if UPLO = 'U': if UPLO = 'L':
148 
149  ( a a a v4 v5 ) ( d )
150  ( a a v4 v5 ) ( 1 d )
151  ( a 1 v5 ) ( v1 1 a )
152  ( d 1 ) ( v1 v2 a a )
153  ( d ) ( v1 v2 a a a )
154 
155  where d denotes a diagonal element of the reduced matrix, a denotes
156  an element of the original matrix that is unchanged, and vi denotes
157  an element of the vector defining H(i).
158  ===================================================================== */
159 
160  char uplo_[2] = {uplo, 0};
161 
162  magma_int_t i;
163 
164  float c_neg_one = MAGMA_S_NEG_ONE;
165  float c_one = MAGMA_S_ONE;
166  float c_zero = MAGMA_S_ZERO;
167 
168  float value = MAGMA_S_ZERO;
169 
170  magma_int_t ione = 1;
171 
172  magma_int_t i_n, i_1, iw;
173 
174  float alpha;
175  float *f;
176 
177  if (n <= 0) {
178  return 0;
179  }
180 
181  magma_queue_t stream;
182  magma_queue_create( &stream );
183  magma_smalloc_cpu( &f, n );
184  assert( f != NULL ); // TODO return error, or allocate outside slatrd
185 
186  if (lapackf77_lsame(uplo_, "U")) {
187 
188  /* Reduce last NB columns of upper triangle */
189  for (i = n-1; i >= n - nb ; --i) {
190  i_1 = i + 1;
191  i_n = n - i - 1;
192 
193  iw = i - n + nb;
194  if (i < n-1) {
195  /* Update A(1:i,i) */
196  #if defined(PRECISION_z) || defined(PRECISION_c)
197  lapackf77_slacgv(&i_n, W(i, iw+1), &ldw);
198  #endif
199  blasf77_sgemv("No transpose", &i_1, &i_n, &c_neg_one, A(0, i+1), &lda,
200  W(i, iw+1), &ldw, &c_one, A(0, i), &ione);
201  #if defined(PRECISION_z) || defined(PRECISION_c)
202  lapackf77_slacgv(&i_n, W(i, iw+1), &ldw);
203  lapackf77_slacgv(&i_n, A(i, i+1), &lda);
204  #endif
205  blasf77_sgemv("No transpose", &i_1, &i_n, &c_neg_one, W(0, iw+1), &ldw,
206  A(i, i+1), &lda, &c_one, A(0, i), &ione);
207  #if defined(PRECISION_z) || defined(PRECISION_c)
208  lapackf77_slacgv(&i_n, A(i, i+1), &lda);
209  #endif
210  }
211  if (i > 0) {
212  /* Generate elementary reflector H(i) to annihilate A(1:i-2,i) */
213 
214  alpha = *A(i-1, i);
215 
216  lapackf77_slarfg(&i, &alpha, A(0, i), &ione, &tau[i - 1]);
217 
218  e[i-1] = MAGMA_S_REAL( alpha );
219  MAGMA_S_SET2REAL(*A(i-1, i), 1.);
220 
221  /* Compute W(1:i-1,i) */
222  // 1. Send the block reflector A(0:n-i-1,i) to the GPU
223  magma_ssetvector( i, A(0, i), 1, dA(0, i), 1 );
224 
225  magma_ssymv(MagmaUpper, i, c_one, dA(0, 0), ldda,
226  dA(0, i), ione, c_zero, dW(0, iw), ione);
227 
228  // 2. Start putting the result back (asynchronously)
230  dW(0, iw), lddw,
231  W(0, iw) /*test*/, ldw, stream );
232 
233  if (i < n-1) {
234  blasf77_sgemv(MagmaTransStr, &i, &i_n, &c_one, W(0, iw+1), &ldw,
235  A(0, i), &ione, &c_zero, W(i+1, iw), &ione);
236  }
237 
238  // 3. Here is where we need it // TODO find the right place
239  magma_queue_sync( stream );
240 
241  if (i < n-1) {
242  blasf77_sgemv("No transpose", &i, &i_n, &c_neg_one, A(0, i+1), &lda,
243  W(i+1, iw), &ione, &c_one, W(0, iw), &ione);
244 
245  blasf77_sgemv(MagmaTransStr, &i, &i_n, &c_one, A(0, i+1), &lda,
246  A(0, i), &ione, &c_zero, W(i+1, iw), &ione);
247 
248  blasf77_sgemv("No transpose", &i, &i_n, &c_neg_one, W(0, iw+1), &ldw,
249  W(i+1, iw), &ione, &c_one, W(0, iw), &ione);
250  }
251 
252  blasf77_sscal(&i, &tau[i - 1], W(0, iw), &ione);
253 
254  #if defined(PRECISION_z) || defined(PRECISION_c)
255  cblas_sdot_sub( i, W(0,iw), ione, A(0,i), ione, &value );
256  #else
257  value = cblas_sdot( i, W(0,iw), ione, A(0,i), ione );
258  #endif
259  alpha = tau[i - 1] * -0.5f * value;
260  blasf77_saxpy(&i, &alpha, A(0, i), &ione,
261  W(0, iw), &ione);
262  }
263  }
264 
265  } else {
266 
267  /* Reduce first NB columns of lower triangle */
268  for (i = 0; i < nb; ++i) {
269 
270  /* Update A(i:n,i) */
271  i_n = n - i;
272  #if defined(PRECISION_z) || defined(PRECISION_c)
273  lapackf77_slacgv(&i, W(i, 0), &ldw);
274  #endif
275  blasf77_sgemv("No transpose", &i_n, &i, &c_neg_one, A(i, 0), &lda,
276  W(i, 0), &ldw, &c_one, A(i, i), &ione);
277  #if defined(PRECISION_z) || defined(PRECISION_c)
278  lapackf77_slacgv(&i, W(i, 0), &ldw);
279  lapackf77_slacgv(&i, A(i ,0), &lda);
280  #endif
281  blasf77_sgemv("No transpose", &i_n, &i, &c_neg_one, W(i, 0), &ldw,
282  A(i, 0), &lda, &c_one, A(i, i), &ione);
283  #if defined(PRECISION_z) || defined(PRECISION_c)
284  lapackf77_slacgv(&i, A(i, 0), &lda);
285  #endif
286 
287  if (i < n-1) {
288  /* Generate elementary reflector H(i) to annihilate A(i+2:n,i) */
289  i_n = n - i - 1;
290  alpha = *A(i+1, i);
291  lapackf77_slarfg(&i_n, &alpha, A(min(i+2,n-1), i), &ione, &tau[i]);
292  e[i] = MAGMA_S_REAL( alpha );
293  MAGMA_S_SET2REAL(*A(i+1, i), 1.);
294 
295  /* Compute W(i+1:n,i) */
296  // 1. Send the block reflector A(i+1:n,i) to the GPU
297  magma_ssetvector( i_n, A(i+1, i), 1, dA(i+1, i), 1 );
298 
299  magma_ssymv(MagmaLower, i_n, c_one, dA(i+1, i+1), ldda, dA(i+1, i), ione, c_zero,
300  dW(i+1, i), ione);
301 
302  // 2. Start putting the result back (asynchronously)
303  magma_sgetmatrix_async( i_n, 1,
304  dW(i+1, i), lddw,
305  W(i+1, i), ldw, stream );
306 
307  blasf77_sgemv(MagmaTransStr, &i_n, &i, &c_one, W(i+1, 0), &ldw,
308  A(i+1, i), &ione, &c_zero, W(0, i), &ione);
309 
310  blasf77_sgemv("No transpose", &i_n, &i, &c_neg_one, A(i+1, 0), &lda,
311  W(0, i), &ione, &c_zero, f, &ione);
312 
313  blasf77_sgemv(MagmaTransStr, &i_n, &i, &c_one, A(i+1, 0), &lda,
314  A(i+1, i), &ione, &c_zero, W(0, i), &ione);
315 
316  // 3. Here is where we need it
317  magma_queue_sync( stream );
318 
319  if (i!=0)
320  blasf77_saxpy(&i_n, &c_one, f, &ione, W(i+1, i), &ione);
321 
322  blasf77_sgemv("No transpose", &i_n, &i, &c_neg_one, W(i+1, 0), &ldw,
323  W(0, i), &ione, &c_one, W(i+1, i), &ione);
324  blasf77_sscal(&i_n, &tau[i], W(i+1,i), &ione);
325 
326  #if defined(PRECISION_z) || defined(PRECISION_c)
327  cblas_sdot_sub( i_n, W(i+1,i), ione, A(i+1,i), ione, &value );
328  #else
329  value = cblas_sdot( i_n, W(i+1,i), ione, A(i+1,i), ione );
330  #endif
331  alpha = tau[i] * -0.5f * value;
332  blasf77_saxpy(&i_n, &alpha, A(i+1, i), &ione, W(i+1,i), &ione);
333  }
334  }
335  }
336 
337  magma_free_cpu(f);
338  magma_queue_destroy( stream );
339 
340  return 0;
341 } /* slatrd */
342 
#define MagmaTransStr
Definition: magma.h:81
#define MAGMA_S_REAL(a)
Definition: magma.h:190
#define min(a, b)
Definition: common_magma.h:86
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define MagmaUpper
Definition: magma.h:61
void blasf77_sscal(const magma_int_t *n, const float *alpha, float *x, const magma_int_t *incx)
#define dA(i, j)
Definition: slatrd.cpp:24
void blasf77_saxpy(const magma_int_t *n, const float *alpha, const float *x, const magma_int_t *incx, float *y, const magma_int_t *incy)
#define MAGMA_S_NEG_ONE
Definition: magma.h:200
int magma_int_t
Definition: magmablas.h:12
#define magma_sgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_s.h:714
void lapackf77_slarfg(const magma_int_t *n, float *alpha, float *x, const magma_int_t *incx, float *tau)
#define A(i, j)
Definition: slatrd.cpp:21
#define magma_queue_destroy(queue)
Definition: magma.h:116
void magma_ssymv(magma_uplo_t uplo, magma_int_t n, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dx, magma_int_t incx, float beta, magmaFloat_ptr dy, magma_int_t incy)
static magma_err_t magma_smalloc_cpu(float **ptrPtr, size_t n)
Definition: magma.h:83
magma_int_t ldda
#define W(i, j)
Definition: slatrd.cpp:22
#define MagmaLower
Definition: magma.h:62
magma_int_t magma_slatrd(char uplo, magma_int_t n, magma_int_t nb, float *a, magma_int_t lda, float *e, float *tau, float *w, magma_int_t ldw, float *da, magma_int_t ldda, float *dw, magma_int_t lddw)
Definition: slatrd.cpp:28
#define magma_ssetvector(n, hx_src, incx, dy_dst, incy)
Definition: magmablas_s.h:634
#define MAGMA_S_ONE
Definition: magma.h:198
void blasf77_sgemv(const char *transa, const magma_int_t *m, const magma_int_t *n, const float *alpha, const float *A, const magma_int_t *lda, const float *x, const magma_int_t *incx, const float *beta, float *y, const magma_int_t *incy)
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define MAGMA_S_ZERO
Definition: magma.h:197
#define MAGMA_S_SET2REAL(v, t)
Definition: magma.h:181
void lapackf77_slacgv(const magma_int_t *n, float *x, const magma_int_t *incx)
float cblas_sdot(const int N, const float *X, const int incX, const float *Y, const int incY)
#define dW(i, j)
Definition: slatrd.cpp:25
magma_err_t magma_free_cpu(void *ptr)
#define magma_queue_sync(queue)
Definition: magma.h:119