MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
cgebrd.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @generated c Thu May 10 22:26:54 2012
9 
10 */
11 #include "common_magma.h"
12 
13 #define A(i, j) (a + (j)*lda + (i))
14 #define dA(i, j) (da+ (j)*ldda + (i))
15 
16 // === Define what BLAS to use ============================================
17 #define PRECISION_c
18 #if (defined(PRECISION_s) || defined(PRECISION_d))
19  #define magma_cgemm magmablas_cgemm
20 #endif
21 // === End defining what BLAS to use ======================================
22 
23 extern "C" magma_int_t
25  cuFloatComplex *a, magma_int_t lda, float *d, float *e,
26  cuFloatComplex *tauq, cuFloatComplex *taup,
27  cuFloatComplex *work, magma_int_t lwork,
28  magma_int_t *info)
29 {
30 /* -- MAGMA (version 1.2.0) --
31  Univ. of Tennessee, Knoxville
32  Univ. of California, Berkeley
33  Univ. of Colorado, Denver
34  May 2012
35 
36  Purpose
37  =======
38  CGEBRD reduces a general complex M-by-N matrix A to upper or lower
39  bidiagonal form B by an orthogonal transformation: Q**H * A * P = B.
40 
41  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
42 
43  Arguments
44  =========
45  M (input) INTEGER
46  The number of rows in the matrix A. M >= 0.
47 
48  N (input) INTEGER
49  The number of columns in the matrix A. N >= 0.
50 
51  A (input/output) COMPLEX array, dimension (LDA,N)
52  On entry, the M-by-N general matrix to be reduced.
53  On exit,
54  if m >= n, the diagonal and the first superdiagonal are
55  overwritten with the upper bidiagonal matrix B; the
56  elements below the diagonal, with the array TAUQ, represent
57  the orthogonal matrix Q as a product of elementary
58  reflectors, and the elements above the first superdiagonal,
59  with the array TAUP, represent the orthogonal matrix P as
60  a product of elementary reflectors;
61  if m < n, the diagonal and the first subdiagonal are
62  overwritten with the lower bidiagonal matrix B; the
63  elements below the first subdiagonal, with the array TAUQ,
64  represent the orthogonal matrix Q as a product of
65  elementary reflectors, and the elements above the diagonal,
66  with the array TAUP, represent the orthogonal matrix P as
67  a product of elementary reflectors.
68  See Further Details.
69 
70  LDA (input) INTEGER
71  The leading dimension of the array A. LDA >= max(1,M).
72 
73  D (output) real array, dimension (min(M,N))
74  The diagonal elements of the bidiagonal matrix B:
75  D(i) = A(i,i).
76 
77  E (output) real array, dimension (min(M,N)-1)
78  The off-diagonal elements of the bidiagonal matrix B:
79  if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
80  if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
81 
82  TAUQ (output) COMPLEX array dimension (min(M,N))
83  The scalar factors of the elementary reflectors which
84  represent the orthogonal matrix Q. See Further Details.
85 
86  TAUP (output) COMPLEX array, dimension (min(M,N))
87  The scalar factors of the elementary reflectors which
88  represent the orthogonal matrix P. See Further Details.
89 
90  WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
91  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
92 
93  LWORK (input) INTEGER
94  The length of the array WORK. LWORK >= max(1,M,N).
95  For optimum performance LWORK >= (M+N)*NB, where NB
96  is the optimal blocksize.
97 
98  If LWORK = -1, then a workspace query is assumed; the routine
99  only calculates the optimal size of the WORK array, returns
100  this value as the first entry of the WORK array, and no error
101  message related to LWORK is issued by XERBLA.
102 
103  INFO (output) INTEGER
104  = 0: successful exit
105  < 0: if INFO = -i, the i-th argument had an illegal value.
106 
107  Further Details
108  ===============
109  The matrices Q and P are represented as products of elementary
110  reflectors:
111 
112  If m >= n,
113  Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
114  Each H(i) and G(i) has the form:
115  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
116  where tauq and taup are complex scalars, and v and u are complex vectors;
117  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
118  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
119  tauq is stored in TAUQ(i) and taup in TAUP(i).
120 
121  If m < n,
122  Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
123  Each H(i) and G(i) has the form:
124  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
125  where tauq and taup are complex scalars, and v and u are complex vectors;
126  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
127  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
128  tauq is stored in TAUQ(i) and taup in TAUP(i).
129 
130  The contents of A on exit are illustrated by the following examples:
131 
132  m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
133 
134  ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
135  ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
136  ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
137  ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
138  ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
139  ( v1 v2 v3 v4 v5 )
140 
141  where d and e denote diagonal and off-diagonal elements of B, vi
142  denotes an element of the vector defining H(i), and ui an element of
143  the vector defining G(i).
144  ===================================================================== */
145 
146  cuFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
147  cuFloatComplex c_one = MAGMA_C_ONE;
148  cuFloatComplex *da, *dwork;
149 
150  magma_int_t ncol, nrow, jmax, nb, ldda;
151 
152  static magma_int_t i, j, nx;
153  static cuFloatComplex ws;
154  static magma_int_t iinfo;
155 
156  magma_int_t minmn;
157  magma_int_t ldwrkx, ldwrky, lwkopt;
158  magma_int_t lquery;
159 
160  nb = magma_get_cgebrd_nb(n);
161  ldda = m;
162 
163  lwkopt = (m + n) * nb;
164  work[0] = MAGMA_C_MAKE( lwkopt, 0. );
165  lquery = (lwork == -1);
166 
167  /* Check arguments */
168  *info = 0;
169  if (m < 0) {
170  *info = -1;
171  } else if (n < 0) {
172  *info = -2;
173  } else if (lda < max(1,m)) {
174  *info = -4;
175  } else if ( (lwork < max( max(1, m), n)) && (! lquery) ) {
176  *info = -10;
177  }
178  if (*info < 0) {
179  magma_xerbla( __func__, -(*info) );
180  return *info;
181  }
182  else if (lquery)
183  return *info;
184 
185  /* Quick return if possible */
186  minmn = min(m,n);
187  if (minmn == 0) {
188  work[0] = c_one;
189  return *info;
190  }
191 
192  if (MAGMA_SUCCESS != magma_cmalloc( &da, n*ldda + (m + n)*nb )) {
193  fprintf (stderr, "!!!! device memory allocation error in cgebrd\n" );
194  *info = MAGMA_ERR_DEVICE_ALLOC;
195  return *info;
196  }
197  dwork = da + (n)*ldda;
198 
199  MAGMA_C_SET2REAL( ws, max(m,n) );
200  ldwrkx = m;
201  ldwrky = n;
202 
203  /* Set the block/unblock crossover point NX. */
204  nx = 128;
205 
206  /* Copy the matrix to the GPU */
207  if (minmn-nx>=1)
208  magma_csetmatrix( m, n, a, lda, da, ldda );
209 
210  for (i=0; i< (minmn - nx); i += nb) {
211 
212  /* Reduce rows and columns i:i+nb-1 to bidiagonal form and return
213  the matrices X and Y which are needed to update the unreduced
214  part of the matrix */
215  nrow = m - i;
216  ncol = n - i;
217 
218  /* Get the current panel (no need for the 1st iteration) */
219  if ( i > 0 ) {
220  magma_cgetmatrix( nrow, nb, dA(i, i), ldda, A( i, i), lda );
221  magma_cgetmatrix( nb, ncol - nb,
222  dA(i, i+nb), ldda,
223  A( i, i+nb), lda );
224  }
225 
226  magma_clabrd_gpu(nrow, ncol, nb,
227  A(i, i), lda, dA(i, i), ldda,
228  d+i, e+i, tauq+i, taup+i,
229  work, ldwrkx, dwork, ldwrkx, // x, dx
230  work+(ldwrkx*nb), ldwrky, dwork+(ldwrkx*nb), ldwrky); // y, dy
231 
232  /* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
233  of the form A := A - V*Y' - X*U' */
234  nrow = m - i - nb;
235  ncol = n - i - nb;
236 
237  // Send Y back to the GPU
238  magma_csetmatrix( nrow, nb, work + nb, ldwrkx, dwork + nb, ldwrkx );
239  magma_csetmatrix( ncol, nb,
240  work + (ldwrkx+1)*nb, ldwrky,
241  dwork + (ldwrkx+1)*nb, ldwrky );
242 
244  nrow, ncol, nb,
245  c_neg_one, dA(i+nb, i ), ldda,
246  dwork+(ldwrkx+1)*nb, ldwrky,
247  c_one, dA(i+nb, i+nb), ldda);
248 
250  nrow, ncol, nb,
251  c_neg_one, dwork+nb, ldwrkx,
252  dA( i, i+nb ), ldda,
253  c_one, dA( i+nb, i+nb ), ldda);
254 
255  /* Copy diagonal and off-diagonal elements of B back into A */
256  if (m >= n) {
257  jmax = i + nb;
258  for (j = i; j < jmax; ++j) {
259  *A(j, j ) = MAGMA_C_MAKE( d[j], 0. );
260  *A(j, j+1) = MAGMA_C_MAKE( e[j], 0. );
261  }
262  } else {
263  jmax = i + nb;
264  for (j = i; j < jmax; ++j) {
265  *A(j, j ) = MAGMA_C_MAKE( d[j], 0. );
266  *A(j+1, j ) = MAGMA_C_MAKE( e[j], 0. );
267  /* L20: */
268  }
269  }
270  }
271 
272  /* Use unblocked code to reduce the remainder of the matrix */
273  nrow = m - i;
274  ncol = n - i;
275 
276  if ( 0 < (minmn-nx) )
277  magma_cgetmatrix( nrow, ncol, dA(i, i), ldda, A( i, i), lda );
278 
279  lapackf77_cgebrd( &nrow, &ncol,
280  A(i, i), &lda, d+i, e+i,
281  tauq+i, taup+i, work, &lwork, &iinfo);
282  work[0] = ws;
283 
284  magma_free( da );
285  return *info;
286 } /* cgebrd_ */
287