MAGMA  magma-1.4.0 Matrix Algebra on GPU and Multicore Architectures
cgebrd.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  August 2013
7
8  @generated c Tue Aug 13 16:44:54 2013
9
10 */
11 #include "common_magma.h"
12
13 #define A(i, j) (a + (j)*lda + (i))
14 #define dA(i, j) (da+ (j)*ldda + (i))
15
16 extern "C" magma_int_t
18  magmaFloatComplex *a, magma_int_t lda, float *d, float *e,
19  magmaFloatComplex *tauq, magmaFloatComplex *taup,
20  magmaFloatComplex *work, magma_int_t lwork,
21  magma_int_t *info)
22 {
23 /* -- MAGMA (version 1.4.0) --
24  Univ. of Tennessee, Knoxville
25  Univ. of California, Berkeley
26  Univ. of Colorado, Denver
27  August 2013
28
29  Purpose
30  =======
31  CGEBRD reduces a general complex M-by-N matrix A to upper or lower
32  bidiagonal form B by an orthogonal transformation: Q**H * A * P = B.
33
34  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
35
36  Arguments
37  =========
38  M (input) INTEGER
39  The number of rows in the matrix A. M >= 0.
40
41  N (input) INTEGER
42  The number of columns in the matrix A. N >= 0.
43
44  A (input/output) COMPLEX array, dimension (LDA,N)
45  On entry, the M-by-N general matrix to be reduced.
46  On exit,
47  if m >= n, the diagonal and the first superdiagonal are
48  overwritten with the upper bidiagonal matrix B; the
49  elements below the diagonal, with the array TAUQ, represent
50  the orthogonal matrix Q as a product of elementary
51  reflectors, and the elements above the first superdiagonal,
52  with the array TAUP, represent the orthogonal matrix P as
53  a product of elementary reflectors;
54  if m < n, the diagonal and the first subdiagonal are
55  overwritten with the lower bidiagonal matrix B; the
56  elements below the first subdiagonal, with the array TAUQ,
57  represent the orthogonal matrix Q as a product of
58  elementary reflectors, and the elements above the diagonal,
59  with the array TAUP, represent the orthogonal matrix P as
60  a product of elementary reflectors.
61  See Further Details.
62
63  LDA (input) INTEGER
64  The leading dimension of the array A. LDA >= max(1,M).
65
66  D (output) real array, dimension (min(M,N))
67  The diagonal elements of the bidiagonal matrix B:
68  D(i) = A(i,i).
69
70  E (output) real array, dimension (min(M,N)-1)
71  The off-diagonal elements of the bidiagonal matrix B:
72  if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
73  if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
74
75  TAUQ (output) COMPLEX array dimension (min(M,N))
76  The scalar factors of the elementary reflectors which
77  represent the orthogonal matrix Q. See Further Details.
78
79  TAUP (output) COMPLEX array, dimension (min(M,N))
80  The scalar factors of the elementary reflectors which
81  represent the orthogonal matrix P. See Further Details.
82
83  WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
84  On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
85
86  LWORK (input) INTEGER
87  The length of the array WORK. LWORK >= (M+N)*NB, where NB
88  is the optimal blocksize.
89
90  If LWORK = -1, then a workspace query is assumed; the routine
91  only calculates the optimal size of the WORK array, returns
92  this value as the first entry of the WORK array, and no error
93  message related to LWORK is issued by XERBLA.
94
95  INFO (output) INTEGER
96  = 0: successful exit
97  < 0: if INFO = -i, the i-th argument had an illegal value.
98
99  Further Details
100  ===============
101  The matrices Q and P are represented as products of elementary
102  reflectors:
103
104  If m >= n,
105  Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
106  Each H(i) and G(i) has the form:
107  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
108  where tauq and taup are complex scalars, and v and u are complex vectors;
109  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
110  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
111  tauq is stored in TAUQ(i) and taup in TAUP(i).
112
113  If m < n,
114  Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
115  Each H(i) and G(i) has the form:
116  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
117  where tauq and taup are complex scalars, and v and u are complex vectors;
118  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
119  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
120  tauq is stored in TAUQ(i) and taup in TAUP(i).
121
122  The contents of A on exit are illustrated by the following examples:
123
124  m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
125
126  ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
127  ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
128  ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
129  ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
130  ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
131  ( v1 v2 v3 v4 v5 )
132
133  where d and e denote diagonal and off-diagonal elements of B, vi
134  denotes an element of the vector defining H(i), and ui an element of
135  the vector defining G(i).
136  ===================================================================== */
137
138  magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
139  magmaFloatComplex c_one = MAGMA_C_ONE;
140  magmaFloatComplex *da, *dwork;
141
142  magma_int_t ncol, nrow, jmax, nb, ldda;
143
144  magma_int_t i, j, nx;
145  magma_int_t iinfo;
146
147  magma_int_t minmn;
148  magma_int_t ldwrkx, ldwrky, lwkopt;
149  magma_int_t lquery;
150
151  nb = magma_get_cgebrd_nb(n);
152  ldda = m;
153
154  lwkopt = (m + n) * nb;
155  work[0] = MAGMA_C_MAKE( lwkopt, 0. );
156  lquery = (lwork == -1);
157
158  /* Check arguments */
159  *info = 0;
160  if (m < 0) {
161  *info = -1;
162  } else if (n < 0) {
163  *info = -2;
164  } else if (lda < max(1,m)) {
165  *info = -4;
166  } else if (lwork < lwkopt && (! lquery) ) {
167  *info = -10;
168  }
169  if (*info < 0) {
170  magma_xerbla( __func__, -(*info) );
171  return *info;
172  }
173  else if (lquery)
174  return *info;
175
176  /* Quick return if possible */
177  minmn = min(m,n);
178  if (minmn == 0) {
179  work[0] = c_one;
180  return *info;
181  }
182
183  if (MAGMA_SUCCESS != magma_cmalloc( &da, n*ldda + (m + n)*nb )) {
184  fprintf (stderr, "!!!! device memory allocation error in cgebrd\n" );
185  *info = MAGMA_ERR_DEVICE_ALLOC;
186  return *info;
187  }
188  dwork = da + (n)*ldda;
189
190  ldwrkx = m;
191  ldwrky = n;
192
193  /* Set the block/unblock crossover point NX. */
194  nx = 128;
195
196  /* Copy the matrix to the GPU */
197  if (minmn - nx >= 1) {
198  magma_csetmatrix( m, n, a, lda, da, ldda );
199  }
200
201  for (i=0; i< (minmn - nx); i += nb) {
202
203  /* Reduce rows and columns i:i+nb-1 to bidiagonal form and return
204  the matrices X and Y which are needed to update the unreduced
205  part of the matrix */
206  nrow = m - i;
207  ncol = n - i;
208
209  /* Get the current panel (no need for the 1st iteration) */
210  if ( i > 0 ) {
211  magma_cgetmatrix( nrow, nb, dA(i, i), ldda, A( i, i), lda );
212  magma_cgetmatrix( nb, ncol - nb,
213  dA(i, i+nb), ldda,
214  A( i, i+nb), lda );
215  }
216
217  magma_clabrd_gpu(nrow, ncol, nb,
218  A(i, i), lda, dA(i, i), ldda,
219  d+i, e+i, tauq+i, taup+i,
220  work, ldwrkx, dwork, ldwrkx, // x, dx
221  work+(ldwrkx*nb), ldwrky, dwork+(ldwrkx*nb), ldwrky); // y, dy
222
223  /* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
224  of the form A := A - V*Y' - X*U' */
225  nrow = m - i - nb;
226  ncol = n - i - nb;
227
228  // Send Y back to the GPU
229  magma_csetmatrix( nrow, nb, work + nb, ldwrkx, dwork + nb, ldwrkx );
230  magma_csetmatrix( ncol, nb,
231  work + (ldwrkx+1)*nb, ldwrky,
232  dwork + (ldwrkx+1)*nb, ldwrky );
233
235  nrow, ncol, nb,
236  c_neg_one, dA(i+nb, i ), ldda,
237  dwork+(ldwrkx+1)*nb, ldwrky,
238  c_one, dA(i+nb, i+nb), ldda);
239
241  nrow, ncol, nb,
242  c_neg_one, dwork+nb, ldwrkx,
243  dA( i, i+nb ), ldda,
244  c_one, dA( i+nb, i+nb ), ldda);
245
246  /* Copy diagonal and off-diagonal elements of B back into A */
247  if (m >= n) {
248  jmax = i + nb;
249  for (j = i; j < jmax; ++j) {
250  *A(j, j ) = MAGMA_C_MAKE( d[j], 0. );
251  *A(j, j+1) = MAGMA_C_MAKE( e[j], 0. );
252  }
253  } else {
254  jmax = i + nb;
255  for (j = i; j < jmax; ++j) {
256  *A(j, j ) = MAGMA_C_MAKE( d[j], 0. );
257  *A(j+1, j ) = MAGMA_C_MAKE( e[j], 0. );
258  }
259  }
260  }
261
262  /* Use unblocked code to reduce the remainder of the matrix */
263  nrow = m - i;
264  ncol = n - i;
265
266  if ( 0 < minmn - nx ) {
267  magma_cgetmatrix( nrow, ncol, dA(i, i), ldda, A(i, i), lda );
268  }
269
270  lapackf77_cgebrd( &nrow, &ncol,
271  A(i, i), &lda, d+i, e+i,
272  tauq+i, taup+i, work, &lwork, &iinfo);
273  work[0] = MAGMA_C_MAKE( lwkopt, 0. );
274
275  magma_free( da );
276  return *info;
277 } /* cgebrd */
#define min(a, b)
Definition: common_magma.h:86
#define A(i, j)
Definition: cgebrd.cpp:13
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_cmalloc(magmaFloatComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:79
#define MAGMA_C_NEG_ONE
Definition: magma.h:156
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
void magma_cgemm(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc)
#define dwork(dev, i, j)
#define magma_cgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_c.h:705
magma_int_t magma_get_cgebrd_nb(magma_int_t m)
Definition: get_nb.cpp:473
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MagmaConjTrans
Definition: magma.h:59
#define MAGMA_C_ONE
Definition: magma.h:154
void lapackf77_cgebrd(const magma_int_t *m, const magma_int_t *n, magmaFloatComplex *A, const magma_int_t *lda, float *d, float *e, magmaFloatComplex *tauq, magmaFloatComplex *taup, magmaFloatComplex *work, const magma_int_t *lwork, magma_int_t *info)
#define dA(i, j)
Definition: cgebrd.cpp:14
#define MAGMA_SUCCESS
Definition: magma.h:106
magma_int_t magma_clabrd_gpu(magma_int_t m, magma_int_t n, magma_int_t nb, magmaFloatComplex *a, magma_int_t lda, magmaFloatComplex *da, magma_int_t ldda, float *d, float *e, magmaFloatComplex *tauq, magmaFloatComplex *taup, magmaFloatComplex *x, magma_int_t ldx, magmaFloatComplex *dx, magma_int_t lddx, magmaFloatComplex *y, magma_int_t ldy, magmaFloatComplex *dy, magma_int_t lddy)
Definition: clabrd_gpu.cpp:18
magma_int_t magma_cgebrd(magma_int_t m, magma_int_t n, magmaFloatComplex *A, magma_int_t lda, float *d, float *e, magmaFloatComplex *tauq, magmaFloatComplex *taup, magmaFloatComplex *work, magma_int_t lwork, magma_int_t *info)
Definition: cgebrd.cpp:17
#define MagmaNoTrans
Definition: magma.h:57
#define max(a, b)
Definition: common_magma.h:82
#define MAGMA_C_MAKE(r, i)
Definition: magma.h:145
#define magma_csetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_c.h:702