MAGMA  magma-1.4.0 Matrix Algebra on GPU and Multicore Architectures
dormqr.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
6  August 2013
7
8  @author Stan Tomov
9
10  @generated d Tue Aug 13 16:44:28 2013
11
12 */
13 #include "common_magma.h"
14
15 extern "C" magma_int_t
16 magma_dormqr(const char side, const char trans,
18  double *A, magma_int_t lda,
19  double *tau,
20  double *C, magma_int_t ldc,
21  double *work, magma_int_t lwork,
22  magma_int_t *info)
23 {
24 /* -- MAGMA (version 1.4.0) --
25  Univ. of Tennessee, Knoxville
26  Univ. of California, Berkeley
28  August 2013
29
30  Purpose
31  =======
32  DORMQR overwrites the general real M-by-N matrix C with
33
34  SIDE = 'L' SIDE = 'R'
35  TRANS = 'N': Q * C C * Q
36  TRANS = 'T': Q**T * C C * Q**T
37
38  where Q is a real orthogonal matrix defined as the product of k
39  elementary reflectors
40
41  Q = H(1) H(2) . . . H(k)
42
43  as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
44  if SIDE = 'R'.
45
46  Arguments
47  =========
48  SIDE (input) CHARACTER*1
49  = 'L': apply Q or Q**T from the Left;
50  = 'R': apply Q or Q**T from the Right.
51
52  TRANS (input) CHARACTER*1
53  = 'N': No transpose, apply Q;
54  = 'T': Transpose, apply Q**T.
55
56  M (input) INTEGER
57  The number of rows of the matrix C. M >= 0.
58
59  N (input) INTEGER
60  The number of columns of the matrix C. N >= 0.
61
62  K (input) INTEGER
63  The number of elementary reflectors whose product defines
64  the matrix Q.
65  If SIDE = 'L', M >= K >= 0;
66  if SIDE = 'R', N >= K >= 0.
67
68  A (input) DOUBLE_PRECISION array, dimension (LDA,K)
69  The i-th column must contain the vector which defines the
70  elementary reflector H(i), for i = 1,2,...,k, as returned by
71  DGEQRF in the first k columns of its array argument A.
72  A is modified by the routine but restored on exit.
73
74  LDA (input) INTEGER
75  The leading dimension of the array A.
76  If SIDE = 'L', LDA >= max(1,M);
77  if SIDE = 'R', LDA >= max(1,N).
78
79  TAU (input) DOUBLE_PRECISION array, dimension (K)
80  TAU(i) must contain the scalar factor of the elementary
81  reflector H(i), as returned by DGEQRF.
82
83  C (input/output) DOUBLE_PRECISION array, dimension (LDC,N)
84  On entry, the M-by-N matrix C.
85  On exit, C is overwritten by Q*C or Q**T * C or C * Q**T or C*Q.
86
87  LDC (input) INTEGER
88  The leading dimension of the array C. LDC >= max(1,M).
89
90  WORK (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK))
91  On exit, if INFO = 0, WORK(0) returns the optimal LWORK.
92
93  LWORK (input) INTEGER
94  The dimension of the array WORK.
95  If SIDE = 'L', LWORK >= max(1,N);
96  if SIDE = 'R', LWORK >= max(1,M).
97  For optimum performance
98  LWORK >= N*NB if SIDE = 'L', and
99  LWORK >= M*NB if SIDE = 'R',
100  where NB is the optimal blocksize.
101
102  If LWORK = -1, then a workspace query is assumed; the routine
103  only calculates the optimal size of the WORK array, returns
104  this value as the first entry of the WORK array, and no error
105  message related to LWORK is issued by XERBLA.
106
107  INFO (output) INTEGER
108  = 0: successful exit
109  < 0: if INFO = -i, the i-th argument had an illegal value
110  ===================================================================== */
111
112  #define A(a_1,a_2) ( A + (a_1) + (a_2)*lda)
113  #define dC(a_1,a_2) (dC + (a_1) + (a_2)*lddc)
114
115  magma_int_t nb = magma_get_dgeqrf_nb( min( m, n ));
116
117  double c_one = MAGMA_D_ONE;
118
119  char side_[2] = {side, 0};
120  char trans_[2] = {trans, 0};
121
122  magma_int_t nq_i, lddwork;
123  magma_int_t i;
124  double *T;
125  magma_int_t i1, i2, step, ib, ic, jc, mi, ni, nq, nw;
126  int left, notran, lquery;
127  magma_int_t iinfo, lwkopt;
128
129  *info = 0;
130  left = lapackf77_lsame(side_, "L");
131  notran = lapackf77_lsame(trans_, "N");
132  lquery = (lwork == -1);
133
134  /* NQ is the order of Q and NW is the minimum dimension of WORK */
135  if (left) {
136  nq = m;
137  nw = n;
138  } else {
139  nq = n;
140  nw = m;
141  }
142  lwkopt = max(1,nw) * nb;
143  work[0] = MAGMA_D_MAKE( lwkopt, 0 );
144
145  if (! left && ! lapackf77_lsame(side_, "R")) {
146  *info = -1;
147  } else if (! notran && ! lapackf77_lsame(trans_, MagmaTransStr)) {
148  *info = -2;
149  } else if (m < 0) {
150  *info = -3;
151  } else if (n < 0) {
152  *info = -4;
153  } else if (k < 0 || k > nq) {
154  *info = -5;
155  } else if (lda < max(1,nq)) {
156  *info = -7;
157  } else if (ldc < max(1,m)) {
158  *info = -10;
159  } else if (lwork < max(1,nw) && ! lquery) {
160  *info = -12;
161  }
162
163  if (*info != 0) {
164  magma_xerbla( __func__, -(*info) );
165  return *info;
166  }
167  else if (lquery) {
168  return *info;
169  }
170
171  /* Quick return if possible */
172  if (m == 0 || n == 0 || k == 0) {
173  work[0] = c_one;
174  return *info;
175  }
176
177  /* Allocate work space on the GPU */
178  magma_int_t lddc = m;
179  double *dwork, *dC;
180  magma_dmalloc( &dC, lddc*n );
181  magma_dmalloc( &dwork, (m + n + nb)*nb );
182  if ( dC == NULL || dwork == NULL ) {
183  magma_free( dC );
184  magma_free( dwork );
185  *info = MAGMA_ERR_DEVICE_ALLOC;
186  return *info;
187  }
188
189  /* work space on CPU */
190  T = (double*) malloc( 2*nb*nb * sizeof(double) );
191  if ( T == NULL ) {
192  magma_free( dC );
193  magma_free( dwork );
194  *info = MAGMA_ERR_HOST_ALLOC;
195  return *info;
196  }
197
198  /* Copy matrix C from the CPU to the GPU */
199  magma_dsetmatrix( m, n, C, ldc, dC, lddc );
200
201  if (nb >= k) {
202  /* Use CPU code */
203  lapackf77_dormqr(side_, trans_, &m, &n, &k, A, &lda, &tau[1],
204  C, &ldc, work, &lwork, &iinfo);
205  }
206  else {
207  /* Use hybrid CPU-GPU code */
208  if ( (left && (! notran)) || ((! left) && notran) ) {
209  i1 = 0;
210  i2 = k;
211  step = nb;
212  } else {
213  i1 = ((k - 1) / nb) * nb;
214  i2 = 0;
215  step = -nb;
216  }
217
218  // silence "uninitialized" warnings
219  mi = 0;
220  ni = 0;
221
222  if (left) {
223  ni = n;
224  jc = 0;
225  } else {
226  mi = m;
227  ic = 0;
228  }
229
230  for( i=i1; (step<0 ? i>=i2 : i<i2); i += step ) {
231  ib = min(nb, k - i);
232
233  /* Form the triangular factor of the block reflector
234  H = H(i) H(i+1) . . . H(i+ib-1) */
235  nq_i = nq - i;
236  lapackf77_dlarft("F", "C", &nq_i, &ib, A(i,i), &lda,
237  &tau[i], T, &ib);
238
239  /* 1) Put 0s in the upper triangular part of A;
240  2) copy the panel from A to the GPU, and
241  3) restore A */
242  dpanel_to_q('U', ib, A(i,i), lda, T+ib*ib);
243  magma_dsetmatrix( nq_i, ib, A(i,i), lda, dwork, nq_i );
244  dq_to_panel('U', ib, A(i,i), lda, T+ib*ib);
245
246  if (left) {
247  /* H or H' is applied to C(i:m,1:n) */
248  mi = m - i;
249  ic = i;
250  }
251  else {
252  /* H or H' is applied to C(1:m,i:n) */
253  ni = n - i;
254  jc = i;
255  }
256
257  if (left)
258  lddwork = ni;
259  else
260  lddwork = mi;
261
262  /* Apply H or H'; First copy T to the GPU */
263  magma_dsetmatrix( ib, ib, T, ib, dwork+nq_i*ib, ib );
265  mi, ni, ib,
266  dwork, nq_i, dwork+nq_i*ib, ib,
267  dC(ic,jc), lddc,
268  dwork+nq_i*ib + ib*ib, lddwork);
269  }
270  magma_dgetmatrix( m, n, dC, lddc, C, ldc );
271  }
272  work[0] = MAGMA_D_MAKE( lwkopt, 0 );
273
274  magma_free( dC );
275  magma_free( dwork );
276  free( T );
277
278  return *info;
279 } /* magma_dormqr */
#define MagmaTransStr
Definition: magma.h:81
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_D_ONE
Definition: magma.h:176
#define __func__
Definition: common_magma.h:65
magma_int_t magma_dormqr(char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, double *a, magma_int_t lda, double *tau, double *c, magma_int_t ldc, double *work, magma_int_t lwork, magma_int_t *info)
Definition: dormqr.cpp:16
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define A(a_1, a_2)
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define T(m)
Definition: zgeqrf_mc.cpp:14
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define C(i, j)
void lapackf77_dlarft(const char *direct, const char *storev, const magma_int_t *n, const magma_int_t *k, double *V, const magma_int_t *ldv, const double *tau, double *T, const magma_int_t *ldt)
#define magma_dgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_d.h:705
#define dwork(dev, i, j)
#define MAGMA_D_MAKE(r, i)
Definition: magma.h:167
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
void lapackf77_dormqr(const char *side, const char *trans, const magma_int_t *m, const magma_int_t *n, const magma_int_t *k, const double *A, const magma_int_t *lda, const double *tau, double *C, const magma_int_t *ldc, double *work, const magma_int_t *lwork, magma_int_t *info)
#define lapackf77_lsame
Definition: magma_lapack.h:23
magma_int_t magma_dlarfb_gpu(char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, const double *dv, magma_int_t ldv, const double *dt, magma_int_t ldt, double *dc, magma_int_t ldc, double *dwork, magma_int_t ldwork)
Definition: dlarfb_gpu.cpp:15
#define dC(a_1, a_2)
magma_int_t magma_get_dgeqrf_nb(magma_int_t m)
Definition: get_nb.cpp:141
#define MagmaForward
Definition: magma.h:71
void dpanel_to_q(char uplo, magma_int_t ib, double *A, magma_int_t lda, double *work)
Definition: dpanel_to_q.cpp:17
void dq_to_panel(char uplo, magma_int_t ib, double *A, magma_int_t lda, double *work)
Definition: dpanel_to_q.cpp:57
#define max(a, b)
Definition: common_magma.h:82
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define MagmaColumnwise
Definition: magma.h:74
#define magma_dsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_d.h:702