MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dormql.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  August 2013
7 
8  @author Raffaele Solca
9 
10  @generated d Tue Aug 13 16:44:28 2013
11 
12 */
13 #include "common_magma.h"
14 
15 extern "C" magma_int_t
16 magma_dormql(const char side, const char trans,
18  double *a, magma_int_t lda,
19  double *tau,
20  double *c, magma_int_t ldc,
21  double *work, magma_int_t lwork,
22  magma_int_t *info)
23 {
24 /* -- MAGMA (version 1.4.0) --
25  Univ. of Tennessee, Knoxville
26  Univ. of California, Berkeley
27  Univ. of Colorado, Denver
28  August 2013
29 
30  Purpose
31  =======
32  DORMQL overwrites the general real M-by-N matrix C with
33 
34  SIDE = 'L' SIDE = 'R'
35  TRANS = 'N': Q * C C * Q
36  TRANS = 'C': Q**T * C C * Q**T
37 
38  where Q is a real unitary matrix defined as the product of k
39  elementary reflectors
40 
41  Q = H(k) . . . H(2) H(1)
42 
43  as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N
44  if SIDE = 'R'.
45 
46  Arguments
47  =========
48  SIDE (input) CHARACTER*1
49  = 'L': apply Q or Q**T from the Left;
50  = 'R': apply Q or Q**T from the Right.
51 
52  TRANS (input) CHARACTER*1
53  = 'N': No transpose, apply Q;
54  = 'C': Transpose, apply Q**T.
55 
56  M (input) INTEGER
57  The number of rows of the matrix C. M >= 0.
58 
59  N (input) INTEGER
60  The number of columns of the matrix C. N >= 0.
61 
62  K (input) INTEGER
63  The number of elementary reflectors whose product defines
64  the matrix Q.
65  If SIDE = 'L', M >= K >= 0;
66  if SIDE = 'R', N >= K >= 0.
67 
68  A (input) DOUBLE_PRECISION array, dimension (LDA,K)
69  The i-th column must contain the vector which defines the
70  elementary reflector H(i), for i = 1,2,...,k, as returned by
71  DGEQLF in the last k columns of its array argument A.
72  A is modified by the routine but restored on exit.
73 
74  LDA (input) INTEGER
75  The leading dimension of the array A.
76  If SIDE = 'L', LDA >= max(1,M);
77  if SIDE = 'R', LDA >= max(1,N).
78 
79  TAU (input) DOUBLE_PRECISION array, dimension (K)
80  TAU(i) must contain the scalar factor of the elementary
81  reflector H(i), as returned by DGEQLF.
82 
83  C (input/output) DOUBLE_PRECISION array, dimension (LDC,N)
84  On entry, the M-by-N matrix C.
85  On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
86 
87  LDC (input) INTEGER
88  The leading dimension of the array C. LDC >= max(1,M).
89 
90  WORK (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK))
91  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
92 
93  LWORK (input) INTEGER
94  The dimension of the array WORK.
95  If SIDE = 'L', LWORK >= max(1,N);
96  if SIDE = 'R', LWORK >= max(1,M).
97  For optimum performance LWORK >= N*NB if SIDE = 'L', and
98  LWORK >= M*NB if SIDE = 'R', where NB is the optimal
99  blocksize.
100 
101  If LWORK = -1, then a workspace query is assumed; the routine
102  only calculates the optimal size of the WORK array, returns
103  this value as the first entry of the WORK array, and no error
104  message related to LWORK is issued by XERBLA.
105 
106  INFO (output) INTEGER
107  = 0: successful exit
108  < 0: if INFO = -i, the i-th argument had an illegal value
109  ===================================================================== */
110 
111  char side_[2] = {side, 0};
112  char trans_[2] = {trans, 0};
113 
114  /* Allocate work space on the GPU */
115  double *dwork, *dc;
116  magma_dmalloc( &dc, (m)*(n) );
117  magma_dmalloc( &dwork, 2*(m + 64)*64 );
118 
119  /* Copy matrix C from the CPU to the GPU */
120  magma_dsetmatrix( m, n, c, ldc, dc, m );
121  dc -= (1 + m);
122 
123  magma_int_t a_offset, c_dim1, c_offset, i__4;
124 
125  magma_int_t i__;
126  double t[2*4160] /* was [65][64] */;
127  magma_int_t i1, i2, i3, ib, nb, mi, ni, nq, nw;
128  magma_int_t iinfo, ldwork, lwkopt=0;
129  int lquery, left, notran;
130 
131  a_offset = 1 + lda;
132  a -= a_offset;
133  --tau;
134  c_dim1 = ldc;
135  c_offset = 1 + c_dim1;
136  c -= c_offset;
137 
138  *info = 0;
139  left = lapackf77_lsame(side_, "L");
140  notran = lapackf77_lsame(trans_, "N");
141  lquery = (lwork == -1);
142 
143  /* NQ is the order of Q and NW is the minimum dimension of WORK */
144  if (left) {
145  nq = m;
146  nw = max(1,n);
147  } else {
148  nq = n;
149  nw = max(1,m);
150  }
151  if (! left && ! lapackf77_lsame(side_, "R")) {
152  *info = -1;
153  } else if (! notran && ! lapackf77_lsame(trans_, "C")) {
154  *info = -2;
155  } else if (m < 0) {
156  *info = -3;
157  } else if (n < 0) {
158  *info = -4;
159  } else if (k < 0 || k > nq) {
160  *info = -5;
161  } else if (lda < max(1,nq)) {
162  *info = -7;
163  } else if (ldc < max(1,m)) {
164  *info = -10;
165  }
166 
167  if (*info == 0) {
168  if (m == 0 || n == 0) {
169  lwkopt = 1;
170  } else {
171  /* Determine the block size. NB may be at most NBMAX, where
172  NBMAX is used to define the local array T. */
173  nb = 64;
174  lwkopt = nw * nb;
175  }
176  MAGMA_D_SET2REAL( work[0], lwkopt );
177 
178  if (lwork < nw && ! lquery) {
179  *info = -12;
180  }
181  }
182 
183  if (*info != 0) {
184  magma_xerbla( __func__, -(*info) );
185  return *info;
186  }
187  else if (lquery) {
188  return *info;
189  }
190 
191  /* Quick return if possible */
192  if (m == 0 || n == 0) {
193  return *info;
194  }
195 
196  ldwork = nw;
197 
198  if ( nb >= k ) {
199  /* Use CPU code */
200  lapackf77_dormql(side_, trans_, &m, &n, &k, &a[a_offset], &lda, &tau[1],
201  &c[c_offset], &ldc, work, &lwork, &iinfo);
202  }
203  else {
204  /* Use hybrid CPU-GPU code */
205  if ((left && notran) || (! left && ! notran)) {
206  i1 = 1;
207  i2 = k;
208  i3 = nb;
209  } else {
210  i1 = (k - 1) / nb * nb + 1;
211  i2 = 1;
212  i3 = -nb;
213  }
214 
215  // silence "uninitialized" warnings
216  mi = 0;
217  ni = 0;
218 
219  if (left) {
220  ni = n;
221  } else {
222  mi = m;
223  }
224 
225  for (i__ = i1; (i3 < 0 ? i__ >= i2 : i__ <= i2); i__ += i3) {
226  ib = min(nb, k - i__ + 1);
227 
228  /* Form the triangular factor of the block reflector
229  H = H(i+ib-1) . . . H(i+1) H(i) */
230  i__4 = nq - k + i__ + ib - 1;
231  lapackf77_dlarft("Backward", "Columnwise", &i__4, &ib,
232  &a[i__ * lda + 1], &lda, &tau[i__], t, &ib);
233 
234  /* 1) Put 0s in the lower triangular part of A;
235  2) copy the panel from A to the GPU, and
236  3) restore A */
237  dpanel_to_q('L', ib, &a[i__ + i__ * lda], lda, t+ib*ib);
238  magma_dsetmatrix( i__4, ib, &a[1 + i__ * lda], lda, dwork, i__4 );
239  dq_to_panel('L', ib, &a[i__ + i__ * lda], lda, t+ib*ib);
240 
241  if (left) {
242  /* H or H' is applied to C(1:m-k+i+ib-1,1:n) */
243  mi = m - k + i__ + ib - 1;
244  }
245  else {
246  /* H or H' is applied to C(1:m,1:n-k+i+ib-1) */
247  ni = n - k + i__ + ib - 1;
248  }
249 
250  /* Apply H or H'; First copy T to the GPU */
251  magma_dsetmatrix( ib, ib, t, ib, dwork+i__4*ib, ib );
253  mi, ni, ib,
254  dwork, i__4, dwork+i__4*ib, ib,
255  &dc[1+m], m,
256  dwork+i__4*ib + ib*ib, ldwork);
257  }
258 
259  magma_dgetmatrix( m, n, &dc[1+m], m, &c[c_offset], ldc );
260  }
261  MAGMA_D_SET2REAL( work[0], lwkopt );
262 
263  dc += (1 + m);
264  magma_free( dc );
265  magma_free( dwork );
266 
267  return *info;
268 } /* magma_dormql */
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
void lapackf77_dlarft(const char *direct, const char *storev, const magma_int_t *n, const magma_int_t *k, double *V, const magma_int_t *ldv, const double *tau, double *T, const magma_int_t *ldt)
#define magma_dgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_d.h:705
#define MagmaBackward
Definition: magma.h:72
#define dwork(dev, i, j)
magma_int_t magma_dormql(char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, double *a, magma_int_t lda, double *tau, double *c, magma_int_t ldc, double *work, magma_int_t lwork, magma_int_t *info)
Definition: dormql.cpp:16
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
magma_int_t magma_dlarfb_gpu(char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, const double *dv, magma_int_t ldv, const double *dt, magma_int_t ldt, double *dc, magma_int_t ldc, double *dwork, magma_int_t ldwork)
Definition: dlarfb_gpu.cpp:15
void lapackf77_dormql(const char *side, const char *trans, const magma_int_t *m, const magma_int_t *n, const magma_int_t *k, const double *A, const magma_int_t *lda, const double *tau, double *C, const magma_int_t *ldc, double *work, const magma_int_t *lwork, magma_int_t *info)
void dpanel_to_q(char uplo, magma_int_t ib, double *A, magma_int_t lda, double *work)
Definition: dpanel_to_q.cpp:17
void dq_to_panel(char uplo, magma_int_t ib, double *A, magma_int_t lda, double *work)
Definition: dpanel_to_q.cpp:57
#define MAGMA_D_SET2REAL(v, t)
Definition: magma.h:159
#define max(a, b)
Definition: common_magma.h:82
#define MagmaColumnwise
Definition: magma.h:74
#define magma_dsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_d.h:702