MAGMA  magma-1.4.0 Matrix Algebra on GPU and Multicore Architectures
sormql.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
6  August 2013
7
8  @author Raffaele Solca
9
10  @generated s Tue Aug 13 16:44:27 2013
11
12 */
13 #include "common_magma.h"
14
15 extern "C" magma_int_t
16 magma_sormql(const char side, const char trans,
18  float *a, magma_int_t lda,
19  float *tau,
20  float *c, magma_int_t ldc,
21  float *work, magma_int_t lwork,
22  magma_int_t *info)
23 {
24 /* -- MAGMA (version 1.4.0) --
25  Univ. of Tennessee, Knoxville
26  Univ. of California, Berkeley
28  August 2013
29
30  Purpose
31  =======
32  SORMQL overwrites the general real M-by-N matrix C with
33
34  SIDE = 'L' SIDE = 'R'
35  TRANS = 'N': Q * C C * Q
36  TRANS = 'C': Q**T * C C * Q**T
37
38  where Q is a real unitary matrix defined as the product of k
39  elementary reflectors
40
41  Q = H(k) . . . H(2) H(1)
42
43  as returned by SGEQLF. Q is of order M if SIDE = 'L' and of order N
44  if SIDE = 'R'.
45
46  Arguments
47  =========
48  SIDE (input) CHARACTER*1
49  = 'L': apply Q or Q**T from the Left;
50  = 'R': apply Q or Q**T from the Right.
51
52  TRANS (input) CHARACTER*1
53  = 'N': No transpose, apply Q;
54  = 'C': Transpose, apply Q**T.
55
56  M (input) INTEGER
57  The number of rows of the matrix C. M >= 0.
58
59  N (input) INTEGER
60  The number of columns of the matrix C. N >= 0.
61
62  K (input) INTEGER
63  The number of elementary reflectors whose product defines
64  the matrix Q.
65  If SIDE = 'L', M >= K >= 0;
66  if SIDE = 'R', N >= K >= 0.
67
68  A (input) REAL array, dimension (LDA,K)
69  The i-th column must contain the vector which defines the
70  elementary reflector H(i), for i = 1,2,...,k, as returned by
71  SGEQLF in the last k columns of its array argument A.
72  A is modified by the routine but restored on exit.
73
74  LDA (input) INTEGER
75  The leading dimension of the array A.
76  If SIDE = 'L', LDA >= max(1,M);
77  if SIDE = 'R', LDA >= max(1,N).
78
79  TAU (input) REAL array, dimension (K)
80  TAU(i) must contain the scalar factor of the elementary
81  reflector H(i), as returned by SGEQLF.
82
83  C (input/output) REAL array, dimension (LDC,N)
84  On entry, the M-by-N matrix C.
85  On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
86
87  LDC (input) INTEGER
88  The leading dimension of the array C. LDC >= max(1,M).
89
90  WORK (workspace/output) REAL array, dimension (MAX(1,LWORK))
91  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
92
93  LWORK (input) INTEGER
94  The dimension of the array WORK.
95  If SIDE = 'L', LWORK >= max(1,N);
96  if SIDE = 'R', LWORK >= max(1,M).
97  For optimum performance LWORK >= N*NB if SIDE = 'L', and
98  LWORK >= M*NB if SIDE = 'R', where NB is the optimal
99  blocksize.
100
101  If LWORK = -1, then a workspace query is assumed; the routine
102  only calculates the optimal size of the WORK array, returns
103  this value as the first entry of the WORK array, and no error
104  message related to LWORK is issued by XERBLA.
105
106  INFO (output) INTEGER
107  = 0: successful exit
108  < 0: if INFO = -i, the i-th argument had an illegal value
109  ===================================================================== */
110
111  char side_[2] = {side, 0};
112  char trans_[2] = {trans, 0};
113
114  /* Allocate work space on the GPU */
115  float *dwork, *dc;
116  magma_smalloc( &dc, (m)*(n) );
117  magma_smalloc( &dwork, 2*(m + 64)*64 );
118
119  /* Copy matrix C from the CPU to the GPU */
120  magma_ssetmatrix( m, n, c, ldc, dc, m );
121  dc -= (1 + m);
122
123  magma_int_t a_offset, c_dim1, c_offset, i__4;
124
125  magma_int_t i__;
126  float t[2*4160] /* was [65][64] */;
127  magma_int_t i1, i2, i3, ib, nb, mi, ni, nq, nw;
128  magma_int_t iinfo, ldwork, lwkopt=0;
129  int lquery, left, notran;
130
131  a_offset = 1 + lda;
132  a -= a_offset;
133  --tau;
134  c_dim1 = ldc;
135  c_offset = 1 + c_dim1;
136  c -= c_offset;
137
138  *info = 0;
139  left = lapackf77_lsame(side_, "L");
140  notran = lapackf77_lsame(trans_, "N");
141  lquery = (lwork == -1);
142
143  /* NQ is the order of Q and NW is the minimum dimension of WORK */
144  if (left) {
145  nq = m;
146  nw = max(1,n);
147  } else {
148  nq = n;
149  nw = max(1,m);
150  }
151  if (! left && ! lapackf77_lsame(side_, "R")) {
152  *info = -1;
153  } else if (! notran && ! lapackf77_lsame(trans_, "C")) {
154  *info = -2;
155  } else if (m < 0) {
156  *info = -3;
157  } else if (n < 0) {
158  *info = -4;
159  } else if (k < 0 || k > nq) {
160  *info = -5;
161  } else if (lda < max(1,nq)) {
162  *info = -7;
163  } else if (ldc < max(1,m)) {
164  *info = -10;
165  }
166
167  if (*info == 0) {
168  if (m == 0 || n == 0) {
169  lwkopt = 1;
170  } else {
171  /* Determine the block size. NB may be at most NBMAX, where
172  NBMAX is used to define the local array T. */
173  nb = 64;
174  lwkopt = nw * nb;
175  }
176  MAGMA_S_SET2REAL( work[0], lwkopt );
177
178  if (lwork < nw && ! lquery) {
179  *info = -12;
180  }
181  }
182
183  if (*info != 0) {
184  magma_xerbla( __func__, -(*info) );
185  return *info;
186  }
187  else if (lquery) {
188  return *info;
189  }
190
191  /* Quick return if possible */
192  if (m == 0 || n == 0) {
193  return *info;
194  }
195
196  ldwork = nw;
197
198  if ( nb >= k ) {
199  /* Use CPU code */
200  lapackf77_sormql(side_, trans_, &m, &n, &k, &a[a_offset], &lda, &tau[1],
201  &c[c_offset], &ldc, work, &lwork, &iinfo);
202  }
203  else {
204  /* Use hybrid CPU-GPU code */
205  if ((left && notran) || (! left && ! notran)) {
206  i1 = 1;
207  i2 = k;
208  i3 = nb;
209  } else {
210  i1 = (k - 1) / nb * nb + 1;
211  i2 = 1;
212  i3 = -nb;
213  }
214
215  // silence "uninitialized" warnings
216  mi = 0;
217  ni = 0;
218
219  if (left) {
220  ni = n;
221  } else {
222  mi = m;
223  }
224
225  for (i__ = i1; (i3 < 0 ? i__ >= i2 : i__ <= i2); i__ += i3) {
226  ib = min(nb, k - i__ + 1);
227
228  /* Form the triangular factor of the block reflector
229  H = H(i+ib-1) . . . H(i+1) H(i) */
230  i__4 = nq - k + i__ + ib - 1;
231  lapackf77_slarft("Backward", "Columnwise", &i__4, &ib,
232  &a[i__ * lda + 1], &lda, &tau[i__], t, &ib);
233
234  /* 1) Put 0s in the lower triangular part of A;
235  2) copy the panel from A to the GPU, and
236  3) restore A */
237  spanel_to_q('L', ib, &a[i__ + i__ * lda], lda, t+ib*ib);
238  magma_ssetmatrix( i__4, ib, &a[1 + i__ * lda], lda, dwork, i__4 );
239  sq_to_panel('L', ib, &a[i__ + i__ * lda], lda, t+ib*ib);
240
241  if (left) {
242  /* H or H' is applied to C(1:m-k+i+ib-1,1:n) */
243  mi = m - k + i__ + ib - 1;
244  }
245  else {
246  /* H or H' is applied to C(1:m,1:n-k+i+ib-1) */
247  ni = n - k + i__ + ib - 1;
248  }
249
250  /* Apply H or H'; First copy T to the GPU */
251  magma_ssetmatrix( ib, ib, t, ib, dwork+i__4*ib, ib );
253  mi, ni, ib,
254  dwork, i__4, dwork+i__4*ib, ib,
255  &dc[1+m], m,
256  dwork+i__4*ib + ib*ib, ldwork);
257  }
258
259  magma_sgetmatrix( m, n, &dc[1+m], m, &c[c_offset], ldc );
260  }
261  MAGMA_S_SET2REAL( work[0], lwkopt );
262
263  dc += (1 + m);
264  magma_free( dc );
265  magma_free( dwork );
266
267  return *info;
268 } /* magma_sormql */
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
#define magma_free(ptr)
Definition: magma.h:57
void sq_to_panel(char uplo, magma_int_t ib, float *A, magma_int_t lda, float *work)
Definition: spanel_to_q.cpp:57
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_slarfb_gpu(char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, const float *dv, magma_int_t ldv, const float *dt, magma_int_t ldt, float *dc, magma_int_t ldc, float *dwork, magma_int_t ldwork)
Definition: slarfb_gpu.cpp:15
#define magma_ssetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_s.h:702
#define MagmaBackward
Definition: magma.h:72
#define dwork(dev, i, j)
void lapackf77_sormql(const char *side, const char *trans, const magma_int_t *m, const magma_int_t *n, const magma_int_t *k, const float *A, const magma_int_t *lda, const float *tau, float *C, const magma_int_t *ldc, float *work, const magma_int_t *lwork, magma_int_t *info)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define MAGMA_S_SET2REAL(v, t)
Definition: magma.h:181
void spanel_to_q(char uplo, magma_int_t ib, float *A, magma_int_t lda, float *work)
Definition: spanel_to_q.cpp:17
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define magma_sgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_s.h:705
magma_int_t magma_sormql(char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, float *a, magma_int_t lda, float *tau, float *c, magma_int_t ldc, float *work, magma_int_t lwork, magma_int_t *info)
Definition: sormql.cpp:16
#define max(a, b)
Definition: common_magma.h:82
void lapackf77_slarft(const char *direct, const char *storev, const magma_int_t *n, const magma_int_t *k, float *V, const magma_int_t *ldv, const float *tau, float *T, const magma_int_t *ldt)
#define MagmaColumnwise
Definition: magma.h:74