MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
sormql.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  August 2013
7 
8  @author Raffaele Solca
9 
10  @generated s Tue Aug 13 16:44:27 2013
11 
12 */
13 #include "common_magma.h"
14 
15 extern "C" magma_int_t
16 magma_sormql(const char side, const char trans,
18  float *a, magma_int_t lda,
19  float *tau,
20  float *c, magma_int_t ldc,
21  float *work, magma_int_t lwork,
22  magma_int_t *info)
23 {
24 /* -- MAGMA (version 1.4.0) --
25  Univ. of Tennessee, Knoxville
26  Univ. of California, Berkeley
27  Univ. of Colorado, Denver
28  August 2013
29 
30  Purpose
31  =======
32  SORMQL overwrites the general real M-by-N matrix C with
33 
34  SIDE = 'L' SIDE = 'R'
35  TRANS = 'N': Q * C C * Q
36  TRANS = 'C': Q**T * C C * Q**T
37 
38  where Q is a real unitary matrix defined as the product of k
39  elementary reflectors
40 
41  Q = H(k) . . . H(2) H(1)
42 
43  as returned by SGEQLF. Q is of order M if SIDE = 'L' and of order N
44  if SIDE = 'R'.
45 
46  Arguments
47  =========
48  SIDE (input) CHARACTER*1
49  = 'L': apply Q or Q**T from the Left;
50  = 'R': apply Q or Q**T from the Right.
51 
52  TRANS (input) CHARACTER*1
53  = 'N': No transpose, apply Q;
54  = 'C': Transpose, apply Q**T.
55 
56  M (input) INTEGER
57  The number of rows of the matrix C. M >= 0.
58 
59  N (input) INTEGER
60  The number of columns of the matrix C. N >= 0.
61 
62  K (input) INTEGER
63  The number of elementary reflectors whose product defines
64  the matrix Q.
65  If SIDE = 'L', M >= K >= 0;
66  if SIDE = 'R', N >= K >= 0.
67 
68  A (input) REAL array, dimension (LDA,K)
69  The i-th column must contain the vector which defines the
70  elementary reflector H(i), for i = 1,2,...,k, as returned by
71  SGEQLF in the last k columns of its array argument A.
72  A is modified by the routine but restored on exit.
73 
74  LDA (input) INTEGER
75  The leading dimension of the array A.
76  If SIDE = 'L', LDA >= max(1,M);
77  if SIDE = 'R', LDA >= max(1,N).
78 
79  TAU (input) REAL array, dimension (K)
80  TAU(i) must contain the scalar factor of the elementary
81  reflector H(i), as returned by SGEQLF.
82 
83  C (input/output) REAL array, dimension (LDC,N)
84  On entry, the M-by-N matrix C.
85  On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
86 
87  LDC (input) INTEGER
88  The leading dimension of the array C. LDC >= max(1,M).
89 
90  WORK (workspace/output) REAL array, dimension (MAX(1,LWORK))
91  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
92 
93  LWORK (input) INTEGER
94  The dimension of the array WORK.
95  If SIDE = 'L', LWORK >= max(1,N);
96  if SIDE = 'R', LWORK >= max(1,M).
97  For optimum performance LWORK >= N*NB if SIDE = 'L', and
98  LWORK >= M*NB if SIDE = 'R', where NB is the optimal
99  blocksize.
100 
101  If LWORK = -1, then a workspace query is assumed; the routine
102  only calculates the optimal size of the WORK array, returns
103  this value as the first entry of the WORK array, and no error
104  message related to LWORK is issued by XERBLA.
105 
106  INFO (output) INTEGER
107  = 0: successful exit
108  < 0: if INFO = -i, the i-th argument had an illegal value
109  ===================================================================== */
110 
111  char side_[2] = {side, 0};
112  char trans_[2] = {trans, 0};
113 
114  /* Allocate work space on the GPU */
115  float *dwork, *dc;
116  magma_smalloc( &dc, (m)*(n) );
117  magma_smalloc( &dwork, 2*(m + 64)*64 );
118 
119  /* Copy matrix C from the CPU to the GPU */
120  magma_ssetmatrix( m, n, c, ldc, dc, m );
121  dc -= (1 + m);
122 
123  magma_int_t a_offset, c_dim1, c_offset, i__4;
124 
125  magma_int_t i__;
126  float t[2*4160] /* was [65][64] */;
127  magma_int_t i1, i2, i3, ib, nb, mi, ni, nq, nw;
128  magma_int_t iinfo, ldwork, lwkopt=0;
129  int lquery, left, notran;
130 
131  a_offset = 1 + lda;
132  a -= a_offset;
133  --tau;
134  c_dim1 = ldc;
135  c_offset = 1 + c_dim1;
136  c -= c_offset;
137 
138  *info = 0;
139  left = lapackf77_lsame(side_, "L");
140  notran = lapackf77_lsame(trans_, "N");
141  lquery = (lwork == -1);
142 
143  /* NQ is the order of Q and NW is the minimum dimension of WORK */
144  if (left) {
145  nq = m;
146  nw = max(1,n);
147  } else {
148  nq = n;
149  nw = max(1,m);
150  }
151  if (! left && ! lapackf77_lsame(side_, "R")) {
152  *info = -1;
153  } else if (! notran && ! lapackf77_lsame(trans_, "C")) {
154  *info = -2;
155  } else if (m < 0) {
156  *info = -3;
157  } else if (n < 0) {
158  *info = -4;
159  } else if (k < 0 || k > nq) {
160  *info = -5;
161  } else if (lda < max(1,nq)) {
162  *info = -7;
163  } else if (ldc < max(1,m)) {
164  *info = -10;
165  }
166 
167  if (*info == 0) {
168  if (m == 0 || n == 0) {
169  lwkopt = 1;
170  } else {
171  /* Determine the block size. NB may be at most NBMAX, where
172  NBMAX is used to define the local array T. */
173  nb = 64;
174  lwkopt = nw * nb;
175  }
176  MAGMA_S_SET2REAL( work[0], lwkopt );
177 
178  if (lwork < nw && ! lquery) {
179  *info = -12;
180  }
181  }
182 
183  if (*info != 0) {
184  magma_xerbla( __func__, -(*info) );
185  return *info;
186  }
187  else if (lquery) {
188  return *info;
189  }
190 
191  /* Quick return if possible */
192  if (m == 0 || n == 0) {
193  return *info;
194  }
195 
196  ldwork = nw;
197 
198  if ( nb >= k ) {
199  /* Use CPU code */
200  lapackf77_sormql(side_, trans_, &m, &n, &k, &a[a_offset], &lda, &tau[1],
201  &c[c_offset], &ldc, work, &lwork, &iinfo);
202  }
203  else {
204  /* Use hybrid CPU-GPU code */
205  if ((left && notran) || (! left && ! notran)) {
206  i1 = 1;
207  i2 = k;
208  i3 = nb;
209  } else {
210  i1 = (k - 1) / nb * nb + 1;
211  i2 = 1;
212  i3 = -nb;
213  }
214 
215  // silence "uninitialized" warnings
216  mi = 0;
217  ni = 0;
218 
219  if (left) {
220  ni = n;
221  } else {
222  mi = m;
223  }
224 
225  for (i__ = i1; (i3 < 0 ? i__ >= i2 : i__ <= i2); i__ += i3) {
226  ib = min(nb, k - i__ + 1);
227 
228  /* Form the triangular factor of the block reflector
229  H = H(i+ib-1) . . . H(i+1) H(i) */
230  i__4 = nq - k + i__ + ib - 1;
231  lapackf77_slarft("Backward", "Columnwise", &i__4, &ib,
232  &a[i__ * lda + 1], &lda, &tau[i__], t, &ib);
233 
234  /* 1) Put 0s in the lower triangular part of A;
235  2) copy the panel from A to the GPU, and
236  3) restore A */
237  spanel_to_q('L', ib, &a[i__ + i__ * lda], lda, t+ib*ib);
238  magma_ssetmatrix( i__4, ib, &a[1 + i__ * lda], lda, dwork, i__4 );
239  sq_to_panel('L', ib, &a[i__ + i__ * lda], lda, t+ib*ib);
240 
241  if (left) {
242  /* H or H' is applied to C(1:m-k+i+ib-1,1:n) */
243  mi = m - k + i__ + ib - 1;
244  }
245  else {
246  /* H or H' is applied to C(1:m,1:n-k+i+ib-1) */
247  ni = n - k + i__ + ib - 1;
248  }
249 
250  /* Apply H or H'; First copy T to the GPU */
251  magma_ssetmatrix( ib, ib, t, ib, dwork+i__4*ib, ib );
253  mi, ni, ib,
254  dwork, i__4, dwork+i__4*ib, ib,
255  &dc[1+m], m,
256  dwork+i__4*ib + ib*ib, ldwork);
257  }
258 
259  magma_sgetmatrix( m, n, &dc[1+m], m, &c[c_offset], ldc );
260  }
261  MAGMA_S_SET2REAL( work[0], lwkopt );
262 
263  dc += (1 + m);
264  magma_free( dc );
265  magma_free( dwork );
266 
267  return *info;
268 } /* magma_sormql */
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
#define magma_free(ptr)
Definition: magma.h:57
void sq_to_panel(char uplo, magma_int_t ib, float *A, magma_int_t lda, float *work)
Definition: spanel_to_q.cpp:57
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_slarfb_gpu(char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, const float *dv, magma_int_t ldv, const float *dt, magma_int_t ldt, float *dc, magma_int_t ldc, float *dwork, magma_int_t ldwork)
Definition: slarfb_gpu.cpp:15
#define magma_ssetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_s.h:702
#define MagmaBackward
Definition: magma.h:72
#define dwork(dev, i, j)
void lapackf77_sormql(const char *side, const char *trans, const magma_int_t *m, const magma_int_t *n, const magma_int_t *k, const float *A, const magma_int_t *lda, const float *tau, float *C, const magma_int_t *ldc, float *work, const magma_int_t *lwork, magma_int_t *info)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define MAGMA_S_SET2REAL(v, t)
Definition: magma.h:181
void spanel_to_q(char uplo, magma_int_t ib, float *A, magma_int_t lda, float *work)
Definition: spanel_to_q.cpp:17
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define magma_sgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_s.h:705
magma_int_t magma_sormql(char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, float *a, magma_int_t lda, float *tau, float *c, magma_int_t ldc, float *work, magma_int_t lwork, magma_int_t *info)
Definition: sormql.cpp:16
#define max(a, b)
Definition: common_magma.h:82
void lapackf77_slarft(const char *direct, const char *storev, const magma_int_t *n, const magma_int_t *k, float *V, const magma_int_t *ldv, const float *tau, float *T, const magma_int_t *ldt)
#define MagmaColumnwise
Definition: magma.h:74