MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
sormqr.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  August 2013
7 
8  @author Stan Tomov
9 
10  @generated s Wed Aug 14 12:16:12 2013
11 
12 */
13 #include "common_magma.h"
14 
15 extern "C" magma_int_t
16 magma_sormqr(const char side, const char trans,
18  float *A, magma_int_t lda,
19  float *tau,
20  float *C, magma_int_t ldc,
21  float *work, magma_int_t lwork,
22  magma_int_t *info)
23 {
24 /* -- MAGMA (version 1.4.0) --
25  Univ. of Tennessee, Knoxville
26  Univ. of California, Berkeley
27  Univ. of Colorado, Denver
28  August 2013
29 
30  Purpose
31  =======
32  SORMQR overwrites the general real M-by-N matrix C with
33 
34  SIDE = 'L' SIDE = 'R'
35  TRANS = 'N': Q * C C * Q
36  TRANS = 'T': Q**T * C C * Q**T
37 
38  where Q is a real orthogonal matrix defined as the product of k
39  elementary reflectors
40 
41  Q = H(1) H(2) . . . H(k)
42 
43  as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N
44  if SIDE = 'R'.
45 
46  Arguments
47  =========
48  SIDE (input) CHARACTER*1
49  = 'L': apply Q or Q**T from the Left;
50  = 'R': apply Q or Q**T from the Right.
51 
52  TRANS (input) CHARACTER*1
53  = 'N': No transpose, apply Q;
54  = 'T': Transpose, apply Q**T.
55 
56  M (input) INTEGER
57  The number of rows of the matrix C. M >= 0.
58 
59  N (input) INTEGER
60  The number of columns of the matrix C. N >= 0.
61 
62  K (input) INTEGER
63  The number of elementary reflectors whose product defines
64  the matrix Q.
65  If SIDE = 'L', M >= K >= 0;
66  if SIDE = 'R', N >= K >= 0.
67 
68  A (input) REAL array, dimension (LDA,K)
69  The i-th column must contain the vector which defines the
70  elementary reflector H(i), for i = 1,2,...,k, as returned by
71  SGEQRF in the first k columns of its array argument A.
72  A is modified by the routine but restored on exit.
73 
74  LDA (input) INTEGER
75  The leading dimension of the array A.
76  If SIDE = 'L', LDA >= max(1,M);
77  if SIDE = 'R', LDA >= max(1,N).
78 
79  TAU (input) REAL array, dimension (K)
80  TAU(i) must contain the scalar factor of the elementary
81  reflector H(i), as returned by SGEQRF.
82 
83  C (input/output) REAL array, dimension (LDC,N)
84  On entry, the M-by-N matrix C.
85  On exit, C is overwritten by Q*C or Q**T * C or C * Q**T or C*Q.
86 
87  LDC (input) INTEGER
88  The leading dimension of the array C. LDC >= max(1,M).
89 
90  WORK (workspace/output) REAL array, dimension (MAX(1,LWORK))
91  On exit, if INFO = 0, WORK(0) returns the optimal LWORK.
92 
93  LWORK (input) INTEGER
94  The dimension of the array WORK.
95  If SIDE = 'L', LWORK >= max(1,N);
96  if SIDE = 'R', LWORK >= max(1,M).
97  For optimum performance
98  LWORK >= N*NB if SIDE = 'L', and
99  LWORK >= M*NB if SIDE = 'R',
100  where NB is the optimal blocksize.
101 
102  If LWORK = -1, then a workspace query is assumed; the routine
103  only calculates the optimal size of the WORK array, returns
104  this value as the first entry of the WORK array, and no error
105  message related to LWORK is issued by XERBLA.
106 
107  INFO (output) INTEGER
108  = 0: successful exit
109  < 0: if INFO = -i, the i-th argument had an illegal value
110  ===================================================================== */
111 
112  #define A(a_1,a_2) ( A + (a_1) + (a_2)*lda)
113  #define dC(a_1,a_2) (dC + (a_1) + (a_2)*lddc)
114 
115  magma_int_t nb = magma_get_sgeqrf_nb( min( m, n ));
116 
117  float c_one = MAGMA_S_ONE;
118 
119  char side_[2] = {side, 0};
120  char trans_[2] = {trans, 0};
121 
122  magma_int_t nq_i, lddwork;
123  magma_int_t i;
124  float *T;
125  magma_int_t i1, i2, step, ib, ic, jc, mi, ni, nq, nw;
126  int left, notran, lquery;
127  magma_int_t iinfo, lwkopt;
128 
129  *info = 0;
130  left = lapackf77_lsame(side_, "L");
131  notran = lapackf77_lsame(trans_, "N");
132  lquery = (lwork == -1);
133 
134  /* NQ is the order of Q and NW is the minimum dimension of WORK */
135  if (left) {
136  nq = m;
137  nw = n;
138  } else {
139  nq = n;
140  nw = m;
141  }
142  lwkopt = max(1,nw) * nb;
143  work[0] = MAGMA_S_MAKE( lwkopt, 0 );
144 
145  if (! left && ! lapackf77_lsame(side_, "R")) {
146  *info = -1;
147  } else if (! notran && ! lapackf77_lsame(trans_, MagmaTransStr)) {
148  *info = -2;
149  } else if (m < 0) {
150  *info = -3;
151  } else if (n < 0) {
152  *info = -4;
153  } else if (k < 0 || k > nq) {
154  *info = -5;
155  } else if (lda < max(1,nq)) {
156  *info = -7;
157  } else if (ldc < max(1,m)) {
158  *info = -10;
159  } else if (lwork < max(1,nw) && ! lquery) {
160  *info = -12;
161  }
162 
163  if (*info != 0) {
164  magma_xerbla( __func__, -(*info) );
165  return *info;
166  }
167  else if (lquery) {
168  return *info;
169  }
170 
171  /* Quick return if possible */
172  if (m == 0 || n == 0 || k == 0) {
173  work[0] = c_one;
174  return *info;
175  }
176 
177  /* Allocate work space on the GPU */
178  magma_int_t lddc = m;
179  float *dwork, *dC;
180  magma_smalloc( &dC, lddc*n );
181  magma_smalloc( &dwork, (m + n + nb)*nb );
182  if ( dC == NULL || dwork == NULL ) {
183  magma_free( dC );
184  magma_free( dwork );
185  *info = MAGMA_ERR_DEVICE_ALLOC;
186  return *info;
187  }
188 
189  /* work space on CPU */
190  T = (float*) malloc( 2*nb*nb * sizeof(float) );
191  if ( T == NULL ) {
192  magma_free( dC );
193  magma_free( dwork );
194  *info = MAGMA_ERR_HOST_ALLOC;
195  return *info;
196  }
197 
198  /* Copy matrix C from the CPU to the GPU */
199  magma_ssetmatrix( m, n, C, ldc, dC, lddc );
200 
201  if (nb >= k) {
202  /* Use CPU code */
203  lapackf77_sormqr(side_, trans_, &m, &n, &k, A, &lda, &tau[1],
204  C, &ldc, work, &lwork, &iinfo);
205  }
206  else {
207  /* Use hybrid CPU-GPU code */
208  if ( (left && (! notran)) || ((! left) && notran) ) {
209  i1 = 0;
210  i2 = k;
211  step = nb;
212  } else {
213  i1 = ((k - 1) / nb) * nb;
214  i2 = 0;
215  step = -nb;
216  }
217 
218  // silence "uninitialized" warnings
219  mi = 0;
220  ni = 0;
221 
222  if (left) {
223  ni = n;
224  jc = 0;
225  } else {
226  mi = m;
227  ic = 0;
228  }
229 
230  for( i=i1; (step<0 ? i>=i2 : i<i2); i += step ) {
231  ib = min(nb, k - i);
232 
233  /* Form the triangular factor of the block reflector
234  H = H(i) H(i+1) . . . H(i+ib-1) */
235  nq_i = nq - i;
236  lapackf77_slarft("F", "C", &nq_i, &ib, A(i,i), &lda,
237  &tau[i], T, &ib);
238 
239  /* 1) Put 0s in the upper triangular part of A;
240  2) copy the panel from A to the GPU, and
241  3) restore A */
242  spanel_to_q('U', ib, A(i,i), lda, T+ib*ib);
243  magma_ssetmatrix( nq_i, ib, A(i,i), lda, dwork, nq_i );
244  sq_to_panel('U', ib, A(i,i), lda, T+ib*ib);
245 
246  if (left) {
247  /* H or H' is applied to C(i:m,1:n) */
248  mi = m - i;
249  ic = i;
250  }
251  else {
252  /* H or H' is applied to C(1:m,i:n) */
253  ni = n - i;
254  jc = i;
255  }
256 
257  if (left)
258  lddwork = ni;
259  else
260  lddwork = mi;
261 
262  /* Apply H or H'; First copy T to the GPU */
263  magma_ssetmatrix( ib, ib, T, ib, dwork+nq_i*ib, ib );
265  mi, ni, ib,
266  dwork, nq_i, dwork+nq_i*ib, ib,
267  dC(ic,jc), lddc,
268  dwork+nq_i*ib + ib*ib, lddwork);
269  }
270  magma_sgetmatrix( m, n, dC, lddc, C, ldc );
271  }
272  work[0] = MAGMA_S_MAKE( lwkopt, 0 );
273 
274  magma_free( dC );
275  magma_free( dwork );
276  free( T );
277 
278  return *info;
279 } /* magma_sormqr */
#define MagmaTransStr
Definition: magma.h:81
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
void lapackf77_sormqr(const char *side, const char *trans, const magma_int_t *m, const magma_int_t *n, const magma_int_t *k, const float *A, const magma_int_t *lda, const float *tau, float *C, const magma_int_t *ldc, float *work, const magma_int_t *lwork, magma_int_t *info)
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
magma_int_t magma_sormqr(char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, float *a, magma_int_t lda, float *tau, float *c, magma_int_t ldc, float *work, magma_int_t lwork, magma_int_t *info)
Definition: sormqr.cpp:16
#define A(a_1, a_2)
#define T(m)
Definition: zgeqrf_mc.cpp:14
#define magma_free(ptr)
Definition: magma.h:57
void sq_to_panel(char uplo, magma_int_t ib, float *A, magma_int_t lda, float *work)
Definition: spanel_to_q.cpp:57
int magma_int_t
Definition: magmablas.h:12
#define C(i, j)
magma_int_t magma_slarfb_gpu(char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, const float *dv, magma_int_t ldv, const float *dt, magma_int_t ldt, float *dc, magma_int_t ldc, float *dwork, magma_int_t ldwork)
Definition: slarfb_gpu.cpp:15
#define magma_ssetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_s.h:702
magma_int_t magma_get_sgeqrf_nb(magma_int_t m)
Definition: get_nb.cpp:120
#define dwork(dev, i, j)
#define MAGMA_S_ONE
Definition: magma.h:198
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define dC(a_1, a_2)
#define MagmaForward
Definition: magma.h:71
void spanel_to_q(char uplo, magma_int_t ib, float *A, magma_int_t lda, float *work)
Definition: spanel_to_q.cpp:17
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define magma_sgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_s.h:705
#define MAGMA_S_MAKE(r, i)
Definition: magma.h:189
#define max(a, b)
Definition: common_magma.h:82
void lapackf77_slarft(const char *direct, const char *storev, const magma_int_t *n, const magma_int_t *k, float *V, const magma_int_t *ldv, const float *tau, float *T, const magma_int_t *ldt)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define MagmaColumnwise
Definition: magma.h:74