MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
sgeqrs3_gpu.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @generated s Thu May 10 22:26:52 2012
9 
10 */
11 #include "common_magma.h"
12 
13 extern "C" magma_int_t
15  float *dA, magma_int_t ldda,
16  float *tau, float *dT,
17  float *dB, magma_int_t lddb,
18  float *hwork, magma_int_t lwork,
19  magma_int_t *info)
20 {
21 /* -- MAGMA (version 1.2.0) --
22  Univ. of Tennessee, Knoxville
23  Univ. of California, Berkeley
24  Univ. of Colorado, Denver
25  May 2012
26 
27  Purpose
28  =======
29  Solves the least squares problem
30  min || A*X - C ||
31  using the QR factorization A = Q*R computed by SGEQRF3_GPU.
32 
33  Arguments
34  =========
35  M (input) INTEGER
36  The number of rows of the matrix A. M >= 0.
37 
38  N (input) INTEGER
39  The number of columns of the matrix A. M >= N >= 0.
40 
41  NRHS (input) INTEGER
42  The number of columns of the matrix C. NRHS >= 0.
43 
44  A (input) REAL array on the GPU, dimension (LDDA,N)
45  The i-th column must contain the vector which defines the
46  elementary reflector H(i), for i = 1,2,...,n, as returned by
47  SGEQRF3_GPU in the first n columns of its array argument A.
48 
49  LDDA (input) INTEGER
50  The leading dimension of the array A, LDDA >= M.
51 
52  TAU (input) REAL array, dimension (N)
53  TAU(i) must contain the scalar factor of the elementary
54  reflector H(i), as returned by MAGMA_SGEQRF_GPU.
55 
56  DB (input/output) REAL array on the GPU, dimension (LDDB,NRHS)
57  On entry, the M-by-NRHS matrix C.
58  On exit, the N-by-NRHS solution matrix X.
59 
60  DT (input) REAL array that is the output (the 6th argument)
61  of magma_sgeqrf_gpu of size
62  2*MIN(M, N)*NB + ((N+31)/32*32 )* MAX(NB, NRHS).
63  The array starts with a block of size MIN(M,N)*NB that stores
64  the triangular T matrices used in the QR factorization,
65  followed by MIN(M,N)*NB block storing the diagonal block
66  matrices for the R matrix, followed by work space of size
67  ((N+31)/32*32 )* MAX(NB, NRHS).
68 
69  LDDB (input) INTEGER
70  The leading dimension of the array DB. LDDB >= M.
71 
72  HWORK (workspace/output) REAL array, dimension (LWORK)
73  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
74 
75  LWORK (input) INTEGER
76  The dimension of the array WORK, LWORK >= max(1,NRHS).
77  For optimum performance LWORK >= (M-N+NB)*(NRHS + 2*NB), where
78  NB is the blocksize given by magma_get_sgeqrf_nb( M ).
79 
80  If LWORK = -1, then a workspace query is assumed; the routine
81  only calculates the optimal size of the HWORK array, returns
82  this value as the first entry of the WORK array.
83 
84  INFO (output) INTEGER
85  = 0: successful exit
86  < 0: if INFO = -i, the i-th argument had an illegal value
87  ===================================================================== */
88 
89  #define a_ref(a_1,a_2) (dA+(a_2)*(ldda) + (a_1))
90  #define d_ref(a_1) (dT+(lddwork+(a_1))*nb)
91 
92  float c_one = MAGMA_S_ONE;
93  magma_int_t k, lddwork;
94 
96  magma_int_t lwkopt = (m-n+nb)*(nrhs+2*nb);
97  long int lquery = (lwork == -1);
98 
99  hwork[0] = MAGMA_S_MAKE( (float)lwkopt, 0. );
100 
101  *info = 0;
102  if (m < 0)
103  *info = -1;
104  else if (n < 0 || m < n)
105  *info = -2;
106  else if (nrhs < 0)
107  *info = -3;
108  else if (ldda < max(1,m))
109  *info = -5;
110  else if (lddb < max(1,m))
111  *info = -8;
112  else if (lwork < lwkopt && ! lquery)
113  *info = -10;
114 
115  if (*info != 0) {
116  magma_xerbla( __func__, -(*info) );
117  return *info;
118  }
119  else if (lquery)
120  return *info;
121 
122  k = min(m,n);
123  if (k == 0) {
124  hwork[0] = c_one;
125  return *info;
126  }
127  lddwork= k;
128 
129  /* B := Q' * B */
131  m, nrhs, n,
132  a_ref(0,0), ldda, tau,
133  dB, lddb, hwork, lwork, dT, nb, info );
134  if ( *info != 0 ) {
135  return *info;
136  }
137 
138  /* Solve R*X = B(1:n,:)
139  1. Move the block diagonal submatrices from d_ref to R
140  2. Solve
141  3. Restore the data format moving data from R back to d_ref
142  */
143  magmablas_sswapdblk(k, nb, a_ref(0,0), ldda, 1, d_ref(0), nb, 0);
144  if ( nrhs == 1 ) {
146  n, a_ref(0,0), ldda, dB, 1);
147  } else {
149  n, nrhs, c_one, a_ref(0,0), ldda, dB, lddb);
150  }
151  magmablas_sswapdblk(k, nb, d_ref(0), nb, 0, a_ref(0,0), ldda, 1);
152 
153  return *info;
154 }
155 
156 #undef a_ref
157 #undef d_ref