MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
cungqr.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @generated c Thu May 10 22:26:50 2012
9 
10 */
11 #include "common_magma.h"
12 
13 extern "C" magma_int_t
15  cuFloatComplex *a, magma_int_t lda,
16  cuFloatComplex *tau, cuFloatComplex *dT,
17  magma_int_t nb, magma_int_t *info)
18 {
19 /* -- MAGMA (version 1.2.0) --
20  Univ. of Tennessee, Knoxville
21  Univ. of California, Berkeley
22  Univ. of Colorado, Denver
23  May 2012
24 
25  Purpose
26  =======
27  CUNGQR generates an M-by-N COMPLEX matrix Q with orthonormal columns,
28  which is defined as the first N columns of a product of K elementary
29  reflectors of order M
30 
31  Q = H(1) H(2) . . . H(k)
32 
33  as returned by CGEQRF.
34 
35  Arguments
36  =========
37  M (input) INTEGER
38  The number of rows of the matrix Q. M >= 0.
39 
40  N (input) INTEGER
41  The number of columns of the matrix Q. M >= N >= 0.
42 
43  K (input) INTEGER
44  The number of elementary reflectors whose product defines the
45  matrix Q. N >= K >= 0.
46 
47  A (input/output) COMPLEX array A, dimension (LDDA,N).
48  On entry, the i-th column must contain the vector
49  which defines the elementary reflector H(i), for
50  i = 1,2,...,k, as returned by CGEQRF_GPU in the
51  first k columns of its array argument A.
52  On exit, the M-by-N matrix Q.
53 
54  LDA (input) INTEGER
55  The first dimension of the array A. LDA >= max(1,M).
56 
57  TAU (input) COMPLEX array, dimension (K)
58  TAU(i) must contain the scalar factor of the elementary
59  reflector H(i), as returned by CGEQRF_GPU.
60 
61  DT (input) COMPLEX array on the GPU device.
62  DT contains the T matrices used in blocking the elementary
63  reflectors H(i), e.g., this can be the 6th argument of
64  magma_cgeqrf_gpu.
65 
66  NB (input) INTEGER
67  This is the block size used in CGEQRF_GPU, and correspondingly
68  the size of the T matrices, used in the factorization, and
69  stored in DT.
70 
71  INFO (output) INTEGER
72  = 0: successful exit
73  < 0: if INFO = -i, the i-th argument has an illegal value
74  ===================================================================== */
75 
76  #define a_ref(i,j) ( a + (j)*lda + (i))
77  #define da_ref(i,j) (da + (j)*ldda + (i))
78  #define t_ref(a_1) (dT+(a_1)*nb)
79 
80  magma_int_t i__1, i__2, i__3;
81  magma_int_t lwork, ldda;
82  static magma_int_t i, ib, ki, kk, iinfo;
83  magma_int_t lddwork = min(m, n);
84  cuFloatComplex *da, *work, *dwork;
85  static cudaStream_t stream;
86 
87  *info = 0;
88  if (m < 0) {
89  *info = -1;
90  } else if ((n < 0) || (n > m)) {
91  *info = -2;
92  } else if ((k < 0) || (k > n)) {
93  *info = -3;
94  } else if (lda < max(1,m)) {
95  *info = -5;
96  }
97  if (*info != 0) {
98  magma_xerbla( __func__, -(*info) );
99  return *info;
100  }
101 
102  if (n <= 0)
103  return *info;
104 
105  /* Allocate GPU work space */
106  ldda = ((m+31)/32)*32;
107  lddwork = ((lddwork+31)/32)*32;
108  if (MAGMA_SUCCESS != magma_cmalloc( &da, (n)*ldda + nb*lddwork )) {
109  *info = MAGMA_ERR_DEVICE_ALLOC;
110  return *info;
111  }
112  dwork = da + (n)*ldda;
113 
114  /* Allocate CPU work space */
115  lwork = n * nb;
116  work = (cuFloatComplex *)malloc(lwork*sizeof(cuFloatComplex));
117  if( work == NULL ) {
118  magma_free( da );
119  *info = MAGMA_ERR_HOST_ALLOC;
120  return *info;
121  }
122 
123  magma_queue_create( &stream );
124 
125  if ( (nb > 1) && (nb < k) )
126  {
127  /* Use blocked code after the last block.
128  The first kk columns are handled by the block method. */
129  ki = (k - nb - 1) / nb * nb;
130  kk = min(k, ki + nb);
131 
132  /* Set A(1:kk,kk+1:n) to zero. */
133  magmablas_claset(MagmaUpperLower, kk, n-kk, da_ref(0,kk), ldda);
134  }
135  else
136  kk = 0;
137 
138  /* Use unblocked code for the last or only block. */
139  if (kk < n)
140  {
141  i__1 = m - kk;
142  i__2 = n - kk;
143  i__3 = k - kk;
144  lapackf77_cungqr(&i__1, &i__2, &i__3,
145  a_ref(kk, kk), &lda,
146  &tau[kk], work, &lwork, &iinfo);
147 
148  magma_csetmatrix( i__1, i__2,
149  a_ref(kk, kk), lda,
150  da_ref(kk, kk), ldda );
151  }
152 
153  if (kk > 0)
154  {
155  /* Use blocked code */
156  for (i = ki; i >= 0; i-=nb)
157  {
158  ib = min(nb, k - i);
159 
160  /* Send the current panel to the GPU */
161  i__2 = m - i;
162  cpanel_to_q(MagmaUpper, ib, a_ref(i,i), lda, work);
163  magma_csetmatrix( i__2, ib, a_ref(i, i), lda, da_ref(i, i), ldda );
164 
165  if (i + ib < n)
166  {
167  /* Apply H to A(i:m,i+ib:n) from the left */
168  i__3 = n - i - ib;
170  i__2, i__3, ib,
171  da_ref(i, i ), ldda, t_ref(i), nb,
172  da_ref(i, i+ib), ldda, dwork, lddwork);
173  }
174 
175  /* Apply H to rows i:m of current block on the CPU */
176  lapackf77_cungqr(&i__2, &ib, &ib,
177  a_ref(i, i), &lda,
178  &tau[i], work, &lwork, &iinfo);
179  magma_csetmatrix_async( i__2, ib,
180  a_ref(i,i), lda,
181  da_ref(i,i), ldda, stream );
182 
183  /* Set rows 1:i-1 of current block to zero */
184  i__2 = i + ib;
185  magmablas_claset(MagmaUpperLower, i, i__2 - i, da_ref(0,i), ldda);
186  }
187  }
188 
189  magma_cgetmatrix( m, n, da_ref(0, 0), ldda, a_ref(0, 0), lda );
190 
191 
192  magma_queue_destroy( stream );
193  magma_free( da );
194  free(work);
195 
196  return *info;
197 } /* magma_cungqr */
198 
199 #undef da_ref
200 #undef a_ref
201 #undef t_ref