MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zgeqrf-v2.cpp File Reference
#include "common_magma.h"
Include dependency graph for zgeqrf-v2.cpp:

Go to the source code of this file.

Data Structures

struct  magma_qr_params
 

Macros

#define a_ref(a_1, a_2)   ( a+(a_2)*(lda) + (a_1))
 
#define da_ref(a_1, a_2)   (da+(a_2)*ldda + (a_1))
 

Functions

magma_int_t magma_zgeqrf2 (magma_context *cntxt, magma_int_t m, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 

Macro Definition Documentation

#define a_ref (   a_1,
  a_2 
)    ( a+(a_2)*(lda) + (a_1))
#define da_ref (   a_1,
  a_2 
)    (da+(a_2)*ldda + (a_1))

Function Documentation

magma_int_t magma_zgeqrf2 ( magma_context cntxt,
magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 73 of file zgeqrf-v2.cpp.

References __func__, a_ref, da_ref, dwork, magma_qr_params::flag, magma_qr_params::ib, lapackf77_zgeqrf, lapackf77_zlarft, MAGMA_ERR_ILLEGAL_VALUE, MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_MAKE, MAGMA_Z_ONE, magma_zgeqrf_mc(), magma_zlarfb_gpu(), MagmaColumnwise, MagmaColumnwiseStr, MagmaConjTrans, MagmaForward, MagmaForwardStr, MagmaLeft, MagmaUpper, max, min, magma_qr_params::nb, context::nb, magma_qr_params::p, context::params, magma_qr_params::t, magma_qr_params::w, zpanel_to_q(), and zq_to_panel().

77 {
78 /* -- MAGMA (version 1.4.0) --
79  Univ. of Tennessee, Knoxville
80  Univ. of California, Berkeley
81  Univ. of Colorado, Denver
82  August 2013
83 
84  Purpose
85  =======
86  ZGEQRF computes a QR factorization of a COMPLEX_16 M-by-N matrix A:
87  A = Q * R. This version does not require work space on the GPU
88  passed as input. GPU memory is allocated in the routine.
89 
90  Arguments
91  =========
92  CNTXT (input) MAGMA_CONTEXT
93  CNTXT specifies the MAGMA hardware context for this routine.
94 
95  M (input) INTEGER
96  The number of rows of the matrix A. M >= 0.
97 
98  N (input) INTEGER
99  The number of columns of the matrix A. N >= 0.
100 
101  A (input/output) COMPLEX_16 array, dimension (LDA,N)
102  On entry, the M-by-N matrix A.
103  On exit, the elements on and above the diagonal of the array
104  contain the min(M,N)-by-N upper trapezoidal matrix R (R is
105  upper triangular if m >= n); the elements below the diagonal,
106  with the array TAU, represent the orthogonal matrix Q as a
107  product of min(m,n) elementary reflectors (see Further
108  Details).
109 
110  Higher performance is achieved if A is in pinned memory, e.g.
111  allocated using cudaMallocHost.
112 
113  LDA (input) INTEGER
114  The leading dimension of the array A. LDA >= max(1,M).
115 
116  TAU (output) COMPLEX_16 array, dimension (min(M,N))
117  The scalar factors of the elementary reflectors (see Further
118  Details).
119 
120  WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
121  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
122 
123  Higher performance is achieved if WORK is in pinned memory, e.g.
124  allocated using cudaMallocHost.
125 
126  LWORK (input) INTEGER
127  The dimension of the array WORK. LWORK >= N*NB,
128  where NB can be obtained through magma_get_zgeqrf_nb(M).
129 
130  If LWORK = -1, then a workspace query is assumed; the routine
131  only calculates the optimal size of the WORK array, returns
132  this value as the first entry of the WORK array, and no error
133  message related to LWORK is issued.
134 
135  INFO (output) INTEGER
136  = 0: successful exit
137  < 0: if INFO = -i, the i-th argument had an illegal value
138  if INFO = -8, the GPU memory allocation failed
139 
140  Further Details
141  ===============
142  The matrix Q is represented as a product of elementary reflectors
143 
144  Q = H(1) H(2) . . . H(k), where k = min(m,n).
145 
146  Each H(i) has the form
147 
148  H(i) = I - tau * v * v'
149 
150  where tau is a complex scalar, and v is a complex vector with
151  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
152  and tau in TAU(i).
153  ===================================================================== */
154 
155  #define a_ref(a_1,a_2) ( a+(a_2)*(lda) + (a_1))
156  #define da_ref(a_1,a_2) (da+(a_2)*ldda + (a_1))
157 
158  int cnt=-1;
159  cuDoubleComplex c_one = MAGMA_Z_ONE;
160 
161  int i, k, lddwork, old_i, old_ib;
162  int nbmin, nx, ib, ldda;
163 
164  *info = 0;
165 
166  magma_qr_params *qr_params = (magma_qr_params *)cntxt->params;
167  int nb = qr_params->nb;
168 
169  int lwkopt = n * nb;
170  work[0] = MAGMA_Z_MAKE( (double)lwkopt, 0 );
171  long int lquery = (lwork == -1);
172  if (m < 0) {
173  *info = -1;
174  } else if (n < 0) {
175  *info = -2;
176  } else if (lda < max(1,m)) {
177  *info = -4;
178  } else if (lwork < max(1,n) && ! lquery) {
179  *info = -7;
180  }
181  if (*info != 0) {
182  magma_xerbla( __func__, -(*info) );
184  }
185  else if (lquery)
186  return MAGMA_SUCCESS;
187 
188  k = min(m,n);
189  if (k == 0) {
190  work[0] = c_one;
191  return MAGMA_SUCCESS;
192  }
193 
194  cublasStatus status;
195  static cudaStream_t stream[2];
196  cudaStreamCreate(&stream[0]);
197  cudaStreamCreate(&stream[1]);
198 
199  nbmin = 2;
200  nx = nb;
201 
202  lddwork = ((n+31)/32)*32;
203  ldda = ((m+31)/32)*32;
204 
205  cuDoubleComplex *da;
206  status = cublasAlloc((n)*ldda + nb*lddwork, sizeof(cuDoubleComplex), (void**)&da);
207  if (status != CUBLAS_STATUS_SUCCESS) {
208  *info = -8;
209  return 0;
210  }
211  cuDoubleComplex *dwork = da + ldda*(n);
212 
213  if (nb >= nbmin && nb < k && nx < k) {
214  /* Use blocked code initially */
215  cudaMemcpy2DAsync(da_ref(0,nb), ldda*sizeof(cuDoubleComplex),
216  a_ref(0,nb), lda *sizeof(cuDoubleComplex),
217  sizeof(cuDoubleComplex)*(m), (n-nb),
218  cudaMemcpyHostToDevice,stream[0]);
219 
220  old_i = 0; old_ib = nb;
221  for (i = 0; i < k-nx; i += nb) {
222  ib = min(k-i, nb);
223  if (i>0){
224  cudaMemcpy2DAsync( a_ref(i,i), lda *sizeof(cuDoubleComplex),
225  da_ref(i,i), ldda*sizeof(cuDoubleComplex),
226  sizeof(cuDoubleComplex)*(m-i), ib,
227  cudaMemcpyDeviceToHost,stream[1]);
228 
229  cudaMemcpy2DAsync( a_ref(0,i), lda *sizeof(cuDoubleComplex),
230  da_ref(0,i), ldda*sizeof(cuDoubleComplex),
231  sizeof(cuDoubleComplex)*i, ib,
232  cudaMemcpyDeviceToHost,stream[0]);
233 
234  /* Apply H' to A(i:m,i+2*ib:n) from the left */
236  m-old_i, n-old_i-2*old_ib, old_ib,
237  da_ref(old_i, old_i), ldda, dwork, lddwork,
238  da_ref(old_i, old_i+2*old_ib), ldda, dwork+old_ib, lddwork);
239  }
240 
241  cudaStreamSynchronize(stream[1]);
242  int rows = m-i;
243 
244  cnt++;
245  cntxt->nb = qr_params->ib;
246  magma_zgeqrf_mc(cntxt, &rows, &ib, a_ref(i,i), &lda,
247  tau+i, work, &lwork, info);
248  cntxt->nb = nb;
249 
250  /* Form the triangular factor of the block reflector
251  H = H(i) H(i+1) . . . H(i+ib-1) */
253  &rows, &ib, a_ref(i,i), &lda, tau+i, qr_params->t+cnt*nb*nb, &ib);
254  if (cnt < qr_params->np_gpu) {
255  qr_params->p[cnt]=a;
256  }
257  zpanel_to_q(MagmaUpper, ib, a_ref(i,i), lda, qr_params->w+cnt*qr_params->nb*qr_params->nb);
258  cublasSetMatrix(rows, ib, sizeof(cuDoubleComplex),
259  a_ref(i,i), lda, da_ref(i,i), ldda);
260  if (qr_params->flag == 1)
261  zq_to_panel(MagmaUpper, ib, a_ref(i,i), lda, qr_params->w+cnt*qr_params->nb*qr_params->nb);
262 
263  if (i + ib < n) {
264  cublasSetMatrix(ib, ib, sizeof(cuDoubleComplex), qr_params->t+cnt*nb*nb, ib, dwork, lddwork);
265 
266  if (i+ib < k-nx)
267  /* Apply H' to A(i:m,i+ib:i+2*ib) from the left */
269  rows, ib, ib,
270  da_ref(i, i ), ldda, dwork, lddwork,
271  da_ref(i, i+ib), ldda, dwork+ib, lddwork);
272  else
274  rows, n-i-ib, ib,
275  da_ref(i, i ), ldda, dwork, lddwork,
276  da_ref(i, i+ib), ldda, dwork+ib, lddwork);
277 
278  old_i = i;
279  old_ib = ib;
280  }
281  }
282  } else {
283  i = 0;
284  }
285 
286  /* Use unblocked code to factor the last or only block. */
287  if (i < k)
288  {
289  ib = n-i;
290  if (i!=0)
291  cublasGetMatrix(m, ib, sizeof(cuDoubleComplex),
292  da_ref(0,i), ldda, a_ref(0,i), lda);
293  int rows = m-i;
294 
295  cnt++;
296  lapackf77_zgeqrf(&rows, &ib, a_ref(i,i), &lda, tau+i, work, &lwork, info);
297 
298  if (cnt < qr_params->np_gpu)
299  {
300  int ib2=min(ib,nb);
301 
303  &rows, &ib2, a_ref(i,i), &lda, tau+i, qr_params->t+cnt*nb*nb, &ib2);
304 
305  qr_params->p[cnt]=a;
306  }
307  }
308 
309  cudaStreamDestroy( stream[0] );
310  cudaStreamDestroy( stream[1] );
311  cublasFree(da);
312  return MAGMA_SUCCESS;
313 } /* magma_zgeqrf */
#define MAGMA_ERR_ILLEGAL_VALUE
Definition: magma.h:107
void zq_to_panel(char uplo, magma_int_t ib, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *work)
Definition: zpanel_to_q.cpp:57
magma_int_t nb
Definition: magma.h:40
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define MagmaForwardStr
Definition: magma.h:94
#define MagmaLeft
Definition: magma.h:68
#define __func__
Definition: common_magma.h:65
#define MagmaUpper
Definition: magma.h:61
#define lapackf77_zlarft
Definition: magma_zlapack.h:80
magma_int_t magma_zgeqrf_mc(magma_context *cntxt, magma_int_t *m, magma_int_t *n, cuDoubleComplex *A, magma_int_t *lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t *lwork, magma_int_t *info)
Definition: zgeqrf_mc.cpp:361
magma_int_t magma_zlarfb_gpu(char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, cuDoubleComplex *dv, magma_int_t ldv, cuDoubleComplex *dt, magma_int_t ldt, cuDoubleComplex *dc, magma_int_t ldc, cuDoubleComplex *dowrk, magma_int_t ldwork)
Definition: zlarfb_gpu.cpp:21
#define dwork(dev, i, j)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MagmaConjTrans
Definition: magma.h:59
#define a_ref(a_1, a_2)
#define MagmaColumnwiseStr
Definition: magma.h:97
volatile cuDoubleComplex ** p
Definition: zgeqrf-v2.cpp:55
void * params
Definition: magma.h:43
#define MagmaForward
Definition: magma.h:71
void zpanel_to_q(char uplo, magma_int_t ib, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *work)
Definition: zpanel_to_q.cpp:17
#define MAGMA_SUCCESS
Definition: magma.h:106
#define lapackf77_zgeqrf
Definition: magma_zlapack.h:62
cuDoubleComplex * t
Definition: zgeqrf-v2.cpp:52
#define da_ref(a_1, a_2)
cuDoubleComplex * w
Definition: zgeqrf-v2.cpp:67
#define MAGMA_Z_ONE
Definition: magma.h:132
#define max(a, b)
Definition: common_magma.h:82
#define MagmaColumnwise
Definition: magma.h:74

Here is the call graph for this function:

Here is the caller graph for this function: