MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zgeqrf3_gpu.cpp File Reference
#include "common_magma.h"
Include dependency graph for zgeqrf3_gpu.cpp:

Go to the source code of this file.

Macros

#define a_ref(a_1, a_2)   (dA+(a_2)*(ldda) + (a_1))
 
#define t_ref(a_1)   (dT+(a_1)*nb)
 
#define d_ref(a_1)   (dT+(minmn+(a_1))*nb)
 
#define dd_ref(a_1)   (dT+(2*minmn+(a_1))*nb)
 
#define work_ref(a_1)   ( work + (a_1))
 
#define hwork   ( work + (nb)*(m))
 

Functions

void zsplit_diag_block3 (int ib, magmaDoubleComplex *a, int lda, magmaDoubleComplex *work)
 
magma_int_t magma_zgeqrf3_gpu (magma_int_t m, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *tau, magmaDoubleComplex *dT, magma_int_t *info)
 

Macro Definition Documentation

#define a_ref (   a_1,
  a_2 
)    (dA+(a_2)*(ldda) + (a_1))
#define d_ref (   a_1)    (dT+(minmn+(a_1))*nb)
#define dd_ref (   a_1)    (dT+(2*minmn+(a_1))*nb)
#define hwork   ( work + (nb)*(m))
#define t_ref (   a_1)    (dT+(a_1)*nb)
#define work_ref (   a_1)    ( work + (a_1))

Function Documentation

magma_int_t magma_zgeqrf3_gpu ( magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex *  dA,
magma_int_t  ldda,
magmaDoubleComplex *  tau,
magmaDoubleComplex *  dT,
magma_int_t info 
)

Definition at line 38 of file zgeqrf3_gpu.cpp.

References __func__, a_ref, d_ref, dd_ref, hwork, lapackf77_zgeqrf, lapackf77_zlarft, MAGMA_ERR_HOST_ALLOC, magma_free_pinned, magma_get_zgeqrf_nb(), magma_queue_create, magma_queue_destroy, magma_queue_sync, MAGMA_SUCCESS, magma_xerbla(), magma_zgetmatrix, magma_zgetmatrix_async, magma_zlarfb_gpu(), magma_zmalloc_pinned(), magma_zsetmatrix, magma_zsetmatrix_async, MagmaColumnwise, MagmaColumnwiseStr, MagmaConjTrans, MagmaForward, MagmaForwardStr, MagmaLeft, max, min, t_ref, work_ref, and zsplit_diag_block3().

42 {
43 /* -- MAGMA (version 1.4.0) --
44  Univ. of Tennessee, Knoxville
45  Univ. of California, Berkeley
46  Univ. of Colorado, Denver
47  August 2013
48 
49  Purpose
50  =======
51  ZGEQRF3 computes a QR factorization of a complex M-by-N matrix A:
52  A = Q * R.
53 
54  This version stores the triangular dT matrices used in
55  the block QR factorization so that they can be applied directly (i.e.,
56  without being recomputed) later. As a result, the application
57  of Q is much faster. Also, the upper triangular matrices for V have 0s
58  in them and the corresponding parts of the upper triangular R are
59  stored separately in dT.
60 
61  Arguments
62  =========
63  M (input) INTEGER
64  The number of rows of the matrix A. M >= 0.
65 
66  N (input) INTEGER
67  The number of columns of the matrix A. N >= 0.
68 
69  dA (input/output) COMPLEX_16 array on the GPU, dimension (LDDA,N)
70  On entry, the M-by-N matrix A.
71  On exit, the elements on and above the diagonal of the array
72  contain the min(M,N)-by-N upper trapezoidal matrix R (R is
73  upper triangular if m >= n); the elements below the diagonal,
74  with the array TAU, represent the orthogonal matrix Q as a
75  product of min(m,n) elementary reflectors (see Further
76  Details).
77 
78  LDDA (input) INTEGER
79  The leading dimension of the array dA. LDDA >= max(1,M).
80  To benefit from coalescent memory accesses LDDA must be
81  dividable by 16.
82 
83  TAU (output) COMPLEX_16 array, dimension (min(M,N))
84  The scalar factors of the elementary reflectors (see Further
85  Details).
86 
87  dT (workspace/output) COMPLEX_16 array on the GPU,
88  dimension (2*MIN(M, N) + (N+31)/32*32 )*NB,
89  where NB can be obtained through magma_get_zgeqrf_nb(M).
90  It starts with MIN(M,N)*NB block that store the triangular T
91  matrices, followed by the MIN(M,N)*NB block of the diagonal
92  matrices for the R matrix. The rest of the array is used as workspace.
93 
94  INFO (output) INTEGER
95  = 0: successful exit
96  < 0: if INFO = -i, the i-th argument had an illegal value
97  or another error occured, such as memory allocation failed.
98 
99  Further Details
100  ===============
101  The matrix Q is represented as a product of elementary reflectors
102 
103  Q = H(1) H(2) . . . H(k), where k = min(m,n).
104 
105  Each H(i) has the form
106 
107  H(i) = I - tau * v * v'
108 
109  where tau is a complex scalar, and v is a complex vector with
110  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
111  and tau in TAU(i).
112  ===================================================================== */
113 
114  #define a_ref(a_1,a_2) (dA+(a_2)*(ldda) + (a_1))
115  #define t_ref(a_1) (dT+(a_1)*nb)
116  #define d_ref(a_1) (dT+(minmn+(a_1))*nb)
117  #define dd_ref(a_1) (dT+(2*minmn+(a_1))*nb)
118  #define work_ref(a_1) ( work + (a_1))
119  #define hwork ( work + (nb)*(m))
120 
121  magma_int_t i, k, minmn, old_i, old_ib, rows, cols;
122  magma_int_t ib, nb;
123  magma_int_t ldwork, lddwork, lwork, lhwork;
124  magmaDoubleComplex *work, *ut;
125 
126  /* check arguments */
127  *info = 0;
128  if (m < 0) {
129  *info = -1;
130  } else if (n < 0) {
131  *info = -2;
132  } else if (ldda < max(1,m)) {
133  *info = -4;
134  }
135  if (*info != 0) {
136  magma_xerbla( __func__, -(*info) );
137  return *info;
138  }
139 
140  k = minmn = min(m,n);
141  if (k == 0)
142  return *info;
143 
144  nb = magma_get_zgeqrf_nb(m);
145 
146  lwork = (m + n + nb)*nb;
147  lhwork = lwork - m*nb;
148 
149  if (MAGMA_SUCCESS != magma_zmalloc_pinned( &work, lwork )) {
150  *info = MAGMA_ERR_HOST_ALLOC;
151  return *info;
152  }
153 
154  ut = hwork+nb*(n);
155  memset( ut, 0, nb*nb*sizeof(magmaDoubleComplex));
156 
157  magma_queue_t stream[2];
158  magma_queue_create( &stream[0] );
159  magma_queue_create( &stream[1] );
160 
161  ldwork = m;
162  lddwork= n;
163 
164  if ( (nb > 1) && (nb < k) ) {
165  /* Use blocked code initially */
166  old_i = 0; old_ib = nb;
167  for (i = 0; i < k-nb; i += nb) {
168  ib = min(k-i, nb);
169  rows = m -i;
170  magma_zgetmatrix_async( rows, ib,
171  a_ref(i,i), ldda,
172  work_ref(i), ldwork, stream[1] );
173  if (i>0){
174  /* Apply H' to A(i:m,i+2*ib:n) from the left */
175  cols = n-old_i-2*old_ib;
177  m-old_i, cols, old_ib,
178  a_ref(old_i, old_i ), ldda, t_ref(old_i), nb,
179  a_ref(old_i, old_i+2*old_ib), ldda, dd_ref(0), lddwork);
180 
181  /* store the diagonal */
182  magma_zsetmatrix_async( old_ib, old_ib,
183  ut, old_ib,
184  d_ref(old_i), old_ib, stream[0] );
185  }
186 
187  magma_queue_sync( stream[1] );
188  lapackf77_zgeqrf(&rows, &ib, work_ref(i), &ldwork, tau+i, hwork, &lhwork, info);
189  /* Form the triangular factor of the block reflector
190  H = H(i) H(i+1) . . . H(i+ib-1) */
192  &rows, &ib,
193  work_ref(i), &ldwork, tau+i, hwork, &ib);
194 
195  /* Put 0s in the upper triangular part of a panel (and 1s on the
196  diagonal); copy the upper triangular in ut. */
197  magma_queue_sync( stream[0] );
198  zsplit_diag_block3(ib, work_ref(i), ldwork, ut);
199  magma_zsetmatrix( rows, ib, work_ref(i), ldwork, a_ref(i,i), ldda );
200 
201  if (i + ib < n) {
202  /* Send the triangular factor T to the GPU */
203  magma_zsetmatrix( ib, ib, hwork, ib, t_ref(i), nb );
204 
205  if (i+nb < k-nb){
206  /* Apply H' to A(i:m,i+ib:i+2*ib) from the left */
208  rows, ib, ib,
209  a_ref(i, i ), ldda, t_ref(i), nb,
210  a_ref(i, i+ib), ldda, dd_ref(0), lddwork);
211  }
212  else {
213  cols = n-i-ib;
215  rows, cols, ib,
216  a_ref(i, i ), ldda, t_ref(i), nb,
217  a_ref(i, i+ib), ldda, dd_ref(0), lddwork);
218  /* Fix the diagonal block */
219  magma_zsetmatrix( ib, ib, ut, ib, d_ref(i), ib );
220  }
221  old_i = i;
222  old_ib = ib;
223  }
224  }
225  } else {
226  i = 0;
227  }
228 
229  /* Use unblocked code to factor the last or only block. */
230  if (i < k) {
231  ib = n-i;
232  rows = m-i;
233  magma_zgetmatrix( rows, ib, a_ref(i, i), ldda, work, rows );
234  lhwork = lwork - rows*ib;
235  lapackf77_zgeqrf(&rows, &ib, work, &rows, tau+i, work+ib*rows, &lhwork, info);
236 
237  magma_zsetmatrix( rows, ib, work, rows, a_ref(i, i), ldda );
238  }
239 
240  magma_queue_destroy( stream[0] );
241  magma_queue_destroy( stream[1] );
242  magma_free_pinned( work );
243  return *info;
244 
245 /* End of MAGMA_ZGEQRF */
246 
247 } /* magma_zgeqrf */
#define a_ref(a_1, a_2)
#define min(a, b)
Definition: common_magma.h:86
#define MagmaForwardStr
Definition: magma.h:94
#define MagmaLeft
Definition: magma.h:68
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
#define magma_zgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_z.h:714
#define lapackf77_zlarft
Definition: magma_zlapack.h:80
#define magma_free_pinned(ptr)
Definition: magma.h:60
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
int magma_int_t
Definition: magmablas.h:12
#define magma_queue_destroy(queue)
Definition: magma.h:116
#define d_ref(a_1)
magma_int_t magma_zlarfb_gpu(char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, cuDoubleComplex *dv, magma_int_t ldv, cuDoubleComplex *dt, magma_int_t ldt, cuDoubleComplex *dc, magma_int_t ldc, cuDoubleComplex *dowrk, magma_int_t ldwork)
Definition: zlarfb_gpu.cpp:21
#define hwork
void zsplit_diag_block3(int ib, magmaDoubleComplex *a, int lda, magmaDoubleComplex *work)
Definition: zgeqrf3_gpu.cpp:19
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MagmaConjTrans
Definition: magma.h:59
magma_int_t ldda
#define MagmaColumnwiseStr
Definition: magma.h:97
#define work_ref(a_1)
static magma_err_t magma_zmalloc_pinned(magmaDoubleComplex **ptrPtr, size_t n)
Definition: magma.h:92
magma_int_t magma_get_zgeqrf_nb(magma_int_t m)
Definition: get_nb.cpp:169
#define MagmaForward
Definition: magma.h:71
#define MAGMA_SUCCESS
Definition: magma.h:106
#define lapackf77_zgeqrf
Definition: magma_zlapack.h:62
#define t_ref(a_1)
#define magma_zsetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_z.h:711
#define dd_ref(a_1)
#define magma_zsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_z.h:702
#define max(a, b)
Definition: common_magma.h:82
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define MagmaColumnwise
Definition: magma.h:74
#define magma_queue_sync(queue)
Definition: magma.h:119

Here is the call graph for this function:

void zsplit_diag_block3 ( int  ib,
magmaDoubleComplex *  a,
int  lda,
magmaDoubleComplex *  work 
)

Definition at line 19 of file zgeqrf3_gpu.cpp.

References MAGMA_Z_ONE, and MAGMA_Z_ZERO.

19  {
20  int i, j;
21  magmaDoubleComplex *cola, *colw;
22  magmaDoubleComplex c_zero = MAGMA_Z_ZERO;
23  magmaDoubleComplex c_one = MAGMA_Z_ONE;
24 
25  for(i=0; i<ib; i++){
26  cola = a + i*lda;
27  colw = work + i*ib;
28  for(j=0; j<i; j++){
29  colw[j] = cola[j];
30  cola[j] = c_zero;
31  }
32  colw[i] = cola[i];
33  cola[i] = c_one;
34  }
35 }
#define MAGMA_Z_ZERO
Definition: magma.h:131
#define MAGMA_Z_ONE
Definition: magma.h:132

Here is the caller graph for this function: