MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zgeqrf_gpu.cpp File Reference
#include "common_magma.h"
Include dependency graph for zgeqrf_gpu.cpp:

Go to the source code of this file.

Macros

#define a_ref(a_1, a_2)   (dA+(a_2)*(ldda) + (a_1))
 
#define t_ref(a_1)   (dT+(a_1)*nb)
 
#define d_ref(a_1)   (dT+(minmn+(a_1))*nb)
 
#define dd_ref(a_1)   (dT+(2*minmn+(a_1))*nb)
 
#define work_ref(a_1)   ( work + (a_1))
 
#define hwork   ( work + (nb)*(m))
 

Functions

void zsplit_diag_block (magma_int_t ib, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *work)
 
magma_int_t magma_zgeqrf_gpu (magma_int_t m, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *tau, magmaDoubleComplex *dT, magma_int_t *info)
 

Macro Definition Documentation

#define a_ref (   a_1,
  a_2 
)    (dA+(a_2)*(ldda) + (a_1))
#define d_ref (   a_1)    (dT+(minmn+(a_1))*nb)
#define dd_ref (   a_1)    (dT+(2*minmn+(a_1))*nb)
#define hwork   ( work + (nb)*(m))
#define t_ref (   a_1)    (dT+(a_1)*nb)
#define work_ref (   a_1)    ( work + (a_1))

Function Documentation

magma_int_t magma_zgeqrf_gpu ( magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex *  dA,
magma_int_t  ldda,
magmaDoubleComplex *  tau,
magmaDoubleComplex *  dT,
magma_int_t info 
)

Definition at line 43 of file zgeqrf_gpu.cpp.

References __func__, a_ref, d_ref, dd_ref, hwork, lapackf77_zgeqrf, lapackf77_zlarft, MAGMA_ERR_HOST_ALLOC, magma_free_pinned, magma_get_zgeqrf_nb(), magma_queue_create, magma_queue_destroy, magma_queue_sync, MAGMA_SUCCESS, magma_xerbla(), magma_zgetmatrix, magma_zgetmatrix_async, magma_zlarfb_gpu(), magma_zmalloc_pinned(), magma_zsetmatrix, magma_zsetmatrix_async, MagmaColumnwise, MagmaColumnwiseStr, MagmaConjTrans, MagmaForward, MagmaForwardStr, MagmaLeft, max, min, t_ref, work_ref, and zsplit_diag_block().

47 {
48 /* -- MAGMA (version 1.4.0) --
49  Univ. of Tennessee, Knoxville
50  Univ. of California, Berkeley
51  Univ. of Colorado, Denver
52  August 2013
53 
54  Purpose
55  =======
56  ZGEQRF computes a QR factorization of a complex M-by-N matrix A:
57  A = Q * R.
58 
59  This version stores the triangular dT matrices used in
60  the block QR factorization so that they can be applied directly (i.e.,
61  without being recomputed) later. As a result, the application
62  of Q is much faster. Also, the upper triangular matrices for V have 0s
63  in them. The corresponding parts of the upper triangular R are inverted
64  and stored separately in dT.
65 
66  Arguments
67  =========
68  M (input) INTEGER
69  The number of rows of the matrix A. M >= 0.
70 
71  N (input) INTEGER
72  The number of columns of the matrix A. N >= 0.
73 
74  dA (input/output) COMPLEX_16 array on the GPU, dimension (LDDA,N)
75  On entry, the M-by-N matrix A.
76  On exit, the elements on and above the diagonal of the array
77  contain the min(M,N)-by-N upper trapezoidal matrix R (R is
78  upper triangular if m >= n); the elements below the diagonal,
79  with the array TAU, represent the orthogonal matrix Q as a
80  product of min(m,n) elementary reflectors (see Further
81  Details).
82 
83  LDDA (input) INTEGER
84  The leading dimension of the array dA. LDDA >= max(1,M).
85  To benefit from coalescent memory accesses LDDA must be
86  dividable by 16.
87 
88  TAU (output) COMPLEX_16 array, dimension (min(M,N))
89  The scalar factors of the elementary reflectors (see Further
90  Details).
91 
92  dT (workspace/output) COMPLEX_16 array on the GPU,
93  dimension (2*MIN(M, N) + (N+31)/32*32 )*NB,
94  where NB can be obtained through magma_get_zgeqrf_nb(M).
95  It starts with MIN(M,N)*NB block that store the triangular T
96  matrices, followed by the MIN(M,N)*NB block of the diagonal
97  inverses for the R matrix. The rest of the array is used as workspace.
98 
99  INFO (output) INTEGER
100  = 0: successful exit
101  < 0: if INFO = -i, the i-th argument had an illegal value
102  or another error occured, such as memory allocation failed.
103 
104  Further Details
105  ===============
106  The matrix Q is represented as a product of elementary reflectors
107 
108  Q = H(1) H(2) . . . H(k), where k = min(m,n).
109 
110  Each H(i) has the form
111 
112  H(i) = I - tau * v * v'
113 
114  where tau is a complex scalar, and v is a complex vector with
115  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
116  and tau in TAU(i).
117  ===================================================================== */
118 
119  #define a_ref(a_1,a_2) (dA+(a_2)*(ldda) + (a_1))
120  #define t_ref(a_1) (dT+(a_1)*nb)
121  #define d_ref(a_1) (dT+(minmn+(a_1))*nb)
122  #define dd_ref(a_1) (dT+(2*minmn+(a_1))*nb)
123  #define work_ref(a_1) ( work + (a_1))
124  #define hwork ( work + (nb)*(m))
125 
126  magma_int_t i, k, minmn, old_i, old_ib, rows, cols;
127  magma_int_t ib, nb;
128  magma_int_t ldwork, lddwork, lwork, lhwork;
129  magmaDoubleComplex *work, *ut;
130 
131  /* check arguments */
132  *info = 0;
133  if (m < 0) {
134  *info = -1;
135  } else if (n < 0) {
136  *info = -2;
137  } else if (ldda < max(1,m)) {
138  *info = -4;
139  }
140  if (*info != 0) {
141  magma_xerbla( __func__, -(*info) );
142  return *info;
143  }
144 
145  k = minmn = min(m,n);
146  if (k == 0)
147  return *info;
148 
149  nb = magma_get_zgeqrf_nb(m);
150 
151  lwork = (m + n + nb)*nb;
152  lhwork = lwork - m*nb;
153 
154  if (MAGMA_SUCCESS != magma_zmalloc_pinned( &work, lwork )) {
155  *info = MAGMA_ERR_HOST_ALLOC;
156  return *info;
157  }
158 
159  ut = hwork+nb*(n);
160  memset( ut, 0, nb*nb*sizeof(magmaDoubleComplex));
161 
162  magma_queue_t stream[2];
163  magma_queue_create( &stream[0] );
164  magma_queue_create( &stream[1] );
165 
166  ldwork = m;
167  lddwork= n;
168 
169  if ( (nb > 1) && (nb < k) ) {
170  /* Use blocked code initially */
171  old_i = 0; old_ib = nb;
172  for (i = 0; i < k-nb; i += nb) {
173  ib = min(k-i, nb);
174  rows = m -i;
175  magma_zgetmatrix_async( rows, ib,
176  a_ref(i,i), ldda,
177  work_ref(i), ldwork, stream[1] );
178  if (i>0){
179  /* Apply H' to A(i:m,i+2*ib:n) from the left */
180  cols = n-old_i-2*old_ib;
182  m-old_i, cols, old_ib,
183  a_ref(old_i, old_i ), ldda, t_ref(old_i), nb,
184  a_ref(old_i, old_i+2*old_ib), ldda, dd_ref(0), lddwork);
185 
186  /* store the diagonal */
187  magma_zsetmatrix_async( old_ib, old_ib,
188  ut, old_ib,
189  d_ref(old_i), old_ib, stream[0] );
190  }
191 
192  magma_queue_sync( stream[1] );
193  lapackf77_zgeqrf(&rows, &ib, work_ref(i), &ldwork, tau+i, hwork, &lhwork, info);
194  /* Form the triangular factor of the block reflector
195  H = H(i) H(i+1) . . . H(i+ib-1) */
197  &rows, &ib,
198  work_ref(i), &ldwork, tau+i, hwork, &ib);
199 
200  /* Put 0s in the upper triangular part of a panel (and 1s on the
201  diagonal); copy the upper triangular in ut and invert it. */
202  magma_queue_sync( stream[0] );
203  zsplit_diag_block(ib, work_ref(i), ldwork, ut);
204  magma_zsetmatrix( rows, ib, work_ref(i), ldwork, a_ref(i,i), ldda );
205 
206  if (i + ib < n) {
207  /* Send the triangular factor T to the GPU */
208  magma_zsetmatrix( ib, ib, hwork, ib, t_ref(i), nb );
209 
210  if (i+nb < k-nb){
211  /* Apply H' to A(i:m,i+ib:i+2*ib) from the left */
213  rows, ib, ib,
214  a_ref(i, i ), ldda, t_ref(i), nb,
215  a_ref(i, i+ib), ldda, dd_ref(0), lddwork);
216  }
217  else {
218  cols = n-i-ib;
220  rows, cols, ib,
221  a_ref(i, i ), ldda, t_ref(i), nb,
222  a_ref(i, i+ib), ldda, dd_ref(0), lddwork);
223  /* Fix the diagonal block */
224  magma_zsetmatrix( ib, ib, ut, ib, d_ref(i), ib );
225  }
226  old_i = i;
227  old_ib = ib;
228  }
229  }
230  } else {
231  i = 0;
232  }
233 
234  /* Use unblocked code to factor the last or only block. */
235  if (i < k) {
236  ib = n-i;
237  rows = m-i;
238  magma_zgetmatrix( rows, ib, a_ref(i, i), ldda, work, rows );
239  lhwork = lwork - rows*ib;
240  lapackf77_zgeqrf(&rows, &ib, work, &rows, tau+i, work+ib*rows, &lhwork, info);
241 
242  magma_zsetmatrix( rows, ib, work, rows, a_ref(i, i), ldda );
243  }
244 
245  magma_queue_destroy( stream[0] );
246  magma_queue_destroy( stream[1] );
247  magma_free_pinned( work );
248  return *info;
249 
250 /* End of MAGMA_ZGEQRF */
251 
252 } /* magma_zgeqrf */
#define min(a, b)
Definition: common_magma.h:86
#define MagmaForwardStr
Definition: magma.h:94
#define MagmaLeft
Definition: magma.h:68
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
#define magma_zgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_z.h:714
#define lapackf77_zlarft
Definition: magma_zlapack.h:80
#define magma_free_pinned(ptr)
Definition: magma.h:60
#define d_ref(a_1)
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
int magma_int_t
Definition: magmablas.h:12
#define magma_queue_destroy(queue)
Definition: magma.h:116
magma_int_t magma_zlarfb_gpu(char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, cuDoubleComplex *dv, magma_int_t ldv, cuDoubleComplex *dt, magma_int_t ldt, cuDoubleComplex *dc, magma_int_t ldc, cuDoubleComplex *dowrk, magma_int_t ldwork)
Definition: zlarfb_gpu.cpp:21
#define hwork
#define t_ref(a_1)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MagmaConjTrans
Definition: magma.h:59
void zsplit_diag_block(magma_int_t ib, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *work)
Definition: zgeqrf_gpu.cpp:22
magma_int_t ldda
#define MagmaColumnwiseStr
Definition: magma.h:97
static magma_err_t magma_zmalloc_pinned(magmaDoubleComplex **ptrPtr, size_t n)
Definition: magma.h:92
magma_int_t magma_get_zgeqrf_nb(magma_int_t m)
Definition: get_nb.cpp:169
#define MagmaForward
Definition: magma.h:71
#define MAGMA_SUCCESS
Definition: magma.h:106
#define lapackf77_zgeqrf
Definition: magma_zlapack.h:62
#define magma_zsetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_z.h:711
#define work_ref(a_1)
#define magma_zsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_z.h:702
#define dd_ref(a_1)
#define max(a, b)
Definition: common_magma.h:82
#define a_ref(a_1, a_2)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define MagmaColumnwise
Definition: magma.h:74
#define magma_queue_sync(queue)
Definition: magma.h:119

Here is the call graph for this function:

void zsplit_diag_block ( magma_int_t  ib,
magmaDoubleComplex *  a,
magma_int_t  lda,
magmaDoubleComplex *  work 
)

Definition at line 22 of file zgeqrf_gpu.cpp.

References lapackf77_ztrtri, MAGMA_Z_ONE, MAGMA_Z_ZERO, MagmaNonUnitStr, and MagmaUpperStr.

23 {
24  magma_int_t i, j, info;
25  magmaDoubleComplex *cola, *colw;
26  magmaDoubleComplex c_zero = MAGMA_Z_ZERO;
27  magmaDoubleComplex c_one = MAGMA_Z_ONE;
28 
29  for(i=0; i<ib; i++){
30  cola = a + i*lda;
31  colw = work + i*ib;
32  for(j=0; j<i; j++){
33  colw[j] = cola[j];
34  cola[j] = c_zero;
35  }
36  colw[i] = cola[i];
37  cola[i] = c_one;
38  }
39  lapackf77_ztrtri( MagmaUpperStr, MagmaNonUnitStr, &ib, work, &ib, &info);
40 }
int magma_int_t
Definition: magmablas.h:12
#define lapackf77_ztrtri
Definition: magma_zlapack.h:93
#define MagmaNonUnitStr
Definition: magma.h:88
#define MAGMA_Z_ZERO
Definition: magma.h:131
#define MAGMA_Z_ONE
Definition: magma.h:132
#define MagmaUpperStr
Definition: magma.h:84

Here is the caller graph for this function: