MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dgeqrf_gpu.cpp File Reference
#include "common_magma.h"
Include dependency graph for dgeqrf_gpu.cpp:

Go to the source code of this file.

Macros

#define a_ref(a_1, a_2)   (dA+(a_2)*(ldda) + (a_1))
 
#define t_ref(a_1)   (dT+(a_1)*nb)
 
#define d_ref(a_1)   (dT+(minmn+(a_1))*nb)
 
#define dd_ref(a_1)   (dT+(2*minmn+(a_1))*nb)
 
#define work_ref(a_1)   ( work + (a_1))
 
#define hwork   ( work + (nb)*(m))
 

Functions

void dsplit_diag_block (magma_int_t ib, double *a, magma_int_t lda, double *work)
 
magma_int_t magma_dgeqrf_gpu (magma_int_t m, magma_int_t n, double *dA, magma_int_t ldda, double *tau, double *dT, magma_int_t *info)
 

Macro Definition Documentation

#define a_ref (   a_1,
  a_2 
)    (dA+(a_2)*(ldda) + (a_1))
#define d_ref (   a_1)    (dT+(minmn+(a_1))*nb)
#define dd_ref (   a_1)    (dT+(2*minmn+(a_1))*nb)
#define hwork   ( work + (nb)*(m))
#define t_ref (   a_1)    (dT+(a_1)*nb)
#define work_ref (   a_1)    ( work + (a_1))

Function Documentation

void dsplit_diag_block ( magma_int_t  ib,
double *  a,
magma_int_t  lda,
double *  work 
)

Definition at line 22 of file dgeqrf_gpu.cpp.

References lapackf77_dtrtri(), MAGMA_D_ONE, MAGMA_D_ZERO, MagmaNonUnitStr, and MagmaUpperStr.

23 {
24  magma_int_t i, j, info;
25  double *cola, *colw;
26  double c_zero = MAGMA_D_ZERO;
27  double c_one = MAGMA_D_ONE;
28 
29  for(i=0; i<ib; i++){
30  cola = a + i*lda;
31  colw = work + i*ib;
32  for(j=0; j<i; j++){
33  colw[j] = cola[j];
34  cola[j] = c_zero;
35  }
36  colw[i] = cola[i];
37  cola[i] = c_one;
38  }
39  lapackf77_dtrtri( MagmaUpperStr, MagmaNonUnitStr, &ib, work, &ib, &info);
40 }
#define MAGMA_D_ONE
Definition: magma.h:176
int magma_int_t
Definition: magmablas.h:12
#define MagmaNonUnitStr
Definition: magma.h:88
void lapackf77_dtrtri(const char *uplo, const char *diag, const magma_int_t *n, double *A, const magma_int_t *lda, magma_int_t *info)
#define MAGMA_D_ZERO
Definition: magma.h:175
#define MagmaUpperStr
Definition: magma.h:84

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgeqrf_gpu ( magma_int_t  m,
magma_int_t  n,
double *  dA,
magma_int_t  ldda,
double *  tau,
double *  dT,
magma_int_t info 
)

Definition at line 43 of file dgeqrf_gpu.cpp.

References __func__, a_ref, d_ref, dd_ref, dsplit_diag_block(), hwork, lapackf77_dgeqrf(), lapackf77_dlarft(), magma_dgetmatrix, magma_dgetmatrix_async, magma_dlarfb_gpu(), magma_dmalloc_pinned(), magma_dsetmatrix, magma_dsetmatrix_async, MAGMA_ERR_HOST_ALLOC, magma_free_pinned, magma_get_dgeqrf_nb(), magma_queue_create, magma_queue_destroy, magma_queue_sync, MAGMA_SUCCESS, magma_xerbla(), MagmaColumnwise, MagmaColumnwiseStr, MagmaForward, MagmaForwardStr, MagmaLeft, MagmaTrans, max, min, t_ref, and work_ref.

47 {
48 /* -- MAGMA (version 1.4.0) --
49  Univ. of Tennessee, Knoxville
50  Univ. of California, Berkeley
51  Univ. of Colorado, Denver
52  August 2013
53 
54  Purpose
55  =======
56  DGEQRF computes a QR factorization of a real M-by-N matrix A:
57  A = Q * R.
58 
59  This version stores the triangular dT matrices used in
60  the block QR factorization so that they can be applied directly (i.e.,
61  without being recomputed) later. As a result, the application
62  of Q is much faster. Also, the upper triangular matrices for V have 0s
63  in them. The corresponding parts of the upper triangular R are inverted
64  and stored separately in dT.
65 
66  Arguments
67  =========
68  M (input) INTEGER
69  The number of rows of the matrix A. M >= 0.
70 
71  N (input) INTEGER
72  The number of columns of the matrix A. N >= 0.
73 
74  dA (input/output) DOUBLE_PRECISION array on the GPU, dimension (LDDA,N)
75  On entry, the M-by-N matrix A.
76  On exit, the elements on and above the diagonal of the array
77  contain the min(M,N)-by-N upper trapezoidal matrix R (R is
78  upper triangular if m >= n); the elements below the diagonal,
79  with the array TAU, represent the orthogonal matrix Q as a
80  product of min(m,n) elementary reflectors (see Further
81  Details).
82 
83  LDDA (input) INTEGER
84  The leading dimension of the array dA. LDDA >= max(1,M).
85  To benefit from coalescent memory accesses LDDA must be
86  dividable by 16.
87 
88  TAU (output) DOUBLE_PRECISION array, dimension (min(M,N))
89  The scalar factors of the elementary reflectors (see Further
90  Details).
91 
92  dT (workspace/output) DOUBLE_PRECISION array on the GPU,
93  dimension (2*MIN(M, N) + (N+31)/32*32 )*NB,
94  where NB can be obtained through magma_get_dgeqrf_nb(M).
95  It starts with MIN(M,N)*NB block that store the triangular T
96  matrices, followed by the MIN(M,N)*NB block of the diagonal
97  inverses for the R matrix. The rest of the array is used as workspace.
98 
99  INFO (output) INTEGER
100  = 0: successful exit
101  < 0: if INFO = -i, the i-th argument had an illegal value
102  or another error occured, such as memory allocation failed.
103 
104  Further Details
105  ===============
106  The matrix Q is represented as a product of elementary reflectors
107 
108  Q = H(1) H(2) . . . H(k), where k = min(m,n).
109 
110  Each H(i) has the form
111 
112  H(i) = I - tau * v * v'
113 
114  where tau is a real scalar, and v is a real vector with
115  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
116  and tau in TAU(i).
117  ===================================================================== */
118 
119  #define a_ref(a_1,a_2) (dA+(a_2)*(ldda) + (a_1))
120  #define t_ref(a_1) (dT+(a_1)*nb)
121  #define d_ref(a_1) (dT+(minmn+(a_1))*nb)
122  #define dd_ref(a_1) (dT+(2*minmn+(a_1))*nb)
123  #define work_ref(a_1) ( work + (a_1))
124  #define hwork ( work + (nb)*(m))
125 
126  magma_int_t i, k, minmn, old_i, old_ib, rows, cols;
127  magma_int_t ib, nb;
128  magma_int_t ldwork, lddwork, lwork, lhwork;
129  double *work, *ut;
130 
131  /* check arguments */
132  *info = 0;
133  if (m < 0) {
134  *info = -1;
135  } else if (n < 0) {
136  *info = -2;
137  } else if (ldda < max(1,m)) {
138  *info = -4;
139  }
140  if (*info != 0) {
141  magma_xerbla( __func__, -(*info) );
142  return *info;
143  }
144 
145  k = minmn = min(m,n);
146  if (k == 0)
147  return *info;
148 
149  nb = magma_get_dgeqrf_nb(m);
150 
151  lwork = (m + n + nb)*nb;
152  lhwork = lwork - m*nb;
153 
154  if (MAGMA_SUCCESS != magma_dmalloc_pinned( &work, lwork )) {
155  *info = MAGMA_ERR_HOST_ALLOC;
156  return *info;
157  }
158 
159  ut = hwork+nb*(n);
160  memset( ut, 0, nb*nb*sizeof(double));
161 
162  magma_queue_t stream[2];
163  magma_queue_create( &stream[0] );
164  magma_queue_create( &stream[1] );
165 
166  ldwork = m;
167  lddwork= n;
168 
169  if ( (nb > 1) && (nb < k) ) {
170  /* Use blocked code initially */
171  old_i = 0; old_ib = nb;
172  for (i = 0; i < k-nb; i += nb) {
173  ib = min(k-i, nb);
174  rows = m -i;
175  magma_dgetmatrix_async( rows, ib,
176  a_ref(i,i), ldda,
177  work_ref(i), ldwork, stream[1] );
178  if (i>0){
179  /* Apply H' to A(i:m,i+2*ib:n) from the left */
180  cols = n-old_i-2*old_ib;
182  m-old_i, cols, old_ib,
183  a_ref(old_i, old_i ), ldda, t_ref(old_i), nb,
184  a_ref(old_i, old_i+2*old_ib), ldda, dd_ref(0), lddwork);
185 
186  /* store the diagonal */
187  magma_dsetmatrix_async( old_ib, old_ib,
188  ut, old_ib,
189  d_ref(old_i), old_ib, stream[0] );
190  }
191 
192  magma_queue_sync( stream[1] );
193  lapackf77_dgeqrf(&rows, &ib, work_ref(i), &ldwork, tau+i, hwork, &lhwork, info);
194  /* Form the triangular factor of the block reflector
195  H = H(i) H(i+1) . . . H(i+ib-1) */
197  &rows, &ib,
198  work_ref(i), &ldwork, tau+i, hwork, &ib);
199 
200  /* Put 0s in the upper triangular part of a panel (and 1s on the
201  diagonal); copy the upper triangular in ut and invert it. */
202  magma_queue_sync( stream[0] );
203  dsplit_diag_block(ib, work_ref(i), ldwork, ut);
204  magma_dsetmatrix( rows, ib, work_ref(i), ldwork, a_ref(i,i), ldda );
205 
206  if (i + ib < n) {
207  /* Send the triangular factor T to the GPU */
208  magma_dsetmatrix( ib, ib, hwork, ib, t_ref(i), nb );
209 
210  if (i+nb < k-nb){
211  /* Apply H' to A(i:m,i+ib:i+2*ib) from the left */
213  rows, ib, ib,
214  a_ref(i, i ), ldda, t_ref(i), nb,
215  a_ref(i, i+ib), ldda, dd_ref(0), lddwork);
216  }
217  else {
218  cols = n-i-ib;
220  rows, cols, ib,
221  a_ref(i, i ), ldda, t_ref(i), nb,
222  a_ref(i, i+ib), ldda, dd_ref(0), lddwork);
223  /* Fix the diagonal block */
224  magma_dsetmatrix( ib, ib, ut, ib, d_ref(i), ib );
225  }
226  old_i = i;
227  old_ib = ib;
228  }
229  }
230  } else {
231  i = 0;
232  }
233 
234  /* Use unblocked code to factor the last or only block. */
235  if (i < k) {
236  ib = n-i;
237  rows = m-i;
238  magma_dgetmatrix( rows, ib, a_ref(i, i), ldda, work, rows );
239  lhwork = lwork - rows*ib;
240  lapackf77_dgeqrf(&rows, &ib, work, &rows, tau+i, work+ib*rows, &lhwork, info);
241 
242  magma_dsetmatrix( rows, ib, work, rows, a_ref(i, i), ldda );
243  }
244 
245  magma_queue_destroy( stream[0] );
246  magma_queue_destroy( stream[1] );
247  magma_free_pinned( work );
248  return *info;
249 
250 /* End of MAGMA_DGEQRF */
251 
252 } /* magma_dgeqrf */
#define a_ref(a_1, a_2)
static magma_err_t magma_dmalloc_pinned(double **ptrPtr, size_t n)
Definition: magma.h:90
#define min(a, b)
Definition: common_magma.h:86
#define t_ref(a_1)
#define magma_dsetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_d.h:711
#define MagmaForwardStr
Definition: magma.h:94
#define MagmaLeft
Definition: magma.h:68
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
magma_int_t ldda
#define magma_free_pinned(ptr)
Definition: magma.h:60
int magma_int_t
Definition: magmablas.h:12
#define magma_dgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_d.h:714
void dsplit_diag_block(magma_int_t ib, double *a, magma_int_t lda, double *work)
Definition: dgeqrf_gpu.cpp:22
#define hwork
void lapackf77_dlarft(const char *direct, const char *storev, const magma_int_t *n, const magma_int_t *k, double *V, const magma_int_t *ldv, const double *tau, double *T, const magma_int_t *ldt)
#define magma_dgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_d.h:705
#define magma_queue_destroy(queue)
Definition: magma.h:116
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
void lapackf77_dgeqrf(const magma_int_t *m, const magma_int_t *n, double *A, const magma_int_t *lda, double *tau, double *work, const magma_int_t *lwork, magma_int_t *info)
#define d_ref(a_1)
magma_int_t magma_dlarfb_gpu(char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, const double *dv, magma_int_t ldv, const double *dt, magma_int_t ldt, double *dc, magma_int_t ldc, double *dwork, magma_int_t ldwork)
Definition: dlarfb_gpu.cpp:15
#define MagmaTrans
Definition: magma.h:58
#define MagmaColumnwiseStr
Definition: magma.h:97
magma_int_t magma_get_dgeqrf_nb(magma_int_t m)
Definition: get_nb.cpp:141
#define MagmaForward
Definition: magma.h:71
#define MAGMA_SUCCESS
Definition: magma.h:106
#define dd_ref(a_1)
#define max(a, b)
Definition: common_magma.h:82
#define work_ref(a_1)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define MagmaColumnwise
Definition: magma.h:74
#define magma_dsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_d.h:702
#define magma_queue_sync(queue)
Definition: magma.h:119

Here is the call graph for this function:

Here is the caller graph for this function: