MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dgeqrf_gpu.cpp File Reference
#include "common_magma.h"
Include dependency graph for dgeqrf_gpu.cpp:

Go to the source code of this file.

Macros

#define a_ref(a_1, a_2)   (dA+(a_2)*(ldda) + (a_1))
#define t_ref(a_1)   (dT+(a_1)*nb)
#define d_ref(a_1)   (dT+(minmn+(a_1))*nb)
#define dd_ref(a_1)   (dT+(2*minmn+(a_1))*nb)
#define work_ref(a_1)   ( work + (a_1))
#define hwork   ( work + (nb)*(m))

Functions

void dsplit_diag_block (int ib, double *a, int lda, double *work)
magma_int_t magma_dgeqrf_gpu (magma_int_t m, magma_int_t n, double *dA, magma_int_t ldda, double *tau, double *dT, magma_int_t *info)

Macro Definition Documentation

#define a_ref (   a_1,
  a_2 
)    (dA+(a_2)*(ldda) + (a_1))
#define d_ref (   a_1)    (dT+(minmn+(a_1))*nb)
#define dd_ref (   a_1)    (dT+(2*minmn+(a_1))*nb)
#define hwork   ( work + (nb)*(m))
#define t_ref (   a_1)    (dT+(a_1)*nb)
#define work_ref (   a_1)    ( work + (a_1))

Function Documentation

void dsplit_diag_block ( int  ib,
double *  a,
int  lda,
double *  work 
)

Definition at line 21 of file dgeqrf_gpu.cpp.

References lapackf77_dtrtri(), MAGMA_D_ONE, MAGMA_D_ZERO, MagmaNonUnitStr, and MagmaUpperStr.

{
int i, j, info;
double *cola, *colw;
double c_zero = MAGMA_D_ZERO;
double c_one = MAGMA_D_ONE;
for(i=0; i<ib; i++){
cola = a + i*lda;
colw = work + i*ib;
for(j=0; j<i; j++){
colw[j] = cola[j];
cola[j] = c_zero;
}
colw[i] = cola[i];
cola[i] = c_one;
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgeqrf_gpu ( magma_int_t  m,
magma_int_t  n,
double *  dA,
magma_int_t  ldda,
double *  tau,
double *  dT,
magma_int_t info 
)

Definition at line 41 of file dgeqrf_gpu.cpp.

References __func__, a_ref, d_ref, dd_ref, dsplit_diag_block(), hwork, lapackf77_dgeqrf(), lapackf77_dlarft(), magma_dgetmatrix(), magma_dgetmatrix_async(), magma_dlarfb_gpu(), magma_dmalloc_host(), magma_dsetmatrix(), magma_dsetmatrix_async(), MAGMA_ERR_HOST_ALLOC, magma_free_host(), magma_get_dgeqrf_nb(), magma_queue_create(), magma_queue_destroy(), magma_queue_sync(), MAGMA_SUCCESS, magma_xerbla(), MagmaColumnwise, MagmaColumnwiseStr, MagmaForward, MagmaForwardStr, MagmaLeft, MagmaTrans, max, min, t_ref, codegen::work, and work_ref.

{
/* -- MAGMA (version 1.2.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
May 2012
Purpose
=======
DGEQRF computes a QR factorization of a DOUBLE_PRECISION M-by-N matrix A:
A = Q * R. This version stores the triangular matrices used in
the factorization so that they can be applied directly (i.e.,
without being recomputed) later. As a result, the application
of Q is much faster.
Arguments
=========
M (input) INTEGER
The number of rows of the matrix A. M >= 0.
N (input) INTEGER
The number of columns of the matrix A. N >= 0.
A (input/output) DOUBLE_PRECISION array on the GPU, dimension (LDDA,N)
On entry, the M-by-N matrix A.
On exit, the elements on and above the diagonal of the array
contain the min(M,N)-by-N upper trapezoidal matrix R (R is
upper triangular if m >= n); the elements below the diagonal,
with the array TAU, represent the orthogonal matrix Q as a
product of min(m,n) elementary reflectors (see Further
Details).
LDDA (input) INTEGER
The leading dimension of the array A. LDDA >= max(1,M).
To benefit from coalescent memory accesses LDDA must be
dividable by 16.
TAU (output) DOUBLE_PRECISION array, dimension (min(M,N))
The scalar factors of the elementary reflectors (see Further
Details).
dT (workspace/output) DOUBLE_PRECISION array on the GPU,
dimension (2*MIN(M, N) + (N+31)/32*32 )*NB,
where NB can be obtained through magma_get_dgeqrf_nb(M).
It starts with MIN(M,N)*NB block that store the triangular T
matrices, followed by the MIN(M,N)*NB block of the diagonal
inverses for the R matrix. The rest of the array is used as workspace.
INFO (output) INTEGER
= 0: successful exit
< 0: if INFO = -i, the i-th argument had an illegal value
or another error occured, such as memory allocation failed.
Further Details
===============
The matrix Q is represented as a product of elementary reflectors
Q = H(1) H(2) . . . H(k), where k = min(m,n).
Each H(i) has the form
H(i) = I - tau * v * v'
where tau is a real scalar, and v is a real vector with
v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
and tau in TAU(i).
===================================================================== */
#define a_ref(a_1,a_2) (dA+(a_2)*(ldda) + (a_1))
#define t_ref(a_1) (dT+(a_1)*nb)
#define d_ref(a_1) (dT+(minmn+(a_1))*nb)
#define dd_ref(a_1) (dT+(2*minmn+(a_1))*nb)
#define work_ref(a_1) ( work + (a_1))
#define hwork ( work + (nb)*(m))
magma_int_t i, k, minmn, old_i, old_ib, rows, cols;
magma_int_t ib, nb;
magma_int_t ldwork, lddwork, lwork, lhwork;
double *work, *ut;
/* check arguments */
*info = 0;
if (m < 0) {
*info = -1;
} else if (n < 0) {
*info = -2;
} else if (ldda < max(1,m)) {
*info = -4;
}
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
k = minmn = min(m,n);
if (k == 0)
return *info;
lwork = (m + n + nb)*nb;
lhwork = lwork - m*nb;
if (MAGMA_SUCCESS != magma_dmalloc_host( &work, lwork )) {
return *info;
}
ut = hwork+nb*(n);
memset( ut, 0, nb*nb*sizeof(double));
static cudaStream_t stream[2];
magma_queue_create( &stream[0] );
magma_queue_create( &stream[1] );
ldwork = m;
lddwork= n;
if ( (nb > 1) && (nb < k) ) {
/* Use blocked code initially */
old_i = 0; old_ib = nb;
for (i = 0; i < k-nb; i += nb) {
ib = min(k-i, nb);
rows = m -i;
a_ref(i,i), ldda,
work_ref(i), ldwork, stream[1] );
if (i>0){
/* Apply H' to A(i:m,i+2*ib:n) from the left */
cols = n-old_i-2*old_ib;
m-old_i, cols, old_ib,
a_ref(old_i, old_i ), ldda, t_ref(old_i), nb,
a_ref(old_i, old_i+2*old_ib), ldda, dd_ref(0), lddwork);
/* store the diagonal */
magma_dsetmatrix_async( old_ib, old_ib,
ut, old_ib,
d_ref(old_i), old_ib, stream[0] );
}
magma_queue_sync( stream[1] );
lapackf77_dgeqrf(&rows, &ib, work_ref(i), &ldwork, tau+i, hwork, &lhwork, info);
/* Form the triangular factor of the block reflector
H = H(i) H(i+1) . . . H(i+ib-1) */
&rows, &ib,
work_ref(i), &ldwork, tau+i, hwork, &ib);
/* Put 0s in the upper triangular part of a panel (and 1s on the
diagonal); copy the upper triangular in ut and invert it */
magma_queue_sync( stream[0] );
dsplit_diag_block(ib, work_ref(i), ldwork, ut);
magma_dsetmatrix( rows, ib, work_ref(i), ldwork, a_ref(i,i), ldda );
if (i + ib < n) {
/* Send the triangular factor T to the GPU */
magma_dsetmatrix( ib, ib, hwork, ib, t_ref(i), nb );
if (i+nb < k-nb){
/* Apply H' to A(i:m,i+ib:i+2*ib) from the left */
rows, ib, ib,
a_ref(i, i ), ldda, t_ref(i), nb,
a_ref(i, i+ib), ldda, dd_ref(0), lddwork);
}
else {
cols = n-i-ib;
rows, cols, ib,
a_ref(i, i ), ldda, t_ref(i), nb,
a_ref(i, i+ib), ldda, dd_ref(0), lddwork);
/* Fix the diagonal block */
magma_dsetmatrix( ib, ib, ut, ib, d_ref(i), ib );
}
old_i = i;
old_ib = ib;
}
}
} else {
i = 0;
}
/* Use unblocked code to factor the last or only block. */
if (i < k) {
ib = n-i;
rows = m-i;
magma_dgetmatrix( rows, ib, a_ref(i, i), ldda, work, rows );
lhwork = lwork - rows*ib;
lapackf77_dgeqrf(&rows, &ib, work, &rows, tau+i, work+ib*rows, &lhwork, info);
magma_dsetmatrix( rows, ib, work, rows, a_ref(i, i), ldda );
}
magma_queue_destroy( stream[0] );
magma_queue_destroy( stream[1] );
magma_free_host( work );
return *info;
/* End of MAGMA_DGEQRF */
} /* magma_dgeqrf */

Here is the call graph for this function:

Here is the caller graph for this function: