MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zgetrf_nopiv_gpu.cpp File Reference
#include "common_magma.h"
Include dependency graph for zgetrf_nopiv_gpu.cpp:

Go to the source code of this file.

Macros

#define PRECISION_z
#define inA(i, j)   (dA + (i)*nb + (j)*nb*ldda)

Functions

magma_int_t magma_zgetrf_nopiv (magma_int_t *m, magma_int_t *n, cuDoubleComplex *a, magma_int_t *lda, magma_int_t *info)
magma_int_t magma_zgetrf_nopiv_gpu (magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)

Macro Definition Documentation

#define inA (   i,
 
)    (dA + (i)*nb + (j)*nb*ldda)
#define PRECISION_z

Definition at line 14 of file zgetrf_nopiv_gpu.cpp.


Function Documentation

magma_int_t magma_zgetrf_nopiv ( magma_int_t m,
magma_int_t n,
cuDoubleComplex *  a,
magma_int_t lda,
magma_int_t info 
)

Definition at line 27 of file zgetrf_nopiv.cpp.

References __func__, blasf77_ztrsm, magma_xerbla(), MAGMA_Z_NEG_ONE, MAGMA_Z_ONE, magma_zgetf2_nopiv(), max, and min.

{
/* -- MAGMA (version 1.2.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
May 2012
Purpose
=======
ZGETRF_NOPIV computes an LU factorization of a general M-by-N
matrix A without pivoting.
The factorization has the form
A = L * U
where L is lower triangular with unit diagonal elements (lower
trapezoidal if m > n), and U is upper triangular (upper
trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
Arguments
=========
M (input) INTEGER
The number of rows of the matrix A. M >= 0.
N (input) INTEGER
The number of columns of the matrix A. N >= 0.
A (input/output) COMPLEX*16 array, dimension (LDA,N)
On entry, the M-by-N matrix to be factored.
On exit, the factors L and U from the factorization
A = P*L*U; the unit diagonal elements of L are not stored.
LDA (input) INTEGER
The leading dimension of the array A. LDA >= max(1,M).
INFO (output) INTEGER
= 0: successful exit
< 0: if INFO = -i, the i-th argument had an illegal value
> 0: if INFO = i, U(i,i) is exactly zero. The factorization
has been completed, but the factor U is exactly
singular, and division by zero will occur if it is used
to solve a system of equations.
===================================================================== */
cuDoubleComplex c_one = MAGMA_Z_ONE;
magma_int_t a_dim1, a_offset, min_mn, i__3, i__4;
cuDoubleComplex z__1;
static magma_int_t j, jb, nb, iinfo;
a_dim1 = *lda;
a_offset = 1 + a_dim1;
a -= a_offset;
/* Function Body */
*info = 0;
if (*m < 0) {
*info = -1;
} else if (*n < 0) {
*info = -2;
} else if (*lda < max(1,*m)) {
*info = -4;
}
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
/* Quick return if possible */
if (*m == 0 || *n == 0) {
return *info;
}
/* Determine the block size for this environment. */
nb = 128;
min_mn = min(*m,*n);
if (nb <= 1 || nb >= min_mn)
{
/* Use unblocked code. */
magma_zgetf2_nopiv(m, n, &a[a_offset], lda, info);
}
else
{
/* Use blocked code. */
for (j = 1; j <= min_mn; j += nb)
{
/* Computing MIN */
i__3 = min_mn - j + 1;
jb = min(i__3,nb);
/* Factor diagonal and subdiagonal blocks and test for exact
singularity. */
i__3 = *m - j + 1;
//magma_zgetf2_nopiv(&i__3, &jb, &a[j + j * a_dim1], lda, &iinfo);
i__3 -= jb;
magma_zgetf2_nopiv(&jb, &jb, &a[j + j * a_dim1], lda, &iinfo);
blasf77_ztrsm("R", "U", "N", "N", &i__3, &jb, &c_one,
&a[j + j * a_dim1], lda,
&a[j + jb + j * a_dim1], lda);
/* Adjust INFO */
if (*info == 0 && iinfo > 0)
*info = iinfo + j - 1;
if (j + jb <= *n)
{
/* Compute block row of U. */
i__3 = *n - j - jb + 1;
ztrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
c_one, &a[j + j * a_dim1], lda, &a[j + (j+jb)*a_dim1], lda);
if (j + jb <= *m)
{
/* Update trailing submatrix. */
i__3 = *m - j - jb + 1;
i__4 = *n - j - jb + 1;
zgemm_("No transpose", "No transpose", &i__3, &i__4, &jb,
&z__1, &a[j + jb + j * a_dim1], lda,
&a[j + (j + jb) * a_dim1], lda, &c_one,
&a[j + jb + (j + jb) * a_dim1], lda);
}
}
}
}
return *info;
} /* magma_zgetrf_nopiv */

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zgetrf_nopiv_gpu ( magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  dA,
magma_int_t  ldda,
magma_int_t info 
)

Definition at line 26 of file zgetrf_nopiv_gpu.cpp.

References __func__, inA, magma_device_sync(), MAGMA_ERR_HOST_ALLOC, magma_free_host(), magma_get_zgetrf_nb(), MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_NEG_ONE, MAGMA_Z_ONE, magma_zgemm(), magma_zgetmatrix(), magma_zgetrf_nopiv(), magma_zmalloc_host(), magma_zsetmatrix(), magma_ztrsm(), MagmaLeft, MagmaLower, MagmaNoTrans, MagmaUnit, max, min, and codegen::work.

{
/* -- MAGMA (version 1.2.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
May 2012
Purpose
=======
ZGETRF_NOPIV_GPU computes an LU factorization of a general M-by-N
matrix A without any pivoting.
The factorization has the form
A = P * L * U
where P is a permutation matrix, L is lower triangular with unit
diagonal elements (lower trapezoidal if m > n), and U is upper
triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
Arguments
=========
M (input) INTEGER
The number of rows of the matrix A. M >= 0.
N (input) INTEGER
The number of columns of the matrix A. N >= 0.
A (input/output) COMPLEX_16 array on the GPU, dimension (LDDA,N).
On entry, the M-by-N matrix to be factored.
On exit, the factors L and U from the factorization
A = P*L*U; the unit diagonal elements of L are not stored.
LDDA (input) INTEGER
The leading dimension of the array A. LDDA >= max(1,M).
INFO (output) INTEGER
= 0: successful exit
< 0: if INFO = -i, the i-th argument had an illegal value
or another error occured, such as memory allocation failed.
> 0: if INFO = i, U(i,i) is exactly zero. The factorization
has been completed, but the factor U is exactly
singular, and division by zero will occur if it is used
to solve a system of equations.
===================================================================== */
#define inA(i,j) (dA + (i)*nb + (j)*nb*ldda)
cuDoubleComplex c_one = MAGMA_Z_ONE;
cuDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
magma_int_t iinfo, nb;
magma_int_t maxm, maxn, mindim;
magma_int_t i, rows, cols, s, lddwork;
cuDoubleComplex *work;
/* Check arguments */
*info = 0;
if (m < 0)
*info = -1;
else if (n < 0)
*info = -2;
else if (ldda < max(1,m))
*info = -4;
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
/* Quick return if possible */
if (m == 0 || n == 0)
return *info;
/* Function Body */
mindim = min(m, n);
s = mindim / nb;
if (nb <= 1 || nb >= min(m,n)) {
/* Use CPU code. */
work = (cuDoubleComplex*)malloc(m * n * sizeof(cuDoubleComplex));
magma_zgetmatrix( m, n, dA, ldda, work, m );
magma_zgetrf_nopiv(&m, &n, work, &m, info);
magma_zsetmatrix( m, n, work, m, dA, ldda );
free(work);
}
else {
/* Use hybrid blocked code. */
maxm = ((m + 31)/32)*32;
maxn = ((n + 31)/32)*32;
lddwork = maxm;
if (MAGMA_SUCCESS != magma_zmalloc_host( &work, maxm*nb )) {
return *info;
}
for( i=0; i<s; i++ )
{
// download i-th panel
cols = maxm - i*nb;
magma_zgetmatrix( m-i*nb, nb, inA(i,i), ldda, work, lddwork );
// make sure that gpu queue is empty
if ( i>0 ){
nb, n - (i+1)*nb,
c_one, inA(i-1,i-1), ldda,
inA(i-1,i+1), ldda );
m-i*nb, n-(i+1)*nb, nb,
c_neg_one, inA(i, i-1), ldda, inA(i-1,i+1), ldda,
c_one, inA(i, i+1), ldda );
}
// do the cpu part
rows = m - i*nb;
magma_zgetrf_nopiv(&rows, &nb, work, &lddwork, &iinfo);
if ( (*info == 0) && (iinfo > 0) )
*info = iinfo + i*nb;
// upload i-th panel
magma_zsetmatrix( m-i*nb, nb, work, lddwork, inA(i, i), ldda );
// do the small non-parallel computations
if ( s > (i+1) ) {
nb, nb,
c_one, inA(i, i ), ldda,
inA(i, i+1), ldda);
m-(i+1)*nb, nb, nb,
c_neg_one, inA(i+1, i ), ldda, inA(i, i+1), ldda,
c_one, inA(i+1, i+1), ldda );
}
else {
nb, n-s*nb,
c_one, inA(i, i ), ldda,
inA(i, i+1), ldda);
m-(i+1)*nb, n-(i+1)*nb, nb,
c_neg_one, inA(i+1, i ), ldda, inA(i, i+1), ldda,
c_one, inA(i+1, i+1), ldda );
}
}
magma_int_t nb0 = min(m - s*nb, n - s*nb);
rows = m - s*nb;
cols = maxm - s*nb;
magma_zgetmatrix( rows, nb0, inA(s,s), ldda, work, lddwork );
// make sure that gpu queue is empty
// do the cpu part
magma_zgetrf_nopiv( &rows, &nb0, work, &lddwork, &iinfo);
if ( (*info == 0) && (iinfo > 0) )
*info = iinfo + s*nb;
// upload i-th panel
magma_zsetmatrix( rows, nb0, work, lddwork, inA(s,s), ldda );
nb0, n-s*nb-nb0,
c_one, inA(s,s), ldda,
inA(s,s)+nb0, ldda);
magma_free_host( work );
}
return *info;
} /* magma_zgetrf_nopiv_gpu */

Here is the call graph for this function: