MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
cssssm_gpu.cpp File Reference
#include "common_magma.h"
Include dependency graph for cssssm_gpu.cpp:

Go to the source code of this file.

Macros

#define magma_cgemm   magmablas_cgemm
#define A1T(i, j)   (dA1T + (i)*ldda1 + (j))
#define A2T(i, j)   (dA2T + (i)*ldda2 + (j))
#define L1(i)   (dL1 + (i)*lddl1 )
#define L2(i, j)   (dL2 + (i)*lddl2i + (j)*lddl2j)

Functions

magma_int_t magma_cssssm_gpu (char storev, magma_int_t m1, magma_int_t n1, magma_int_t m2, magma_int_t n2, magma_int_t k, magma_int_t ib, cuFloatComplex *dA1, magma_int_t ldda1, cuFloatComplex *dA2, magma_int_t ldda2, cuFloatComplex *dL1, magma_int_t lddl1, cuFloatComplex *dL2, magma_int_t lddl2, magma_int_t *IPIV, magma_int_t *info)

Macro Definition Documentation

#define A1T (   i,
 
)    (dA1T + (i)*ldda1 + (j))
#define A2T (   i,
 
)    (dA2T + (i)*ldda2 + (j))
#define L1 (   i)    (dL1 + (i)*lddl1 )
#define L2 (   i,
 
)    (dL2 + (i)*lddl2i + (j)*lddl2j)
#define magma_cgemm   magmablas_cgemm

Definition at line 16 of file cssssm_gpu.cpp.


Function Documentation

magma_int_t magma_cssssm_gpu ( char  storev,
magma_int_t  m1,
magma_int_t  n1,
magma_int_t  m2,
magma_int_t  n2,
magma_int_t  k,
magma_int_t  ib,
cuFloatComplex *  dA1,
magma_int_t  ldda1,
cuFloatComplex *  dA2,
magma_int_t  ldda2,
cuFloatComplex *  dL1,
magma_int_t  lddl1,
cuFloatComplex *  dL2,
magma_int_t  lddl2,
magma_int_t IPIV,
magma_int_t info 
)

Definition at line 21 of file cssssm_gpu.cpp.

References __func__, A1T, A2T, L1, L2, MAGMA_C_NEG_ONE, MAGMA_C_ONE, magma_cgemm, magma_ctrmm(), magma_ctrsm(), magma_xerbla(), magmablas_cgetmo_in, magmablas_cgetmo_out, magmablas_cswap(), magmablas_cswapblk(), MagmaLower, MagmaNoTrans, MagmaRight, MagmaTrans, MagmaUnit, max, and min.

{
/* -- MAGMA (version 1.2.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
May 2012
Purpose
=======
SGETRF computes an LU factorization of a general M-by-N matrix A
using partial pivoting with row interchanges.
The factorization has the form
A = P * L * U
where P is a permutation matrix, L is lower triangular with unit
diagonal elements (lower trapezoidal if m > n), and U is upper
triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
Arguments
=========
M (input) INTEGER
The number of rows of the matrix A. M >= 0.
N (input) INTEGER
The number of columns of the matrix A. N >= 0.
A (input/output) REAL array on the GPU, dimension (LDA,N).
On entry, the M-by-N matrix to be factored.
On exit, the factors L and U from the factorization
A = P*L*U; the unit diagonal elements of L are not stored.
LDA (input) INTEGER
The leading dimension of the array A. LDA >= max(1,M).
IPIV (output) INTEGER array, dimension (min(M,N))
The pivot indices; for 1 <= i <= min(M,N), row i of the
matrix was interchanged with row IPIV(i).
INFO (output) INTEGER
= 0: successful exit
< 0: if INFO = -i, the i-th argument had an illegal value
or another error occured, such as memory allocation failed.
> 0: if INFO = i, U(i,i) is exactly zero. The factorization
has been completed, but the factor U is exactly
singular, and division by zero will occur if it is used
to solve a system of equations.
===================================================================== */
#define A1T(i,j) (dA1T + (i)*ldda1 + (j))
#define A2T(i,j) (dA2T + (i)*ldda2 + (j))
#define L1(i) (dL1 + (i)*lddl1 )
#define L2(i,j) (dL2 + (i)*lddl2i + (j)*lddl2j)
cuFloatComplex c_one = MAGMA_C_ONE;
cuFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
int ip, ii, sb;
cuFloatComplex *dA1T, *dA2T;
char transL;
int lddl2i, lddl2j;
/* Check input arguments */
*info = 0;
if (m1 < 0) {
*info = -1;
}
else if (n1 < 0) {
*info = -2;
}
else if (m2 < 0) {
*info = -3;
}
else if (n2 < 0) {
*info = -4;
}
else if (k < 0) {
*info = -5;
}
else if (ib < 0) {
*info = -6;
}
else if (ldda1 < max(1,m1)) {
*info = -8;
}
else if (ldda2 < max(1,m2)) {
*info = -10;
}
else if (lddl1 < max(1,ib)) {
*info = -12;
}
else if (lddl2 < max(1,m2)) {
*info = -14;
}
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
/* Quick return */
if ((m1 == 0) || (n1 == 0) || (m2 == 0) || (n2 == 0) || (k == 0) || (ib == 0))
return *info;
if ( (storev == 'C') || (storev == 'c') ) {
magmablas_cgetmo_in( dA1, dA1T, ldda1, m1, n1 );
magmablas_cgetmo_in( dA2, dA2T, ldda2, m2, n2 );
transL = MagmaTrans;
lddl2i = 1; lddl2j = lddl2;
} else {
dA1T = dA1;
dA2T = dA2;
transL = MagmaNoTrans;
lddl2i = lddl2; lddl2j = 1;
}
ip = 0;
for( ii=0; ii<k; ii+=ib )
{
sb = min( k-ii, ib);
#ifndef NOSWAPBLK
A1T(0, 0), ldda1,
A2T(0, 0), ldda2,
ii+1, ii+ib, IPIV, 1, m1 );
#else
{
int im;
for(i=0; i<ib; i++) {
im = IPIV[ip]-1;
if (im != (ii+i)) {
im = im - m1;
assert( (im>=0) && (im<m1) && (im<m2) );
magmablas_cswap( n1, A1T(ii+i, 0), 1, A2T(im, 0), 1 );
}
ip++;
}
}
#endif
#ifndef WITHOUTTRTRI
/* Lower, Trans, because L1 is not transposed */
n1, sb,
c_one, L1( ii), lddl1,
A1T(ii, 0), ldda1);
#else
/* Lower, Trans, because L1 is not transposed */
n1, sb,
c_one, L1( ii), lddl1,
A1T(ii, 0), ldda1);
#endif
/* Second parameter is trans because L2 is not transposed */
n2, m2, sb,
c_neg_one, A1T(ii, 0), ldda1,
L2( 0, ii), lddl2,
c_one, A2T(0, 0 ), ldda2 );
}
if ( (storev == 'C') || (storev == 'c') ) {
magmablas_cgetmo_out( dA1, dA1T, ldda1, m1, n1 );
magmablas_cgetmo_out( dA2, dA2T, ldda2, m2, n2 );
}
return *info;
}

Here is the call graph for this function:

Here is the caller graph for this function: