{
#define A(i,j) (a + (j)*lda + (i))
#define inAT(i,j) (dAT + (i)*nb*maxn + (j)*nb)
#define inPT(i,j) (dPT + (i)*nb*nb + (j)*nb)
cuDoubleComplex *dAT, *
dA, *da, *dPT, *
work;
magma_int_t i, ii, jj, offset, ib, rows, cols, s, nb0, m0;
#if CUDA_VERSION > 3010
size_t totalMem;
#else
unsigned int totalMem;
#endif
CUdevice dev;
*info = 0;
if (m < 0)
*info = -1;
else if (n < 0)
*info = -2;
*info = -4;
if (*info != 0) {
return *info;
}
if (m == 0 || n == 0)
return *info;
cuDeviceGet( &dev, 0);
cuDeviceTotalMem( &totalMem, dev );
totalMem /= sizeof(cuDoubleComplex);
MB = m;
if( NB >= n ) {
#ifdef CHECK_ZGETRF_OOC
printf( " * still fit in GPU memory.\n" );
#endif
NB = n;
}
#ifdef CHECK_ZGETRF_OOC
else {
printf( " * don't fit in GPU memory.\n" );
}
#endif
NB = (NB / nb) * nb;
#ifdef CHECK_ZGETRF_OOC
if( NB != n ) printf( " * running in out-core mode (n=%d, NB=%d, nb=%d).\n",n,NB,nb );
else printf( " * running in in-core mode (n=%d, NB=%d, nb=%d).\n",n,NB,nb );
fflush(stdout);
#endif
if ( (nb <= 1) || (nb >=
min(m,n)) ) {
} else {
maxm = ((MB + 31)/32)*32;
maxn = ((NB + 31)/32)*32;
maxdim =
max(maxm, maxn);
return *info;
}
da = dA + 2*nb*maxm;
dPT = dA + nb*maxm;
return *info;
}
for( I=0; I<n; I+=NB ) {
M = MB;
for( offset = 0; offset<
min(m,I); offset+=NB ) {
NBk =
min( m-offset, NB );
for( jj=0, ib=offset/nb; jj<NBk; jj+=nb, ib++ )
{
ii = offset+jj;
rows = maxm - ii;
dA, rows );
N, nb0, c_one,
inPT(0,0), nb,
inAT(ib,0), maxn );
if( M > ii+nb0 ) {
N, M-(ii+nb0), nb0, c_neg_one,
inAT(ib,0), maxn,
inPT(1,0), nb, c_one,
inAT(ib+1,0), maxn );
}
}
}
m0 = M-I;
work = &a[I*lda];
if( m0 > 0 ) {
if( I > 0 ) {
cols = maxm - I;
}
if( iinfo != 0 ) {
*info = iinfo;
break;
}
for( ii = 0; ii < s; ii++ ) {
i = I/nb+ii;
cols = maxm - i*nb;
if (ii>0) {
N - (ii+1)*nb, nb,
c_one,
inAT(i-1,ii-1), maxn,
N-(ii+1)*nb, M-i*nb, nb,
c_neg_one,
inAT(i-1,ii+1), maxn,
c_one,
inAT(i, ii+1), maxn );
rows = m - i*nb;
}
if (*info == 0 && iinfo > 0)
*info = iinfo + i*nb;
if (s > (ii+1)) {
c_one,
inAT(i, ii ), maxn,
c_neg_one,
inAT(i, ii+1), maxn,
c_one,
inAT(i+1, ii+1), maxn );
} else {
c_one,
inAT(i, ii ), maxn,
c_neg_one,
inAT(i, ii+1), maxn,
c_one,
inAT(i+1, ii+1), maxn );
}
}
i = I/nb+s;
nb0 =
min(M - i*nb, N - s*nb);
rows = M - i*nb;
cols = maxm - i*nb;
if( nb0 > 0 ) {
if (*info == 0 && iinfo > 0)
*info = iinfo + s*nb;
N-s*nb-nb0, nb0,
}
}
}
}
return *info;
}