Definition at line 14 of file sgeqrf_ooc.cpp.
References __func__, a_ref, da_ref, dwork, lapackf77_slarft(), MAGMA_ERR_DEVICE_ALLOC, magma_free(), magma_get_sgeqrf_nb(), magma_queue_create(), magma_queue_destroy(), magma_queue_sync(), MAGMA_S_MAKE, MAGMA_S_ONE, magma_sgeqrf(), magma_sgeqrf2_gpu(), magma_sgetmatrix_async(), magma_slarfb_gpu(), magma_smalloc(), magma_ssetmatrix_async(), MAGMA_SUCCESS, magma_xerbla(), MagmaColumnwise, MagmaColumnwiseStr, MagmaForward, MagmaForwardStr, MagmaLeft, MagmaTrans, MagmaUpper, max, min, spanel_to_q(), and sq_to_panel().
{
#define a_ref(a_1,a_2) ( a+(a_2)*(lda) + (a_1))
#define da_ref(a_1,a_2) (da+(a_2)*ldda + (a_1))
int k, lddwork, ldda;
*info = 0;
int lwkopt = n * nb;
long int lquery = (lwork == -1);
if (m < 0) {
*info = -1;
} else if (n < 0) {
*info = -2;
}
else if (lda <
max(1,m)) {
*info = -4;
}
else if (lwork <
max(1,n) && ! lquery) {
*info = -7;
}
if (*info != 0) {
return *info;
}
else if (lquery)
return *info;
#if CUDA_VERSION > 3010
size_t totalMem;
#else
unsigned int totalMem;
#endif
CUdevice dev;
cuDeviceGet( &dev, 0);
cuDeviceTotalMem( &totalMem, dev );
totalMem /= sizeof(float);
NB = (NB / nb) * nb;
if (NB >= n)
if (k == 0) {
return *info;
}
lddwork = ((NB+31)/32)*32+nb;
ldda = ((m+31)/32)*32;
return *info;
}
static cudaStream_t stream[2];
float *ptr = da + ldda * NB;
dwork = da + ldda*(NB + nb);
for(int i=0; i<n; i+=NB)
{
da_ref(0,0), ldda, stream[0] );
for(
int j=0; j<
min(i,k); j+=nb)
{
int rows = m-j;
&rows, &ib,
a_ref(j,j), &lda, tau+j,
work, &ib);
dwork, lddwork, stream[1] );
ptr, rows, stream[1] );
rows, IB, ib,
ptr, rows, dwork, lddwork,
da_ref(j, 0), ldda, dwork+ib, lddwork);
}
if (i<k)
a_ref(0,i), lda, stream[0] );
}
return *info;
}