MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
testing_zgetrf.cpp File Reference
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cublas.h>
#include "flops.h"
#include "magma.h"
#include "magma_lapack.h"
#include "testings.h"
Include dependency graph for testing_zgetrf.cpp:

Go to the source code of this file.

Macros

#define PRECISION_z
#define FLOPS(m, n)   ( 6. * FMULS_GETRF(m, n) + 2. * FADDS_GETRF(m, n) )

Functions

magma_int_t magma_zgetrf3 (magma_int_t num_gpus, magma_int_t m, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
double get_LU_error (magma_int_t M, magma_int_t N, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *LU, magma_int_t *IPIV)
int main (int argc, char **argv)

Macro Definition Documentation

#define FLOPS (   m,
 
)    ( 6. * FMULS_GETRF(m, n) + 2. * FADDS_GETRF(m, n) )

Definition at line 36 of file testing_zgetrf.cpp.

#define PRECISION_z

Definition at line 34 of file testing_zgetrf.cpp.


Function Documentation

double get_LU_error ( magma_int_t  M,
magma_int_t  N,
cuDoubleComplex *  A,
magma_int_t  lda,
cuDoubleComplex *  LU,
magma_int_t IPIV 
)

Definition at line 41 of file testing_zgetrf.cpp.

References blasf77_zgemm, L, lapackf77_zlacpy, lapackf77_zlange, lapackf77_zlaswp, MAGMA_Z_MAKE, MAGMA_Z_ONE, MAGMA_Z_SUB, MAGMA_Z_ZERO, MagmaLowerStr, MagmaUpperStr, min, gbstrct_blg::N, TESTING_FREE, TESTING_MALLOC, and codegen::work.

{
magma_int_t min_mn = min(M,N);
magma_int_t ione = 1;
cuDoubleComplex alpha = MAGMA_Z_ONE;
cuDoubleComplex beta = MAGMA_Z_ZERO;
cuDoubleComplex *L, *U;
double work[1], matnorm, residual;
TESTING_MALLOC( L, cuDoubleComplex, M*min_mn);
TESTING_MALLOC( U, cuDoubleComplex, min_mn*N);
memset( L, 0, M*min_mn*sizeof(cuDoubleComplex) );
memset( U, 0, min_mn*N*sizeof(cuDoubleComplex) );
lapackf77_zlaswp( &N, A, &lda, &ione, &min_mn, IPIV, &ione);
lapackf77_zlacpy( MagmaLowerStr, &M, &min_mn, LU, &lda, L, &M );
lapackf77_zlacpy( MagmaUpperStr, &min_mn, &N, LU, &lda, U, &min_mn );
for(j=0; j<min_mn; j++)
L[j+j*M] = MAGMA_Z_MAKE( 1., 0. );
matnorm = lapackf77_zlange("f", &M, &N, A, &lda, work);
blasf77_zgemm("N", "N", &M, &N, &min_mn,
&alpha, L, &M, U, &min_mn, &beta, LU, &lda);
for( j = 0; j < N; j++ ) {
for( i = 0; i < M; i++ ) {
LU[i+j*lda] = MAGMA_Z_SUB( LU[i+j*lda], A[i+j*lda] );
}
}
residual = lapackf77_zlange("f", &M, &N, LU, &lda, work);
return residual / (matnorm * N);
}
magma_int_t magma_zgetrf3 ( magma_int_t  num_gpus,
magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  a,
magma_int_t  lda,
magma_int_t ipiv,
magma_int_t info 
)
int main ( int  argc,
char **  argv 
)

Definition at line 86 of file testing_zgetrf.cpp.

References FLOPS, get_current_time(), get_LU_error(), GetTimerValue(), ISEED, lapackf77_zgetrf, lapackf77_zlacpy, lapackf77_zlarnv, magma_get_zgetrf_nb(), magma_zgetrf(), MagmaUpperLowerStr, min, gbstrct_blg::N, TESTING_CUDA_FINALIZE, TESTING_CUDA_INIT, TESTING_FREE, TESTING_HOSTALLOC, TESTING_HOSTFREE, and TESTING_MALLOC.

{
magma_timestr_t start, end;
double flops, gpu_perf, cpu_perf, error;
cuDoubleComplex *h_A, *h_R;
magma_int_t *ipiv;
/* Matrix size */
magma_int_t M = 0, N = 0, n2, lda, ldda;
magma_int_t size[10] = {1024,2048,3072,4032,5184,6016,7040,8064,9088,10112};
magma_int_t i, info, min_mn, nb;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
if (argc != 1){
for(i = 1; i<argc; i++){
if (strcmp("-N", argv[i])==0)
N = atoi(argv[++i]);
else if (strcmp("-M", argv[i])==0)
M = atoi(argv[++i]);
}
if (M>0 && N>0)
printf(" testing_zgetrf -M %d -N %d\n\n", M, N);
else
{
printf("\nUsage: \n");
printf(" testing_zgetrf -M %d -N %d\n\n", 1024, 1024);
exit(1);
}
}
else {
printf("\nUsage: \n");
printf(" testing_zgetrf -M %d -N %d\n\n", 1024, 1024);
M = N = size[9];
}
ldda = ((M+31)/32)*32;
n2 = M * N;
min_mn = min(M, N);
nb = magma_get_zgetrf_nb(min_mn);
/* Allocate host memory for the matrix */
TESTING_MALLOC(ipiv, magma_int_t, min_mn);
TESTING_MALLOC( h_A, cuDoubleComplex, n2 );
TESTING_HOSTALLOC( h_R, cuDoubleComplex, n2 );
printf("\n\n");
printf(" M N CPU GFlop/s GPU GFlop/s ||PA-LU||/(||A||*N)\n");
printf("============================================================\n");
for(i=0; i<10; i++){
if (argc == 1){
M = N = size[i];
}
min_mn= min(M, N);
lda = M;
n2 = lda*N;
ldda = ((M+31)/32)*32;
flops = FLOPS( (double)M, (double)N ) / 1000000;
/* Initialize the matrix */
lapackf77_zlarnv( &ione, ISEED, &n2, h_A );
lapackf77_zlacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_R, &lda );
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
start = get_current_time();
lapackf77_zgetrf(&M, &N, h_A, &lda, ipiv, &info);
if (info < 0)
printf("Argument %d of zgetrf had an illegal value.\n", -info);
cpu_perf = flops / GetTimerValue(start, end);
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
lapackf77_zlacpy( MagmaUpperLowerStr, &M, &N, h_R, &lda, h_A, &lda );
start = get_current_time();
magma_zgetrf( M, N, h_R, lda, ipiv, &info);
if (info < 0)
printf("Argument %d of zgetrf had an illegal value.\n", -info);
gpu_perf = flops / GetTimerValue(start, end);
/* =====================================================================
Check the factorization
=================================================================== */
error = get_LU_error(M, N, h_A, lda, h_R, ipiv);
printf("%5d %5d %6.2f %6.2f %e\n",
M, N, cpu_perf, gpu_perf, error);
if (argc != 1)
break;
}
/* Memory clean up */
TESTING_FREE( ipiv );
TESTING_FREE( h_A );
/* Shutdown */
}

Here is the call graph for this function: