MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
testing_chetrd.cpp File Reference
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cublas.h>
#include "flops.h"
#include "magma.h"
#include "magma_lapack.h"
#include "testings.h"
Include dependency graph for testing_chetrd.cpp:

Go to the source code of this file.

Macros

#define PRECISION_c
#define FLOPS(n)   ( 6. * FMULS_HETRD(n) + 2. * FADDS_HETRD(n))

Functions

int main (int argc, char **argv)

Macro Definition Documentation

#define FLOPS (   n)    ( 6. * FMULS_HETRD(n) + 2. * FADDS_HETRD(n))

Definition at line 32 of file testing_chetrd.cpp.

#define PRECISION_c

Definition at line 30 of file testing_chetrd.cpp.


Function Documentation

int main ( int  argc,
char **  argv 
)

Definition at line 40 of file testing_chetrd.cpp.

References diag, FLOPS, get_current_time(), GetTimerValue(), ISEED, lapackf77_chet21(), lapackf77_chetrd(), lapackf77_clacpy(), lapackf77_clarnv(), lapackf77_cungtr(), lapackf77_slamch, MAGMA_C_REAL, MAGMA_C_SET2REAL, magma_chetrd(), magma_get_chetrd_nb(), MagmaLowerStr, MagmaUpperLowerStr, MagmaUpperStr, gbstrct_blg::N, TESTING_CUDA_FINALIZE, TESTING_CUDA_INIT, TESTING_FREE, TESTING_HOSTALLOC, TESTING_HOSTFREE, TESTING_MALLOC, uplo, and codegen::work.

{
magma_timestr_t start, end;
float eps, flops, gpu_perf, cpu_perf;
cuFloatComplex *h_A, *h_R, *h_Q, *h_work, *work;
cuFloatComplex *tau;
float *diag, *offdiag, *rwork;
float result[2] = {0., 0.};
/* Matrix size */
magma_int_t N = 0, n2, lda, lwork;
magma_int_t size[10] = {1024,2048,3072,4032,5184,6016,7040,8064,9088,10112};
magma_int_t i, info, nb, checkres, once = 0;
magma_int_t ione = 1;
magma_int_t itwo = 2;
magma_int_t ithree = 3;
magma_int_t ISEED[4] = {0,0,0,1};
char *uplo = (char *)MagmaLowerStr;
if (argc != 1){
for(i = 1; i<argc; i++){
if (strcmp("-N", argv[i])==0) {
N = atoi(argv[++i]);
once = 1;
}
else if (strcmp("-U", argv[i])==0)
uplo = (char *)MagmaUpperStr;
else if (strcmp("-L", argv[i])==0)
uplo = (char *)MagmaLowerStr;
}
if ( N > 0 )
printf(" testing_chetrd -L|U -N %d\n\n", N);
else
{
printf("\nUsage: \n");
printf(" testing_chetrd -L|U -N %d\n\n", 1024);
exit(1);
}
}
else {
printf("\nUsage: \n");
printf(" testing_chetrd -L|U -N %d\n\n", 1024);
N = size[9];
}
checkres = getenv("MAGMA_TESTINGS_CHECK") != NULL;
eps = lapackf77_slamch( "E" );
lda = N;
n2 = lda * N;
/* We suppose the magma nb is bigger than lapack nb */
lwork = N*nb;
/* Allocate host memory for the matrix */
TESTING_MALLOC( h_A, cuFloatComplex, lda*N );
TESTING_HOSTALLOC( h_R, cuFloatComplex, lda*N );
TESTING_HOSTALLOC( h_work, cuFloatComplex, lwork );
TESTING_MALLOC( tau, cuFloatComplex, N );
TESTING_MALLOC( diag, float, N );
TESTING_MALLOC( offdiag, float, N-1 );
/* To avoid uninitialized variable warning */
h_Q = NULL;
work = NULL;
rwork = NULL;
if ( checkres ) {
TESTING_MALLOC( h_Q, cuFloatComplex, lda*N );
TESTING_MALLOC( work, cuFloatComplex, 2*N*N );
#if defined(PRECISION_z) || defined(PRECISION_c)
TESTING_MALLOC( rwork, float, N );
#endif
}
printf("\n\n");
printf(" N CPU GFlop/s GPU GFlop/s |A-QHQ'|/N|A| |I-QQ'|/N \n");
printf("=============================================================\n");
for(i=0; i<10; i++){
if ( !once ) {
N = size[i];
}
lda = N;
n2 = N*lda;
flops = FLOPS( (float)N ) / 1e6;
/* ====================================================================
Initialize the matrix
=================================================================== */
lapackf77_clarnv( &ione, ISEED, &n2, h_A );
/* Make the matrix hermitian */
{
for(i=0; i<N; i++) {
MAGMA_C_SET2REAL( h_A[i*lda+i], ( MAGMA_C_REAL(h_A[i*lda+i]) ) );
for(j=0; j<i; j++)
h_A[i*lda+j] = cuConjf(h_A[j*lda+i]);
}
}
lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
start = get_current_time();
magma_chetrd(uplo[0], N, h_R, lda, diag, offdiag,
tau, h_work, lwork, &info);
if ( info < 0 )
printf("Argument %d of magma_chetrd had an illegal value\n", -info);
gpu_perf = flops / GetTimerValue(start,end);
/* =====================================================================
Check the factorization
=================================================================== */
if ( checkres ) {
lapackf77_clacpy(uplo, &N, &N, h_R, &lda, h_Q, &lda);
lapackf77_cungtr(uplo, &N, h_Q, &lda, tau, h_work, &lwork, &info);
#if defined(PRECISION_z) || defined(PRECISION_c)
lapackf77_chet21(&itwo, uplo, &N, &ione,
h_A, &lda, diag, offdiag,
h_Q, &lda, h_R, &lda,
tau, work, rwork, &result[0]);
lapackf77_chet21(&ithree, uplo, &N, &ione,
h_A, &lda, diag, offdiag,
h_Q, &lda, h_R, &lda,
tau, work, rwork, &result[1]);
#else
lapackf77_chet21(&itwo, uplo, &N, &ione,
h_A, &lda, diag, offdiag,
h_Q, &lda, h_R, &lda,
tau, work, &result[0]);
lapackf77_chet21(&ithree, uplo, &N, &ione,
h_A, &lda, diag, offdiag,
h_Q, &lda, h_R, &lda,
tau, work, &result[1]);
#endif
}
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
start = get_current_time();
lapackf77_chetrd(uplo, &N, h_A, &lda, diag, offdiag, tau,
h_work, &lwork, &info);
if (info < 0)
printf("Argument %d of lapackf77_chetrd had an illegal value.\n", -info);
cpu_perf = flops / GetTimerValue(start,end);
/* =====================================================================
Print performance and error.
=================================================================== */
if ( checkres ) {
printf("%5d %6.2f %6.2f %e %e\n",
N, cpu_perf, gpu_perf,
result[0]*eps, result[1]*eps );
} else {
printf("%5d %6.2f %6.2f\n",
N, cpu_perf, gpu_perf );
}
if ( once )
break;
}
/* Memory clean up */
TESTING_FREE( h_A );
TESTING_FREE( tau );
TESTING_FREE( diag );
TESTING_FREE( offdiag );
TESTING_HOSTFREE( h_work );
if ( checkres ) {
TESTING_FREE( h_Q );
TESTING_FREE( work );
#if defined(PRECISION_z) || defined(PRECISION_c)
TESTING_FREE( rwork );
#endif
}
/* Shutdown */
return EXIT_SUCCESS;
}

Here is the call graph for this function: