MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
testing_ztrsv.cpp File Reference
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cuda_runtime_api.h>
#include <cublas.h>
#include "flops.h"
#include "magma.h"
#include "magma_lapack.h"
#include "testings.h"
Include dependency graph for testing_ztrsv.cpp:

Go to the source code of this file.

Macros

#define h_A(i, j)   (h_A + (i) + (j)*lda)
 

Functions

int main (int argc, char **argv)
 

Macro Definition Documentation

#define h_A (   i,
 
)    (h_A + (i) + (j)*lda)

Definition at line 32 of file testing_ztrsv.cpp.

Function Documentation

int main ( int  argc,
char **  argv 
)

Definition at line 37 of file testing_ztrsv.cpp.

References blasf77_zaxpy, blasf77_zcopy, blasf77_ztrmv, blasf77_ztrsv, magma_opts::diag, FLOPS_ZTRSM, h_A, magma_opts::lapack, lapackf77_zgetrf, lapackf77_zlange, lapackf77_zlarnv, magma_sync_wtime(), magma_wtime(), MAGMA_Z_NEG_ONE, magma_zgetvector, magma_zsetmatrix, magma_zsetvector, magma_opts::niter, magma_opts::nsize, magma_opts::ntest, parse_opts(), magma_opts::side, TESTING_DEVALLOC, TESTING_DEVFREE, TESTING_FINALIZE, TESTING_FREE, TESTING_INIT, TESTING_MALLOC, magma_opts::transA, and magma_opts::uplo.

38 {
39  TESTING_INIT();
40 
41  real_Double_t gflops, cublas_perf, cublas_time, cpu_perf=0, cpu_time=0;
42  double cublas_error, normA, normx, normr, work[1];
43  magma_int_t N, info;
44  magma_int_t sizeA;
45  magma_int_t lda, ldda;
46  magma_int_t ione = 1;
47  magma_int_t ISEED[4] = {0,0,0,1};
48  magma_int_t *ipiv;
49 
50  magmaDoubleComplex *h_A, *h_b, *h_x, *h_xcublas;
51  magmaDoubleComplex *d_A, *d_x;
52  magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
53 
54  magma_opts opts;
55  parse_opts( argc, argv, &opts );
56 
57  printf("uplo = %c, transA = %c, diag = %c\n", opts.uplo, opts.transA, opts.diag );
58  printf(" N CUBLAS Gflop/s (ms) CPU Gflop/s (ms) CUBLAS error\n");
59  printf("============================================================\n");
60  for( int i = 0; i < opts.ntest; ++i ) {
61  for( int iter = 0; iter < opts.niter; ++iter ) {
62  N = opts.nsize[i];
63  gflops = FLOPS_ZTRSM(opts.side, N, 1) / 1e9;
64  lda = N;
65  ldda = ((lda+31)/32)*32;
66  sizeA = lda*N;
67 
68  TESTING_MALLOC( ipiv, magma_int_t, N );
69  TESTING_MALLOC( h_A, magmaDoubleComplex, lda*N );
70  TESTING_MALLOC( h_b, magmaDoubleComplex, N );
71  TESTING_MALLOC( h_x, magmaDoubleComplex, N );
72  TESTING_MALLOC( h_xcublas, magmaDoubleComplex, N );
73 
74  TESTING_DEVALLOC( d_A, magmaDoubleComplex, ldda*N );
75  TESTING_DEVALLOC( d_x, magmaDoubleComplex, N );
76 
77  /* Initialize the matrices */
78  /* Factor A into LU to get well-conditioned triangular matrix.
79  * Copy L to U, since L seems okay when used with non-unit diagonal
80  * (i.e., from U), while U fails when used with unit diagonal. */
81  lapackf77_zlarnv( &ione, ISEED, &sizeA, h_A );
82  lapackf77_zgetrf( &N, &N, h_A, &lda, ipiv, &info );
83  for( int j = 0; j < N; ++j ) {
84  for( int i = 0; i < j; ++i ) {
85  *h_A(i,j) = *h_A(j,i);
86  }
87  }
88 
89  lapackf77_zlarnv( &ione, ISEED, &N, h_b );
90  blasf77_zcopy( &N, h_b, &ione, h_x, &ione );
91 
92  /* =====================================================================
93  Performs operation using CUDA-BLAS
94  =================================================================== */
95  magma_zsetmatrix( N, N, h_A, lda, d_A, ldda );
96  magma_zsetvector( N, h_x, 1, d_x, 1 );
97 
98  cublas_time = magma_sync_wtime( NULL );
99  cublasZtrsv( opts.uplo, opts.transA, opts.diag,
100  N,
101  d_A, ldda,
102  d_x, 1 );
103  cublas_time = magma_sync_wtime( NULL ) - cublas_time;
104  cublas_perf = gflops / cublas_time;
105 
106  magma_zgetvector( N, d_x, 1, h_xcublas, 1 );
107 
108  /* =====================================================================
109  Performs operation using CPU BLAS
110  =================================================================== */
111  if ( opts.lapack ) {
112  cpu_time = magma_wtime();
113  blasf77_ztrsv( &opts.uplo, &opts.transA, &opts.diag,
114  &N,
115  h_A, &lda,
116  h_x, &ione );
117  cpu_time = magma_wtime() - cpu_time;
118  cpu_perf = gflops / cpu_time;
119  }
120 
121  /* =====================================================================
122  Check the result
123  =================================================================== */
124  // ||b - Ax|| / (||A||*||x||)
125  // error for CUBLAS
126  normA = lapackf77_zlange( "F", &N, &N, h_A, &lda, work );
127 
128  normx = lapackf77_zlange( "F", &N, &ione, h_xcublas, &ione, work );
129  blasf77_ztrmv( &opts.uplo, &opts.transA, &opts.diag,
130  &N,
131  h_A, &lda,
132  h_xcublas, &ione );
133  blasf77_zaxpy( &N, &c_neg_one, h_b, &ione, h_xcublas, &ione );
134  normr = lapackf77_zlange( "F", &N, &ione, h_xcublas, &N, work );
135  cublas_error = normr / (normA*normx);
136 
137  if ( opts.lapack ) {
138  printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e\n",
139  (int) N,
140  cublas_perf, 1000.*cublas_time,
141  cpu_perf, 1000.*cpu_time,
142  cublas_error );
143  }
144  else {
145  printf("%5d %7.2f (%7.2f) --- ( --- ) %8.2e\n",
146  (int) N,
147  cublas_perf, 1000.*cublas_time,
148  cublas_error );
149  }
150 
151  TESTING_FREE( h_A );
152  TESTING_FREE( h_x );
153  TESTING_FREE( h_xcublas );
154 
155  TESTING_DEVFREE( d_A );
156  TESTING_DEVFREE( d_x );
157  }
158  if ( opts.niter > 1 ) {
159  printf( "\n" );
160  }
161  }
162 
164  return 0;
165 }
void parse_opts(int argc, char **argv, magma_opts *opts)
#define blasf77_zaxpy
Definition: magma_zlapack.h:23
magma_int_t ntest
Definition: testings.h:124
#define lapackf77_zgetrf
Definition: magma_zlapack.h:64
#define FLOPS_ZTRSM(__side, __m, __n)
Definition: flops.h:215
#define MAGMA_Z_NEG_ONE
Definition: magma.h:134
magma_diag_t diag
Definition: testings.h:157
magma_trans_t transA
Definition: testings.h:154
magma_int_t niter
Definition: testings.h:138
#define TESTING_INIT()
Definition: testings.h:19
int magma_int_t
Definition: magmablas.h:12
#define TESTING_MALLOC(__ptr, __type, __size)
Definition: testings.h:34
#define magma_zgetvector(n, dx_src, incx, hy_dst, incy)
Definition: magmablas_z.h:637
#define TESTING_FREE(__ptr)
Definition: testings.h:54
#define TESTING_DEVFREE(__ptr)
Definition: testings.h:60
#define magma_zsetvector(n, hx_src, incx, dy_dst, incy)
Definition: magmablas_z.h:634
#define h_A(i, j)
magma_side_t side
Definition: testings.h:156
magma_int_t ldda
#define blasf77_ztrsv
Definition: magma_zlapack.h:48
double magma_sync_wtime(magma_queue_t queue)
Definition: timer.cpp:119
double magma_wtime(void)
Definition: timer.cpp:110
magma_int_t nsize[MAX_NTEST]
Definition: testings.h:126
#define blasf77_zcopy
Definition: magma_zlapack.h:24
#define lapackf77_zlange
Definition: magma_zlapack.h:75
#define TESTING_FINALIZE()
Definition: testings.h:29
#define TESTING_DEVALLOC(__ptr, __type, __size)
Definition: testings.h:47
double real_Double_t
Definition: magma_types.h:27
#define magma_zsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_z.h:702
#define blasf77_ztrmv
Definition: magma_zlapack.h:46
magma_uplo_t uplo
Definition: testings.h:153
#define lapackf77_zlarnv
Definition: magma_zlapack.h:81
int lapack
Definition: testings.h:148

Here is the call graph for this function: