MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
testing_sgetrf_gpu.cpp File Reference
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cuda_runtime_api.h>
#include <cublas.h>
#include "flops.h"
#include "magma.h"
#include "magma_lapack.h"
#include "testings.h"
Include dependency graph for testing_sgetrf_gpu.cpp:

Go to the source code of this file.

Functions

void init_matrix (int m, int n, float *h_A, magma_int_t lda)
 
float get_residual (magma_int_t m, magma_int_t n, float *A, magma_int_t lda, magma_int_t *ipiv)
 
float get_LU_error (magma_int_t M, magma_int_t N, float *LU, magma_int_t lda, magma_int_t *ipiv)
 
int main (int argc, char **argv)
 

Function Documentation

float get_LU_error ( magma_int_t  M,
magma_int_t  N,
float *  LU,
magma_int_t  lda,
magma_int_t ipiv 
)

Definition at line 102 of file testing_sgetrf_gpu.cpp.

References A, blasf77_sgemm(), init_matrix(), L, lapackf77_slacpy(), lapackf77_slange(), lapackf77_slaswp(), MAGMA_S_MAKE, MAGMA_S_ONE, MAGMA_S_SUB, MAGMA_S_ZERO, MagmaLowerStr, MagmaUpperStr, min, TESTING_FREE, and TESTING_MALLOC.

105 {
106  magma_int_t min_mn = min(M,N);
107  magma_int_t ione = 1;
108  magma_int_t i, j;
109  float alpha = MAGMA_S_ONE;
110  float beta = MAGMA_S_ZERO;
111  float *A, *L, *U;
112  float work[1], matnorm, residual;
113 
114  TESTING_MALLOC( A, float, lda*N );
115  TESTING_MALLOC( L, float, M*min_mn );
116  TESTING_MALLOC( U, float, min_mn*N );
117  memset( L, 0, M*min_mn*sizeof(float) );
118  memset( U, 0, min_mn*N*sizeof(float) );
119 
120  // set to original A
121  init_matrix( M, N, A, lda );
122  lapackf77_slaswp( &N, A, &lda, &ione, &min_mn, ipiv, &ione);
123 
124  // copy LU to L and U, and set diagonal to 1
125  lapackf77_slacpy( MagmaLowerStr, &M, &min_mn, LU, &lda, L, &M );
126  lapackf77_slacpy( MagmaUpperStr, &min_mn, &N, LU, &lda, U, &min_mn );
127  for(j=0; j<min_mn; j++)
128  L[j+j*M] = MAGMA_S_MAKE( 1., 0. );
129 
130  matnorm = lapackf77_slange("f", &M, &N, A, &lda, work);
131 
132  blasf77_sgemm("N", "N", &M, &N, &min_mn,
133  &alpha, L, &M, U, &min_mn, &beta, LU, &lda);
134 
135  for( j = 0; j < N; j++ ) {
136  for( i = 0; i < M; i++ ) {
137  LU[i+j*lda] = MAGMA_S_SUB( LU[i+j*lda], A[i+j*lda] );
138  }
139  }
140  residual = lapackf77_slange("f", &M, &N, LU, &lda, work);
141 
142  TESTING_FREE(A);
143  TESTING_FREE(L);
144  TESTING_FREE(U);
145 
146  return residual / (matnorm * N);
147 }
#define min(a, b)
Definition: common_magma.h:86
void init_matrix(int m, int n, magmaFloatComplex *h_A, magma_int_t lda)
void lapackf77_slacpy(const char *uplo, const magma_int_t *m, const magma_int_t *n, const float *A, const magma_int_t *lda, float *B, const magma_int_t *ldb)
int magma_int_t
Definition: magmablas.h:12
#define TESTING_MALLOC(__ptr, __type, __size)
Definition: testings.h:34
#define TESTING_FREE(__ptr)
Definition: testings.h:54
#define L(i, j)
#define MAGMA_S_SUB(a, b)
Definition: magma.h:193
#define MAGMA_S_ONE
Definition: magma.h:198
#define MAGMA_S_ZERO
Definition: magma.h:197
float lapackf77_slange(const char *norm, const magma_int_t *m, const magma_int_t *n, const float *A, const magma_int_t *lda, float *work)
#define LU(i, j)
void lapackf77_slaswp(const magma_int_t *n, float *A, const magma_int_t *lda, const magma_int_t *k1, const magma_int_t *k2, magma_int_t *ipiv, const magma_int_t *incx)
#define MagmaLowerStr
Definition: magma.h:85
#define A(i, j)
Definition: cprint.cpp:16
#define MAGMA_S_MAKE(r, i)
Definition: magma.h:189
void blasf77_sgemm(const char *transa, const char *transb, const magma_int_t *m, const magma_int_t *n, const magma_int_t *k, const float *alpha, const float *A, const magma_int_t *lda, const float *B, const magma_int_t *ldb, const float *beta, float *C, const magma_int_t *ldc)
#define MagmaUpperStr
Definition: magma.h:84

Here is the call graph for this function:

float get_residual ( magma_int_t  m,
magma_int_t  n,
float *  A,
magma_int_t  lda,
magma_int_t ipiv 
)

Definition at line 43 of file testing_sgetrf_gpu.cpp.

References blasf77_scopy(), blasf77_sgemv(), init_matrix(), lapackf77_sgetrs(), lapackf77_slange(), lapackf77_slarnv(), MAGMA_S_NEG_ONE, MAGMA_S_ONE, magma_strerror(), TESTING_FREE, and TESTING_MALLOC.

47 {
48  if ( m != n ) {
49  printf( "\nERROR: residual check defined only for square matrices\n" );
50  return -1;
51  }
52 
53  const float c_one = MAGMA_S_ONE;
54  const float c_neg_one = MAGMA_S_NEG_ONE;
55  const magma_int_t ione = 1;
56 
57  // this seed should be DIFFERENT than used in init_matrix
58  // (else x is column of A, so residual can be exactly zero)
59  magma_int_t ISEED[4] = {0,0,0,2};
60  magma_int_t info = 0;
61  float *x, *b;
62 
63  // initialize RHS
64  TESTING_MALLOC( x, float, n );
65  TESTING_MALLOC( b, float, n );
66  lapackf77_slarnv( &ione, ISEED, &n, b );
67  blasf77_scopy( &n, b, &ione, x, &ione );
68 
69  // solve Ax = b
70  lapackf77_sgetrs( "Notrans", &n, &ione, A, &lda, ipiv, x, &n, &info );
71  if (info != 0)
72  printf("lapackf77_sgetrs returned error %d: %s.\n",
73  (int) info, magma_strerror( info ));
74 
75  // reset to original A
76  init_matrix( m, n, A, lda );
77 
78  // compute r = Ax - b, saved in b
79  blasf77_sgemv( "Notrans", &m, &n, &c_one, A, &lda, x, &ione, &c_neg_one, b, &ione );
80 
81  // compute residual |Ax - b| / (n*|A|*|x|)
82  float norm_x, norm_A, norm_r, work[1];
83  norm_A = lapackf77_slange( "F", &m, &n, A, &lda, work );
84  norm_r = lapackf77_slange( "F", &n, &ione, b, &n, work );
85  norm_x = lapackf77_slange( "F", &n, &ione, x, &n, work );
86 
87  //printf( "r=\n" ); magma_sprint( 1, n, b, 1 );
88 
89  TESTING_FREE( x );
90  TESTING_FREE( b );
91 
92  //printf( "r=%.2e, A=%.2e, x=%.2e, n=%d\n", norm_r, norm_A, norm_x, n );
93  return norm_r / (n * norm_A * norm_x);
94 }
void lapackf77_slarnv(const magma_int_t *idist, magma_int_t *iseed, const magma_int_t *n, float *x)
void init_matrix(int m, int n, magmaFloatComplex *h_A, magma_int_t lda)
#define MAGMA_S_NEG_ONE
Definition: magma.h:200
int magma_int_t
Definition: magmablas.h:12
#define TESTING_MALLOC(__ptr, __type, __size)
Definition: testings.h:34
#define TESTING_FREE(__ptr)
Definition: testings.h:54
#define MAGMA_S_ONE
Definition: magma.h:198
void blasf77_sgemv(const char *transa, const magma_int_t *m, const magma_int_t *n, const float *alpha, const float *A, const magma_int_t *lda, const float *x, const magma_int_t *incx, const float *beta, float *y, const magma_int_t *incy)
float lapackf77_slange(const char *norm, const magma_int_t *m, const magma_int_t *n, const float *A, const magma_int_t *lda, float *work)
#define A(i, j)
Definition: cprint.cpp:16
const char * magma_strerror(magma_err_t error)
Definition: error.cpp:227
void lapackf77_sgetrs(const char *trans, const magma_int_t *n, const magma_int_t *nrhs, const float *A, const magma_int_t *lda, const magma_int_t *ipiv, float *B, const magma_int_t *ldb, magma_int_t *info)
void blasf77_scopy(const magma_int_t *n, const float *x, const magma_int_t *incx, float *y, const magma_int_t *incy)

Here is the call graph for this function:

void init_matrix ( int  m,
int  n,
float *  h_A,
magma_int_t  lda 
)

Definition at line 29 of file testing_sgetrf_gpu.cpp.

References lapackf77_slarnv().

30 {
31  magma_int_t ione = 1;
32  magma_int_t ISEED[4] = {0,0,0,1};
33  magma_int_t n2 = lda*n;
34  lapackf77_slarnv( &ione, ISEED, &n2, h_A );
35 }
void lapackf77_slarnv(const magma_int_t *idist, magma_int_t *iseed, const magma_int_t *n, float *x)
int magma_int_t
Definition: magmablas.h:12
#define h_A(i, j)

Here is the call graph for this function:

int main ( int  argc,
char **  argv 
)

Definition at line 153 of file testing_sgetrf_gpu.cpp.

References magma_opts::check, FLOPS_SGETRF, get_LU_error(), get_residual(), h_A, init_matrix(), magma_opts::lapack, lapackf77_sgetrf(), lapackf77_slamch, magma_sgetmatrix, magma_sgetrf_gpu(), magma_ssetmatrix, magma_strerror(), magma_wtime(), min, magma_opts::msize, magma_opts::niter, magma_opts::nsize, magma_opts::ntest, parse_opts(), TESTING_DEVALLOC, TESTING_DEVFREE, TESTING_FINALIZE, TESTING_FREE, TESTING_INIT, TESTING_MALLOC, and magma_opts::tolerance.

154 {
155  TESTING_INIT();
156 
157  real_Double_t gflops, gpu_perf, gpu_time, cpu_perf=0, cpu_time=0;
158  float error;
159  float *h_A;
160  float *d_A;
161  magma_int_t *ipiv;
162  magma_int_t M, N, n2, lda, ldda, info, min_mn;
163  magma_int_t status = 0;
164 
165  magma_opts opts;
166  parse_opts( argc, argv, &opts );
167 
168  float tol = opts.tolerance * lapackf77_slamch("E");
169 
170  if ( opts.check == 2 ) {
171  printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) |Ax-b|/(N*|A|*|x|)\n");
172  }
173  else {
174  printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) |PA-LU|/(N*|A|)\n");
175  }
176  printf("=========================================================================\n");
177  for( int i = 0; i < opts.ntest; ++i ) {
178  for( int iter = 0; iter < opts.niter; ++iter ) {
179  M = opts.msize[i];
180  N = opts.nsize[i];
181  min_mn = min(M, N);
182  lda = M;
183  n2 = lda*N;
184  ldda = ((M+31)/32)*32;
185  gflops = FLOPS_SGETRF( M, N ) / 1e9;
186 
187  TESTING_MALLOC( ipiv, magma_int_t, min_mn );
188  TESTING_MALLOC( h_A, float, n2 );
189  TESTING_DEVALLOC( d_A, float, ldda*N );
190 
191  /* =====================================================================
192  Performs operation using LAPACK
193  =================================================================== */
194  if ( opts.lapack ) {
195  init_matrix( M, N, h_A, lda );
196 
197  cpu_time = magma_wtime();
198  lapackf77_sgetrf(&M, &N, h_A, &lda, ipiv, &info);
199  cpu_time = magma_wtime() - cpu_time;
200  cpu_perf = gflops / cpu_time;
201  if (info != 0)
202  printf("lapackf77_sgetrf returned error %d: %s.\n",
203  (int) info, magma_strerror( info ));
204  }
205 
206  /* ====================================================================
207  Performs operation using MAGMA
208  =================================================================== */
209  init_matrix( M, N, h_A, lda );
210  magma_ssetmatrix( M, N, h_A, lda, d_A, ldda );
211 
212  gpu_time = magma_wtime();
213  magma_sgetrf_gpu( M, N, d_A, ldda, ipiv, &info);
214  gpu_time = magma_wtime() - gpu_time;
215  gpu_perf = gflops / gpu_time;
216  if (info != 0)
217  printf("magma_sgetrf_gpu returned error %d: %s.\n",
218  (int) info, magma_strerror( info ));
219 
220  /* =====================================================================
221  Check the factorization
222  =================================================================== */
223  if ( opts.lapack ) {
224  printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f)",
225  (int) M, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time );
226  }
227  else {
228  printf("%5d %5d --- ( --- ) %7.2f (%7.2f)",
229  (int) M, (int) N, gpu_perf, gpu_time );
230  }
231  if ( opts.check == 2 ) {
232  magma_sgetmatrix( M, N, d_A, ldda, h_A, lda );
233  error = get_residual( M, N, h_A, lda, ipiv );
234  printf(" %8.2e%s\n", error, (error < tol ? "" : " failed"));
235  status |= ! (error < tol);
236  }
237  else if ( opts.check ) {
238  magma_sgetmatrix( M, N, d_A, ldda, h_A, lda );
239  error = get_LU_error( M, N, h_A, lda, ipiv );
240  printf(" %8.2e%s\n", error, (error < tol ? "" : " failed"));
241  status |= ! (error < tol);
242  }
243  else {
244  printf(" --- \n");
245  }
246 
247  TESTING_FREE( ipiv );
248  TESTING_FREE( h_A );
249  TESTING_DEVFREE( d_A );
250  }
251  if ( opts.niter > 1 ) {
252  printf( "\n" );
253  }
254  }
255 
257  return status;
258 }
void parse_opts(int argc, char **argv, magma_opts *opts)
#define min(a, b)
Definition: common_magma.h:86
magma_int_t ntest
Definition: testings.h:124
void init_matrix(int m, int n, magmaFloatComplex *h_A, magma_int_t lda)
magma_int_t niter
Definition: testings.h:138
#define TESTING_INIT()
Definition: testings.h:19
int magma_int_t
Definition: magmablas.h:12
#define TESTING_MALLOC(__ptr, __type, __size)
Definition: testings.h:34
#define TESTING_FREE(__ptr)
Definition: testings.h:54
double get_LU_error(magma_int_t M, magma_int_t N, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *LU, magma_int_t *IPIV)
#define TESTING_DEVFREE(__ptr)
Definition: testings.h:60
#define h_A(i, j)
float get_residual(magma_int_t m, magma_int_t n, magmaFloatComplex *A, magma_int_t lda, magma_int_t *ipiv)
#define magma_ssetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_s.h:702
#define FLOPS_SGETRF(__m, __n)
Definition: flops.h:226
int check
Definition: testings.h:147
magma_int_t ldda
double magma_wtime(void)
Definition: timer.cpp:110
#define magma_sgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_s.h:705
#define lapackf77_slamch
Definition: magma_lapack.h:26
magma_int_t nsize[MAX_NTEST]
Definition: testings.h:126
const char * magma_strerror(magma_err_t error)
Definition: error.cpp:227
#define TESTING_FINALIZE()
Definition: testings.h:29
#define TESTING_DEVALLOC(__ptr, __type, __size)
Definition: testings.h:47
void lapackf77_sgetrf(const magma_int_t *m, const magma_int_t *n, float *A, const magma_int_t *lda, magma_int_t *ipiv, magma_int_t *info)
magma_int_t magma_sgetrf_gpu(magma_int_t m, magma_int_t n, float *dA, magma_int_t ldda, magma_int_t *ipiv, magma_int_t *info)
Definition: sgetrf_gpu.cpp:14
double real_Double_t
Definition: magma_types.h:27
magma_int_t msize[MAX_NTEST]
Definition: testings.h:125
double tolerance
Definition: testings.h:144
int lapack
Definition: testings.h:148

Here is the call graph for this function: