14 external cublas_init, cublas_set_matrix, cublas_get_matrix
15 external cublas_shutdown, cublas_alloc
16 external zlange, zgemm, zgesv, dlamch
18 double precision zlange, dlamch
21 double precision :: rnumber(2), anorm, bnorm, rnorm, xnorm
22 double precision,
allocatable :: work(:)
23 complex*16,
allocatable :: h_a(:), h_b(:), h_x(:)
24 magma_devptr_t :: devptra, devptrb
25 integer,
allocatable :: ipiv(:)
27 complex*16 :: zone, mzone
28 integer :: i, n, info, stat, lda
29 integer :: size_of_elt, nrhs
30 real(kind=8) :: flops, t, tstart, tend
32 parameter( nrhs = 1, zone = 1., mzone = -1. )
39 size_of_elt = sizeof_complex_16
43 allocate(h_b(lda*nrhs))
44 allocate(h_x(lda*nrhs))
49 stat = cublas_alloc(ldda*n, size_of_elt, devptra)
51 write(*,*)
"device memory allocation failed"
55 stat = cublas_alloc(ldda*nrhs, size_of_elt, devptrb)
57 write(*,*)
"device memory allocation failed"
63 call random_number(rnumber)
68 call random_number(rnumber)
74 call cublas_set_matrix(n, n, size_of_elt, h_a, lda, devptra, ldda)
77 call cublas_set_matrix(n, nrhs, size_of_elt, h_b, lda, devptrb, ldda)
84 if ( info .ne. 0 )
then
85 write(*,*)
"Info : ", info
91 if ( info .ne. 0 )
then
92 write(*,*)
"Info : ", info
96 call cublas_get_matrix(n, nrhs, size_of_elt, devptrb, ldda, h_x, lda)
99 anorm = zlange(
'I', n, n, h_a, lda, work)
100 bnorm = zlange(
'I', n, nrhs, h_b, lda, work)
101 xnorm = zlange(
'I', n, nrhs, h_x, lda, work)
102 call zgemm(
'n',
'n', n, nrhs, n, zone, h_a, lda, h_x, lda, mzone, h_b, lda)
103 rnorm = zlange(
'I', n, nrhs, h_b, lda, work)
106 write(*,* )
'Solving A x = b using LU factorization:'
107 write(*,105)
' || A || = ', anorm
108 write(*,105)
' || b || = ', bnorm
109 write(*,105)
' || x || = ', xnorm
110 write(*,105)
' || b - A x || = ', rnorm
112 flops = 2. * n * n * n / 3.
115 write(*,*)
' Gflops = ', flops / t / 1e9
118 rnorm = rnorm / ( (anorm*xnorm+bnorm) * n * dlamch(
'E') )
120 if ( rnorm > 60. )
then
121 write(*,105)
' Solution is suspicious, ', rnorm
123 write(*,105)
' Solution is CORRECT'
127 deallocate(h_a, h_x, h_b, work, ipiv)
130 call cublas_free(devptra)
131 call cublas_free(devptrb)
132 call cublas_shutdown()