PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zporfs.f
Go to the documentation of this file.
1  SUBROUTINE zporfs( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X,
2  $ ldx, ferr, berr, work, rwork, info )
3 *
4  include 'plasmaf.h'
5 *
6 * -- LAPACK routine (version 3.2) --
7 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
8 * November 2006
9 *
10 * Modified to call ZLACN2 in place of ZLACON, 10 Feb 03, SJH.
11 *
12 * .. Scalar Arguments ..
13  CHARACTER uplo
14  INTEGER info, lda, ldaf, ldb, ldx, n, nrhs
15 * ..
16 * .. Array Arguments ..
17  DOUBLE PRECISION berr( * ), ferr( * ), rwork( * )
18  COMPLEX*16 a( lda, * ), af( ldaf, * ), b( ldb, * ),
19  $ work( * ), x( ldx, * )
20 * ..
21 *
22 * Purpose
23 * =======
24 *
25 * ZPORFS improves the computed solution to a system of linear
26 * equations when the coefficient matrix is Hermitian positive definite,
27 * and provides error bounds and backward error estimates for the
28 * solution.
29 *
30 * Arguments
31 * =========
32 *
33 * UPLO (input) CHARACTER*1
34 * = 'U': Upper triangle of A is stored;
35 * = 'L': Lower triangle of A is stored.
36 *
37 * N (input) INTEGER
38 * The order of the matrix A. N >= 0.
39 *
40 * NRHS (input) INTEGER
41 * The number of right hand sides, i.e., the number of columns
42 * of the matrices B and X. NRHS >= 0.
43 *
44 * A (input) COMPLEX*16 array, dimension (LDA,N)
45 * The Hermitian matrix A. If UPLO = 'U', the leading N-by-N
46 * upper triangular part of A contains the upper triangular part
47 * of the matrix A, and the strictly lower triangular part of A
48 * is not referenced. If UPLO = 'L', the leading N-by-N lower
49 * triangular part of A contains the lower triangular part of
50 * the matrix A, and the strictly upper triangular part of A is
51 * not referenced.
52 *
53 * LDA (input) INTEGER
54 * The leading dimension of the array A. LDA >= max(1,N).
55 *
56 * AF (input) COMPLEX*16 array, dimension (LDAF,N)
57 * The triangular factor U or L from the Cholesky factorization
58 * A = U**H*U or A = L*L**H, as computed by ZPOTRF.
59 *
60 * LDAF (input) INTEGER
61 * The leading dimension of the array AF. LDAF >= max(1,N).
62 *
63 * B (input) COMPLEX*16 array, dimension (LDB,NRHS)
64 * The right hand side matrix B.
65 *
66 * LDB (input) INTEGER
67 * The leading dimension of the array B. LDB >= max(1,N).
68 *
69 * X (input/output) COMPLEX*16 array, dimension (LDX,NRHS)
70 * On entry, the solution matrix X, as computed by ZPOTRS.
71 * On exit, the improved solution matrix X.
72 *
73 * LDX (input) INTEGER
74 * The leading dimension of the array X. LDX >= max(1,N).
75 *
76 * FERR (output) DOUBLE PRECISION array, dimension (NRHS)
77 * The estimated forward error bound for each solution vector
78 * X(j) (the j-th column of the solution matrix X).
79 * If XTRUE is the true solution corresponding to X(j), FERR(j)
80 * is an estimated upper bound for the magnitude of the largest
81 * element in (X(j) - XTRUE) divided by the magnitude of the
82 * largest element in X(j). The estimate is as reliable as
83 * the estimate for RCOND, and is almost always a slight
84 * overestimate of the true error.
85 *
86 * BERR (output) DOUBLE PRECISION array, dimension (NRHS)
87 * The componentwise relative backward error of each solution
88 * vector X(j) (i.e., the smallest relative change in
89 * any element of A or B that makes X(j) an exact solution).
90 *
91 * WORK (workspace) COMPLEX*16 array, dimension (2*N)
92 *
93 * RWORK (workspace) DOUBLE PRECISION array, dimension (N)
94 *
95 * INFO (output) INTEGER
96 * = 0: successful exit
97 * < 0: if INFO = -i, the i-th argument had an illegal value
98 *
99 * Internal Parameters
100 * ===================
101 *
102 * ITMAX is the maximum number of steps of iterative refinement.
103 *
104 * ====================================================================
105 *
106 * .. Parameters ..
107  INTEGER itmax
108  parameter( itmax = 5 )
109  DOUBLE PRECISION zero
110  parameter( zero = 0.0d+0 )
111  COMPLEX*16 one
112  parameter( one = ( 1.0d+0, 0.0d+0 ) )
113  DOUBLE PRECISION two
114  parameter( two = 2.0d+0 )
115  DOUBLE PRECISION three
116  parameter( three = 3.0d+0 )
117 * ..
118 * .. Local Scalars ..
119  LOGICAL upper
120  INTEGER count, i, j, k, kase, nz, plasma_uplo
121  DOUBLE PRECISION eps, lstres, s, safe1, safe2, safmin, xk
122  COMPLEX*16 zdum
123 * ..
124 * .. Local Arrays ..
125  INTEGER isave( 3 )
126 * ..
127 * .. External Subroutines ..
128  EXTERNAL xerbla, zaxpy, zcopy, zhemv, zlacn2, zpotrs
129 * ..
130 * .. Intrinsic Functions ..
131  INTRINSIC abs, dble, dimag, max
132 * ..
133 * .. External Functions ..
134  LOGICAL lsame
135  DOUBLE PRECISION dlamch
136  EXTERNAL lsame, dlamch
137 * ..
138 * .. Statement Functions ..
139  DOUBLE PRECISION cabs1
140 * ..
141 * .. Statement Function definitions ..
142  cabs1( zdum ) = abs( dble( zdum ) ) + abs( dimag( zdum ) )
143 * ..
144 * .. Executable Statements ..
145 *
146 * Test the input parameters.
147 *
148  info = 0
149  upper = lsame( uplo, 'U' )
150  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
151  info = -1
152  ELSE IF( n.LT.0 ) THEN
153  info = -2
154  ELSE IF( nrhs.LT.0 ) THEN
155  info = -3
156  ELSE IF( lda.LT.max( 1, n ) ) THEN
157  info = -5
158  ELSE IF( ldaf.LT.max( 1, n ) ) THEN
159  info = -7
160  ELSE IF( ldb.LT.max( 1, n ) ) THEN
161  info = -9
162  ELSE IF( ldx.LT.max( 1, n ) ) THEN
163  info = -11
164  END IF
165  IF( info.NE.0 ) THEN
166  CALL xerbla( 'ZPORFS', -info )
167  return
168  END IF
169 *
170 * Quick return if possible
171 *
172  IF( n.EQ.0 .OR. nrhs.EQ.0 ) THEN
173  DO 10 j = 1, nrhs
174  ferr( j ) = zero
175  berr( j ) = zero
176  10 continue
177  return
178  END IF
179 *
180  IF ( lsame( uplo, 'U' ) ) THEN
181  plasma_uplo = plasmaupper
182  ELSE
183  plasma_uplo = plasmalower
184  ENDIF
185 *
186 * NZ = maximum number of nonzero elements in each row of A, plus 1
187 *
188  nz = n + 1
189  eps = dlamch( 'Epsilon' )
190  safmin = dlamch( 'Safe minimum' )
191  safe1 = nz*safmin
192  safe2 = safe1 / eps
193 *
194 * Do for each right hand side
195 *
196  DO 140 j = 1, nrhs
197 *
198  count = 1
199  lstres = three
200  20 continue
201 *
202 * Loop until stopping criterion is satisfied.
203 *
204 * Compute residual R = B - A * X
205 *
206  CALL zcopy( n, b( 1, j ), 1, work, 1 )
207  CALL zhemv( uplo, n, -one, a, lda, x( 1, j ), 1, one, work, 1 )
208 *
209 * Compute componentwise relative backward error from formula
210 *
211 * max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
212 *
213 * where abs(Z) is the componentwise absolute value of the matrix
214 * or vector Z. If the i-th component of the denominator is less
215 * than SAFE2, then SAFE1 is added to the i-th components of the
216 * numerator and denominator before dividing.
217 *
218  DO 30 i = 1, n
219  rwork( i ) = cabs1( b( i, j ) )
220  30 continue
221 *
222 * Compute abs(A)*abs(X) + abs(B).
223 *
224  IF( upper ) THEN
225  DO 50 k = 1, n
226  s = zero
227  xk = cabs1( x( k, j ) )
228  DO 40 i = 1, k - 1
229  rwork( i ) = rwork( i ) + cabs1( a( i, k ) )*xk
230  s = s + cabs1( a( i, k ) )*cabs1( x( i, j ) )
231  40 continue
232  rwork( k ) = rwork( k ) + abs( dble( a( k, k ) ) )*xk + s
233  50 continue
234  ELSE
235  DO 70 k = 1, n
236  s = zero
237  xk = cabs1( x( k, j ) )
238  rwork( k ) = rwork( k ) + abs( dble( a( k, k ) ) )*xk
239  DO 60 i = k + 1, n
240  rwork( i ) = rwork( i ) + cabs1( a( i, k ) )*xk
241  s = s + cabs1( a( i, k ) )*cabs1( x( i, j ) )
242  60 continue
243  rwork( k ) = rwork( k ) + s
244  70 continue
245  END IF
246  s = zero
247  DO 80 i = 1, n
248  IF( rwork( i ).GT.safe2 ) THEN
249  s = max( s, cabs1( work( i ) ) / rwork( i ) )
250  ELSE
251  s = max( s, ( cabs1( work( i ) )+safe1 ) /
252  $ ( rwork( i )+safe1 ) )
253  END IF
254  80 continue
255  berr( j ) = s
256 *
257 * Test stopping criterion. Continue iterating if
258 * 1) The residual BERR(J) is larger than machine epsilon, and
259 * 2) BERR(J) decreased by at least a factor of 2 during the
260 * last iteration, and
261 * 3) At most ITMAX iterations tried.
262 *
263  IF( berr( j ).GT.eps .AND. two*berr( j ).LE.lstres .AND.
264  $ count.LE.itmax ) THEN
265 *
266 * Update solution and try again.
267 *
268  CALL plasma_zpotrs( plasma_uplo, n, 1, af, ldaf,
269  $ work, n, info )
270  CALL zaxpy( n, one, work, 1, x( 1, j ), 1 )
271  lstres = berr( j )
272  count = count + 1
273  go to 20
274  END IF
275 *
276 * Bound error from formula
277 *
278 * norm(X - XTRUE) / norm(X) .le. FERR =
279 * norm( abs(inv(A))*
280 * ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
281 *
282 * where
283 * norm(Z) is the magnitude of the largest component of Z
284 * inv(A) is the inverse of A
285 * abs(Z) is the componentwise absolute value of the matrix or
286 * vector Z
287 * NZ is the maximum number of nonzeros in any row of A, plus 1
288 * EPS is machine epsilon
289 *
290 * The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
291 * is incremented by SAFE1 if the i-th component of
292 * abs(A)*abs(X) + abs(B) is less than SAFE2.
293 *
294 * Use ZLACN2 to estimate the infinity-norm of the matrix
295 * inv(A) * diag(W),
296 * where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
297 *
298  DO 90 i = 1, n
299  IF( rwork( i ).GT.safe2 ) THEN
300  rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i )
301  ELSE
302  rwork( i ) = cabs1( work( i ) ) + nz*eps*rwork( i ) +
303  $ safe1
304  END IF
305  90 continue
306 *
307  kase = 0
308  100 continue
309  CALL zlacn2( n, work( n+1 ), work, ferr( j ), kase, isave )
310  IF( kase.NE.0 ) THEN
311  IF( kase.EQ.1 ) THEN
312 *
313 * Multiply by diag(W)*inv(A').
314 *
315  CALL plasma_zpotrs( plasma_uplo, n, 1, af, ldaf,
316  $ work, n, info )
317  DO 110 i = 1, n
318  work( i ) = rwork( i )*work( i )
319  110 continue
320  ELSE IF( kase.EQ.2 ) THEN
321 *
322 * Multiply by inv(A)*diag(W).
323 *
324  DO 120 i = 1, n
325  work( i ) = rwork( i )*work( i )
326  120 continue
327  CALL plasma_zpotrs( plasma_uplo, n, 1, af, ldaf,
328  $ work, n, info )
329  END IF
330  go to 100
331  END IF
332 *
333 * Normalize error.
334 *
335  lstres = zero
336  DO 130 i = 1, n
337  lstres = max( lstres, cabs1( x( i, j ) ) )
338  130 continue
339  IF( lstres.NE.zero )
340  $ ferr( j ) = ferr( j ) / lstres
341 *
342  140 continue
343 *
344  return
345 *
346 * End of ZPORFS
347 *
348  END