PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zauxiliary.c
Go to the documentation of this file.
1 
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <math.h>
10 #include <cblas.h>
11 #include <lapacke.h>
12 #include <plasma.h>
13 #include <core_blas.h>
14 #include "auxiliary.h"
15 
16 /*-------------------------------------------------------------------
17  * Check the orthogonality of Q
18  */
19 
20 int z_check_orthogonality(int M, int N, int LDQ, PLASMA_Complex64_t *Q)
21 {
22  double alpha, beta;
23  double normQ;
24  int info_ortho;
25  int i;
26  int minMN = min(M, N);
27  double eps;
28  double *work = (double *)malloc(minMN*sizeof(double));
29 
30  eps = LAPACKE_dlamch_work('e');
31  alpha = 1.0;
32  beta = -1.0;
33 
34  /* Build the idendity matrix USE DLASET?*/
35  PLASMA_Complex64_t *Id = (PLASMA_Complex64_t *) malloc(minMN*minMN*sizeof(PLASMA_Complex64_t));
36  memset((void*)Id, 0, minMN*minMN*sizeof(PLASMA_Complex64_t));
37  for (i = 0; i < minMN; i++)
38  Id[i*minMN+i] = (PLASMA_Complex64_t)1.0;
39 
40  /* Perform Id - Q'Q */
41  if (M >= N)
42  cblas_zherk(CblasColMajor, CblasUpper, CblasConjTrans, N, M, alpha, Q, LDQ, beta, Id, N);
43  else
44  cblas_zherk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M);
45 
46  normQ = LAPACKE_zlansy_work(LAPACK_COL_MAJOR, 'i', 'u', minMN, Id, minMN, work);
47 
48  printf("============\n");
49  printf("Checking the orthogonality of Q \n");
50  printf("||Id-Q'*Q||_oo / (N*eps) = %e \n",normQ/(minMN*eps));
51 
52  if ( isnan(normQ / (minMN * eps)) || (normQ / (minMN * eps) > 10.0) ) {
53  printf("-- Orthogonality is suspicious ! \n");
54  info_ortho=1;
55  }
56  else {
57  printf("-- Orthogonality is CORRECT ! \n");
58  info_ortho=0;
59  }
60 
61  free(work); free(Id);
62 
63  return info_ortho;
64 }
65 
66 /*------------------------------------------------------------
67  * Check the factorization QR
68  */
69 
71 {
72  double Anorm, Rnorm;
73  PLASMA_Complex64_t alpha, beta;
74  int info_factorization;
75  int i,j;
76  double eps;
77 
78  eps = LAPACKE_dlamch_work('e');
79 
80  PLASMA_Complex64_t *Ql = (PLASMA_Complex64_t *)malloc(M*N*sizeof(PLASMA_Complex64_t));
81  PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(M*N*sizeof(PLASMA_Complex64_t));
82  double *work = (double *)malloc(max(M,N)*sizeof(double));
83 
84  alpha=1.0;
85  beta=0.0;
86 
87  if (M >= N) {
88  /* Extract the R */
89  PLASMA_Complex64_t *R = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t));
90  memset((void*)R, 0, N*N*sizeof(PLASMA_Complex64_t));
91  LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'u', M, N, A2, LDA, R, N);
92 
93  /* Perform Ql=Q*R */
94  memset((void*)Ql, 0, M*N*sizeof(PLASMA_Complex64_t));
95  cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N, CBLAS_SADDR(alpha), Q, LDA, R, N, CBLAS_SADDR(beta), Ql, M);
96  free(R);
97  }
98  else {
99  /* Extract the L */
100  PLASMA_Complex64_t *L = (PLASMA_Complex64_t *)malloc(M*M*sizeof(PLASMA_Complex64_t));
101  memset((void*)L, 0, M*M*sizeof(PLASMA_Complex64_t));
102  LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'l', M, N, A2, LDA, L, M);
103 
104  /* Perform Ql=LQ */
105  memset((void*)Ql, 0, M*N*sizeof(PLASMA_Complex64_t));
106  cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M, CBLAS_SADDR(alpha), L, M, Q, LDA, CBLAS_SADDR(beta), Ql, M);
107  free(L);
108  }
109 
110  /* Compute the Residual */
111  for (i = 0; i < M; i++)
112  for (j = 0 ; j < N; j++)
113  Residual[j*M+i] = A1[j*LDA+i]-Ql[j*M+i];
114 
115  Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'i', M, N, Residual, M, work);
116  Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'i', M, N, A2, LDA, work);
117 
118  if (M >= N) {
119  printf("============\n");
120  printf("Checking the QR Factorization \n");
121  printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
122  }
123  else {
124  printf("============\n");
125  printf("Checking the LQ Factorization \n");
126  printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
127  }
128 
129  if (isnan(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 10.0) ) {
130  printf("-- Factorization is suspicious ! \n");
131  info_factorization = 1;
132  }
133  else {
134  printf("-- Factorization is CORRECT ! \n");
135  info_factorization = 0;
136  }
137 
138  free(work); free(Ql); free(Residual);
139 
140  return info_factorization;
141 }
142 
143 /*------------------------------------------------------------------------
144  * Check the factorization of the matrix A2
145  */
146 
148 {
149  double Anorm, Rnorm;
150  PLASMA_Complex64_t alpha;
151  int info_factorization;
152  int i,j;
153  double eps;
154 
155  eps = LAPACKE_dlamch_work('e');
156 
157  PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t));
158  PLASMA_Complex64_t *L1 = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t));
159  PLASMA_Complex64_t *L2 = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t));
160  double *work = (double *)malloc(N*sizeof(double));
161 
162  memset((void*)L1, 0, N*N*sizeof(PLASMA_Complex64_t));
163  memset((void*)L2, 0, N*N*sizeof(PLASMA_Complex64_t));
164 
165  alpha= 1.0;
166 
167  LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N);
168 
169  /* Dealing with L'L or U'U */
170  if (uplo == PlasmaUpper){
171  LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L1, N);
172  LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L2, N);
174  }
175  else{
176  LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L1, N);
177  LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L2, N);
179  }
180 
181  /* Compute the Residual || A -L'L|| */
182  for (i = 0; i < N; i++)
183  for (j = 0; j < N; j++)
184  Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i];
185 
186  Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'i', N, N, Residual, N, work);
187  Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'i', N, N, A1, LDA, work);
188 
189  printf("============\n");
190  printf("Checking the Cholesky Factorization \n");
191  printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
192 
193  if ( isnan(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 10.0) ){
194  printf("-- Factorization is suspicious ! \n");
195  info_factorization = 1;
196  }
197  else{
198  printf("-- Factorization is CORRECT ! \n");
199  info_factorization = 0;
200  }
201 
202  free(Residual); free(L1); free(L2); free(work);
203 
204  return info_factorization;
205 }
206 
207 /*--------------------------------------------------------------
208  * Check the gemm
209  */
210 double z_check_gemm(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K,
211  PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA,
212  PLASMA_Complex64_t *B, int LDB,
213  PLASMA_Complex64_t beta, PLASMA_Complex64_t *Cplasma,
214  PLASMA_Complex64_t *Cref, int LDC,
215  double *Cinitnorm, double *Cplasmanorm, double *Clapacknorm )
216 {
217  PLASMA_Complex64_t beta_const = -1.0;
218  double Rnorm;
219  double *work = (double *)malloc(max(K,max(M, N))* sizeof(double));
220 
221  *Cinitnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'i', M, N, Cref, LDC, work);
222  *Cplasmanorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'i', M, N, Cplasma, LDC, work);
223 
224  cblas_zgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K,
225  CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC);
226 
227  *Clapacknorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'i', M, N, Cref, LDC, work);
228 
229  cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1);
230 
231  Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'i', M, N, Cref, LDC, work);
232 
233  free(work);
234 
235  return Rnorm;
236 }
237 
238 /*--------------------------------------------------------------
239  * Check the trsm
240  */
242  int M, int NRHS, PLASMA_Complex64_t alpha,
243  PLASMA_Complex64_t *A, int LDA,
244  PLASMA_Complex64_t *Bplasma, PLASMA_Complex64_t *Bref, int LDB,
245  double *Binitnorm, double *Bplasmanorm, double *Blapacknorm )
246 {
247  PLASMA_Complex64_t beta_const = -1.0;
248  double Rnorm;
249  double *work = (double *)malloc(max(M, NRHS)* sizeof(double));
250  /*double eps = LAPACKE_dlamch_work('e');*/
251 
252  *Binitnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'i', M, NRHS, Bref, LDB, work);
253  *Bplasmanorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'm', M, NRHS, Bplasma, LDB, work);
254 
256  (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag, M, NRHS,
257  CBLAS_SADDR(alpha), A, LDA, Bref, LDB);
258 
259  *Blapacknorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'm', M, NRHS, Bref, LDB, work);
260 
261  cblas_zaxpy(LDB * NRHS, CBLAS_SADDR(beta_const), Bplasma, 1, Bref, 1);
262 
263  Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'm', M, NRHS, Bref, LDB, work);
264  Rnorm = Rnorm / *Blapacknorm;
265  /* max(M,NRHS) * eps);*/
266 
267  free(work);
268 
269  return Rnorm;
270 }
271 
272 /*--------------------------------------------------------------
273  * Check the solution
274  */
275 
276 double z_check_solution(int M, int N, int NRHS, PLASMA_Complex64_t *A, int LDA,
278  double *anorm, double *bnorm, double *xnorm )
279 {
280 /* int info_solution; */
281  double Rnorm = -1.00;
282  PLASMA_Complex64_t zone = 1.0;
283  PLASMA_Complex64_t mzone = -1.0;
284  double *work = (double *)malloc(max(M, N)* sizeof(double));
285 
286  *anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'i', M, N, A, LDA, work);
287  *xnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'i', M, NRHS, X, LDB, work);
288  *bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'i', N, NRHS, B, LDB, work);
289 
290  cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, CBLAS_SADDR(zone), A, LDA, X, LDB, CBLAS_SADDR(mzone), B, LDB);
291 
292  Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'i', N, NRHS, B, M, work);
293 
294  free(work);
295 
296  return Rnorm;
297 }