PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
example_cgeqrf.c
Go to the documentation of this file.
1 
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <math.h>
21 
22 #include <plasma.h>
23 #include <cblas.h>
24 #include <lapacke.h>
25 #include <core_blas.h>
26 
27 #ifndef max
28 #define max(a, b) ((a) > (b) ? (a) : (b))
29 #endif
30 #ifndef min
31 #define min(a, b) ((a) < (b) ? (a) : (b))
32 #endif
33 
34 int check_orthogonality(int, int, int, PLASMA_Complex32_t*);
36 
37 int IONE=1;
38 int ISEED[4] = {0,0,0,1}; /* initial seed for clarnv() */
39 
40 int main ()
41 {
42 
43  int cores = 2;
44  int M = 15;
45  int N = 10;
46  int LDA = 15;
47  int K = min(M, N);
48  int info;
49  int info_ortho, info_factorization;
50  int i,j;
51  int LDAxN = LDA*N;
52 
53  PLASMA_Complex32_t *A1 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t));
54  PLASMA_Complex32_t *A2 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t));
55  PLASMA_Complex32_t *Q = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t));
57 
58  /* Check if unable to allocate memory */
59  if ((!A1)||(!A2)||(!Q)){
60  printf("Out of Memory \n ");
61  return EXIT_SUCCESS;
62  }
63 
64  /* Plasma Initialization */
65  PLASMA_Init(cores);
66  printf("-- PLASMA is initialized to run on %d cores. \n",cores);
67 
68  /* Allocate T */
70 
71  /* Initialize A1 and A2 */
72  LAPACKE_clarnv_work(IONE, ISEED, LDAxN, A1);
73  for (i = 0; i < M; i++)
74  for (j = 0; j < N; j++)
75  A2[LDA*j+i] = A1[LDA*j+i] ;
76 
77  /* Factorization QR of the matrix A2 */
78  info = PLASMA_cgeqrf(M, N, A2, LDA, T);
79 
80  /* Building the economy-size Q */
81  memset((void*)Q, 0, LDA*N*sizeof(PLASMA_Complex32_t));
82  for (i = 0; i < K; i++)
83  Q[LDA*i+i] = 1.0;
84 
85  PLASMA_cungqr(M, N, K, A2, LDA, T, Q, LDA);
86 
87  /* Check the orthogonality, factorization and the solution */
88  info_ortho = check_orthogonality(M, N, LDA, Q);
89  info_factorization = check_factorization(M, N, A1, A2, LDA, Q);
90  printf("--- info %d %d %d \n",info_factorization,info_ortho,info);
91  if ((info_ortho != 0)|(info_factorization != 0)|(info != 0))
92  printf("-- Error in CGEQRF example ! \n");
93  else
94  printf("-- Run of CGEQRF example successful ! \n");
95 
96  free(A1); free(A2); free(Q); free(T);
97 
99 
100  return EXIT_SUCCESS;
101 }
102 
103 /*-------------------------------------------------------------------
104  * Check the orthogonality of Q
105  */
106 
107 int check_orthogonality(int M, int N, int LDQ, PLASMA_Complex32_t *Q)
108 {
109  float alpha, beta;
110  float normQ;
111  int info_ortho;
112  int i;
113  int minMN = min(M, N);
114  float eps;
115  float *work = (float *)malloc(minMN*sizeof(float));
116 
117  eps = LAPACKE_slamch_work('e');
118  alpha = 1.0;
119  beta = -1.0;
120 
121  /* Build the idendity matrix USE DLASET?*/
122  PLASMA_Complex32_t *Id = (PLASMA_Complex32_t *) malloc(minMN*minMN*sizeof(PLASMA_Complex32_t));
123  memset((void*)Id, 0, minMN*minMN*sizeof(PLASMA_Complex32_t));
124  for (i = 0; i < minMN; i++)
125  Id[i*minMN+i] = (PLASMA_Complex32_t)1.0;
126 
127  /* Perform Id - Q'Q */
128  if (M >= N)
129  cblas_cherk(CblasColMajor, CblasUpper, CblasConjTrans, N, M, alpha, Q, LDQ, beta, Id, N);
130  else
131  cblas_cherk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M);
132 
133  normQ = LAPACKE_clansy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), 'u', minMN, Id, minMN, work);
134 
135  printf("============\n");
136  printf("Checking the orthogonality of Q \n");
137  printf("||Id-Q'*Q||_oo / (N*eps) = %e \n",normQ/(minMN*eps));
138 
139  if ( isnan(normQ / (minMN * eps)) || (normQ / (minMN * eps) > 10.0) ) {
140  printf("-- Orthogonality is suspicious ! \n");
141  info_ortho=1;
142  }
143  else {
144  printf("-- Orthogonality is CORRECT ! \n");
145  info_ortho=0;
146  }
147 
148  free(work); free(Id);
149 
150  return info_ortho;
151 }
152 
153 /*------------------------------------------------------------
154  * Check the factorization QR
155  */
156 
157 int check_factorization(int M, int N, PLASMA_Complex32_t *A1, PLASMA_Complex32_t *A2, int LDA, PLASMA_Complex32_t *Q)
158 {
159  float Anorm, Rnorm;
160  PLASMA_Complex32_t alpha, beta;
161  int info_factorization;
162  int i,j;
163  float eps;
164 
165  eps = LAPACKE_slamch_work('e');
166 
167  PLASMA_Complex32_t *Ql = (PLASMA_Complex32_t *)malloc(M*N*sizeof(PLASMA_Complex32_t));
168  PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(M*N*sizeof(PLASMA_Complex32_t));
169  float *work = (float *)malloc(max(M,N)*sizeof(float));
170 
171  alpha=1.0;
172  beta=0.0;
173 
174  if (M >= N) {
175  /* Extract the R */
176  PLASMA_Complex32_t *R = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t));
177  memset((void*)R, 0, N*N*sizeof(PLASMA_Complex32_t));
178  LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'u', M, N, A2, LDA, R, N);
179 
180  /* Perform Ql=Q*R */
181  memset((void*)Ql, 0, M*N*sizeof(PLASMA_Complex32_t));
182  cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N, CBLAS_SADDR(alpha), Q, LDA, R, N, CBLAS_SADDR(beta), Ql, M);
183  free(R);
184  }
185  else {
186  /* Extract the L */
187  PLASMA_Complex32_t *L = (PLASMA_Complex32_t *)malloc(M*M*sizeof(PLASMA_Complex32_t));
188  memset((void*)L, 0, M*M*sizeof(PLASMA_Complex32_t));
189  LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'l', M, N, A2, LDA, L, M);
190 
191  /* Perform Ql=LQ */
192  memset((void*)Ql, 0, M*N*sizeof(PLASMA_Complex32_t));
193  cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M, CBLAS_SADDR(alpha), L, M, Q, LDA, CBLAS_SADDR(beta), Ql, M);
194  free(L);
195  }
196 
197  /* Compute the Residual */
198  for (i = 0; i < M; i++)
199  for (j = 0 ; j < N; j++)
200  Residual[j*M+i] = A1[j*LDA+i]-Ql[j*M+i];
201 
202  Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Residual, M, work);
203  Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A2, LDA, work);
204 
205  if (M >= N) {
206  printf("============\n");
207  printf("Checking the QR Factorization \n");
208  printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
209  }
210  else {
211  printf("============\n");
212  printf("Checking the LQ Factorization \n");
213  printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
214  }
215 
216  if (isnan(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 10.0) ) {
217  printf("-- Factorization is suspicious ! \n");
218  info_factorization = 1;
219  }
220  else {
221  printf("-- Factorization is CORRECT ! \n");
222  info_factorization = 0;
223  }
224 
225  free(work); free(Ql); free(Residual);
226 
227  return info_factorization;
228 }