PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
example_ctrsmpl.c
Go to the documentation of this file.
1 
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <math.h>
22 
23 #include <plasma.h>
24 #include <cblas.h>
25 #include <lapacke.h>
26 #include <core_blas.h>
27 
29 
30 int IONE=1;
31 int ISEED[4] = {0,0,0,1}; /* initial seed for clarnv() */
32 
33 int main ()
34 {
35 
36  int cores = 2;
37  int N = 10;
38  int LDA = 10;
39  int NRHS = 5;
40  int LDB = 10;
41  int info;
42  int info_solution;
43  int i,j;
44  int LDAxN = LDA*N;
45  int LDBxNRHS = LDB*NRHS;
46 
47  PLASMA_Complex32_t *A1 = (PLASMA_Complex32_t *)malloc(LDA*N*(sizeof*A1));
48  PLASMA_Complex32_t *A2 = (PLASMA_Complex32_t *)malloc(LDA*N*(sizeof*A2));
49  PLASMA_Complex32_t *B1 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*(sizeof*B1));
50  PLASMA_Complex32_t *B2 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*(sizeof*B2));
52  int *IPIV;
53 
54  /* Check if unable to allocate memory */
55  if ((!A1)||(!A2)||(!B1)||(!B2)){
56  printf("Out of Memory \n ");
57  return EXIT_SUCCESS;
58  }
59 
60  /*Plasma Initialize*/
61  PLASMA_Init(cores);
62  printf("-- PLASMA is initialized to run on %d cores. \n",cores);
63 
64  /* Initialize A1 and A2 Matrix */
65  LAPACKE_clarnv_work(IONE, ISEED, LDAxN, A1);
66  for ( i = 0; i < N; i++)
67  for ( j = 0; j < N; j++)
68  A2[LDA*j+i] = A1[LDA*j+i];
69 
70  /* Initialize B1 and B2 */
71  LAPACKE_clarnv_work(IONE, ISEED, LDBxNRHS, B1);
72  for ( i = 0; i < N; i++)
73  for ( j = 0; j < NRHS; j++)
74  B2[LDB*j+i] = B1[LDB*j+i];
75 
76 
77  /* Allocate L and IPIV */
78  info = PLASMA_Alloc_Workspace_cgetrf_incpiv(N, N, &L, &IPIV);
79 
80  /* LU factorization of the matrix A */
81  info = PLASMA_cgetrf_incpiv(N, N, A2, LDA, L, IPIV);
82 
83  /* Solve the problem */
84  info = PLASMA_ctrsmpl(N, NRHS, A2, LDA, L, IPIV, B2, LDB);
86  N, NRHS, (PLASMA_Complex32_t)1.0, A2, LDA, B2, LDB);
87 
88  /* Check the solution */
89  info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB);
90 
91  if ((info_solution != 0)|(info != 0))
92  printf("-- Error in CGETRS example ! \n");
93  else
94  printf("-- Run of CGETRS example successful ! \n");
95 
96  free(A1); free(A2); free(B1); free(B2); free(IPIV); free(L);
97 
99 
100  return EXIT_SUCCESS;
101 }
102 
103 /*------------------------------------------------------------------------
104  * Check the accuracy of the solution of the linear system
105  */
106 
107 int check_solution(int N, int NRHS, PLASMA_Complex32_t *A1, int LDA, PLASMA_Complex32_t *B1, PLASMA_Complex32_t *B2, int LDB)
108 {
109  int info_solution;
110  float Rnorm, Anorm, Xnorm, Bnorm;
111  PLASMA_Complex32_t alpha, beta;
112  float *work = (float *)malloc(N*sizeof(float));
113  float eps;
114 
115  eps = LAPACKE_slamch_work('e');
116 
117  alpha = 1.0;
118  beta = -1.0;
119 
120  Xnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B2, LDB, work);
121  Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work);
122  Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work);
123 
124  cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB);
125  Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work);
126 
127  printf("============\n");
128  printf("Checking the Residual of the solution \n");
129  printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps));
130 
131  if ( isnan(Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)) || (Rnorm/((Anorm*Xnorm+Bnorm)*N*eps) > 10.0) ){
132  printf("-- The solution is suspicious ! \n");
133  info_solution = 1;
134  }
135  else{
136  printf("-- The solution is CORRECT ! \n");
137  info_solution = 0;
138  }
139 
140  free(work);
141 
142  return info_solution;
143 }