PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
time_zgetrf_rectil.c
Go to the documentation of this file.
1 
6 #define _TYPE PLASMA_Complex64_t
7 #define _PREC double
8 #define _LAMCH LAPACKE_dlamch_work
9 
10 #define _NAME "PLASMA_zgetrf_rectil"
11 /* See Lawn 41 page 120 */
12 #define _FMULS FMULS_GETRF(M, NRHS)
13 #define _FADDS FADDS_GETRF(M, NRHS)
14 
15 #include "../control/common.h"
16 #include "./timing.c"
17 
18 void CORE_zgetrf_rectil_init(void);
20 
21 /*
22  * WARNING: the check is only working with LAPACK Netlib
23  * which choose the same pivot than this code.
24  * MKL has a different code and can pick a different pivot
25  * if two elments have the same absolute value but not the
26  * same sign for example.
27  */
28 
29 static int
30 RunTest(int *iparam, double *dparam, real_Double_t *t_)
31 {
32  PASTE_CODE_IPARAM_LOCALS( iparam );
35  PLASMA_sequence *sequence = NULL;
37 
38  /* Allocate Data */
40  PASTE_CODE_ALLOCATE_MATRIX( ipiv, 1, int, max(M, NRHS), 1 );
41 
42  /* Initialiaze Data */
43  PLASMA_zplrnt_Tile(descA, 3456);
44 
45  /* Save A in lapack layout for check */
46  PASTE_TILE_TO_LAPACK( descA, A2, check, PLASMA_Complex64_t, LDA, NRHS );
47  PASTE_CODE_ALLOCATE_MATRIX( ipiv2, check, int, max(M, NRHS), 1 );
48 
49  /* Save AT in lapack layout for check */
50  if ( check ) {
51  LAPACKE_zgetrf_work(LAPACK_COL_MAJOR, M, NRHS, A2, LDA, ipiv2 );
52  }
53 
54  plasma = plasma_context_self();
55  PLASMA_Sequence_Create(&sequence);
56  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
58 
61 
62  START_TIMING();
63  QUARK_CORE_zgetrf_rectil(plasma->quark, &task_flags,
64  *descA, descA->mat, descA->mb*descA->nb, ipiv,
65  sequence, &request,
66  0, 0,
67  iparam[IPARAM_THRDNBR]);
68  PLASMA_Sequence_Wait(sequence);
69  STOP_TIMING();
70 
71  PLASMA_Sequence_Destroy(sequence);
72 
73  /* Check the solution */
74  if ( check )
75  {
76  int64_t i;
77  double *work = (double *)malloc(max(M, NRHS)*sizeof(double));
78  PASTE_TILE_TO_LAPACK( descA, A, 1, PLASMA_Complex64_t, LDA, NRHS );
79 
80  /* Check ipiv */
81  for(i=0; i<NRHS; i++)
82  {
83  if( ipiv[i] != ipiv2[i] ) {
84  fprintf(stderr, "\nPLASMA (ipiv[%ld] = %d, A[%ld] = %e) / LAPACK (ipiv[%ld] = %d, A[%ld] = [%e])\n",
85  i, ipiv[i], i, creal(A[ i * LDA + i ]),
86  i, ipiv2[i], i, creal(A2[ i * LDA + i ]));
87  break;
88  }
89  }
90 
91  dparam[IPARAM_ANORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm),
92  M, NRHS, A, LDA, work);
93  dparam[IPARAM_XNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm),
94  M, NRHS, A2, LDA, work);
95  dparam[IPARAM_BNORM] = 0.0;
96 
97  CORE_zgeadd( M, NRHS, -1.0, A, LDA, A2, LDA);
98 
99  dparam[IPARAM_RES] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm),
100  M, NRHS, A2, LDA, work);
101 
102  free( A );
103  free( A2 );
104  free( ipiv2 );
105  free( work );
106  }
107 
108  /* Deallocate Workspace */
109  PASTE_CODE_FREE_MATRIX( descA );
110  free( ipiv );
111 
112  return 0;
113 }