PLASMA
2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
Main Page
Modules
Namespaces
Data Structures
Files
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
time_zgetrf_rectil.c
Go to the documentation of this file.
1
6
#define _TYPE PLASMA_Complex64_t
7
#define _PREC double
8
#define _LAMCH LAPACKE_dlamch_work
9
10
#define _NAME "PLASMA_zgetrf_rectil"
11
/* See Lawn 41 page 120 */
12
#define _FMULS FMULS_GETRF(M, NRHS)
13
#define _FADDS FADDS_GETRF(M, NRHS)
14
15
#include "../control/common.h"
16
#include "
./timing.c
"
17
18
void
CORE_zgetrf_rectil_init
(
void
);
19
extern
plasma_context_t
*
plasma_context_self
(
void
);
20
21
/*
22
* WARNING: the check is only working with LAPACK Netlib
23
* which choose the same pivot than this code.
24
* MKL has a different code and can pick a different pivot
25
* if two elments have the same absolute value but not the
26
* same sign for example.
27
*/
28
29
static
int
30
RunTest(
int
*iparam,
double
*dparam,
real_Double_t
*t_)
31
{
32
PASTE_CODE_IPARAM_LOCALS
( iparam );
33
plasma_context_t
*
plasma
;
34
Quark_Task_Flags
task_flags =
Quark_Task_Flags_Initializer
;
35
PLASMA_sequence
*sequence = NULL;
36
PLASMA_request
request =
PLASMA_REQUEST_INITIALIZER
;
37
38
/* Allocate Data */
39
PASTE_CODE_ALLOCATE_MATRIX_TILE
( descA, 1,
PLASMA_Complex64_t
,
PlasmaComplexDouble
, LDA, M, NRHS );
40
PASTE_CODE_ALLOCATE_MATRIX
( ipiv, 1,
int
,
max
(M, NRHS), 1 );
41
42
/* Initialiaze Data */
43
PLASMA_zplrnt_Tile
(descA, 3456);
44
45
/* Save A in lapack layout for check */
46
PASTE_TILE_TO_LAPACK
( descA, A2, check,
PLASMA_Complex64_t
, LDA, NRHS );
47
PASTE_CODE_ALLOCATE_MATRIX
( ipiv2, check,
int
,
max
(M, NRHS), 1 );
48
49
/* Save AT in lapack layout for check */
50
if
( check ) {
51
LAPACKE_zgetrf_work(LAPACK_COL_MAJOR, M, NRHS, A2, LDA, ipiv2 );
52
}
53
54
plasma =
plasma_context_self
();
55
PLASMA_Sequence_Create
(&sequence);
56
QUARK_Task_Flag_Set
(&task_flags,
TASK_SEQUENCE
, (intptr_t)sequence->
quark_sequence
);
57
QUARK_Task_Flag_Set
(&task_flags,
TASK_THREAD_COUNT
, iparam[
IPARAM_THRDNBR
] );
58
59
plasma_dynamic_spawn
();
60
CORE_zgetrf_rectil_init
();
61
62
START_TIMING
();
63
QUARK_CORE_zgetrf_rectil
(plasma->
quark
, &task_flags,
64
*descA, descA->mat, descA->mb*descA->nb, ipiv,
65
sequence, &request,
66
0, 0,
67
iparam[
IPARAM_THRDNBR
]);
68
PLASMA_Sequence_Wait
(sequence);
69
STOP_TIMING
();
70
71
PLASMA_Sequence_Destroy
(sequence);
72
73
/* Check the solution */
74
if
( check )
75
{
76
int64_t i;
77
double
*work = (
double
*)malloc(
max
(M, NRHS)*
sizeof
(double));
78
PASTE_TILE_TO_LAPACK
( descA,
A
, 1,
PLASMA_Complex64_t
, LDA, NRHS );
79
80
/* Check ipiv */
81
for
(i=0; i<NRHS; i++)
82
{
83
if
( ipiv[i] != ipiv2[i] ) {
84
fprintf(stderr,
"\nPLASMA (ipiv[%ld] = %d, A[%ld] = %e) / LAPACK (ipiv[%ld] = %d, A[%ld] = [%e])\n"
,
85
i, ipiv[i], i,
creal
(
A
[ i * LDA + i ]),
86
i, ipiv2[i], i,
creal
(A2[ i * LDA + i ]));
87
break
;
88
}
89
}
90
91
dparam[
IPARAM_ANORM
] = LAPACKE_zlange_work(LAPACK_COL_MAJOR,
lapack_const
(
PlasmaMaxNorm
),
92
M, NRHS,
A
, LDA, work);
93
dparam[
IPARAM_XNORM
] = LAPACKE_zlange_work(LAPACK_COL_MAJOR,
lapack_const
(
PlasmaMaxNorm
),
94
M, NRHS, A2, LDA, work);
95
dparam[
IPARAM_BNORM
] = 0.0;
96
97
CORE_zgeadd
( M, NRHS, -1.0,
A
, LDA, A2, LDA);
98
99
dparam[
IPARAM_RES
] = LAPACKE_zlange_work(LAPACK_COL_MAJOR,
lapack_const
(
PlasmaMaxNorm
),
100
M, NRHS, A2, LDA, work);
101
102
free(
A
);
103
free( A2 );
104
free( ipiv2 );
105
free( work );
106
}
107
108
/* Deallocate Workspace */
109
PASTE_CODE_FREE_MATRIX
( descA );
110
free( ipiv );
111
112
return
0;
113
}
plasma_2.4.5
timing
time_zgetrf_rectil.c
Generated on Mon Jul 9 2012 12:45:08 for PLASMA by
1.8.1