PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
compute_z.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define plasma_zdesc_alloc(descA, mb, nb, lm, ln, i, j, m, n, free)
#define plasma_zooplap2tile(descA, A, mb, nb, lm, ln, i, j, m, n, free)
#define plasma_ziplap2tile(descA, A, mb, nb, lm, ln, i, j, m, n)
#define plasma_zooptile2lap(descA, A, mb, nb, lm, ln)
#define plasma_ziptile2lap(descA, A, mb, nb, lm, ln)

Functions

void plasma_pzgeadd (plasma_context_t *plasma)
void plasma_pzgelqf (plasma_context_t *plasma)
void plasma_pzgemm (plasma_context_t *plasma)
void plasma_pzgeqrf (plasma_context_t *plasma)
void plasma_pzgerbb (plasma_context_t *plasma)
void plasma_pzgetmi2 (plasma_context_t *plasma)
void plasma_pzgetrf_incpiv (plasma_context_t *plasma)
void plasma_pzlacpy (plasma_context_t *plasma)
void plasma_pzlag2c (plasma_context_t *plasma)
void plasma_pzlange (plasma_context_t *plasma)
void plasma_pzlansy (plasma_context_t *plasma)
void plasma_pzpack (plasma_context_t *plasma)
void plasma_pzplghe (plasma_context_t *plasma)
void plasma_pzplgsy (plasma_context_t *plasma)
void plasma_pzplrnt (plasma_context_t *plasma)
void plasma_pzpotrf (plasma_context_t *plasma)
void plasma_pzshift (plasma_context_t *plasma)
void plasma_pzsymm (plasma_context_t *plasma)
void plasma_pzsyrk (plasma_context_t *plasma)
void plasma_pzsyr2k (plasma_context_t *plasma)
void plasma_pztrmm (plasma_context_t *plasma)
void plasma_pztrsm (plasma_context_t *plasma)
void plasma_pztrsmpl (plasma_context_t *plasma)
void plasma_pztrsmrv (plasma_context_t *plasma)
void plasma_pzunglq (plasma_context_t *plasma)
void plasma_pzungqr (plasma_context_t *plasma)
void plasma_pzungqrrh (plasma_context_t *plasma)
void plasma_pzunmlq (plasma_context_t *plasma)
void plasma_pzunmqr (plasma_context_t *plasma)
void plasma_pzunpack (plasma_context_t *plasma)
int plasma_zshift (plasma_context_t *plasma, int m, int n, PLASMA_Complex64_t *A, int nprob, int me, int ne, int L, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzgeadd_quark (PLASMA_Complex64_t alpha, PLASMA_desc A, PLASMA_desc B, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzbarrier_tl2pnl_quark (PLASMA_desc A, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzbarrier_pnl2tl_quark (PLASMA_desc A, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzbarrier_tl2row_quark (PLASMA_desc A, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzbarrier_row2tl_quark (PLASMA_desc A, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzgelqf_quark (PLASMA_desc A, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzgelqfrh_quark (PLASMA_desc A, PLASMA_desc T, int BS, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzgemm_quark (PLASMA_enum transA, PLASMA_enum transB, PLASMA_Complex64_t alpha, PLASMA_desc A, PLASMA_desc B, PLASMA_Complex64_t beta, PLASMA_desc C, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzgeqrf_quark (PLASMA_desc A, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzgeqrfrh_quark (PLASMA_desc A, PLASMA_desc T, int BS, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzgerbh_quark (PLASMA_desc A, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzgerbb_quark (PLASMA_desc A, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzgerbbrh_quark (PLASMA_desc A, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzgetmi2_quark (PLASMA_enum idep, PLASMA_enum odep, PLASMA_enum storev, int m, int n, int mb, int nb, PLASMA_Complex64_t *A, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzgetrf_incpiv_quark (PLASMA_desc A, PLASMA_desc L, int *IPIV, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzgetrf_reclap_quark (PLASMA_desc A, int *IPIV, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzgetrf_rectil_quark (PLASMA_desc A, int *IPIV, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzhegst_quark (PLASMA_enum itype, PLASMA_enum uplo, PLASMA_desc A, PLASMA_desc B, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzherbt_quark (PLASMA_enum uplo, PLASMA_desc A, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzgbrdb_quark (PLASMA_enum uplo, PLASMA_desc A, double *D, double *E, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzhbrdt_quark (PLASMA_enum uplo, PLASMA_desc A, double *D, double *E, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzlacpy_quark (PLASMA_enum uplo, PLASMA_desc A, PLASMA_desc B, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzlag2c_quark (PLASMA_desc A, PLASMA_desc SB, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzlange_quark (PLASMA_enum norm, PLASMA_desc A, double *work, double *result, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzlansy_quark (PLASMA_enum norm, PLASMA_enum uplo, PLASMA_desc A, double *work, double *result, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzlaset_quark (PLASMA_enum uplo, PLASMA_Complex64_t alpha, PLASMA_Complex64_t beta, PLASMA_desc A, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzlaset2_quark (PLASMA_enum uplo, PLASMA_Complex64_t alpha, PLASMA_desc A, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzlaswp_quark (PLASMA_desc B, int *IPIV, int inc, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzlaswpc_quark (PLASMA_desc B, int *IPIV, int inc, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzlauum_quark (PLASMA_enum uplo, PLASMA_desc A, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzplghe_quark (double bump, PLASMA_desc A, unsigned long long int seed, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzplgsy_quark (PLASMA_Complex64_t bump, PLASMA_desc A, unsigned long long int seed, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzplrnt_quark (PLASMA_desc A, unsigned long long int seed, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzpotrf_quark (PLASMA_enum uplo, PLASMA_desc A, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzshift_quark (int, int, int, PLASMA_Complex64_t *, int *, int, int, PLASMA_sequence *, PLASMA_request *)
void plasma_pzsymm_quark (PLASMA_enum side, PLASMA_enum uplo, PLASMA_Complex64_t alpha, PLASMA_desc A, PLASMA_desc B, PLASMA_Complex64_t beta, PLASMA_desc C, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzsyrk_quark (PLASMA_enum uplo, PLASMA_enum trans, PLASMA_Complex64_t alpha, PLASMA_desc A, PLASMA_Complex64_t beta, PLASMA_desc C, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzsyr2k_quark (PLASMA_enum uplo, PLASMA_enum trans, PLASMA_Complex64_t alpha, PLASMA_desc A, PLASMA_desc B, PLASMA_Complex64_t beta, PLASMA_desc C, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pztrmm_quark (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, PLASMA_Complex64_t alpha, PLASMA_desc A, PLASMA_desc B, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pztrsm_quark (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, PLASMA_Complex64_t alpha, PLASMA_desc A, PLASMA_desc B, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pztrsmpl_quark (PLASMA_desc A, PLASMA_desc B, PLASMA_desc L, int *IPIV, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pztrsmrv_quark (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, PLASMA_Complex64_t alpha, PLASMA_desc A, PLASMA_desc W, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pztrtri_quark (PLASMA_enum uplo, PLASMA_enum diag, PLASMA_desc A, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzungbr_quark (PLASMA_enum side, PLASMA_desc A, PLASMA_desc O, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzungbrrh_quark (PLASMA_enum side, PLASMA_desc A, PLASMA_desc O, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzungqr_quark (PLASMA_desc A, PLASMA_desc Q, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzungqrrh_quark (PLASMA_desc A, PLASMA_desc Q, PLASMA_desc T, int BS, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzunglq_quark (PLASMA_desc A, PLASMA_desc Q, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzunglqrh_quark (PLASMA_desc A, PLASMA_desc Q, PLASMA_desc T, int BS, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzungtr_quark (PLASMA_enum uplo, PLASMA_desc A, PLASMA_desc Q, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzunmqr_quark (PLASMA_enum side, PLASMA_enum trans, PLASMA_desc A, PLASMA_desc B, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzunmqrrh_quark (PLASMA_enum side, PLASMA_enum trans, PLASMA_desc A, PLASMA_desc B, PLASMA_desc T, int BS, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzunmlq_quark (PLASMA_enum side, PLASMA_enum trans, PLASMA_desc A, PLASMA_desc B, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pzunmlqrh_quark (PLASMA_enum side, PLASMA_enum trans, PLASMA_desc A, PLASMA_desc B, PLASMA_desc T, int BS, PLASMA_sequence *sequence, PLASMA_request *request)

Detailed Description

PLASMA auxiliary routines PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Jakub Kurzak
Mathieu Faverge
Date:
2010-11-15 normal z -> c d s

Definition in file compute_z.h.


Macro Definition Documentation

#define plasma_zdesc_alloc (   descA,
  mb,
  nb,
  lm,
  ln,
  i,
  j,
  m,
  n,
  free 
)
Value:
descA = plasma_desc_init( \
PlasmaComplexDouble, (mb), (nb), ((mb)*(nb)), \
(m), (n), (i), (j), (m), (n)); \
if ( plasma_desc_mat_alloc( &(descA) ) ) { \
plasma_error( __func__, "plasma_shared_alloc() failed"); \
{free;}; \
}

Macro for matrix conversion / Lapack interface

Definition at line 20 of file compute_z.h.

#define plasma_ziplap2tile (   descA,
  A,
  mb,
  nb,
  lm,
  ln,
  i,
  j,
  m,
 
)
Value:
descA = plasma_desc_init( \
PlasmaComplexDouble, (mb), (nb), ((mb)*(nb)), \
(lm), (ln), (i), (j), (m), (n)); \
descA.mat = A; \
PLASMA_zgecfi_Async((lm), (ln), (A), PlasmaCM, (mb), (nb), \
PlasmaCCRB, (mb), (nb), sequence, &request);

Definition at line 47 of file compute_z.h.

#define plasma_ziptile2lap (   descA,
  A,
  mb,
  nb,
  lm,
  ln 
)
Value:
PLASMA_zgecfi_Async((lm), (ln), (A), PlasmaCCRB, (mb), (nb), \
PlasmaCM, (mb), (nb), sequence, &request);

Definition at line 65 of file compute_z.h.

#define plasma_zooplap2tile (   descA,
  A,
  mb,
  nb,
  lm,
  ln,
  i,
  j,
  m,
  n,
  free 
)
Value:
descA = plasma_desc_init( \
PlasmaComplexDouble, (mb), (nb), ((mb)*(nb)), \
(lm), (ln), (i), (j), (m), (n)); \
if ( plasma_desc_mat_alloc( &(descA) ) ) { \
plasma_error( __func__, "plasma_shared_alloc() failed"); \
{free;}; \
} \
plasma_parallel_call_5( \
int, (lm), \
PLASMA_desc, (descA), \
PLASMA_sequence*, sequence, \
PLASMA_request*, &request);

Definition at line 30 of file compute_z.h.

#define plasma_zooptile2lap (   descA,
  A,
  mb,
  nb,
  lm,
  ln 
)
Value:

Definition at line 57 of file compute_z.h.


Function Documentation

void plasma_pzbarrier_pnl2tl_quark ( PLASMA_desc  A,
PLASMA_sequence sequence,
PLASMA_request request 
)

Barrier from panels to tiles

Definition at line 61 of file pzbarrier.c.

References CORE_foo2_quark(), CORE_foo_quark(), INOUT, INPUT, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_Insert_Task(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int m, n;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for (n = 0; n < A.nt; n++)
{
/* Protection from previous GATHERV */
QUARK_Insert_Task(plasma->quark, CORE_foo_quark, &task_flags,
sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(0, n), INOUT,
0);
for (m = 0; m < A.mt; m++)
{
QUARK_Insert_Task(plasma->quark, CORE_foo2_quark, &task_flags,
sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(0, n), INPUT,
sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(m, n), INOUT,
0);
}
}
}

Here is the call graph for this function:

void plasma_pzbarrier_row2tl_quark ( PLASMA_desc  A,
PLASMA_sequence sequence,
PLASMA_request request 
)

Barrier from panels to tiles

Definition at line 128 of file pzbarrier.c.

References CORE_foo2_quark(), CORE_foo_quark(), INOUT, INPUT, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_Insert_Task(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int m, n;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for (m = 0; m < A.mt; m++)
{
/* Protection from previous GATHERV */
QUARK_Insert_Task(plasma->quark, CORE_foo_quark, &task_flags,
sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(m, 0), INOUT,
0);
for (n = 0; n < A.nt; n++)
{
QUARK_Insert_Task(plasma->quark, CORE_foo2_quark, &task_flags,
sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(m, 0), INPUT,
sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(m, n), INOUT,
0);
}
}
}

Here is the call graph for this function:

void plasma_pzbarrier_tl2pnl_quark ( PLASMA_desc  A,
PLASMA_sequence sequence,
PLASMA_request request 
)

Barrier from tiles to panels

Definition at line 25 of file pzbarrier.c.

References CORE_foo2_quark(), CORE_foo_quark(), GATHERV, INOUT, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_Insert_Task(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int m, n;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for (n = 0; n < A.nt; n++)
{
/* Protection from previous GATHERV */
QUARK_Insert_Task(plasma->quark, CORE_foo_quark, &task_flags,
sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(0, n), INOUT,
0);
for (m = 0; m < A.mt; m++)
{
QUARK_Insert_Task(plasma->quark, CORE_foo2_quark, &task_flags,
sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(0, n), INOUT | GATHERV,
sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(m, n), INOUT,
0);
}
/* Protection to next GATHERV */
QUARK_Insert_Task(plasma->quark, CORE_foo_quark, &task_flags,
sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(0, n), INOUT,
0);
}
}

Here is the call graph for this function:

void plasma_pzbarrier_tl2row_quark ( PLASMA_desc  A,
PLASMA_sequence sequence,
PLASMA_request request 
)

Barrier from tiles to panels

Definition at line 92 of file pzbarrier.c.

References CORE_foo2_quark(), CORE_foo_quark(), GATHERV, INOUT, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_Insert_Task(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int m, n;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for (m = 0; m < A.mt; m++)
{
/* Protection from previous GATHERV */
QUARK_Insert_Task(plasma->quark, CORE_foo_quark, &task_flags,
sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(m, 0), INOUT,
0);
for (n = 0; n < A.nt; n++)
{
QUARK_Insert_Task(plasma->quark, CORE_foo2_quark, &task_flags,
sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(m, 0), INOUT | GATHERV,
sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(m, n), INOUT,
0);
}
/* Protection to next GATHERV */
QUARK_Insert_Task(plasma->quark, CORE_foo_quark, &task_flags,
sizeof(PLASMA_Complex64_t)*A.mb*A.nb, A(m, 0), INOUT,
0);
}
}

Here is the call graph for this function:

void plasma_pzgbrdb_quark ( PLASMA_enum  uplo,
PLASMA_desc  A,
double *  D,
double *  E,
PLASMA_desc  T,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel Reduction from BAND Bidiagonal to the final condensed form - dynamic scheduler

Definition at line 26 of file pzgbrdb.c.

References A, C, cabs(), conj(), creal(), DEP, plasma_desc_t::dtyp, plasma_desc_t::lm, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::n, plasma_context_self(), plasma_element_size(), plasma_sequence_flush(), plasma_shared_alloc(), plasma_shared_free(), PLASMA_SUCCESS, PlasmaComplexDouble, PlasmaInteger, PlasmaLower, plasma_context_struct::quark, QUARK_Barrier(), QUARK_CORE_zbrdalg(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, TASK_SEQUENCE, TAU, and V.

{
#ifdef COMPLEX
static double dzero = (double) 0.0;
double absztmp;
#endif
static PLASMA_Complex64_t zzero = (PLASMA_Complex64_t) 0.0;
int M, N, NB, MINMN, INgrsiz, INthgrsiz, BAND;
int myid, grsiz, shift=3, stt, st, ed, stind, edind;
int blklastind, colpt, PCOL, ACOL, MCOL;
int stepercol,mylastid,grnb,grid;
int *DEP,*MAXID;
int i, j, m;
int thgrsiz, thgrnb, thgrid, thed;
size_t eltsize = plasma_element_size(A.dtyp);
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
M = A.m;
N = A.n;
NB = A.mb;
MINMN = min(M,N);
/* Quick return */
if ( MINMN == 0 ){
return;
}
if ( NB == 0 ) {
memset(D, 0, MINMN *sizeof(double));
memset(E, 0, (MINMN-1)*sizeof(double));
#ifdef COMPLEX
for (i=0; i<MINMN; i++)
D[i] = cabs(*A(i,i));
#else
for (i=0; i<MINMN; i++)
D[i] = *A(i,i);
#endif
return;
}
/*
* Barrier is used because the bulge have to wait until
* the reduction to band has been finish.
* otherwise, I can remove this BARRIER when I integrate
* the function dependencies link inside the reduction to
* band. Keep in mind the case when NB=1, where no bulge-chasing.
*/
/***************************************************************/
QUARK_Barrier(plasma->quark);
/***************************************************************/
/*
* Case NB=1 ==> matrix is already Bidiagonal. no need to bulge.
* Make diagonal and superdiagonal elements real, storing them in
* D and E. if PlasmaLower, first transform lower bidiagonal form
* to upper bidiagonal by applying plane rotations/ Householder
* from the left, overwriting superdiagonal elements then make
* elements real of the resulting upper Bidiagonal. if PlasmaUpper
* then make its elements real. For Q, PT: ZSCAL should be done
* in case of WANTQ.
*/
if ( NB == 1 ) {
memset(D, 0, MINMN*sizeof(double));
memset(E, 0, (MINMN-1)*sizeof(double));
for (i=0; i<(MINMN-1); i++)
{
/* generate Householder to annihilate a(i+1,i) and create a(i,i+1) */
V = *A((i+1), i);
*A((i+1), i) = zzero;
LAPACKE_zlarfg_work( 2, A(i, i), &V, 1, &TAU);
/* apply Left*/
TAU = conj(TAU);
ztmp = TAU*V;
V = conj(V);
*A(i, i+1) = - V * TAU * (*A(i+1, i+1));
*A(i+1, i+1) = *(A(i+1, i+1)) * (zone - V * ztmp);
}
}
/* PlasmaLower or PlasmaUpper, both are now upper */
/* Make diagonal and superdiagonal elements real,
* storing them in D and E
*/
#ifdef COMPLEX
ztmp = zone;
for (i=0; i<MINMN; i++)
{
ztmp = *A(i, i) * conj(ztmp);
absztmp = cabs(ztmp);
D[i] = absztmp; /* diag value */
if(absztmp != dzero)
ztmp = (PLASMA_Complex64_t) (ztmp / absztmp);
else
ztmp = zone;
if(i<(MINMN-1)) {
ztmp = *A(i, (i+1)) * conj(ztmp);
absztmp = cabs(ztmp);
E[i] = absztmp; /* upper off-diag value */
if(absztmp != dzero)
ztmp = (PLASMA_Complex64_t) (ztmp / absztmp);
else
ztmp = zone;
}
}
#else
for (i=0; i < MINMN-1; i++) {
D[i] = *A(i, i );
E[i] = *A(i, i+1);
}
D[i] = *A(i, i);
#endif
return;
}
/*
* Case MINMN<NB ==> matrix is very small and better to call lapack ZGETRD.
*
* Use fact that one row of block is stored the same way than in LAPACK
* Doesn't work if M > NB because of tile storage
*/
if ( MINMN <= 0 )
{
PLASMA_Complex64_t *work, *taup, *tauq;
int info, ldwork = N*N;
info = LAPACKE_zgebrd_work(LAPACK_COL_MAJOR, M, N,
A(0,0), A.lm, D, E, taup, tauq, work, ldwork);
plasma_shared_free(plasma, (void*) work);
plasma_shared_free(plasma, (void*) taup);
plasma_shared_free(plasma, (void*) tauq);
if( info == 0 )
sequence->status = PLASMA_SUCCESS;
else
plasma_sequence_flush(plasma->quark, sequence, request, info);
return;
}
/* General case NB > 1 && N > NB */
DEP = (int *) plasma_shared_alloc(plasma, MINMN+1, PlasmaInteger );
MAXID = (int *) plasma_shared_alloc(plasma, MINMN+1, PlasmaInteger );
memset(MAXID,0,(MINMN+1)*sizeof(int));
/***************************************************************************
* START BULGE CHASING CODE
**************************************************************************/
/*
* Initialisation of local parameter. those parameter should be
* input or tuned parameter.
*/
INgrsiz = 1;
if( NB > 160 ) {
INgrsiz = 2;
}
else if( NB > 100 ) {
if( MINMN < 5000 )
INgrsiz = 2;
else
INgrsiz = 4;
} else {
INgrsiz = 6;
}
INthgrsiz = MINMN;
BAND = 0;
grsiz = INgrsiz;
thgrsiz = INthgrsiz;
if( grsiz == 0 ) grsiz = 6;
if( thgrsiz == 0 ) thgrsiz = MINMN;
i = shift/grsiz;
stepercol = i*grsiz == shift ? i:i+1;
i = (MINMN-2)/thgrsiz;
thgrnb = i*thgrsiz == (MINMN-2) ? i:i+1;
for (thgrid = 1; thgrid<=thgrnb; thgrid++){
stt = (thgrid-1)*thgrsiz+1;
thed = min( (stt + thgrsiz -1), (MINMN-2));
for (i = stt; i <= MINMN-2; i++){
ed=min(i,thed);
if(stt>ed)break;
for (m = 1; m <=stepercol; m++){
st=stt;
for (j = st; j <=ed; j++){
/* PCOL: dependency on the ID of the master of the group of the previous column. (Previous Column:PCOL). */
/* ACOL: dependency on the ID of the master of the previous group of my column. (Acctual Column:ACOL). (it is 0(NULL) for myid=1) */
/* MCOL: OUTPUT dependency on the my ID, to be used by the next ID. (My Column: MCOL). I am the master of this group. */
myid = (i-j)*(stepercol*grsiz) +(m-1)*grsiz + 1;
mylastid = myid+grsiz-1;
PCOL = mylastid+shift-1; /* to know the dependent ID of the previous column. need to know the master of its group*/
MAXID[j] = myid;
PCOL = min(PCOL,MAXID[j-1]); /* for the last columns, we might do only 1 or 2 kernel, so the PCOL will be wrong. this is to force it to the last ID of the previous col.*/
grnb = PCOL/grsiz;
grid = grnb*grsiz == PCOL ? grnb:grnb+1;
PCOL = (grid-1)*grsiz +1; /* give me the ID of the master of the group of the previous column.*/
ACOL = myid-grsiz;
if(myid==1)ACOL=0;
MCOL = myid;
plasma->quark, &task_flags,
uplo, MINMN, NB,
&A, C, S, i, j, m, grsiz, BAND,
DEP(PCOL), DEP(ACOL), DEP(MCOL) );
if(mylastid%2 ==0){
blklastind = (mylastid/2)*NB+1+j-1;
}else{
colpt = ((mylastid+1)/2)*NB + 1 +j -1 ;
stind = colpt-NB+1;
edind = min(colpt,MINMN);
if( (stind>=edind-1) && (edind==MINMN) )
blklastind=MINMN;
else
blklastind=0;
}
if(blklastind >= (MINMN-1)) stt=stt+1;
} /* END for j=st:ed */
} /* END for m=1:stepercol */
} /* END for i=1:MINMN-2 */
} /* END for thgrid=1:thgrnb */
/*
* Barrier used only for now, to be sure that everything
* is done before copying the D and E and free workspace.
* this will be removed later when D and E are directly filled
* during the bulge process.
*/
QUARK_Barrier(plasma->quark);
plasma_shared_free(plasma, (void*) DEP);
plasma_shared_free(plasma, (void*) MAXID);
plasma_shared_free(plasma, (void*) C);
plasma_shared_free(plasma, (void*) S);
/*
* STORE THE RESULTING diagonal/off-diagonal in D AND E
*/
memset(D, 0, MINMN*sizeof(double));
memset(E, 0, (MINMN-1)*sizeof(double));
/*
* If PlasmaLower, first transform lower bidiagonal form
* to upper bidiagonal by applying plane rotations/ Householder
* from the left, overwriting superdiagonal elements then make
* elements real of the resulting upper Bidiagonal. if PlasmaUpper
* then make its elements real.
* For Q, PT: ZSCAL should be done in case of WANTQ.
*/
for (i=0; i<(MINMN-1); i++)
{
/* generate Householder to annihilate a(i+1,i) and create a(i,i+1)*/
V = *A((i+1), i);
*A((i+1), i) = zzero;
LAPACKE_zlarfg_work( 2, A(i, i), &V, 1, &TAU);
/* apply Left */
TAU = conj(TAU);
ztmp = TAU*V;
V = conj(V);
*A(i, (i+1)) = - V * TAU * (*A((i+1), (i+1)));
*A((i+1), (i+1)) = (*A((i+1), (i+1))) * (zone - V * ztmp);
}
}
/* PlasmaLower or PlasmaUpper, both are upper, now*/
/* Make diagonal and superdiagonal elements real,
* storing them in D and E
*/
/* In complex case, the element off diagonal element are
* not necessary real and we have to make off-diagonal
* elements real and copy them to E.
* When using HouseHolder elimination,
* the ZLARFG give us a real as output so, all the
* diagonal/off-diagonal element except the last one are already
* real and thus we need only to take the abs of the last
* one.
* */
#ifdef COMPLEX
ztmp =zone;
for (i=0; i < MINMN-1; i++) {
D[i] = creal( *A(i, i) );
/*
* Alternative for Householder case, all diag/superdiag
* are real except the last diag and superdiag, where we
* have to take the abs
*/
if(i<(MINMN-2))
E[i] = creal(*A(i, i+1));
else
E[i] = cabs( *A(i, i+1)); /* last upper value is complex */
}
D[i] = cabs( *A(i, i) );
#else
for (i=0; i < MINMN-1; i++) {
D[i] = *A(i, i );
E[i] = *A(i, i+1);
}
D[i] = *A(i, i);
#endif
} /* END FUNCTION */

Here is the call graph for this function:

void plasma_pzgeadd ( plasma_context_t plasma)

Declarations of parallel functions (static scheduling) - alphabetical order

Definition at line 23 of file pzgeadd.c.

References A, B, BLKLDD, CORE_zgeadd(), plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_5, and plasma_sequence_t::status.

{
PLASMA_sequence *sequence;
PLASMA_request *request;
int X, Y;
int m, n;
int next_m;
int next_n;
int ldam, ldbm;
plasma_unpack_args_5(alpha, A, B, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
n = 0;
while (m >= A.mt && n < A.nt) {
n++;
m = m-A.mt;
}
while (n < A.nt) {
next_m = m;
next_n = n;
next_m += PLASMA_SIZE;
while (next_m >= A.mt && next_n < A.nt) {
next_n++;
next_m = next_m-A.mt;
}
X = m == A.mt-1 ? A.m-A.mb*m : A.nb;
Y = n == A.nt-1 ? A.n-A.nb*n : A.nb;
ldam = BLKLDD(A, m);
ldbm = BLKLDD(B, m);
CORE_zgeadd(X, Y, alpha, A(m, n), ldam, B(m, n), ldbm);
m = next_m;
n = next_n;
}
}

Here is the call graph for this function:

void plasma_pzgeadd_quark ( PLASMA_Complex64_t  alpha,
PLASMA_desc  A,
PLASMA_desc  B,
PLASMA_sequence sequence,
PLASMA_request request 
)

Declarations of parallel functions (dynamic scheduling) - alphabetical order

Definition at line 72 of file pzgeadd.c.

References B, BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_CORE_zgeadd(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int X, Y;
int m, n;
int ldam, ldbm;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for (m = 0; m < A.mt; m++) {
X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
ldbm = BLKLDD(B, m);
for (n = 0; n < A.nt; n++) {
Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
X, Y, A.mb,
alpha, A(m, n), ldam,
B(m, n), ldbm);
}
}
}

Here is the call graph for this function:

void plasma_pzgelqf ( plasma_context_t plasma)

Parallel tile LQ factorization - static scheduling

Definition at line 24 of file pzgelqf.c.

References A, BLKLDD, CORE_zgelqt(), CORE_ztslqt(), CORE_ztsmlq(), CORE_zunmlq(), plasma_desc_t::dtyp, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_IB, plasma_private_alloc(), plasma_private_free(), PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_4, PlasmaConjTrans, PlasmaRight, ss_cond_set, ss_cond_wait, ss_finalize, ss_init, plasma_sequence_t::status, and T.

{
PLASMA_sequence *sequence;
PLASMA_request *request;
int k, m, n;
int next_k;
int next_m;
int next_n;
int ldak, ldam;
int tempkm, tempkn, tempmm, tempnn;
int ib = PLASMA_IB;
PLASMA_Complex64_t *work, *tau;
plasma_unpack_args_4(A, T, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
work = (PLASMA_Complex64_t*)plasma_private_alloc(plasma, ib*T.nb, T.dtyp);
ss_init(A.mt, A.nt, -1);
k = 0;
while (m >= A.mt) {
k++;
m = m-A.mt+k;
}
n = k;
while (k < min(A.mt, A.nt) && m < A.mt) {
next_m = m;
next_n = n;
next_k = k;
next_n++;
if (next_n == A.nt) {
next_m += PLASMA_SIZE;
while (next_m >= A.mt && next_k < min(A.nt, A.mt)) {
next_k++;
next_m = next_m-A.mt+next_k;
}
next_n = next_k;
}
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
ldak = BLKLDD(A, k);
ldam = BLKLDD(A, m);
if (m == k) {
if (n == k) {
ss_cond_wait(k, k, k-1);
tempkm, tempkn, ib,
A(k, k), ldak,
T(k, k), T.mb,
tau, work);
ss_cond_set(k, k, k);
}
else {
ss_cond_wait(k, n, k-1);
tempkm, tempnn, ib,
A(k, k), ldak,
A(k, n), ldak,
T(k, n), T.mb,
tau, work);
ss_cond_set(k, n, k);
}
}
else {
if (n == k) {
ss_cond_wait(k, k, k);
ss_cond_wait(m, k, k-1);
tempmm, tempkn, tempkn, ib,
A(k, k), ldak,
T(k, k), T.mb,
A(m, k), ldam,
work, T.nb);
}
else {
ss_cond_wait(k, n, k);
ss_cond_wait(m, n, k-1);
tempmm, A.nb, tempmm, tempnn, A.nb, ib,
A(m, k), ldam,
A(m, n), ldam,
A(k, n), ldak,
T(k, n), T.mb,
work, T.nb);
ss_cond_set(m, n, k);
}
}
m = next_m;
n = next_n;
k = next_k;
}
plasma_private_free(plasma, work);
plasma_private_free(plasma, tau);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzgelqf_quark ( PLASMA_desc  A,
PLASMA_desc  T,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile LQ factorization - dynamic scheduling

Definition at line 137 of file pzgelqf.c.

References A, BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_IB, PLASMA_SUCCESS, PlasmaConjTrans, PlasmaRight, plasma_context_struct::quark, QUARK_CORE_zgelqt(), QUARK_CORE_ztslqt(), QUARK_CORE_ztsmlq(), QUARK_CORE_zunmlq(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, T, and TASK_SEQUENCE.

{
int k, m, n;
int ldak, ldam;
int tempkm, tempkn, tempmm, tempnn;
int ib;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
ib = PLASMA_IB;
for (k = 0; k < min(A.mt, A.nt); k++) {
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
ldak = BLKLDD(A, k);
plasma->quark, &task_flags,
tempkm, tempkn, ib, T.nb,
A(k, k), ldak,
T(k, k), T.mb);
for (m = k+1; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
plasma->quark, &task_flags,
tempmm, tempkn, tempkn, ib, T.nb,
A(k, k), ldak,
T(k, k), T.mb,
A(m, k), ldam);
}
for (n = k+1; n < A.nt; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
tempkm, tempnn, ib, T.nb,
A(k, k), ldak,
A(k, n), ldak,
T(k, n), T.mb);
for (m = k+1; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
plasma->quark, &task_flags,
tempmm, A.nb, tempmm, tempnn, A.mb, ib, T.nb,
A(m, k), ldam,
A(m, n), ldam,
A(k, n), ldak,
T(k, n), T.mb);
}
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzgelqfrh_quark ( PLASMA_desc  A,
PLASMA_desc  T,
int  BS,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile LQ factorization (reduction Householder) - dynamic scheduling

Definition at line 25 of file pzgelqfrh.c.

References A, BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_IB, PLASMA_SUCCESS, PlasmaConjTrans, PlasmaRight, plasma_context_struct::quark, QUARK_CORE_zgelqt(), QUARK_CORE_ztslqt(), QUARK_CORE_ztsmlq(), QUARK_CORE_zttlqt(), QUARK_CORE_zttmlq(), QUARK_CORE_zunmlq(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, T, T2, and TASK_SEQUENCE.

{
int k, m, n;
int N, RD;
int ldak, ldam;
int tempkmin, tempkm, tempNn, tempnn, tempmm, tempNRDn;
int ib;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
ib = PLASMA_IB;
for (k = 0; k < min(A.mt, A.nt); k++) {
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
ldak = BLKLDD(A, k);
for (N = k; N < A.nt; N += BS) {
tempNn = N == A.nt-1 ? A.n-N*A.nb : A.nb;
tempkmin = min(tempkm, tempNn);
plasma->quark, &task_flags,
tempkm, tempNn, ib, T.nb,
A(k, N), ldak,
T(k, N), T.mb);
for (m = k+1; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
plasma->quark, &task_flags,
tempmm, tempNn, tempkmin, ib, T.nb,
A(k, N), ldak,
T(k, N), T.mb,
A(m, N), ldam);
}
for (n = N+1; n < min(N+BS, A.nt); n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
tempkm, tempnn, ib, T.nb,
A(k, N), ldak,
A(k, n), ldak,
T(k, n), T.mb);
for (m = k+1; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
plasma->quark, &task_flags,
tempmm, A.nb, tempmm, tempnn, tempkm, ib, T.nb,
A(m, N), ldam,
A(m, n), ldam,
A(k, n), ldak,
T(k, n), T.mb);
}
}
}
for (RD = BS; RD < A.nt-k; RD *= 2) {
for (N = k; N+RD < A.nt; N += 2*RD) {
tempNRDn = N+RD == A.nt-1 ? A.n-(N+RD)*A.nb : A.nb;
plasma->quark, &task_flags,
tempkm, tempNRDn, ib, T.nb,
A (k, N ), ldak,
A (k, N+RD), ldak,
T2(k, N+RD), T.mb);
for (m = k+1; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m );
plasma->quark, &task_flags,
tempmm, A.nb, tempmm, tempNRDn, tempkm, ib, T.nb,
A (m, N ), ldam,
A (m, N+RD), ldam,
A (k, N+RD), ldak,
T2(k, N+RD), T.mb);
}
}
}
}
}

Here is the call graph for this function:

void plasma_pzgemm ( plasma_context_t plasma)

Parallel tile matrix-matrix multiplication - static scheduling

Definition at line 24 of file pzgemm.c.

References A, B, BLKLDD, C, CORE_zgemm(), plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_9, PlasmaNoTrans, and plasma_sequence_t::status.

{
PLASMA_enum transA;
PLASMA_enum transB;
PLASMA_sequence *sequence;
PLASMA_request *request;
int K, X, Y;
int k, m, n;
int next_m;
int next_n;
int ldam, ldak, ldbn, ldbk, ldcm;
plasma_unpack_args_9(transA, transB, alpha, A, B, beta, C, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
n = 0;
while (m >= C.mt && n < C.nt) {
n++;
m = m-C.mt;
}
while (n < C.nt) {
next_m = m;
next_n = n;
next_m += PLASMA_SIZE;
while (next_m >= C.mt && next_n < C.nt) {
next_n++;
next_m = next_m - C.mt;
}
X = m == C.mt-1 ? C.m - m*C.mb : C.mb;
Y = n == C.nt-1 ? C.n - n*C.nb : C.nb;
ldcm = BLKLDD(C, m);
/*
* A: PlasmaNoTrans / B: PlasmaNoTrans
*/
if (transA == PlasmaNoTrans) {
ldam = BLKLDD(A, m);
if (transB == PlasmaNoTrans) {
for (k = 0; k < A.nt; k++) {
K = k == A.nt-1 ? A.n-k*A.nb : A.nb;
ldbk = BLKLDD(B, k);
zbeta = k == 0 ? beta : zone;
transA, transB,
X, Y, K,
alpha, A(m, k), ldam,
B(k, n), ldbk,
zbeta, C(m, n), ldcm);
}
}
/*
* A: PlasmaNoTrans / B: Plasma[Conj]Trans
*/
else {
ldbn = BLKLDD(B, n);
for (k = 0; k < A.nt; k++) {
K = k == A.nt-1 ? A.n-k*A.nb : A.nb;
zbeta = k == 0 ? beta : zone;
transA, transB,
X, Y, K,
alpha, A(m, k), ldam,
B(n, k), ldbn,
zbeta, C(m, n), ldcm);
}
}
}
/*
* A: Plasma[Conj]Trans / B: PlasmaNoTrans
*/
else {
if (transB == PlasmaNoTrans) {
for (k = 0; k < A.mt; k++) {
K = k == A.mt-1 ? A.m-k*A.mb : A.mb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
zbeta = k == 0 ? beta : zone;
transA, transB,
X, Y, K,
alpha, A(k, m), ldak,
B(k, n), ldbk,
zbeta, C(m, n), ldcm);
}
}
/*
* A: Plasma[Conj]Trans / B: Plasma[Conj]Trans
*/
else {
ldbn = BLKLDD(B, n);
for (k = 0; k < A.mt; k++) {
K = k == A.mt-1 ? A.m-k*A.mb : A.mb;
ldak = BLKLDD(A, k);
zbeta = k == 0 ? beta : zone;
transA, transB,
X, Y, K,
alpha, A(k, m), ldak,
B(n, k), ldbn,
zbeta, C(m, n), ldcm);
}
}
}
m = next_m;
n = next_n;
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzgemm_quark ( PLASMA_enum  transA,
PLASMA_enum  transB,
PLASMA_Complex64_t  alpha,
PLASMA_desc  A,
PLASMA_desc  B,
PLASMA_Complex64_t  beta,
PLASMA_desc  C,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile matrix-matrix multiplication - dynamic scheduling

Definition at line 149 of file pzgemm.c.

References B, BLKLDD, C, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaNoTrans, plasma_context_struct::quark, QUARK_CORE_zgemm(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int m, n, k;
int ldam, ldak, ldbn, ldbk, ldcm;
int tempmm, tempnn, tempkn, tempkm;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for (m = 0; m < C.mt; m++) {
tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
ldcm = BLKLDD(C, m);
for (n = 0; n < C.nt; n++) {
tempnn = n == C.nt-1 ? C.n-n*C.nb : C.nb;
/*
* A: PlasmaNoTrans / B: PlasmaNoTrans
*/
if (transA == PlasmaNoTrans) {
ldam = BLKLDD(A, m);
if (transB == PlasmaNoTrans) {
for (k = 0; k < A.nt; k++) {
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
ldbk = BLKLDD(B, k);
zbeta = k == 0 ? beta : zone;
plasma->quark, &task_flags,
transA, transB,
tempmm, tempnn, tempkn, A.mb,
alpha, A(m, k), ldam, /* lda * Z */
B(k, n), ldbk, /* ldb * Y */
zbeta, C(m, n), ldcm); /* ldc * Y */
}
}
/*
* A: PlasmaNoTrans / B: Plasma[Conj]Trans
*/
else {
ldbn = BLKLDD(B, n);
for (k = 0; k < A.nt; k++) {
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
zbeta = k == 0 ? beta : zone;
plasma->quark, &task_flags,
transA, transB,
tempmm, tempnn, tempkn, A.mb,
alpha, A(m, k), ldam, /* lda * Z */
B(n, k), ldbn, /* ldb * Z */
zbeta, C(m, n), ldcm); /* ldc * Y */
}
}
}
/*
* A: Plasma[Conj]Trans / B: PlasmaNoTrans
*/
else {
if (transB == PlasmaNoTrans) {
for (k = 0; k < A.mt; k++) {
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
zbeta = k == 0 ? beta : zone;
plasma->quark, &task_flags,
transA, transB,
tempmm, tempnn, tempkm, A.mb,
alpha, A(k, m), ldak, /* lda * X */
B(k, n), ldbk, /* ldb * Y */
zbeta, C(m, n), ldcm); /* ldc * Y */
}
}
/*
* A: Plasma[Conj]Trans / B: Plasma[Conj]Trans
*/
else {
ldbn = BLKLDD(B, n);
for (k = 0; k < A.mt; k++) {
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
ldak = BLKLDD(A, k);
zbeta = k == 0 ? beta : zone;
plasma->quark, &task_flags,
transA, transB,
tempmm, tempnn, tempkm, A.mb,
alpha, A(k, m), ldak, /* lda * X */
B(n, k), ldbn, /* ldb * Z */
zbeta, C(m, n), ldcm); /* ldc * Y */
}
}
}
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzgeqrf ( plasma_context_t plasma)

Parallel tile QR factorization - static scheduling

Definition at line 24 of file pzgeqrf.c.

References A, BLKLDD, CORE_zgeqrt(), CORE_ztsmqr(), CORE_ztsqrt(), CORE_zunmqr(), plasma_desc_t::dtyp, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_IB, plasma_private_alloc(), plasma_private_free(), PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_4, PlasmaConjTrans, PlasmaLeft, ss_cond_set, ss_cond_wait, ss_finalize, ss_init, plasma_sequence_t::status, and T.

{
PLASMA_sequence *sequence;
PLASMA_request *request;
int k, m, n;
int next_k;
int next_m;
int next_n;
int ldak, ldam;
int tempkm, tempkn, tempnn, tempmm;
int ib = PLASMA_IB;
PLASMA_Complex64_t *work, *tau;
plasma_unpack_args_4(A, T, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
work = (PLASMA_Complex64_t*)plasma_private_alloc(plasma, ib*T.nb, T.dtyp);
ss_init(A.mt, A.nt, -1);
k = 0;
while (n >= A.nt) {
k++;
n = n-A.nt+k;
}
m = k;
while (k < min(A.mt, A.nt) && n < A.nt) {
next_n = n;
next_m = m;
next_k = k;
next_m++;
if (next_m == A.mt) {
next_n += PLASMA_SIZE;
while (next_n >= A.nt && next_k < min(A.mt, A.nt)) {
next_k++;
next_n = next_n-A.nt+next_k;
}
next_m = next_k;
}
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldak = BLKLDD(A, k);
ldam = BLKLDD(A, m);
if (n == k) {
if (m == k) {
ss_cond_wait(k, k, k-1);
tempkm, tempkn, ib,
A(k, k), ldak,
T(k, k), T.mb,
tau, work);
ss_cond_set(k, k, k);
}
else {
ss_cond_wait(m, k, k-1);
tempmm, tempkn, ib,
A(k, k), ldak,
A(m, k), ldam,
T(m, k), T.mb,
tau, work);
ss_cond_set(m, k, k);
}
}
else {
if (m == k) {
ss_cond_wait(k, k, k);
ss_cond_wait(k, n, k-1);
tempkm, tempnn, tempkm, ib,
A(k, k), ldak,
T(k, k), T.mb,
A(k, n), ldak,
work, T.nb);
}
else {
ss_cond_wait(m, k, k);
ss_cond_wait(m, n, k-1);
A.nb, tempnn, tempmm, tempnn, A.nb, ib,
A(k, n), ldak,
A(m, n), ldam,
A(m, k), ldam,
T(m, k), T.mb,
work, ib);
ss_cond_set(m, n, k);
}
}
n = next_n;
m = next_m;
k = next_k;
}
plasma_private_free(plasma, work);
plasma_private_free(plasma, tau);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzgeqrf_quark ( PLASMA_desc  A,
PLASMA_desc  T,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile QR factorization - dynamic scheduling

Definition at line 137 of file pzgeqrf.c.

References A, BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_IB, PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, plasma_context_struct::quark, QUARK_CORE_zgeqrt(), QUARK_CORE_ztsmqr(), QUARK_CORE_ztsqrt(), QUARK_CORE_zunmqr(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, T, and TASK_SEQUENCE.

{
int k, m, n;
int ldak, ldam;
int tempkm, tempkn, tempnn, tempmm;
int ib;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
ib = PLASMA_IB;
for (k = 0; k < min(A.mt, A.nt); k++) {
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
ldak = BLKLDD(A, k);
plasma->quark, &task_flags,
tempkm, tempkn, ib, T.nb,
A(k, k), ldak,
T(k, k), T.mb);
for (n = k+1; n < A.nt; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
tempkm, tempnn, tempkm, ib, T.nb,
A(k, k), ldak,
T(k, k), T.mb,
A(k, n), ldak);
}
for (m = k+1; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
plasma->quark, &task_flags,
tempmm, tempkn, ib, T.nb,
A(k, k), ldak,
A(m, k), ldam,
T(m, k), T.mb);
for (n = k+1; n < A.nt; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
A.mb, tempnn, tempmm, tempnn, A.nb, ib, T.nb,
A(k, n), ldak,
A(m, n), ldam,
A(m, k), ldam,
T(m, k), T.mb);
}
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzgeqrfrh_quark ( PLASMA_desc  A,
PLASMA_desc  T,
int  BS,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile QR factorization (reduction Householder) - dynamic scheduling

Definition at line 25 of file pzgeqrfrh.c.

References A, BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_IB, PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, plasma_context_struct::quark, QUARK_CORE_zgeqrt(), QUARK_CORE_ztsmqr(), QUARK_CORE_ztsqrt(), QUARK_CORE_zttmqr(), QUARK_CORE_zttqrt(), QUARK_CORE_zunmqr(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, T, T2, and TASK_SEQUENCE.

{
int k, m, n;
int M, RD;
int ldaM, ldam, ldaMRD;
int tempkmin, tempkn, tempMm, tempnn, tempmm, tempMRDm;
int ib;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
ib = PLASMA_IB;
for (k = 0; k < min(A.mt, A.nt); k++) {
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
for (M = k; M < A.mt; M += BS) {
tempMm = M == A.mt-1 ? A.m-M*A.mb : A.mb;
tempkmin = min(tempMm, tempkn);
ldaM = BLKLDD(A, M);
plasma->quark, &task_flags,
tempMm, tempkn, ib, T.nb,
A(M, k), ldaM,
T(M, k), T.mb);
for (n = k+1; n < A.nt; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
tempMm, tempnn, tempkmin, ib, T.nb,
A(M, k), ldaM,
T(M, k), T.mb,
A(M, n), ldaM);
}
for (m = M+1; m < min(M+BS, A.mt); m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
plasma->quark, &task_flags,
tempmm, tempkn, ib, T.nb,
A(M, k), ldaM,
A(m, k), ldam,
T(m, k), T.mb);
for (n = k+1; n < A.nt; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
A.nb, tempnn, tempmm, tempnn, A.nb, ib, T.nb,
A(M, n), ldaM,
A(m, n), ldam,
A(m, k), ldam,
T(m, k), T.mb);
}
}
}
for (RD = BS; RD < A.mt-k; RD *= 2) {
for (M = k; M+RD < A.mt; M += 2*RD) {
tempMRDm = M+RD == A.mt-1 ? A.m-(M+RD)*A.mb : A.mb;
ldaM = BLKLDD(A, M );
ldaMRD = BLKLDD(A, M+RD);
plasma->quark, &task_flags,
tempMRDm, tempkn, ib, T.nb,
A (M , k), ldaM,
A (M+RD, k), ldaMRD,
T2(M+RD, k), T.mb);
for (n = k+1; n < A.nt; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
A.nb, tempnn, tempMRDm, tempnn, A.nb, ib, T.nb,
A (M, n), ldaM,
A (M+RD, n), ldaMRD,
A (M+RD, k), ldaMRD,
T2(M+RD, k), T.mb);
}
}
}
}
}

Here is the call graph for this function:

void plasma_pzgerbb ( plasma_context_t plasma)

Parallel tile BAND Bidiagonal Reduction - dynamic scheduler Could be optimized by using the algorithms from Trefethen book

WARNING: do never call this function because unmqr and unmlq are not implementing all the cases required in static.

Definition at line 26 of file pzgerbb.c.

References A, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_desc_submatrix(), plasma_pzgelqf(), plasma_pzgeqrf(), plasma_pzunmlq(), plasma_pzunmqr(), plasma_static_call_4, plasma_static_call_7, PLASMA_SUCCESS, plasma_unpack_args_4, PlasmaConjTrans, PlasmaLeft, PlasmaRight, plasma_sequence_t::status, and T.

{
PLASMA_sequence *sequence;
PLASMA_request *request;
int k;
int tempkm, tempkn;
plasma_unpack_args_4(A, T, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
if (A.m >= A.n){
for (k = 0; k < A.nt; k++) {
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
PLASMA_desc, plasma_desc_submatrix(A, k*A.mb, k*A.nb, A.m-k*A.mb, tempkn),
PLASMA_desc, plasma_desc_submatrix(T, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, plasma_desc_submatrix(A, k*A.mb, k*A.nb, A.m-k*A.mb, tempkn),
PLASMA_desc, plasma_desc_submatrix(A, k*A.mb, (k+1)*A.nb, A.m-k*A.mb, A.n-(k+1)*A.nb),
PLASMA_desc, plasma_desc_submatrix(T, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
if (k+1 < A.nt){
tempkn = k+1 == A.nt-1 ? A.n-(k+1)*A.nb : A.nb;
PLASMA_desc, plasma_desc_submatrix(A, k*A.mb, (k+1)*A.nb, tempkm, A.n-(k+1)*A.nb),
PLASMA_desc, plasma_desc_submatrix(T, k*T.mb, (k+1)*T.nb, tempkm, T.n-(k+1)*T.nb),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, plasma_desc_submatrix(A, k*A.mb, (k+1)*A.nb, tempkm, A.n-(k+1)*A.nb),
PLASMA_desc, plasma_desc_submatrix(A, (k+1)*A.mb, (k+1)*A.nb, A.m-(k+1)*A.mb, A.n-(k+1)*A.nb),
PLASMA_desc, plasma_desc_submatrix(T, k*T.mb, (k+1)*T.nb, tempkm, T.n-(k+1)*T.nb),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}
}
else{
for (k = 0; k < A.mt; k++) {
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
PLASMA_desc, plasma_desc_submatrix(A, k*A.mb, k*A.nb, tempkm, A.n-k*A.nb),
PLASMA_desc, plasma_desc_submatrix(T, k*T.mb, k*T.nb, tempkm, T.n-k*T.nb),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, plasma_desc_submatrix(A, k*A.mb, k*A.nb, tempkm, A.n-k*A.nb),
PLASMA_desc, plasma_desc_submatrix(A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, A.n-k*A.nb),
PLASMA_desc, plasma_desc_submatrix(T, k*T.mb, k*T.nb, tempkm, T.n-k*T.nb),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
if (k+1 < A.mt){
tempkm = k+1 == A.mt-1 ? A.m-(k+1)*A.mb : A.mb;
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
PLASMA_desc, plasma_desc_submatrix(A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, tempkn),
PLASMA_desc, plasma_desc_submatrix(T, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
PLASMA_desc, plasma_desc_submatrix(A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, tempkn),
PLASMA_desc, plasma_desc_submatrix(A, (k+1)*A.mb, (k+1)*A.nb, A.m-(k+1)*A.mb, A.n-(k+1)*A.nb),
PLASMA_desc, plasma_desc_submatrix(T, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzgerbb_quark ( PLASMA_desc  A,
PLASMA_desc  T,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile BAND Bidiagonal Reduction - dynamic scheduler Could be optimized by using the algorithms from Trefethen book

Definition at line 127 of file pzgerbb.c.

References plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_desc_submatrix(), plasma_pzgelqf_quark(), plasma_pzgeqrf_quark(), plasma_pzunmlq_quark(), plasma_pzunmqr_quark(), PlasmaConjTrans, PlasmaLeft, and PlasmaRight.

{
int k;
int tempkm, tempkn;
if (A.m >= A.n){
for (k = 0; k < A.nt; k++) {
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
plasma_desc_submatrix(A, k*A.mb, k*A.nb, A.m-k*A.mb, tempkn),
plasma_desc_submatrix(T, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn),
sequence, request);
plasma_desc_submatrix(A, k*A.mb, k*A.nb, A.m-k*A.mb, tempkn),
plasma_desc_submatrix(A, k*A.mb, (k+1)*A.nb, A.m-k*A.mb, A.n-(k+1)*A.nb),
plasma_desc_submatrix(T, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn),
sequence, request);
if (k+1 < A.nt){
tempkn = k+1 == A.nt-1 ? A.n-(k+1)*A.nb : A.nb;
plasma_desc_submatrix(A, k*A.mb, (k+1)*A.nb, tempkm, A.n-(k+1)*A.nb),
plasma_desc_submatrix(T, k*T.mb, (k+1)*T.nb, tempkm, T.n-(k+1)*T.nb),
sequence, request);
plasma_desc_submatrix(A, k*A.mb, (k+1)*A.nb, tempkm, A.n-(k+1)*A.nb),
plasma_desc_submatrix(A, (k+1)*A.mb, (k+1)*A.nb, A.m-(k+1)*A.mb, A.n-(k+1)*A.nb),
plasma_desc_submatrix(T, k*T.mb, (k+1)*T.nb, tempkm, T.n-(k+1)*T.nb),
sequence, request);
}
}
}
else{
for (k = 0; k < A.mt; k++) {
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
plasma_desc_submatrix(A, k*A.mb, k*A.nb, tempkm, A.n-k*A.nb),
plasma_desc_submatrix(T, k*T.mb, k*T.nb, tempkm, T.n-k*T.nb),
sequence, request);
plasma_desc_submatrix(A, k*A.mb, k*A.nb, tempkm, A.n-k*A.nb),
plasma_desc_submatrix(A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, A.n-k*A.nb),
plasma_desc_submatrix(T, k*T.mb, k*T.nb, tempkm, T.n-k*T.nb),
sequence, request);
if (k+1 < A.mt){
tempkm = k+1 == A.mt-1 ? A.m-(k+1)*A.mb : A.mb;
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
plasma_desc_submatrix(A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, tempkn),
plasma_desc_submatrix(T, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn),
sequence, request);
plasma_desc_submatrix(A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, tempkn),
plasma_desc_submatrix(A, (k+1)*A.mb, (k+1)*A.nb, A.m-(k+1)*A.mb, A.n-(k+1)*A.nb),
plasma_desc_submatrix(T, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn),
sequence, request);
}
}
}
}

Here is the call graph for this function:

void plasma_pzgerbbrh_quark ( PLASMA_desc  A,
PLASMA_desc  T,
PLASMA_sequence sequence,
PLASMA_request request 
)
void plasma_pzgerbh_quark ( PLASMA_desc  A,
PLASMA_desc  T,
PLASMA_sequence sequence,
PLASMA_request request 
)
void plasma_pzgetmi2 ( plasma_context_t plasma)

plasma_pzgetmi2 - realises nprob independant transpositions. Each subproblem is a tile of mb-by-nb elements. This function use an extra space of PLASMA_SIZE*(mb*nb).

Parameters:
[in]plasmaPlasma context to which this call belong to.
See also:
plasma_pzgetmi2_quark

Definition at line 40 of file pzgetmi2.c.

References A, CORE_zgetrip(), plasma_private_alloc(), plasma_private_free(), PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_10, PlasmaComplexDouble, plasma_sequence_t::status, and storev.

{
PLASMA_sequence *sequence;
PLASMA_request *request;
PLASMA_Complex64_t *A, *Al, *work;
PLASMA_enum storev, idep, odep;
int i, m, n, mb, nb, nprob;
int size, bsiz;
plasma_unpack_args_10(idep, odep, storev, m, n, mb, nb, A, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
/* quick return */
if( (mb < 2) || (nb < 2) ) {
return ;
}
size = PLASMA_SIZE;
bsiz = mb*nb;
nprob = ( m / mb ) * ( n / nb );
for (i=PLASMA_RANK; i<nprob; i+=size) {
Al = &(A[ i * bsiz]);
CORE_zgetrip(mb, nb, Al, work);
}
plasma_private_free(plasma, work);
}

Here is the call graph for this function:

void plasma_pzgetmi2_quark ( PLASMA_enum  idep,
PLASMA_enum  odep,
PLASMA_enum  storev,
int  m,
int  n,
int  mb,
int  nb,
PLASMA_Complex64_t A,
PLASMA_sequence sequence,
PLASMA_request request 
)

plasma_pzgetmi2_quark - realises nprob independant transpositions. Each subproblem is a tile of mb-by-nb elements. This function use an extra space of PLASMA_SIZE*(mb*nb). This is a maximum in case of dynamic scheduling.

Parameters:
[in]idepPlasmaIPT_Nodep: No fake dependencies are added. PlasmaIPT_Panel: A gatherv is added on each panel and panel size is m*nb. PlasmaIPT_All: A gatherv is added on the whole matrix.
[in]odepPlasmaIPT_Nodep: No fake dependencies are added. PlasmaIPT_Panel: A gatherv is added on each panel and panel size is m*nb. PlasmaIPT_All: A gatherv is added on the whole matrix.
[in]storevPlasmaColumnWise: Data stored in column major. PlasmaRowWise: Data stored in row major.
[in]mNumber of row of A if tiles are sorted in column major format, number of columns otherwise.
[in]nNumber of columns of A if tiles are sorted in column major format, number of rows otherwise.
[in]mbNumber of rows in each individual subproblem if storev == PlasmaColumnWise, number of columns otherwise. mmb must be 0.
[in]nbNumber of columns in each individual subproblem if storev == PlasmaColumnWise, number of rows otherwise. nnb must be 0.
[in,out]AMatrix of size m*n.
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
plasma_pzgetmi2

Definition at line 128 of file pzgetmi2.c.

References GATHERV, INOUT, INPUT, plasma_context_self(), PLASMA_SUCCESS, PlasmaColumnwise, PlasmaIPT_All, PlasmaIPT_NoDep, PlasmaIPT_Panel, plasma_context_struct::quark, QUARK_CORE_zgetrip(), QUARK_CORE_zgetrip_f1(), QUARK_CORE_zgetrip_f2(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int i, j, nprob, mt, nt;
int bsiz, psiz, size;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
/* quick return */
if( (mb < 2) || (nb < 2) ) {
return ;
}
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
bsiz = mb*nb;
psiz = m*nb;
mt = ( m / mb );
nt = ( n / nb );
} else {
psiz = n*mb;
mt = ( n / nb );
nt = ( m / mb );
}
size = m*n;
switch ( idep ) {
/*
* Dependencies on each panel as input
*/
switch ( odep ) {
for (j=0; j<nt; j++) {
Ap = A + (psiz*j);
for (i=0; i<mt; i++) {
Al = Ap + i*bsiz;
plasma->quark, &task_flags,
mb, nb, Al, bsiz,
Ap, psiz, INOUT|GATHERV);
}
}
break;
for (j=0; j<nt; j++) {
Ap = A + (psiz*j);
for (i=0; i<mt; i++) {
Al = Ap + i*bsiz;
QUARK_CORE_zgetrip_f2(plasma->quark, &task_flags,
mb, nb, Al, bsiz,
Ap, size, INPUT,
A, size, INOUT|GATHERV);
}
}
break;
default:
for (j=0; j<nt; j++) {
Ap = A + (psiz*j);
for (i=0; i<mt; i++) {
Al = Ap + i*bsiz;
plasma->quark, &task_flags,
mb, nb, Al, bsiz,
Ap, psiz, INPUT);
}
}
}
break;
/*
* Dependency on all the matrix as input
*/
switch ( odep ) {
for (j=0; j<nt; j++) {
Ap = A + (psiz*j);
for (i=0; i<mt; i++) {
Al = Ap + i*bsiz;
plasma->quark, &task_flags,
mb, nb, Al, bsiz,
A, size, INPUT,
Ap, psiz, INOUT|GATHERV);
}
}
break;
nprob = mt*nt;
for (i=0; i<nprob; i++) {
QUARK_CORE_zgetrip_f1(plasma->quark, &task_flags,
mb, nb, &(A[ i*bsiz ]), bsiz,
A, size, INOUT|GATHERV);
}
break;
default:
nprob = mt*nt;
for (i=0; i<nprob; i++) {
QUARK_CORE_zgetrip_f1(plasma->quark, &task_flags,
mb, nb, &(A[ i*bsiz ]), bsiz,
A, size, INPUT);
}
}
break;
/*
* No Dependencies as input
*/
default:
switch ( odep ) {
for (j=0; j<nt; j++) {
Ap = A + (psiz*j);
for (i=0; i<mt; i++) {
Al = Ap + i*bsiz;
plasma->quark, &task_flags,
mb, nb, Al, bsiz,
Ap, psiz, INOUT|GATHERV);
}
}
break;
nprob = mt*nt;
for (i=0; i<nprob; i++) {
QUARK_CORE_zgetrip_f1(plasma->quark, &task_flags,
mb, nb, &(A[ i*bsiz ]), bsiz,
A, size, INOUT|GATHERV);
}
break;
default:
nprob = mt*nt;
for (i=0; i<nprob; i++) {
QUARK_CORE_zgetrip(plasma->quark, &task_flags,
mb, nb, &(A[ i*bsiz ]), bsiz);
}
}
}
}

Here is the call graph for this function:

void plasma_pzgetrf_incpiv ( plasma_context_t plasma)

Parallel tile LU factorization - static scheduling

Definition at line 25 of file pzgetrf_incpiv.c.

References A, BLKLDD, CORE_zgessm(), CORE_zgetrf_incpiv(), CORE_zssssm(), CORE_ztstrf(), plasma_desc_t::dtyp, IPIV, L, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_IB, plasma_private_alloc(), plasma_private_free(), PLASMA_RANK, plasma_request_fail(), PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_5, ss_abort, ss_aborted, ss_cond_set, ss_cond_wait, ss_finalize, ss_init, and plasma_sequence_t::status.

{
int *IPIV;
PLASMA_sequence *sequence;
PLASMA_request *request;
int k, m, n;
int next_k;
int next_m;
int next_n;
int ldak, ldam;
int info;
int tempkn, tempkm, tempmm, tempnn;
int ib = PLASMA_IB;
plasma_unpack_args_5(A, L, IPIV, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
work = (PLASMA_Complex64_t*)plasma_private_alloc(plasma, ib*L.nb, L.dtyp);
ss_init(A.mt, A.nt, -1);
k = 0;
while (n >= A.nt) {
k++;
n = n-A.nt+k;
}
m = k;
while (k < min(A.mt, A.nt) && n < A.nt && !ss_aborted()) {
next_n = n;
next_m = m;
next_k = k;
next_m++;
if (next_m == A.mt) {
next_n += PLASMA_SIZE;
while (next_n >= A.nt && next_k < min(A.mt, A.nt)) {
next_k++;
next_n = next_n-A.nt+next_k;
}
next_m = next_k;
}
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
ldak = BLKLDD(A, k);
ldam = BLKLDD(A, m);
if (n == k) {
if (m == k) {
ss_cond_wait(k, k, k-1);
tempkm, tempkn, ib,
A(k, k), ldak,
IPIV(k, k), &info);
if (info != 0 && m == A.mt-1) {
plasma_request_fail(sequence, request, info + A.nb*k);
}
ss_cond_set(k, k, k);
}
else {
ss_cond_wait(m, k, k-1);
tempmm, tempkn, ib, A.nb,
A(k, k), ldak,
A(m, k), ldam,
L(m, k), L.mb,
IPIV(m, k),
work, L.nb, &info);
if (info != 0 && m == A.mt-1) {
plasma_request_fail(sequence, request, info + A.nb*k);
}
ss_cond_set(m, k, k);
}
}
else {
if (m == k) {
ss_cond_wait(k, k, k);
ss_cond_wait(k, n, k-1);
tempkm, tempnn, tempkm, ib,
IPIV(k, k),
A(k, k), ldak,
A(k, n), ldak);
}
else {
ss_cond_wait(m, k, k);
ss_cond_wait(m, n, k-1);
A.nb, tempnn, tempmm, tempnn, A.nb, ib,
A(k, n), ldak,
A(m, n), ldam,
L(m, k), L.mb,
A(m, k), ldam,
IPIV(m, k));
ss_cond_set(m, n, k);
}
}
n = next_n;
m = next_m;
k = next_k;
}
plasma_private_free(plasma, work);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzgetrf_incpiv_quark ( PLASMA_desc  A,
PLASMA_desc  L,
int *  IPIV,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile LU factorization - dynamic scheduling

Definition at line 143 of file pzgetrf_incpiv.c.

References A, BLKLDD, IPIV, L, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_IB, PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_CORE_zgessm(), QUARK_CORE_zgetrf_incpiv(), QUARK_CORE_zssssm(), QUARK_CORE_ztstrf(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int k, m, n;
int ldak, ldam;
int tempkm, tempkn, tempmm, tempnn;
int ib;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
ib = PLASMA_IB;
for (k = 0; k < min(A.mt, A.nt); k++) {
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
ldak = BLKLDD(A, k);
plasma->quark, &task_flags,
tempkm, tempkn, ib, L.nb,
A(k, k), ldak, IPIV(k, k),
sequence, request,
k == A.mt-1, A.nb*k);
for (n = k+1; n < A.nt; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
tempkm, tempnn, tempkm, ib, L.nb,
IPIV(k, k),
A(k, k), ldak,
A(k, n), ldak);
}
for (m = k+1; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
plasma->quark, &task_flags,
tempmm, tempkn, ib, L.nb,
A(k, k), ldak,
A(m, k), ldam,
L(m, k), L.mb,
IPIV(m, k),
sequence, request,
m == A.mt-1, A.nb*k);
for (n = k+1; n < A.nt; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
A.nb, tempnn, tempmm, tempnn, A.nb, ib, L.nb,
A(k, n), ldak,
A(m, n), ldam,
L(m, k), L.mb,
A(m, k), ldam,
IPIV(m, k));
}
}
}
}

Here is the call graph for this function:

void plasma_pzgetrf_reclap_quark ( PLASMA_desc  A,
int *  IPIV,
PLASMA_sequence sequence,
PLASMA_request request 
)
void plasma_pzgetrf_rectil_quark ( PLASMA_desc  A,
int *  IPIV,
PLASMA_sequence sequence,
PLASMA_request request 
)
void plasma_pzhbrdt_quark ( PLASMA_enum  uplo,
PLASMA_desc  A,
double *  D,
double *  E,
PLASMA_desc  T,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel Reduction from BAND tridiagonal to the final condensed form - dynamic scheduler

Definition at line 28 of file pzhbrdt.c.

References A, C, cabs(), creal(), DEP, plasma_desc_t::dtyp, lapack_const, plasma_desc_t::lm, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_context_self(), plasma_element_size(), plasma_sequence_flush(), plasma_shared_alloc(), plasma_shared_free(), PLASMA_SUCCESS, PlasmaComplexDouble, PlasmaInteger, PlasmaLower, plasma_context_struct::quark, QUARK_Barrier(), QUARK_CORE_ztrdalg(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
#ifdef COMPLEX
static double dzero = (double) 0.0;
double absztmp;
#endif
int N, NB, INgrsiz, INthgrsiz, BAND;
int myid, grsiz, shift=3, stt, st, ed, stind, edind;
int blklastind, colpt, PCOL, ACOL, MCOL;
int stepercol, mylastid, grnb, grid;
int *DEP,*MAXID;
int i, j, m;
int thgrsiz, thgrnb, thgrid, thed;
size_t eltsize = plasma_element_size(A.dtyp);
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
N = A.m;
NB = A.mb;
/* Quick return */
if (N == 0){
return;
}
if (NB == 0) {
memset(D, 0, N*sizeof(double));
memset(E, 0, (N-1)*sizeof(double));
#ifdef COMPLEX
for (i=0; i<N; i++)
D[i] = cabs(*A(i,i));
#else
for (i=0; i<N; i++)
D[i] = *A(i,i);
#endif
return;
}
/*
* Barrier is used because the bulge have to wait until
* the reduction to band has been finish.
* otherwise, I can remove this BARRIER when I integrate
* the function dependencies link inside the reduction to
* band. Keep in min the case when NB=1, where no bulge-chasing.
*/
/***************************************************************/
QUARK_Barrier(plasma->quark);
/***************************************************************/
/*
* Case NB=1 ==> matrix is already Bidiagonal. no need to bulge.
* Make diagonal and superdiagonal elements real, storing them in
* D and E. if PlasmaLower, first transform lower bidiagonal form
* to upper bidiagonal by applying plane rotations/ Householder
* from the left, overwriting superdiagonal elements then make
* elements real of the resulting upper Bidiagonal. if PlasmaUpper
* then make its elements real. For Q, PT: ZSCAL should be done
* in case of WANTQ.
*/
if (NB == 1){
memset(D, 0, N *sizeof(double));
memset(E, 0, (N-1)*sizeof(double));
#ifdef COMPLEX
for (i=0; i<N; i++)
{
D[i] = creal( *A(i, i) ); /* diag value */
if( i < (N-1)) { /* lower off-diag value */
ztmp = *A((i+1),i);
absztmp = cabs(ztmp);
*A((i+1),i) = absztmp;
E[i] = absztmp;
if(absztmp != dzero)
ztmp = (PLASMA_Complex64_t) (ztmp / absztmp);
else
ztmp = zone;
if(i<(N-2)) *A((i+2),(i+1)) = *A((i+2),(i+1)) * ztmp;
/* for Q: ZSCAL should be done in case of WANTQ */
}
}
} else { /* PlasmaUpper */
for (i=0; i<N; i++)
{
D[i] = creal( *A(i,i) ); /* diag value*/
if(i<(N-1)) { /* lower off-diag value */
ztmp = *A(i, (i+1));
absztmp = cabs(ztmp);
*A(i,(i+1)) = absztmp;
E[i] = absztmp;
if(absztmp != dzero)
ztmp = (PLASMA_Complex64_t) (ztmp / absztmp);
else
ztmp = zone;
if(i<(N-2)) *A((i+1),(i+2)) = *A((i+1),(i+2)) * ztmp;
/* for Q: ZSCAL should be done in case of WANTQ. HERE NEED THE multiply by CONJ(T) */
}
}
} /* end PlasmaUpper*/
#else
if( uplo == PlasmaLower ){
for (i=0; i < N-1; i++) {
D[i] = *A(i, i);
E[i] = *A(i+1, i);
}
D[i] = *A(i, i);
} else {
for (i=0; i < N-1; i++) {
D[i] = *A(i, i );
E[i] = *A(i, i+1);
}
D[i] = *A(i, i);
}
#endif
return;
}
/* Case N<NB ==> matrix is very small and better to call lapack XHETRD. */
if( N <= 0 ) /* this will be removed we don t need it. */
{
PLASMA_Complex64_t *work, *TTau;
int info, ldwork = N*N;
info = LAPACKE_zhetrd_work(LAPACK_COL_MAJOR, lapack_const(uplo), N,
A(0,0), A.lm, D, E, TTau, work, ldwork);
plasma_shared_free(plasma, (void*) work);
plasma_shared_free(plasma, (void*) TTau);
if( info == 0 )
sequence->status = PLASMA_SUCCESS;
else
plasma_sequence_flush(plasma->quark, sequence, request, info);
return;
}
/* General case NB > 1 && N > NB */
DEP = (int *) plasma_shared_alloc(plasma, N+1, PlasmaInteger );
MAXID = (int *) plasma_shared_alloc(plasma, N+1, PlasmaInteger );
memset(MAXID,0,(N+1)*sizeof(int));
/***************************************************************************
* START BULGE CHASING CODE
**************************************************************************/
/*
* Initialisation of local parameter. those parameter should be
* input or tuned parameter.
*/
INgrsiz = 1;
if( NB > 160 ) {
INgrsiz = 2;
}
else if( NB > 100 ) {
if( N < 5000 )
INgrsiz = 2;
else
INgrsiz = 4;
} else {
INgrsiz = 6;
}
INthgrsiz = N;
BAND = 0;
grsiz = INgrsiz;
thgrsiz = INthgrsiz;
if( grsiz == 0 ) grsiz = 6;
if( thgrsiz == 0 ) thgrsiz = N;
i = shift/grsiz;
stepercol = i*grsiz == shift ? i:i+1;
i = (N-2)/thgrsiz;
thgrnb = i*thgrsiz == (N-2) ? i:i+1;
for (thgrid = 1; thgrid<=thgrnb; thgrid++){
stt = (thgrid-1)*thgrsiz+1;
thed = min( (stt + thgrsiz -1), (N-2));
for (i = stt; i <= N-2; i++){
ed=min(i,thed);
if(stt>ed)break;
for (m = 1; m <=stepercol; m++){
st=stt;
for (j = st; j <=ed; j++){
/* PCOL: dependency on the ID of the master of the group of the previous column. (Previous Column:PCOL). */
/* ACOL: dependency on the ID of the master of the previous group of my column. (Acctual Column:ACOL). (it is 0(NULL) for myid=1) */
/* MCOL: OUTPUT dependency on the my ID, to be used by the next ID. (My Column: MCOL). I am the master of this group. */
myid = (i-j)*(stepercol*grsiz) +(m-1)*grsiz + 1;
mylastid = myid+grsiz-1;
PCOL = mylastid+shift-1; /* to know the dependent ID of the previous column. need to know the master of its group */
MAXID[j] = myid;
PCOL = min(PCOL,MAXID[j-1]); /* for the last columns, we might do only 1 or 2 kernel, so the PCOL will be wrong. this is to force it to the last ID of the previous col.*/
grnb = PCOL/grsiz;
grid = grnb*grsiz == PCOL ? grnb:grnb+1;
PCOL = (grid-1)*grsiz +1; /* give me the ID of the master of the group of the previous column. */
ACOL = myid-grsiz;
if(myid==1)ACOL=0;
MCOL = myid;
plasma->quark, &task_flags,
uplo, N, NB,
&A, C, S, i, j, m, grsiz, BAND,
DEP(PCOL), DEP(ACOL), DEP(MCOL) );
if(mylastid%2 ==0){
blklastind = (mylastid/2)*NB+1+j-1;
}else{
colpt = ((mylastid+1)/2)*NB + 1 +j -1 ;
stind = colpt-NB+1;
edind = min(colpt,N);
if( (stind>=edind-1) && (edind==N) )
blklastind=N;
else
blklastind=0;
}
if(blklastind >= (N-1)) stt=stt+1;
} /* END for j=st:ed */
} /* END for m=1:stepercol */
} /* END for i=1:MINMN-2 */
} /* END for thgrid=1:thgrnb */
/*
* Barrier used only for now, to be sure that everything
* is done before copying the D and E and free workspace.
* this will be removed later when D and E are directly filled
* during the bulge process.
*/
QUARK_Barrier(plasma->quark);
plasma_shared_free(plasma, (void*) DEP);
plasma_shared_free(plasma, (void*) MAXID);
plasma_shared_free(plasma, (void*) C);
plasma_shared_free(plasma, (void*) S);
/*
* STORE THE RESULTING diagonal/off-diagonal in D AND E
*/
memset(D, 0, N *sizeof(double));
memset(E, 0, (N-1)*sizeof(double));
/* Make diagonal and superdiagonal elements real,
* storing them in D and E
*/
/* In complex case, the off diagonal element are
* not necessary real. we have to make off-diagonal
* elements real and copy them to E.
* When using HouseHolder elimination,
* the ZLARFG give us a real as output so, all the
* diagonal/off-diagonal element except the last one are already
* real and thus we need only to take the abs of the last
* one.
* */
#ifdef COMPLEX
for (i=0; i < N-1 ; i++)
{
D[i] = creal( *A(i,i) );
/*
* Alternative for Householder case, all off-diag
* are real except the last off-diag, where we
* have to take the abs
*/
if(i<(N-2))
E[i] = creal(*A(i+1, i));
else
E[i] = cabs( *A(i+1, i));
}
D[i] = creal( *A(i, i) );
} else { /* PlasmaUpper */
for (i=0; i<N-1; i++)
{
D[i] = creal( *A(i,i) );
/*
* Alternative for Householder case, all off-diag
* are real except the last off-diag, where we
* have to take the abs
*/
if( i < (N-2) )
E[i] = creal(*A(i, (i+1)));
else
E[i] = cabs(*A(i, (i+1)));
}
D[i] = creal( *A(i, i) );
} /* end PlasmaUpper */
#else
if( uplo == PlasmaLower ){
for (i=0; i < N-1; i++) {
D[i] = *A(i, i);
E[i] = *A(i+1, i);
}
D[i] = *A(i, i);
} else {
for (i=0; i < N-1; i++) {
D[i] = *A(i, i );
E[i] = *A(i, i+1);
}
D[i] = *A(i, i);
}
#endif
} /* END FUNCTION */

Here is the call graph for this function:

void plasma_pzhegst_quark ( PLASMA_enum  itype,
PLASMA_enum  uplo,
PLASMA_desc  A,
PLASMA_desc  B,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel Transformation to standard eigenvalue problem - dynamic scheduler

Definition at line 22 of file pzhegst.c.

References B, BLKLDD, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), plasma_desc_submatrix(), plasma_pzhemm_quark(), plasma_pzher2k_quark(), plasma_pztrmm_quark(), plasma_pztrsm_quark(), PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, plasma_context_struct::quark, QUARK_CORE_zhegst(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int k;
int ldak, ldbk;
int tempkn;
static double done = 1.0;
static PLASMA_Complex64_t zone = 1.0;
static PLASMA_Complex64_t mzone = -1.0;
static PLASMA_Complex64_t zhalf = 0.5;
static PLASMA_Complex64_t mzhalf = -0.5;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
if (itype == 1) {
if (uplo == PlasmaLower) {
for (k = 0; k < A.nt; k++){
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
plasma->quark, &task_flags,
itype, uplo, tempkn,
A(k, k), ldak,
B(k, k), ldbk,
sequence, request, A.nb*k);
if (k*A.nb+tempkn < A.n) {
zone,
plasma_desc_submatrix(B, k*B.nb, k*B.nb, tempkn, tempkn),
plasma_desc_submatrix(A, k*A.nb+tempkn, k*A.nb, A.n-k*A.nb-tempkn, tempkn),
sequence, request);
PlasmaRight, uplo, mzhalf,
plasma_desc_submatrix(A, k*A.nb, k*A.nb, tempkn, tempkn),
plasma_desc_submatrix(B, k*B.nb+tempkn, k*B.nb, B.n-k*B.nb-tempkn, tempkn),
zone,
plasma_desc_submatrix(A, k*A.nb+tempkn, k*A.nb, A.n-k*A.nb-tempkn, tempkn),
sequence, request);
mzone,
plasma_desc_submatrix(A, k*A.nb+tempkn, k*A.nb, A.n-k*A.nb-tempkn, tempkn),
plasma_desc_submatrix(B, k*B.nb+tempkn, k*B.nb, B.n-k*B.nb-tempkn, tempkn),
done,
plasma_desc_submatrix(A, k*A.nb+tempkn, k*A.nb+tempkn, A.n-k*A.nb-tempkn, A.n-k*A.nb-tempkn),
sequence, request);
mzhalf,
plasma_desc_submatrix(A, k*A.nb, k*A.nb, tempkn, tempkn),
plasma_desc_submatrix(B, k*B.nb+tempkn, k*B.nb, B.n-k*B.nb-tempkn, tempkn),
zone,
plasma_desc_submatrix(A, k*A.nb+tempkn, k*A.nb, A.n-k*A.nb-tempkn, tempkn),
sequence, request);
zone,
plasma_desc_submatrix(B, k*B.nb+tempkn, k*B.nb+tempkn, tempkn, tempkn),
plasma_desc_submatrix(A, k*A.nb+tempkn, k*A.nb, A.n-k*A.nb-tempkn, tempkn),
sequence, request);
}
}
}
else {
for (k = 0; k < A.nt; k++){
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
plasma->quark, &task_flags,
itype, uplo, tempkn,
A(k, k), ldak,
B(k, k), ldbk,
sequence, request, A.nb*k);
if (k*A.nb+tempkn < A.n) {
zone,
plasma_desc_submatrix(B, k*B.nb, k*B.nb, tempkn, tempkn),
plasma_desc_submatrix(A, k*A.nb, k*A.nb+tempkn, tempkn, A.n-k*A.nb-tempkn),
sequence, request);
PlasmaLeft, uplo, mzhalf,
plasma_desc_submatrix(A, k*A.nb, k*A.nb, tempkn, tempkn),
plasma_desc_submatrix(B, k*B.nb, k*B.nb+tempkn, tempkn, B.n-k*B.nb-tempkn),
zone,
plasma_desc_submatrix(A, k*A.nb, k*A.nb+tempkn, tempkn, A.n-k*A.nb-tempkn),
sequence, request);
mzone,
plasma_desc_submatrix(A, k*A.nb, k*A.nb+tempkn, tempkn, A.n-k*A.nb-tempkn),
plasma_desc_submatrix(B, k*B.nb, k*B.nb+tempkn, tempkn, B.n-k*B.nb-tempkn),
done,
plasma_desc_submatrix(A, k*A.nb+tempkn, k*A.nb+tempkn, A.n-k*A.nb-tempkn, A.n-k*A.nb-tempkn),
sequence, request);
mzhalf,
plasma_desc_submatrix(A, k*A.nb, k*A.nb, tempkn, tempkn),
plasma_desc_submatrix(B, k*B.nb, k*B.nb+tempkn, tempkn, B.n-k*B.nb-tempkn),
zone,
plasma_desc_submatrix(A, k*A.nb, k*A.nb+tempkn, tempkn, A.n-k*A.nb-tempkn),
sequence, request);
zone,
plasma_desc_submatrix(B, k*B.nb+tempkn, k*B.nb+tempkn, tempkn, tempkn),
plasma_desc_submatrix(A, k*A.nb, k*A.nb+tempkn, tempkn, A.n-k*A.nb-tempkn),
sequence, request);
}
}
}
}
else{
if (uplo == PlasmaLower) {
for (k = 0; k < A.nt; k++){
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
zone,
plasma_desc_submatrix(B, 0, 0, k*B.nb, k*B.nb),
plasma_desc_submatrix(A, k*A.nb, 0, tempkn, k*A.nb),
sequence, request);
PlasmaLeft, uplo, zhalf,
plasma_desc_submatrix(A, k*A.nb, k*A.nb, tempkn, tempkn),
plasma_desc_submatrix(B, k*B.nb, 0, tempkn, k*B.nb),
zone,
plasma_desc_submatrix(A, k*A.nb, 0, tempkn, k*A.nb),
sequence, request);
zone,
plasma_desc_submatrix(A, k*A.nb, 0, tempkn, k*A.nb),
plasma_desc_submatrix(B, k*B.nb, 0, tempkn, k*B.nb),
done,
plasma_desc_submatrix(A, 0, 0, k*A.nb, k*A.nb),
sequence, request);
PlasmaLeft, uplo, zhalf,
plasma_desc_submatrix(A, k*A.nb, k*A.nb, tempkn, tempkn),
plasma_desc_submatrix(B, k*B.nb, 0, tempkn, k*B.nb),
zone,
plasma_desc_submatrix(A, k*A.nb, 0, tempkn, k*A.nb),
sequence, request);
zone,
plasma_desc_submatrix(B, k*B.nb, k*B.nb, tempkn, tempkn),
plasma_desc_submatrix(A, k*A.nb, 0, tempkn, k*A.nb),
sequence, request);
plasma->quark, &task_flags,
itype, uplo, tempkn,
A(k, k), ldak,
B(k, k), ldbk,
sequence, request, A.nb*k);
}
}
else {
for (k = 0; k < A.nt; k++){
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
zone,
plasma_desc_submatrix(B, 0, 0, k*B.nb, k*B.nb),
plasma_desc_submatrix(A, 0, k*A.nb, k*A.nb, tempkn),
sequence, request);
PlasmaRight, uplo, zhalf,
plasma_desc_submatrix(A, k*A.nb, k*A.nb, k*A.nb, k*A.nb),
plasma_desc_submatrix(B, 0, k*B.nb, k*B.nb, tempkn),
zone,
plasma_desc_submatrix(A, 0, k*A.nb, k*A.nb, tempkn),
sequence, request);
zone,
plasma_desc_submatrix(A, 0, k*A.nb, k*A.nb, tempkn),
plasma_desc_submatrix(B, 0, k*B.nb, k*B.nb, tempkn),
done,
plasma_desc_submatrix(A, 0, 0, k*A.nb, k*A.nb),
sequence, request);
PlasmaRight, uplo, zhalf,
plasma_desc_submatrix(A, k*A.nb, k*A.nb, k*A.nb, k*A.nb),
plasma_desc_submatrix(B, 0, k*B.nb, k*B.nb, tempkn),
zone,
plasma_desc_submatrix(A, 0, k*A.nb, k*A.nb, tempkn),
sequence, request);
zone,
plasma_desc_submatrix(B, k*B.nb, k*B.nb, tempkn, tempkn),
plasma_desc_submatrix(A, 0, k*A.nb, k*A.nb, tempkn),
sequence, request);
plasma->quark, &task_flags,
itype, uplo, tempkn,
A(k, k), ldak,
B(k, k), ldbk,
sequence, request, A.nb*k);
}
}
}
}

Here is the call graph for this function:

void plasma_pzherbt_quark ( PLASMA_enum  uplo,
PLASMA_desc  A,
PLASMA_desc  T,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile BAND Tridiagonal Reduction - dynamic scheduler

Definition at line 23 of file pzherbt.c.

References A, BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_IB, PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNoTrans, PlasmaRight, PlasmaUpper, plasma_context_struct::quark, QUARK_CORE_zgelqt(), QUARK_CORE_zgeqrt(), QUARK_CORE_zherfb(), QUARK_CORE_ztslqt(), QUARK_CORE_ztsmlq(), QUARK_CORE_ztsmlq_corner(), QUARK_CORE_ztsmlq_hetra1(), QUARK_CORE_ztsmqr(), QUARK_CORE_ztsmqr_corner(), QUARK_CORE_ztsmqr_hetra1(), QUARK_CORE_ztsqrt(), QUARK_CORE_zunmlq(), QUARK_CORE_zunmqr(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, T, and TASK_SEQUENCE.

{
int k, m, n, i, j;
int ldak, ldam, ldan, ldaj, ldai;
int tempkn, tempmm, tempnn, tempjj;
int ib;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
ib = PLASMA_IB;
if (uplo == PlasmaLower) {
for (k = 0; k < A.nt-1; k++){
tempkn = k+1 == A.nt-1 ? A.n-(k+1)*A.nb : A.nb;
ldak = BLKLDD(A, k+1);
plasma->quark, &task_flags,
tempkn, A.nb, ib, T.nb,
A(k+1, k), ldak,
T(k+1, k), T.mb);
/* LEFT and RIGHT on the symmetric diagonal block */
plasma->quark, &task_flags,
tempkn, tempkn, ib, T.nb,
A(k+1, k), ldak,
T(k+1, k), T.mb,
A(k+1, k+1), ldak);
/* RIGHT on the remaining tiles until the bottom */
for (m = k+2; m < A.mt ; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
plasma->quark, &task_flags,
tempmm, A.nb, tempkn, ib, T.nb,
A(k+1, k), ldak,
T(k+1, k), T.mb,
A(m , k+1), ldam);
}
for (m = k+2; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
plasma->quark, &task_flags,
tempmm, A.nb, ib, T.nb,
A(k+1, k), ldak,
A(m , k), ldam,
T(m , k), T.mb);
/* LEFT */
for (i = k+2; i < m; i++) {
ldai = BLKLDD(A, i);
plasma->quark, &task_flags,
A.mb, A.nb, tempmm, A.nb, A.nb, ib, T.nb,
A(i, k+1), ldai,
A(m, i), ldam,
A(m, k), ldam,
T(m, k), T.mb);
}
/* RIGHT */
for (j = m+1; j < A.mt ; j++) {
tempjj = j == A.mt-1 ? A.m-j*A.mb : A.mb;
ldaj = BLKLDD(A, j);
plasma->quark, &task_flags,
tempjj, A.nb, tempjj, tempmm, A.nb, ib, T.nb,
A(j, k+1), ldaj,
A(j, m), ldaj,
A(m, k), ldam,
T(m, k), T.mb);
}
/* LEFT->RIGHT */
plasma->quark, &task_flags,
A.nb, A.nb, tempmm, A.nb, tempmm, tempmm, A.nb, ib, T.nb,
A(k+1, k+1), ldak,
A(m , k+1), ldam,
A(m , m), ldam,
A(m , k), ldam,
T(m , k), T.mb);
}
}
}
else {
for (k = 0; k < A.nt-1; k++){
tempkn = k+1 == A.nt-1 ? A.n-(k+1)*A.nb : A.nb;
ldak = BLKLDD(A, k+1);
plasma->quark, &task_flags,
A.nb, tempkn, ib, T.nb,
A(k, k+1), A.nb,
T(k, k+1), T.mb);
/* RIGHT and LEFT on the symmetric diagonal block */
plasma->quark, &task_flags,
tempkn, tempkn, ib, T.nb,
A(k, k+1), A.nb,
T(k, k+1), T.mb,
A(k+1, k+1), ldak);
/* LEFT on the remaining tiles until the left side */
for (n = k+2; n < A.nt ; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
A.nb, tempnn, tempkn, ib, T.nb,
A(k, k+1), A.nb,
T(k, k+1), T.mb,
A(k+1, n), ldak);
}
for (n = k+2; n < A.nt; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
ldan = BLKLDD(A, n);
plasma->quark, &task_flags,
A.nb, tempnn, ib, T.nb,
A(k, k+1), A.nb,
A(k, n), A.nb,
T(k, n), T.mb);
/* RIGHT */
for (i = k+2; i < n; i++) {
ldai = BLKLDD(A, i);
plasma->quark, &task_flags,
A.mb, A.nb, A.nb, tempnn, A.nb, ib, T.nb,
A(k+1, i), ldak,
A(i, n), ldai,
A(k, n), A.nb,
T(k, n), T.mb);
}
/* LEFT */
for (j = n+1; j < A.nt ; j++) {
tempjj = j == A.nt-1 ? A.n-j*A.nb : A.nb;
ldaj = BLKLDD(A, j);
plasma->quark, &task_flags,
A.nb, tempjj, tempnn, tempjj, A.nb, ib, T.nb,
A(k+1, j), ldak,
A(n, j), ldan,
A(k, n), A.nb,
T(k, n), T.mb);
}
/* RIGHT->LEFT */
plasma->quark, &task_flags,
A.nb, A.nb, A.nb, tempnn, tempnn, tempnn, A.nb, ib, T.nb,
A(k+1, k+1), ldak,
A(k+1, n), ldak,
A(n , n), ldan,
A(k , n), A.nb,
T(k , n), T.mb);
}
}
}
}

Here is the call graph for this function:

void plasma_pzlacpy ( plasma_context_t plasma)

Definition at line 23 of file pzlacpy.c.

References A, B, BLKLDD, CORE_zlacpy(), plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_5, PlasmaLower, PlasmaUpper, PlasmaUpperLower, plasma_sequence_t::status, and uplo.

{
PLASMA_sequence *sequence;
PLASMA_request *request;
int X, Y;
int m, n;
int next_m;
int next_n;
int ldam, ldbm;
plasma_unpack_args_5(uplo, A, B, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
switch (uplo) {
/*
* PlasmaUpper
*/
m = 0;
while (n >= A.nt) {
m++;
n = n - A.nt + m;
}
while (m < A.mt) {
next_m = m;
next_n = n;
next_n += PLASMA_SIZE;
while (next_n >= A.nt && next_m < A.mt) {
next_m++;
next_n = next_n - A.nt + next_m;
}
X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
ldam = BLKLDD(A, m);
ldbm = BLKLDD(B, m);
m == n ? uplo : PlasmaUpperLower,
X, Y,
A(m, n), ldam,
B(m, n), ldbm);
n = next_n;
m = next_m;
}
break;
/*
* PlasmaLower
*/
n = 0;
while (m >= A.mt) {
n++;
m = m - A.mt + n;
}
while (n < A.nt) {
next_m = m;
next_n = n;
next_m += PLASMA_SIZE;
while (next_m >= A.mt && next_n < A.nt) {
next_n++;
next_m = next_m - A.mt + next_n;
}
X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
ldam = BLKLDD(A, m);
ldbm = BLKLDD(B, m);
m == n ? uplo : PlasmaUpperLower,
X, Y,
A(m, n), ldam,
B(m, n), ldbm);
n = next_n;
m = next_m;
}
break;
/*
* PlasmaUpperLower
*/
case PlasmaUpperLower:
default:
n = 0;
while (m >= A.mt) {
n++;
m = m - A.mt;
}
while (n < A.nt) {
next_m = m;
next_n = n;
next_m += PLASMA_SIZE;
while (next_m >= A.mt && next_n < A.nt) {
next_n++;
next_m = next_m - A.mt;
}
X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
ldam = BLKLDD(A, m);
ldbm = BLKLDD(B, m);
PlasmaUpperLower,
X, Y,
A(m, n), ldam,
B(m, n), ldbm);
n = next_n;
m = next_m;
}
break;
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzlacpy_quark ( PLASMA_enum  uplo,
PLASMA_desc  A,
PLASMA_desc  B,
PLASMA_sequence sequence,
PLASMA_request request 
)

Definition at line 153 of file pzlacpy.c.

References B, BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaLower, PlasmaUpper, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_zlacpy(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int X, Y;
int m, n;
int ldam, ldbm;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
switch (uplo) {
/*
* PlasmaUpper
*/
for (m = 0; m < A.mt; m++) {
X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
ldbm = BLKLDD(B, m);
if (m < A.nt) {
Y = m == A.nt-1 ? A.n-m*A.nb : A.nb;
plasma->quark, &task_flags,
X, Y, A.mb,
A(m, m), ldam,
B(m, m), ldbm);
}
for (n = m+1; n < A.nt; n++) {
Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
X, Y, A.mb,
A(m, n), ldam,
B(m, n), ldbm);
}
}
break;
/*
* PlasmaLower
*/
for (m = 0; m < A.mt; m++) {
X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
ldbm = BLKLDD(B, m);
if (m < A.nt) {
Y = m == A.nt-1 ? A.n-m*A.nb : A.nb;
plasma->quark, &task_flags,
X, Y, A.mb,
A(m, m), ldam,
B(m, m), ldbm);
}
for (n = 0; n < min(m, A.nt); n++) {
Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
X, Y, A.mb,
A(m, n), ldam,
B(m, n), ldbm);
}
}
break;
/*
* PlasmaUpperLower
*/
default:
for (m = 0; m < A.mt; m++) {
X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
ldbm = BLKLDD(B, m);
for (n = 0; n < A.nt; n++) {
Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
X, Y, A.mb,
A(m, n), ldam,
B(m, n), ldbm);
}
}
}
}

Here is the call graph for this function:

void plasma_pzlag2c ( plasma_context_t plasma)

Definition at line 25 of file pzlag2c.c.

References A, BLKLDD, CORE_zlag2c(), plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, plasma_request_fail(), PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_4, SB, and plasma_sequence_t::status.

{
PLASMA_sequence *sequence;
PLASMA_request *request;
int X, Y;
int m, n;
int next_m;
int next_n;
int ldam, ldbm;
int info = PLASMA_SUCCESS;
plasma_unpack_args_4(A, SB, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
n = 0;
while (m >= A.mt && n < A.nt) {
n++;
m = m-A.mt;
}
while (n < A.nt) {
next_m = m;
next_n = n;
next_m += PLASMA_SIZE;
while (next_m >= A.mt && next_n < A.nt) {
next_n++;
next_m = next_m-A.mt;
}
X = m == A.mt-1 ? A.m-A.mb*m : A.nb;
Y = n == A.nt-1 ? A.n-A.nb*n : A.nb;
ldam = BLKLDD(A, m);
ldbm = BLKLDD(SB, m);
CORE_zlag2c(X, Y, A(m, n), ldam, SB(m, n), ldbm, &info);
if (info != 0)
plasma_request_fail(sequence, request, info);
m = next_m;
n = next_n;
}
}

Here is the call graph for this function:

void plasma_pzlag2c_quark ( PLASMA_desc  A,
PLASMA_desc  SB,
PLASMA_sequence sequence,
PLASMA_request request 
)

Definition at line 77 of file pzlag2c.c.

References BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_CORE_zlag2c(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, SB, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int X, Y;
int m, n;
int ldam, ldbm;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for(m = 0; m < A.mt; m++) {
X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
ldbm = BLKLDD(SB, m);
for(n = 0; n < A.nt; n++) {
Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
X, Y, A.mb,
A(m, n), ldam,
SB(m, n), ldbm,
sequence, request);
}
}
}

Here is the call graph for this function:

void plasma_pzlange ( plasma_context_t plasma)

Definition at line 24 of file pzlange.c.

References A, BLKLDD, CORE_dlange(), CORE_dzasum(), CORE_zlange(), plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, max, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, norm, plasma_desc_t::nt, plasma_private_alloc(), plasma_private_free(), PLASMA_RANK, PLASMA_SIZE, plasma_unpack_args_6, PlasmaColumnwise, PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaMaxNorm, PlasmaOneNorm, PlasmaRealDouble, PlasmaRowwise, PlasmaUpperLower, ss_cond_set, ss_cond_wait, ss_finalize, and ss_init.

{
double *work;
double *result;
PLASMA_sequence *sequence;
PLASMA_request *request;
int m, n;
int next_m;
int next_n;
int ldam;
int step, lrank;
int X, X1, X2, Y, Y1, Y2;
double* lwork;
double normtmp, normtmp2;
plasma_unpack_args_6(norm, A, work, result, sequence, request);
*result = 0.0;
if (PLASMA_RANK == 0)
memset(work, 0, PLASMA_SIZE*sizeof(double));
switch (norm) {
/*
* PlasmaMaxNorm
*/
n = 0;
while (m >= A.mt && n < A.nt) {
n++;
m = m-A.mt;
}
while (n < A.nt) {
next_m = m;
next_n = n;
next_m += PLASMA_SIZE;
while (next_m >= A.mt && next_n < A.nt) {
next_n++;
next_m = next_m-A.mt;
}
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
Y1 = n == 0 ? A.j %A.nb : 0;
Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y = Y2 - Y1;
ldam = BLKLDD(A, m);
CORE_zlange(PlasmaMaxNorm, X, Y, A(m, n, X1, Y1, ldam), ldam, NULL, &normtmp);
if (normtmp > work[PLASMA_RANK])
work[PLASMA_RANK] = normtmp;
m = next_m;
n = next_n;
}
break;
/*
* PlasmaOneNorm
*/
normtmp2 = 0.0;
lwork = (double*)plasma_private_alloc(plasma, A.nb, PlasmaRealDouble);
while (n < A.nt) {
Y1 = n == 0 ? A.j %A.nb : 0;
Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y = Y2 - Y1;
memset(lwork, 0, A.nb*sizeof(double));
for (m = 0; m < A.mt; m++) {
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
ldam = BLKLDD(A, m);
X, Y,
A(m, n, X1, Y1, ldam), ldam,
lwork);
}
CORE_dlange(PlasmaMaxNorm, Y, 1, lwork, 1, NULL, &normtmp);
if (normtmp > normtmp2)
normtmp2 = normtmp;
}
work[PLASMA_RANK] = normtmp2;
plasma_private_free(plasma, lwork);
break;
/*
* PlasmaInfNorm
*/
normtmp2 = 0.0;
lwork = (double*)plasma_private_alloc(plasma, A.mb, PlasmaRealDouble);
while (m < A.mt) {
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
ldam = BLKLDD(A, m);
memset(lwork, 0, A.mb*sizeof(double));
for (n = 0; n < A.nt; n++) {
Y1 = n == 0 ? A.j %A.nb : 0;
Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y = Y2 - Y1;
X, Y,
A(m, n, X1, Y1, ldam), ldam,
lwork);
}
CORE_dlange(PlasmaMaxNorm, X, 1, lwork, 1, NULL, &normtmp);
if (normtmp > normtmp2)
normtmp2 = normtmp;
}
work[PLASMA_RANK] = normtmp2;
plasma_private_free(plasma, lwork);
break;
/*
* PlasmaFrobeniusNorm - not implemented
*/
default:;
}
if (norm != PlasmaFrobeniusNorm) {
step = 1;
lrank = PLASMA_RANK;
while ( (lrank%2 == 0) && (PLASMA_RANK+step < PLASMA_SIZE) ) {
ss_cond_wait(PLASMA_RANK+step, 0, step);
work[PLASMA_RANK] = max(work[PLASMA_RANK], work[PLASMA_RANK+step]);
lrank = lrank >> 1;
step = step << 1;
}
if (PLASMA_RANK > 0) {
while( lrank != 0 ) {
if (lrank%2 == 1) {
lrank = 0;
} else {
lrank = lrank >> 1;
step = step << 1;
}
}
}
if (PLASMA_RANK == 0)
*result = work[0];
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzlange_quark ( PLASMA_enum  norm,
PLASMA_desc  A,
double *  work,
double *  result,
PLASMA_sequence sequence,
PLASMA_request request 
)

Definition at line 202 of file pzlange.c.

References BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), plasma_shared_alloc(), PLASMA_SUCCESS, PlasmaColumnwise, PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaMaxNorm, PlasmaOneNorm, PlasmaRealDouble, PlasmaRowwise, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_dlange(), QUARK_CORE_dzasum_f1(), QUARK_CORE_free(), QUARK_CORE_zlange_f1(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int X, X1, X2, Y, Y1, Y2;
int ldam;
int m, n;
int szeW;
double* lwork;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
*result = 0.0;
switch ( norm ) {
/*
* PlasmaMaxNorm
*/
szeW = A.mt*A.nt;
lwork = (double*)plasma_shared_alloc(plasma, szeW, PlasmaRealDouble);
memset(lwork, 0, szeW*sizeof(double));
for(m = 0; m < A.mt; m++) {
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
ldam = BLKLDD(A, m);
for(n = 0; n < A.nt; n++) {
Y1 = n == 0 ? A.j %A.nb : 0;
Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y = Y2 - Y1;
plasma->quark, &task_flags,
A(m, n, X1, Y1, ldam), ldam, ldam*Y,
0, &(lwork[A.mt*n+m]),
lwork, szeW);
}
}
plasma->quark, &task_flags,
lwork, A.mt, szeW,
0, result);
QUARK_CORE_free(plasma->quark, &task_flags, lwork, szeW*sizeof(PLASMA_Complex64_t));
break;
/*
* PlasmaOneNorm
*/
lwork = (double*)plasma_shared_alloc(plasma, (A.n+1), PlasmaRealDouble);
memset(lwork, 0, (A.n+1)*sizeof(double));
for(m = 0; m < A.mt; m++) {
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
ldam = BLKLDD(A, m);
for(n = 0; n < A.nt; n++) {
Y1 = n == 0 ? A.j %A.nb : 0;
Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y = Y2 - Y1;
plasma->quark, &task_flags,
A(m, n, X1, Y1, ldam), ldam, ldam*Y,
&(lwork[n*A.nb+1]), A.nb,
lwork, A.n);
}
}
plasma->quark, &task_flags,
PlasmaMaxNorm, A.n+1, 1,
lwork, 1, A.n+1,
0, result);
QUARK_CORE_free(plasma->quark, &task_flags, lwork, (A.n+1)*sizeof(PLASMA_Complex64_t));
break;
/*
* PlasmaInfNorm
*/
lwork = (double*)plasma_shared_alloc(plasma, (A.m+1), PlasmaRealDouble);
memset(lwork, 0, (A.m+1)*sizeof(double));
for(m = 0; m < A.mt; m++) {
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
ldam = BLKLDD(A, m);
for(n = 0; n < A.nt; n++) {
Y1 = n == 0 ? A.j %A.nb : 0;
Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y = Y2 - Y1;
plasma->quark, &task_flags,
A(m, n, X1, Y1, ldam), ldam, ldam*Y,
&(lwork[m*A.mb+1]), A.mb,
lwork, A.m);
}
}
plasma->quark, &task_flags,
PlasmaMaxNorm, A.m+1, 1,
lwork, 1, A.m+1,
0, result);
QUARK_CORE_free(plasma->quark, &task_flags, lwork, (A.m+1)*sizeof(PLASMA_Complex64_t));
break;
/*
* PlasmaFrobeniusNorm - not implemented
*/
default:;
}
}

Here is the call graph for this function:

void plasma_pzlansy ( plasma_context_t plasma)

Definition at line 24 of file pzlansy.c.

References A, BLKLDD, CORE_dlange(), CORE_dzasum(), CORE_zlange(), CORE_zlansy(), plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, max, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, norm, plasma_desc_t::nt, plasma_private_alloc(), plasma_private_free(), PLASMA_RANK, PLASMA_SIZE, plasma_unpack_args_7, PlasmaColumnwise, PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaLower, PlasmaMaxNorm, PlasmaOneNorm, PlasmaRealDouble, PlasmaRowwise, PlasmaUpperLower, ss_cond_set, ss_cond_wait, ss_finalize, ss_init, and uplo.

{
double *work;
double *result;
PLASMA_sequence *sequence;
PLASMA_request *request;
int m, n;
int next_m;
int next_n;
int ldam, ldan;
int step, lrank;
int X, X1, X2, Y, Y1, Y2;
double* lwork;
double normtmp, normtmp2;
plasma_unpack_args_7(norm, uplo, A, work, result, sequence, request);
*result = 0.0;
if (PLASMA_RANK == 0)
memset(work, 0, PLASMA_SIZE*sizeof(double));
switch (norm) {
/*
* PlasmaMaxNorm
*/
n = 0;
while (m >= A.mt && n < A.nt) {
n++;
m = m-A.mt+n;
}
while (n < A.nt) {
next_m = m;
next_n = n;
next_m += PLASMA_SIZE;
while (next_m >= A.mt && next_n < A.nt) {
next_n++;
next_m = next_m-A.mt+next_n;
}
if (m == n) {
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
ldam = BLKLDD(A, m);
CORE_zlansy(PlasmaMaxNorm, uplo, X, A(m, n, X1, X1, ldam), ldam, NULL, &normtmp);
}
else {
/*
* PlasmaLower
*/
if (uplo == PlasmaLower) {
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
Y1 = n == 0 ? A.j %A.nb : 0;
Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y = Y2 - Y1;
ldam = BLKLDD(A, m);
CORE_zlange(PlasmaMaxNorm, X, Y, A(m, n, X1, Y1, ldam), ldam, NULL, &normtmp);
}
/*
* PlasmaUpper
*/
else {
X1 = n == 0 ? A.i %A.mb : 0;
X2 = n == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
Y1 = m == 0 ? A.j %A.nb : 0;
Y2 = m == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y = Y2 - Y1;
ldan = BLKLDD(A, n);
CORE_zlange(PlasmaMaxNorm, X, Y, A(n, m, X1, Y1, ldan), ldan, NULL, &normtmp);
}
}
if (normtmp > work[PLASMA_RANK])
work[PLASMA_RANK] = normtmp;
m = next_m;
n = next_n;
}
break;
/*
* PlasmaOneNorm / PlasmaInfNorm
*/
normtmp2 = 0.0;
lwork = (double*)plasma_private_alloc(plasma, A.mb, PlasmaRealDouble);
while (m < A.mt) {
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
ldam = BLKLDD(A, m);
memset(lwork, 0, A.mb*sizeof(double));
/*
* PlasmaLower
*/
if (uplo == PlasmaLower) {
for (n = 0; n < m; n++) {
Y1 = n == 0 ? A.j%A.nb : 0;
Y = A.nb - Y1;
CORE_dzasum(PlasmaRowwise, PlasmaUpperLower, X, Y, A(m, n, X1, Y1, ldam), ldam, lwork);
}
CORE_dzasum(PlasmaRowwise, uplo, X, X, A(m, m, X1, X1, ldam), ldam, lwork);
for (n = m+1; n < A.mt; n++) {
Y = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
ldan = BLKLDD(A, n);
CORE_dzasum(PlasmaColumnwise, PlasmaUpperLower, Y, X, A(n, m, 0, X1, ldan), ldan, lwork);
}
}
/*
* PlasmaUpper
*/
else {
for (n = 0; n < m; n++) {
Y1 = n == 0 ? A.j%A.nb : 0;
Y = A.nb - Y1;
CORE_dzasum(PlasmaColumnwise, PlasmaUpperLower, Y, X, A(n, m, Y1, X1, A.nb), A.nb, lwork);
}
CORE_dzasum(PlasmaRowwise, uplo, X, X, A(m, m, X1, X1, ldam), ldam, lwork);
for ( n =m+1; n < A.mt; n++) {
Y = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
CORE_dzasum(PlasmaRowwise, PlasmaUpperLower, X, Y, A(m, n, X1, 0, ldam), ldam, lwork);
}
}
CORE_dlange(PlasmaMaxNorm, X, 1, lwork, 1, NULL, &normtmp);
if (normtmp > normtmp2)
normtmp2 = normtmp;
}
work[PLASMA_RANK] = normtmp2;
plasma_private_free(plasma, lwork);
break;
/*
* PlasmaFrobeniusNorm
*/
default:;
}
if (norm != PlasmaFrobeniusNorm) {
step = 1;
lrank = PLASMA_RANK;
while ( (lrank%2 == 0) && (PLASMA_RANK+step < PLASMA_SIZE) ) {
ss_cond_wait(PLASMA_RANK+step, 0, step);
work[PLASMA_RANK] = max(work[PLASMA_RANK], work[PLASMA_RANK+step]);
lrank = lrank >> 1;
step = step << 1;
}
if (PLASMA_RANK > 0) {
while( lrank != 0 ) {
if (lrank%2 == 1) {
lrank = 0;
} else {
lrank = lrank >> 1;
step = step << 1;
}
}
}
if (PLASMA_RANK == 0)
*result = work[0];
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzlansy_quark ( PLASMA_enum  norm,
PLASMA_enum  uplo,
PLASMA_desc  A,
double *  work,
double *  result,
PLASMA_sequence sequence,
PLASMA_request request 
)

Definition at line 219 of file pzlansy.c.

References BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), plasma_shared_alloc(), PLASMA_SUCCESS, PlasmaColumnwise, PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaLower, PlasmaMaxNorm, PlasmaOneNorm, PlasmaRealDouble, PlasmaRowwise, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_dlange(), QUARK_CORE_dzasum_f1(), QUARK_CORE_free(), QUARK_CORE_zlange_f1(), QUARK_CORE_zlansy_f1(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int X, X1, X2, Y, Y1;
int ldam;
int m, n;
int szeW, pos;
double* lwork;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
*result = 0.0;
switch ( norm ) {
/*
* PlasmaMaxNorm
*/
szeW = A.mt*(A.mt+1)/2;
pos = 0;
lwork = (double*)plasma_shared_alloc(plasma, szeW, PlasmaRealDouble);
memset(lwork, 0, szeW*sizeof(double));
for(m = 0; m < A.mt; m++) {
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
ldam = BLKLDD(A, m);
plasma->quark, &task_flags,
A(m, m, X1, X1, ldam), ldam, ldam*X,
0, &(lwork[pos]),
lwork, szeW);
pos++;
/*
* PlasmaLower
*/
if (uplo == PlasmaLower) {
for(n=0; n<m; n++) {
Y1 = n == 0 ? A.j%A.nb : 0;
Y = A.nb - Y1;
plasma->quark, &task_flags,
A(m, n, X1, Y1, ldam), ldam, ldam*Y,
0, &(lwork[pos]),
lwork, szeW);
pos++;
}
}
/*
* PlasmaUpper
*/
else {
for(n = m+1; n < A.mt; n++) {
Y = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
plasma->quark, &task_flags,
A(m, n, X1, 0, ldam), ldam, ldam*Y,
0, &(lwork[pos]),
lwork, szeW);
pos++;
}
}
}
plasma->quark, &task_flags,
PlasmaMaxNorm, szeW, 1,
lwork, 1, szeW,
0, result);
QUARK_CORE_free(plasma->quark, &task_flags, lwork, szeW*sizeof(PLASMA_Complex64_t));
break;
/*
* PlasmaOneNorm / PlasmaInfNorm
*/
lwork = (double *)plasma_shared_alloc(plasma, A.m+1, PlasmaRealDouble);
memset(lwork, 0, (A.m+1)*sizeof(double));
for(m = 0; m < A.mt; m++) {
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
ldam = BLKLDD(A, m);
plasma->quark, &task_flags,
A(m, m, X1, X1, ldam), ldam, ldam*X,
&(lwork[m*A.mb+1]), A.mb,
lwork, A.m);
/*
* PlasmaLower
*/
if (uplo == PlasmaLower) {
for(n = 0; n < m; n++) {
Y1 = n == 0 ? A.j%A.nb : 0;
Y = A.nb - Y1;
plasma->quark, &task_flags,
A(m, n, X1, Y1, ldam), ldam, ldam*Y,
&(lwork[m*A.mb+1]), A.mb,
lwork, A.m);
plasma->quark, &task_flags,
A(m, n, X1, Y1, ldam), ldam, ldam*Y,
&(lwork[n*A.mb+1]), A.mb,
lwork, A.m);
}
}
/*
* PlasmaUpper
*/
else {
for(n = m+1; n < A.mt; n++) {
Y = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
plasma->quark, &task_flags,
A(m, n, X1, 0, ldam), ldam, ldam*Y,
&(lwork[m*A.mb+1]), A.mb,
lwork, A.m);
plasma->quark, &task_flags,
A(m, n, X1, 0, ldam), ldam, ldam*Y,
&(lwork[n*A.mb+1]), A.mb,
lwork, A.m);
}
}
}
plasma->quark, &task_flags,
PlasmaMaxNorm, A.m+1, 1,
lwork, 1, A.m+1,
0, result);
QUARK_CORE_free(plasma->quark, &task_flags, lwork, (A.m+1)*sizeof(PLASMA_Complex64_t));
break;
/*
* PlasmaFrobeniusNorm - not implemented
*/
default:;
}
}

Here is the call graph for this function:

void plasma_pzlaset2_quark ( PLASMA_enum  uplo,
PLASMA_Complex64_t  alpha,
PLASMA_desc  A,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel initializztion a 2-D array A to ALPHA on the offdiagonals.

Definition at line 22 of file pzlaset2.c.

References BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaLower, PlasmaUpper, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_zlaset2(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int i, j;
int ldai, ldaj;
int tempim;
int tempjm, tempjn;
int minmn = min(A.mt, A.nt);
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
if (uplo == PlasmaLower) {
for (j = 0; j < minmn; j++){
tempjm = j == A.mt-1 ? A.m-j*A.mb : A.mb;
tempjn = j == A.nt-1 ? A.n-j*A.nb : A.nb;
ldaj = BLKLDD(A, j);
plasma->quark, &task_flags,
PlasmaLower, tempjm, tempjn, alpha,
A(j, j), ldaj);
for (i = j+1; i < A.mt; i++){
tempim = i == A.mt-1 ? A.m-i*A.mb : A.mb;
ldai = BLKLDD(A, i);
plasma->quark, &task_flags,
PlasmaUpperLower, tempim, tempjn, alpha,
A(i, j), ldai);
}
}
}
else if (uplo == PlasmaUpper) {
for (j = 1; j < A.nt; j++){
tempjn = j == A.nt-1 ? A.n-j*A.nb : A.nb;
for (i = 0; i < min(j, A.mt); i++){
tempim = i == A.mt-1 ? A.m-i*A.mb : A.mb;
ldai = BLKLDD(A, i);
plasma->quark, &task_flags,
PlasmaUpperLower, tempim, tempjn, alpha,
A(i, j), ldai);
}
}
for (j = 0; j < minmn; j++){
tempjm = j == A.mt-1 ? A.m-j*A.mb : A.mb;
tempjn = j == A.nt-1 ? A.n-j*A.nb : A.nb;
ldaj = BLKLDD(A, j);
plasma->quark, &task_flags,
PlasmaUpper, tempjm, tempjn, alpha,
A(j, j), ldaj);
}
}
else {
for (i = 0; i < A.mt; i++){
tempim = i == A.mt-1 ? A.m-i*A.mb : A.mb;
ldai = BLKLDD(A, i);
for (j = 0; j < A.nt; j++){
tempjn = j == A.nt-1 ? A.n-j*A.nb : A.nb;
plasma->quark, &task_flags,
PlasmaUpperLower, tempim, tempjn, alpha,
A(i, j), ldai);
}
}
}
}

Here is the call graph for this function:

void plasma_pzlaset_quark ( PLASMA_enum  uplo,
PLASMA_Complex64_t  alpha,
PLASMA_Complex64_t  beta,
PLASMA_desc  A,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel initialization a 2-D array A to BETA on the diagonal and ALPHA on the offdiagonals.

Definition at line 22 of file pzlaset.c.

References BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaLower, PlasmaUpper, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_zlaset(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int i, j;
int ldai, ldaj;
int tempim;
int tempjm, tempjn;
int minmn = min(A.mt, A.nt);
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
if (uplo == PlasmaLower) {
for (j = 0; j < minmn; j++){
tempjm = j == A.mt-1 ? A.m-j*A.mb : A.mb;
tempjn = j == A.nt-1 ? A.n-j*A.nb : A.nb;
ldaj = BLKLDD(A, j);
plasma->quark, &task_flags,
PlasmaLower, tempjm, tempjn, alpha, beta,
A(j, j), ldaj);
for (i = j+1; i < A.mt; i++){
tempim = i == A.mt-1 ? A.m-i*A.mb : A.mb;
ldai = BLKLDD(A, i);
plasma->quark, &task_flags,
PlasmaUpperLower, tempim, tempjn, alpha, alpha,
A(i, j), ldai);
}
}
}
else if (uplo == PlasmaUpper) {
for (j = 1; j < A.nt; j++){
tempjn = j == A.nt-1 ? A.n-j*A.nb : A.nb;
for (i = 0; i < min(j, A.mt); i++){
tempim = i == A.mt-1 ? A.m-i*A.mb : A.mb;
ldai = BLKLDD(A, i);
plasma->quark, &task_flags,
PlasmaUpperLower, tempim, tempjn, alpha, alpha,
A(i, j), ldai);
}
}
for (j = 0; j < minmn; j++){
tempjm = j == A.mt-1 ? A.m-j*A.mb : A.mb;
tempjn = j == A.nt-1 ? A.n-j*A.nb : A.nb;
ldaj = BLKLDD(A, j);
plasma->quark, &task_flags,
PlasmaUpper, tempjm, tempjn, alpha, beta,
A(j, j), ldaj);
}
}
else {
for (i = 0; i < A.mt; i++){
tempim = i == A.mt-1 ? A.m-i*A.mb : A.mb;
ldai = BLKLDD(A, i);
for (j = 0; j < A.nt; j++){
tempjn = j == A.nt-1 ? A.n-j*A.nb : A.nb;
plasma->quark, &task_flags,
PlasmaUpperLower, tempim, tempjn, alpha, alpha,
A(i, j), ldai);
}
}
for (j = 0; j < minmn; j++){
tempjm = j == A.mt-1 ? A.m-j*A.mb : A.mb;
tempjn = j == A.nt-1 ? A.n-j*A.nb : A.nb;
ldaj = BLKLDD(A, j);
plasma->quark, &task_flags,
PlasmaUpperLower, tempjm, tempjn, alpha, beta,
A(j, j), ldaj);
}
}
}

Here is the call graph for this function:

void plasma_pzlaswp_quark ( PLASMA_desc  B,
int *  IPIV,
int  inc,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile row interchanges - dynamic scheduling

Definition at line 23 of file pzlaswp.c.

References B, IPIV, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), plasma_desc_submatrix(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_CORE_zlaswp_ontile(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int m, n;
int tempi, tempm, tempmm, tempnn;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
if ( inc > 0 )
{
for (m = 0; m < B.mt; m++) {
tempi = m * B.mb;
tempm = B.m - tempi;
tempmm = m == B.mt-1 ? tempm : B.mb;
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n - n * B.nb : B.nb;
plasma->quark, &task_flags,
plasma_desc_submatrix(B, tempi, n*B.nb, tempm, tempnn),
B(m, n), 1, tempmm, IPIV(m), inc, B(B.mt-1, n) );
}
}
}
else
{
for (m = B.mt-1; m > -1; m--) {
tempi = m * B.mb;
tempm = B.m - tempi;
tempmm = m == B.mt-1 ? tempm : B.mb;
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n - n * B.nb : B.nb;
plasma->quark, &task_flags,
plasma_desc_submatrix(B, tempi, n*B.nb, tempm, tempnn),
B(m, n), 1, tempmm, IPIV(m), inc, B(0, n) );
}
}
}
}

Here is the call graph for this function:

void plasma_pzlaswpc_quark ( PLASMA_desc  B,
int *  IPIV,
int  inc,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile column interchanges - dynamic scheduling

Definition at line 23 of file pzlaswpc.c.

References B, IPIV, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), plasma_desc_submatrix(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_CORE_zlaswpc_ontile(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int m, n;
int tempj, tempn, tempmm, tempnn;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
if ( inc > 0 )
{
for (n = 0; n < B.nt; n++) {
tempj = n * B.nb;
tempn = B.n - tempj;
tempnn = n == B.nt-1 ? tempn : B.nb;
for (m = 0; m < B.mt; m++) {
tempmm = m == B.mt-1 ? B.m - m * B.mb : B.mb;
plasma->quark, &task_flags,
plasma_desc_submatrix(B, m*B.mb, tempj, tempmm, tempn),
B(m, n), 1, tempnn, IPIV(n), inc, B(m, B.nt-1) );
}
}
}
else
{
for (n = B.nt-1; n > -1; n--) {
tempj = n * B.nb;
tempn = B.n - tempj;
tempnn = n == B.nt-1 ? tempn : B.nb;
for (m = 0; m < B.mt; m++) {
tempmm = m == B.mt-1 ? B.m - m * B.mb : B.mb;
plasma->quark, &task_flags,
plasma_desc_submatrix(B, m*B.mb, tempj, tempmm, tempn),
B(m, n), 1, tempnn, IPIV(n), inc, B(m, 0) );
}
}
}
}

Here is the call graph for this function:

void plasma_pzlauum_quark ( PLASMA_enum  uplo,
PLASMA_desc  A,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel UU' or L'L operation - dynamic scheduling

Definition at line 23 of file pzlauum.c.

References A, BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, plasma_context_struct::quark, QUARK_CORE_zgemm(), QUARK_CORE_zherk(), QUARK_CORE_zlauum(), QUARK_CORE_ztrmm(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int k, m, n;
int ldam;
int tempkm, tempmm, tempnn;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
/*
* PlasmaLower
*/
if (uplo == PlasmaLower) {
for (m = 0; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
for(n = 0; n < m; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
tempnn, tempmm, A.mb,
1.0, A(m, n), ldam,
1.0, A(n, n), A.mb);
for(k = n+1; k < m; k++) {
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
plasma->quark, &task_flags,
tempkm, tempnn, tempmm, A.mb,
zone, A(m, k), ldam,
A(m, n), ldam,
zone, A(k, n), A.mb);
}
}
for (n = 0; n < m; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
tempmm, tempnn, A.mb,
zone, A(m, m), ldam,
A(m, n), ldam);
}
plasma->quark, &task_flags,
tempmm,
A.mb, A(m, m), ldam);
}
}
/*
* PlasmaUpper
*/
else {
for (m = 0; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
for (n = 0; n < m; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
tempnn, tempmm, A.mb,
1.0, A(n, m), A.mb,
1.0, A(n, n), A.mb);
for (k = n+1; k < m; k++){
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
plasma->quark, &task_flags,
tempnn, tempkm, tempmm, A.mb,
zone, A(n, m), A.mb,
A(k, m), A.mb,
zone, A(n, k), A.mb);
}
}
for (n = 0; n < m; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
tempnn, tempmm, A.mb,
zone, A(m, m), ldam,
A(n, m), A.mb);
}
plasma->quark, &task_flags,
tempmm,
A.mb, A(m, m), ldam);
}
}
}

Here is the call graph for this function:

void plasma_pzpack ( plasma_context_t plasma)

plasma_pzpack pack all extra elements at the end of the matrix

 +&mdash;&mdash;&mdash;&mdash;&mdash;+
 |               |
 |               |
 |     A11       |
 |               |
 |               |
 +&mdash;&mdash;&mdash;&mdash;&mdash;+
 |     A21       |
 +&mdash;&mdash;&mdash;&mdash;&mdash;+

This matrix is initially stored as (example of Column Major, it's the same for row major. We just consider the transpose matrix) : A11(:,0), A21(:,0), A11(:,1), A21(:,1), ...

On exit, it's stored as follow. A11(:,:), A12(:,:)

Parameters:
[in]plasmaPlasma context
[in]mNumber of rows in matrix A
[in]nNumber of columns in matrix A
[in,out]AMatrix A to pack. (see above for entry and exit format)
[in]m0Number of rows of A21

Definition at line 65 of file pzpack.c.

References A, CORE_zlacpy(), min, plasma_barrier(), plasma_private_alloc(), plasma_private_free(), PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_6, PlasmaComplexDouble, PlasmaUpperLower, plasma_sequence_t::status, and W.

{
PLASMA_sequence *sequence;
PLASMA_request *request;
int m, n, m0;
int i, m1, size, rank, start, end, bs, mod;
plasma_unpack_args_6(m, n, A, m0, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
/* Quick return */
if ( n <= 1 )
return;
m1 = m - m0;
size = PLASMA_SIZE;
rank = PLASMA_RANK;
mod = (n-1) % size;
bs = (n-1) / size;
start = rank * bs;
if ( rank < mod ) {
bs++;
}
start += min( mod, rank );
/* Save leftover pieces that are otherwise going to be overwritten */
CORE_zlacpy( PlasmaUpperLower, m0, bs, &(A[(int64_t)start*m+m1]), m, W, m0 );
/* Pack A */
end = ((n-1) / size) * size + 1;
for(i=rank+1; i<end; i+=size) {
memcpy( Wl, &(A[i*m]), m1*sizeof(PLASMA_Complex64_t));
plasma_barrier(plasma);
memcpy( &(A[i*m1]), Wl, m1*sizeof(PLASMA_Complex64_t));
}
if ( rank < (n - end)) {
i = end + rank;
memcpy( Wl, &(A[i*m]), m1*sizeof(PLASMA_Complex64_t));
plasma_barrier(plasma);
memcpy( &(A[i*m1]), Wl, m1*sizeof(PLASMA_Complex64_t));
}
else
plasma_barrier(plasma);
/* Restore leftover pieces */
CORE_zlacpy( PlasmaUpperLower, m0, bs, W, m0, &(A[(int64_t)m1*n+start*m0]), m0 );
plasma_private_free(plasma, W);
plasma_private_free(plasma, Wl);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzplghe ( plasma_context_t plasma)

Parallel tile Cholesky factorization - static scheduling

Definition at line 21 of file pzplghe.c.

References A, BLKLDD, CORE_zplghe(), plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_5, and plasma_sequence_t::status.

{
double bump;
unsigned long long int seed;
PLASMA_sequence *sequence;
PLASMA_request *request;
int m, n;
int next_m;
int next_n;
int ldam;
int tempmm, tempnn;
plasma_unpack_args_5(bump, A, seed, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
n = 0;
while (m >= A.mt) {
n++;
m = m - A.mt;
}
while ( n < A.nt ) {
next_n = n;
next_m = m;
next_m += PLASMA_SIZE;
while ( next_m >= A.mt && next_n < A.nt ) {
next_n++;
next_m = next_m - A.mt;
}
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
ldam = BLKLDD(A, m);
bump, tempmm, tempnn, A(m, n), ldam,
A.m, m*A.mb, n*A.nb, seed );
m = next_m;
n = next_n;
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzplghe_quark ( double  bump,
PLASMA_desc  A,
unsigned long long int  seed,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile Cholesky factorization - dynamic scheduling

Definition at line 72 of file pzplghe.c.

References BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_CORE_zplghe(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int m, n;
int ldam;
int tempmm, tempnn;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for (m = 0; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
for (n = 0; n < A.nt; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
bump, tempmm, tempnn, A(m, n), ldam,
A.m, m*A.mb, n*A.nb, seed );
}
}
}

Here is the call graph for this function:

void plasma_pzplgsy ( plasma_context_t plasma)

Parallel tile Cholesky factorization - static scheduling

Definition at line 21 of file pzplgsy.c.

References A, BLKLDD, CORE_zplgsy(), plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_5, and plasma_sequence_t::status.

{
unsigned long long int seed;
PLASMA_sequence *sequence;
PLASMA_request *request;
int m, n;
int next_m;
int next_n;
int ldam;
int tempmm, tempnn;
plasma_unpack_args_5(bump, A, seed, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
n = 0;
while (m >= A.mt) {
n++;
m = m - A.mt;
}
while ( n < A.nt ) {
next_n = n;
next_m = m;
next_m += PLASMA_SIZE;
while ( next_m >= A.mt && next_n < A.nt ) {
next_n++;
next_m = next_m - A.mt;
}
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
ldam = BLKLDD(A, m);
bump, tempmm, tempnn, A(m, n), ldam,
A.m, m*A.mb, n*A.nb, seed );
m = next_m;
n = next_n;
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzplgsy_quark ( PLASMA_Complex64_t  bump,
PLASMA_desc  A,
unsigned long long int  seed,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile Cholesky factorization - dynamic scheduling

Definition at line 72 of file pzplgsy.c.

References BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_CORE_zplgsy(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int m, n;
int ldam;
int tempmm, tempnn;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for (m = 0; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
for (n = 0; n < A.nt; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
bump, tempmm, tempnn, A(m, n), ldam,
A.m, m*A.mb, n*A.nb, seed );
}
}
}

Here is the call graph for this function:

void plasma_pzplrnt ( plasma_context_t plasma)

Parallel tile Cholesky factorization - static scheduling

Definition at line 21 of file pzplrnt.c.

References A, BLKLDD, CORE_zplrnt(), plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_4, and plasma_sequence_t::status.

{
unsigned long long int seed;
PLASMA_sequence *sequence;
PLASMA_request *request;
int m, n;
int next_m;
int next_n;
int ldam;
int tempmm, tempnn;
plasma_unpack_args_4(A, seed, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
n = 0;
while (m >= A.mt) {
n++;
m = m - A.mt;
}
while ( n < A.nt ) {
next_n = n;
next_m = m;
next_m += PLASMA_SIZE;
while ( next_m >= A.mt && next_n < A.nt ) {
next_n++;
next_m = next_m - A.mt;
}
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
ldam = BLKLDD(A, m);
tempmm, tempnn, A(m, n), ldam,
A.m, m*A.mb, n*A.nb, seed );
m = next_m;
n = next_n;
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzplrnt_quark ( PLASMA_desc  A,
unsigned long long int  seed,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile Cholesky factorization - dynamic scheduling

Definition at line 71 of file pzplrnt.c.

References BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_CORE_zplrnt(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int m, n;
int ldam;
int tempmm, tempnn;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for (m = 0; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
for (n = 0; n < A.nt; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
tempmm, tempnn, A(m, n), ldam,
A.m, m*A.mb, n*A.nb, seed );
}
}
}

Here is the call graph for this function:

void plasma_pzpotrf ( plasma_context_t plasma)

Parallel tile Cholesky factorization - static scheduling

Definition at line 23 of file pzpotrf.c.

References A, BLKLDD, CORE_zgemm(), CORE_zherk(), CORE_zpotrf(), CORE_ztrsm(), plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, plasma_request_fail(), PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_4, PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaUpper, ss_abort, ss_aborted, ss_cond_set, ss_cond_wait, ss_finalize, ss_init, plasma_sequence_t::status, and uplo.

{
PLASMA_sequence *sequence;
PLASMA_request *request;
int k, m, n;
int next_k;
int next_m;
int next_n;
int ldak, ldam, ldan;
int info;
int tempkn, tempmn;
plasma_unpack_args_4(uplo, A, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
ss_init(A.nt, A.nt, 0);
k = 0;
while (m >= A.nt) {
k++;
m = m-A.nt+k;
}
n = 0;
while (k < A.nt && m < A.nt && !ss_aborted()) {
next_n = n;
next_m = m;
next_k = k;
next_n++;
if (next_n > next_k) {
next_m += PLASMA_SIZE;
while (next_m >= A.nt && next_k < A.nt) {
next_k++;
next_m = next_m-A.nt+next_k;
}
next_n = 0;
}
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
tempmn = m == A.nt-1 ? A.n-m*A.nb : A.nb;
ldak = BLKLDD(A, k);
ldan = BLKLDD(A, n);
ldam = BLKLDD(A, m);
if (m == k) {
if (n == k) {
/*
* PlasmaLower
*/
if (uplo == PlasmaLower) {
tempkn,
A(k, k), ldak,
&info);
}
/*
* PlasmaUpper
*/
else {
tempkn,
A(k, k), ldak,
&info);
}
if (info != 0) {
plasma_request_fail(sequence, request, info + A.nb*k);
}
ss_cond_set(k, k, 1);
}
else {
ss_cond_wait(k, n, 1);
/*
* PlasmaLower
*/
if (uplo == PlasmaLower) {
tempkn, A.nb,
-1.0, A(k, n), ldak,
1.0, A(k, k), ldak);
}
/*
* PlasmaUpper
*/
else {
tempkn, A.nb,
-1.0, A(n, k), ldan,
1.0, A(k, k), ldak);
}
}
}
else {
if (n == k) {
ss_cond_wait(k, k, 1);
/*
* PlasmaLower
*/
if (uplo == PlasmaLower) {
tempmn, A.nb,
zone, A(k, k), ldak,
A(m, k), ldam);
}
/*
* PlasmaUpper
*/
else {
A.nb, tempmn,
zone, A(k, k), ldak,
A(k, m), ldak);
}
ss_cond_set(m, k, 1);
}
else {
ss_cond_wait(k, n, 1);
ss_cond_wait(m, n, 1);
/*
* PlasmaLower
*/
if (uplo == PlasmaLower) {
tempmn, A.nb, A.nb,
mzone, A(m, n), ldam,
A(k, n), ldak,
zone, A(m, k), ldam);
}
/*
* PlasmaUpper
*/
else {
A.nb, tempmn, A.nb,
mzone, A(n, k), ldan,
A(n, m), ldan,
zone, A(k, m), ldak);
}
}
}
n = next_n;
m = next_m;
k = next_k;
}
}

Here is the call graph for this function:

Here is the caller graph for this function: