PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_dblas.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define REAL

Functions

int CORE_dlarfx2 (int side, int N, double V, double TAU, double *C1, int LDC1, double *C2, int LDC2)
int CORE_dlarfx2c (int uplo, double V, double TAU, double *C1, double *C2, double *C3)
int CORE_dlarfx2ce (int uplo, double *V, double *TAU, double *C1, double *C2, double *C3)
int CORE_dhbelr (int uplo, int N, PLASMA_desc *A, double *V, double *TAU, int st, int ed, int eltsize)
int CORE_dhbrce (int uplo, int N, PLASMA_desc *A, double *V, double *TAU, int st, int ed, int eltsize)
int CORE_dhblrx (int uplo, int N, PLASMA_desc *A, double *V, double *TAU, int st, int ed, int eltsize)
int CORE_dgbelr (int uplo, int N, PLASMA_desc *A, double *V, double *TAU, int st, int ed, int eltsize)
int CORE_dgbrce (int uplo, int N, PLASMA_desc *A, double *V, double *TAU, int st, int ed, int eltsize)
int CORE_dgblrx (int uplo, int N, PLASMA_desc *A, double *V, double *TAU, int st, int ed, int eltsize)
void CORE_dasum (int storev, int uplo, int M, int N, double *A, int lda, double *work)
void CORE_dgeadd (int M, int N, double alpha, double *A, int LDA, double *B, int LDB)
void CORE_dbrdalg (PLASMA_enum uplo, int N, int NB, PLASMA_desc *pA, double *C, double *S, int i, int j, int m, int grsiz)
int CORE_dgelqt (int M, int N, int IB, double *A, int LDA, double *T, int LDT, double *TAU, double *WORK)
void CORE_dgemm (int transA, int transB, int M, int N, int K, double alpha, double *A, int LDA, double *B, int LDB, double beta, double *C, int LDC)
int CORE_dgeqrt (int M, int N, int IB, double *A, int LDA, double *T, int LDT, double *TAU, double *WORK)
int CORE_dgessm (int M, int N, int K, int IB, int *IPIV, double *L, int LDL, double *A, int LDA)
int CORE_dgetrf (int M, int N, double *A, int LDA, int *IPIV, int *INFO)
int CORE_dgetrf_incpiv (int M, int N, int IB, double *A, int LDA, int *IPIV, int *INFO)
int CORE_dgetrf_reclap (const int M, const int N, double *A, const int LDA, int *IPIV, int *info)
int CORE_dgetrf_rectil (const PLASMA_desc A, int *IPIV, int *info)
void CORE_dgetrip (int m, int n, double *A, double *work)
void CORE_dlacpy (PLASMA_enum uplo, int M, int N, double *A, int LDA, double *B, int LDB)
void CORE_dlange (int norm, int M, int N, double *A, int LDA, double *work, double *normA)
void CORE_dlansy (int norm, int uplo, int N, double *A, int LDA, double *work, double *normA)
void CORE_dlaset (PLASMA_enum uplo, int n1, int n2, double alpha, double beta, double *tileA, int ldtilea)
void CORE_dlaset2 (PLASMA_enum uplo, int n1, int n2, double alpha, double *tileA, int ldtilea)
void CORE_dlaswp (int N, double *A, int LDA, int I1, int I2, int *IPIV, int INC)
int CORE_dlaswp_ontile (PLASMA_desc descA, int i1, int i2, int *ipiv, int inc)
int CORE_dlaswpc_ontile (PLASMA_desc descA, int i1, int i2, int *ipiv, int inc)
void CORE_dlauum (int uplo, int N, double *A, int LDA)
int CORE_dpamm (int op, int side, int storev, int M, int N, int K, int L, double *A1, int LDA1, double *A2, int LDA2, double *V, int LDV, double *W, int LDW)
int CORE_dparfb (int side, int trans, int direct, int storev, int M1, int N1, int M2, int N2, int K, int L, double *A1, int LDA1, double *A2, int LDA2, double *V, int LDV, double *T, int LDT, double *WORK, int LDWORK)
int CORE_dpemv (int trans, int storev, int M, int N, int L, double ALPHA, double *A, int LDA, double *X, int INCX, double BETA, double *Y, int INCY, double *WORK)
void CORE_dplgsy (double bump, int m, int n, double *A, int lda, int bigM, int m0, int n0, unsigned long long int seed)
void CORE_dplrnt (int m, int n, double *A, int lda, int bigM, int m0, int n0, unsigned long long int seed)
void CORE_dpotrf (int uplo, int N, double *A, int LDA, int *INFO)
void CORE_dshift (int s, int m, int n, int L, double *A)
void CORE_dshiftw (int s, int cl, int m, int n, int L, double *A, double *W)
int CORE_dssssm (int M1, int N1, int M2, int N2, int K, int IB, double *A1, int LDA1, double *A2, int LDA2, double *L1, int LDL1, double *L2, int LDL2, int *IPIV)
void CORE_dsymm (int side, int uplo, int M, int N, double alpha, double *A, int LDA, double *B, int LDB, double beta, double *C, int LDC)
void CORE_dsyrk (int uplo, int trans, int N, int K, double alpha, double *A, int LDA, double beta, double *C, int LDC)
void CORE_dsyr2k (int uplo, int trans, int N, int K, double alpha, double *A, int LDA, double *B, int LDB, double beta, double *C, int LDC)
void CORE_dswpab (int i, int n1, int n2, double *A, double *work)
int CORE_dswptr_ontile (PLASMA_desc descA, int i1, int i2, int *ipiv, int inc, double *Akk, int ldak)
void CORE_dtrdalg (PLASMA_enum uplo, int N, int NB, PLASMA_desc *pA, double *C, double *S, int i, int j, int m, int grsiz)
void CORE_dtrmm (int side, int uplo, int transA, int diag, int M, int N, double alpha, double *A, int LDA, double *B, int LDB)
void CORE_dtrsm (int side, int uplo, int transA, int diag, int M, int N, double alpha, double *A, int LDA, double *B, int LDB)
void CORE_dtrtri (int uplo, int diag, int N, double *A, int LDA, int *info)
int CORE_dtslqt (int M, int N, int IB, double *A1, int LDA1, double *A2, int LDA2, double *T, int LDT, double *TAU, double *WORK)
int CORE_dtsmlq (int side, int trans, int M1, int N1, int M2, int N2, int K, int IB, double *A1, int LDA1, double *A2, int LDA2, double *V, int LDV, double *T, int LDT, double *WORK, int LDWORK)
int CORE_dtsmlq_corner (int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *A3, int lda3, double *V, int ldv, double *T, int ldt, double *WORK, int ldwork)
int CORE_dtsmlq_sytra1 (int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, double *A1, int lda1, double *A2, int lda2, double *V, int ldv, double *T, int ldt, double *WORK, int ldwork)
int CORE_dtsmqr (int side, int trans, int M1, int N1, int M2, int N2, int K, int IB, double *A1, int LDA1, double *A2, int LDA2, double *V, int LDV, double *T, int LDT, double *WORK, int LDWORK)
int CORE_dtsmqr_corner (int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *A3, int lda3, double *V, int ldv, double *T, int ldt, double *WORK, int ldwork)
int CORE_dtsmqr_sytra1 (int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, double *A1, int lda1, double *A2, int lda2, double *V, int ldv, double *T, int ldt, double *WORK, int ldwork)
int CORE_dtsqrt (int M, int N, int IB, double *A1, int LDA1, double *A2, int LDA2, double *T, int LDT, double *TAU, double *WORK)
int CORE_dtstrf (int M, int N, int IB, int NB, double *U, int LDU, double *A, int LDA, double *L, int LDL, int *IPIV, double *WORK, int LDWORK, int *INFO)
int CORE_dttmqr (int side, int trans, int M1, int N1, int M2, int N2, int K, int IB, double *A1, int LDA1, double *A2, int LDA2, double *V, int LDV, double *T, int LDT, double *WORK, int LDWORK)
int CORE_dttqrt (int M, int N, int IB, double *A1, int LDA1, double *A2, int LDA2, double *T, int LDT, double *TAU, double *WORK)
int CORE_dttmlq (int side, int trans, int M1, int N1, int M2, int N2, int K, int IB, double *A1, int LDA1, double *A2, int LDA2, double *V, int LDV, double *T, int LDT, double *WORK, int LDWORK)
int CORE_dttlqt (int M, int N, int IB, double *A1, int LDA1, double *A2, int LDA2, double *T, int LDT, double *TAU, double *WORK)
int CORE_dormlq (int side, int trans, int M, int N, int IB, int K, double *V, int LDV, double *T, int LDT, double *C, int LDC, double *WORK, int LDWORK)
int CORE_dormqr (int side, int trans, int M, int N, int K, int IB, double *V, int LDV, double *T, int LDT, double *C, int LDC, double *WORK, int LDWORK)
void QUARK_CORE_dasum (Quark *quark, Quark_Task_Flags *task_flags, PLASMA_enum storev, PLASMA_enum uplo, int m, int n, double *A, int lda, int szeA, double *work, int szeW)
void QUARK_CORE_dasum_f1 (Quark *quark, Quark_Task_Flags *task_flags, PLASMA_enum storev, PLASMA_enum uplo, int m, int n, double *A, int lda, int szeA, double *work, int szeW, double *fake, int szeF)
void QUARK_CORE_dgeadd (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, int nb, double alpha, double *A, int lda, double *B, int ldb)
void QUARK_CORE_dbrdalg (Quark *quark, Quark_Task_Flags *task_flags, int uplo, int N, int NB, PLASMA_desc *A, double *C, double *S, int i, int j, int m, int grsiz, int BAND, int *PCOL, int *ACOL, int *MCOL)
void QUARK_CORE_dgelqt (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, int ib, int nb, double *A, int lda, double *T, int ldt)
void QUARK_CORE_dgemm (Quark *quark, Quark_Task_Flags *task_flags, int transA, int transB, int m, int n, int k, int nb, double alpha, double *A, int lda, double *B, int ldb, double beta, double *C, int ldc)
void QUARK_CORE_dgemm2 (Quark *quark, Quark_Task_Flags *task_flags, int transA, int transB, int m, int n, int k, int nb, double alpha, double *A, int lda, double *B, int ldb, double beta, double *C, int ldc)
void QUARK_CORE_dgemm_f2 (Quark *quark, Quark_Task_Flags *task_flags, int transA, int transB, int m, int n, int k, int nb, double alpha, double *A, int lda, double *B, int ldb, double beta, double *C, int ldc, double *fake1, int szefake1, int flag1, double *fake2, int szefake2, int flag2)
void QUARK_CORE_dgemm_p2 (Quark *quark, Quark_Task_Flags *task_flags, int transA, int transB, int m, int n, int k, int nb, double alpha, double *A, int lda, double **B, int ldb, double beta, double *C, int ldc)
void QUARK_CORE_dgemm_p2f1 (Quark *quark, Quark_Task_Flags *task_flags, int transA, int transB, int m, int n, int k, int nb, double alpha, double *A, int lda, double **B, int ldb, double beta, double *C, int ldc, double *fake1, int szefake1, int flag1)
void QUARK_CORE_dgemm_p3 (Quark *quark, Quark_Task_Flags *task_flags, int transA, int transB, int m, int n, int k, int nb, double alpha, double *A, int lda, double *B, int ldb, double beta, double **C, int ldc)
void QUARK_CORE_dgeqrt (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, int ib, int nb, double *A, int lda, double *T, int ldt)
void QUARK_CORE_dgessm (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, int k, int ib, int nb, int *IPIV, double *L, int ldl, double *A, int lda)
void QUARK_CORE_dgetrf (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, int nb, double *A, int lda, int *IPIV, PLASMA_sequence *sequence, PLASMA_request *request, PLASMA_bool check_info, int iinfo)
void QUARK_CORE_dgetrf_incpiv (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, int ib, int nb, double *A, int lda, int *IPIV, PLASMA_sequence *sequence, PLASMA_request *request, PLASMA_bool check_info, int iinfo)
void QUARK_CORE_dgetrf_reclap (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, int nb, double *A, int lda, int *IPIV, PLASMA_sequence *sequence, PLASMA_request *request, PLASMA_bool check_info, int iinfo, int nbthread)
void QUARK_CORE_dgetrf_rectil (Quark *quark, Quark_Task_Flags *task_flags, PLASMA_desc A, double *Amn, int size, int *IPIV, PLASMA_sequence *sequence, PLASMA_request *request, PLASMA_bool check_info, int iinfo, int nbthread)
void QUARK_CORE_dgetrip (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, double *A, int szeA)
void QUARK_CORE_dgetrip_f1 (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, double *A, int szeA, double *fake, int szeF, int paramF)
void QUARK_CORE_dgetrip_f2 (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, double *A, int szeA, double *fake1, int szeF1, int paramF1, double *fake2, int szeF2, int paramF2)
void QUARK_CORE_dsymm (Quark *quark, Quark_Task_Flags *task_flags, int side, int uplo, int m, int n, int nb, double alpha, double *A, int lda, double *B, int ldb, double beta, double *C, int ldc)
void QUARK_CORE_dsygst (Quark *quark, Quark_Task_Flags *task_flags, int itype, int uplo, int N, double *A, int LDA, double *B, int LDB, PLASMA_sequence *sequence, PLASMA_request *request, int iinfo)
void QUARK_CORE_dsyrk (Quark *quark, Quark_Task_Flags *task_flags, int uplo, int trans, int n, int k, int nb, double alpha, double *A, int lda, double beta, double *C, int ldc)
void QUARK_CORE_dsyr2k (Quark *quark, Quark_Task_Flags *task_flags, int uplo, int trans, int n, int k, int nb, double alpha, double *A, int lda, double *B, int LDB, double beta, double *C, int ldc)
void QUARK_CORE_dsyrfb (Quark *quark, Quark_Task_Flags *task_flags, int uplo, int n, int k, int ib, int nb, double *A, int lda, double *T, int ldt, double *C, int ldc)
void QUARK_CORE_dlacpy (Quark *quark, Quark_Task_Flags *task_flags, PLASMA_enum uplo, int m, int n, int mb, double *A, int lda, double *B, int ldb)
void QUARK_CORE_dlange (Quark *quark, Quark_Task_Flags *task_flags, int norm, int M, int N, double *A, int LDA, int szeA, int szeW, double *result)
void QUARK_CORE_dlange_f1 (Quark *quark, Quark_Task_Flags *task_flags, int norm, int M, int N, double *A, int LDA, int szeA, int szeW, double *result, double *fake, int szeF)
void QUARK_CORE_dlansy (Quark *quark, Quark_Task_Flags *task_flags, int norm, int uplo, int N, double *A, int LDA, int szeA, int szeW, double *result)
void QUARK_CORE_dlansy_f1 (Quark *quark, Quark_Task_Flags *task_flags, int norm, int uplo, int N, double *A, int LDA, int szeA, int szeW, double *result, double *fake, int szeF)
void QUARK_CORE_dlaset (Quark *quark, Quark_Task_Flags *task_flags, PLASMA_enum uplo, int n1, int n2, double alpha, double beta, double *tileA, int ldtilea)
void QUARK_CORE_dlaset2 (Quark *quark, Quark_Task_Flags *task_flags, PLASMA_enum uplo, int n1, int n2, double alpha, double *tileA, int ldtilea)
void QUARK_CORE_dlaswp (Quark *quark, Quark_Task_Flags *task_flags, int n, double *A, int lda, int i1, int i2, int *ipiv, int inc)
void QUARK_CORE_dlaswp_f2 (Quark *quark, Quark_Task_Flags *task_flags, int n, double *A, int lda, int i1, int i2, int *ipiv, int inc, double *fake1, int szefake1, int flag1, double *fake2, int szefake2, int flag2)
void QUARK_CORE_dlaswp_ontile (Quark *quark, Quark_Task_Flags *task_flags, PLASMA_desc descA, double *A, int i1, int i2, int *ipiv, int inc, double *fakepanel)
void QUARK_CORE_dlaswp_ontile_f2 (Quark *quark, Quark_Task_Flags *task_flags, PLASMA_desc descA, double *A, int i1, int i2, int *ipiv, int inc, double *fake1, int szefake1, int flag1, double *fake2, int szefake2, int flag2)
void QUARK_CORE_dlaswpc_ontile (Quark *quark, Quark_Task_Flags *task_flags, PLASMA_desc descA, double *A, int i1, int i2, int *ipiv, int inc, double *fakepanel)
void QUARK_CORE_dlauum (Quark *quark, Quark_Task_Flags *task_flags, int uplo, int n, int nb, double *A, int lda)
void QUARK_CORE_dplgsy (Quark *quark, Quark_Task_Flags *task_flags, double bump, int m, int n, double *A, int lda, int bigM, int m0, int n0, unsigned long long int seed)
void QUARK_CORE_dplrnt (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, double *A, int lda, int bigM, int m0, int n0, unsigned long long int seed)
void QUARK_CORE_dpotrf (Quark *quark, Quark_Task_Flags *task_flags, int uplo, int n, int nb, double *A, int lda, PLASMA_sequence *sequence, PLASMA_request *request, int iinfo)
void QUARK_CORE_dshift (Quark *quark, Quark_Task_Flags *task_flags, int s, int m, int n, int L, double *A)
void QUARK_CORE_dshiftw (Quark *quark, Quark_Task_Flags *task_flags, int s, int cl, int m, int n, int L, double *A, double *W)
void QUARK_CORE_dssssm (Quark *quark, Quark_Task_Flags *task_flags, int m1, int n1, int m2, int n2, int k, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *L1, int ldl1, double *L2, int ldl2, int *IPIV)
void QUARK_CORE_dswpab (Quark *quark, Quark_Task_Flags *task_flags, int i, int n1, int n2, double *A, int szeA)
void QUARK_CORE_dswptr_ontile (Quark *quark, Quark_Task_Flags *task_flags, PLASMA_desc descA, double *Aij, int i1, int i2, int *ipiv, int inc, double *Akk, int ldak)
void QUARK_CORE_dtrdalg (Quark *quark, Quark_Task_Flags *task_flags, int uplo, int N, int NB, PLASMA_desc *A, double *C, double *S, int i, int j, int m, int grsiz, int BAND, int *PCOL, int *ACOL, int *MCOL)
void QUARK_CORE_dtrmm (Quark *quark, Quark_Task_Flags *task_flags, int side, int uplo, int transA, int diag, int m, int n, int nb, double alpha, double *A, int lda, double *B, int ldb)
void QUARK_CORE_dtrmm_p2 (Quark *quark, Quark_Task_Flags *task_flags, int side, int uplo, int transA, int diag, int m, int n, int nb, double alpha, double *A, int lda, double **B, int ldb)
void QUARK_CORE_dtrsm (Quark *quark, Quark_Task_Flags *task_flags, int side, int uplo, int transA, int diag, int m, int n, int nb, double alpha, double *A, int lda, double *B, int ldb)
void QUARK_CORE_dtrtri (Quark *quark, Quark_Task_Flags *task_flags, int uplo, int diag, int n, int nb, double *A, int lda, PLASMA_sequence *sequence, PLASMA_request *request, int iinfo)
void QUARK_CORE_dtslqt (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *T, int ldt)
void QUARK_CORE_dtsmlq (Quark *quark, Quark_Task_Flags *task_flags, int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *V, int ldv, double *T, int ldt)
void QUARK_CORE_dtsmlq_sytra1 (Quark *quark, Quark_Task_Flags *task_flags, int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *V, int ldv, double *T, int ldt)
void QUARK_CORE_dtsmlq_corner (Quark *quark, Quark_Task_Flags *task_flags, int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *A3, int lda3, double *V, int ldv, double *T, int ldt)
void QUARK_CORE_dtsmqr (Quark *quark, Quark_Task_Flags *task_flags, int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *V, int ldv, double *T, int ldt)
void QUARK_CORE_dtsmqr_sytra1 (Quark *quark, Quark_Task_Flags *task_flags, int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *V, int ldv, double *T, int ldt)
void QUARK_CORE_dtsmqr_corner (Quark *quark, Quark_Task_Flags *task_flags, int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *A3, int lda3, double *V, int ldv, double *T, int ldt)
void QUARK_CORE_dtsqrt (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *T, int ldt)
void QUARK_CORE_dtstrf (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, int ib, int nb, double *U, int ldu, double *A, int lda, double *L, int ldl, int *IPIV, PLASMA_sequence *sequence, PLASMA_request *request, PLASMA_bool check_info, int iinfo)
void QUARK_CORE_dttmqr (Quark *quark, Quark_Task_Flags *task_flags, int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *V, int ldv, double *T, int ldt)
void QUARK_CORE_dttqrt (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *T, int ldt)
void QUARK_CORE_dttmlq (Quark *quark, Quark_Task_Flags *task_flags, int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *V, int ldv, double *T, int ldt)
void QUARK_CORE_dttlqt (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *T, int ldt)
void QUARK_CORE_dpamm (Quark *quark, Quark_Task_Flags *task_flags, int op, int side, int storev, int m, int n, int k, int l, double *A1, int lda1, double *A2, int lda2, double *V, int ldv, double *W, int ldw)
void QUARK_CORE_dormlq (Quark *quark, Quark_Task_Flags *task_flags, int side, int trans, int m, int n, int ib, int nb, int k, double *A, int lda, double *T, int ldt, double *C, int ldc)
void QUARK_CORE_dormqr (Quark *quark, Quark_Task_Flags *task_flags, int side, int trans, int m, int n, int k, int ib, int nb, double *A, int lda, double *T, int ldt, double *C, int ldc)
void CORE_dasum_quark (Quark *quark)
void CORE_dasum_f1_quark (Quark *quark)
void CORE_dgeadd_quark (Quark *quark)
void CORE_dbrdalg_quark (Quark *quark)
void CORE_dgelqt_quark (Quark *quark)
void CORE_dgemm_quark (Quark *quark)
void CORE_dgeqrt_quark (Quark *quark)
void CORE_dgessm_quark (Quark *quark)
void CORE_dgetrf_quark (Quark *quark)
void CORE_dgetrf_incpiv_quark (Quark *quark)
void CORE_dgetrf_reclap_quark (Quark *quark)
void CORE_dgetrf_rectil_quark (Quark *quark)
void CORE_dgetrip_quark (Quark *quark)
void CORE_dgetrip_f1_quark (Quark *quark)
void CORE_dgetrip_f2_quark (Quark *quark)
void CORE_dsygst_quark (Quark *quark)
void CORE_dsyrfb_quark (Quark *quark)
void CORE_dlacpy_quark (Quark *quark)
void CORE_dlange_quark (Quark *quark)
void CORE_dlange_f1_quark (Quark *quark)
void CORE_dlansy_quark (Quark *quark)
void CORE_dlansy_f1_quark (Quark *quark)
void CORE_dlaset_quark (Quark *quark)
void CORE_dlaset2_quark (Quark *quark)
void CORE_dlauum_quark (Quark *quark)
void CORE_dpamm_quark (Quark *quark)
void CORE_dplgsy_quark (Quark *quark)
void CORE_dplrnt_quark (Quark *quark)
void CORE_dpotrf_quark (Quark *quark)
void CORE_dshift_quark (Quark *quark)
void CORE_dshiftw_quark (Quark *quark)
void CORE_dssssm_quark (Quark *quark)
void CORE_dsymm_quark (Quark *quark)
void CORE_dsyrk_quark (Quark *quark)
void CORE_dsyr2k_quark (Quark *quark)
void CORE_dswpab_quark (Quark *quark)
void CORE_dswptr_ontile_quark (Quark *quark)
void CORE_dtrdalg_quark (Quark *quark)
void CORE_dtrmm_quark (Quark *quark)
void CORE_dtrsm_quark (Quark *quark)
void CORE_dtrtri_quark (Quark *quark)
void CORE_dtslqt_quark (Quark *quark)
void CORE_dtsmlq_quark (Quark *quark)
void CORE_dtsmlq_sytra1_quark (Quark *quark)
void CORE_dtsmlq_corner_quark (Quark *quark)
void CORE_dtsmqr_quark (Quark *quark)
void CORE_dtsmqr_sytra1_quark (Quark *quark)
void CORE_dtsmqr_corner_quark (Quark *quark)
void CORE_dtsqrt_quark (Quark *quark)
void CORE_dtstrf_quark (Quark *quark)
void CORE_dttmqr_quark (Quark *quark)
void CORE_dttqrt_quark (Quark *quark)
void CORE_dttmlq_quark (Quark *quark)
void CORE_dttlqt_quark (Quark *quark)
void CORE_dormlq_quark (Quark *quark)
void CORE_dormqr_quark (Quark *quark)
void CORE_dlaswp_quark (Quark *quark)
void CORE_dlaswp_f2_quark (Quark *quark)
void CORE_dlaswp_ontile_quark (Quark *quark)
void CORE_dlaswp_ontile_f2_quark (Quark *quark)
void CORE_dlaswpc_ontile_quark (Quark *quark)
void CORE_dtrmm_p2_quark (Quark *quark)
void CORE_dgemm_f2_quark (Quark *quark)
void CORE_dgemm_p2_quark (Quark *quark)
void CORE_dgemm_p2f1_quark (Quark *quark)
void CORE_dgemm_p3_quark (Quark *quark)

Detailed Description

PLASMA auxiliary routines PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Jakub Kurzak
Hatem Ltaief
Mathieu Faverge
Azzam Haidar
Date:
2010-11-15 d Tue Nov 22 14:35:11 2011

Definition in file core_dblas.h.


Macro Definition Documentation

#define REAL

Definition at line 21 of file core_dblas.h.


Function Documentation

void CORE_dasum ( int  storev,
int  uplo,
int  M,
int  N,
double *  A,
int  lda,
double *  work 
)

Definition at line 28 of file core_dasum.c.

References PlasmaColumnwise, PlasmaLower, PlasmaUpper, PlasmaUpperLower, and sum().

{
double *tmpA;
double *tmpW, sum, abs;
int i,j;
switch (uplo) {
for (j = 0; j < N; j++) {
tmpA = A+(j*lda);
sum = 0.0;
for (i = 0; i < j; i++) {
abs = fabs(*tmpA);
sum += abs;
work[i] += abs;
tmpA++;
}
work[j] += sum + fabs(*tmpA);
}
break;
for (j = 0; j < N; j++) {
tmpA = A+(j*lda)+j;
sum = 0.0;
work[j] += fabs(*tmpA);
tmpA++;
for (i = j+1; i < M; i++) {
abs = fabs(*tmpA);
sum += abs;
work[i] += abs;
tmpA++;
}
work[j] += sum;
}
break;
default:
for (j = 0; j < N; j++) {
/* work[j] += cblas_dasum(M, &(A[j*lda]), 1); */
tmpA = A+(j*lda);
for (i = 0; i < M; i++) {
work[j] += fabs(*tmpA);
tmpA++;
}
}
}
else {
for (j = 0; j < N; j++) {
tmpA = A+(j*lda);
tmpW = work;
for (i = 0; i < M; i++) {
/* work[i] += fabs( A[j*lda+i] );*/
*tmpW += fabs( *tmpA );
tmpA++; tmpW++;
}
}
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dasum_f1_quark ( Quark quark)

Definition at line 162 of file core_dasum.c.

References A, CORE_dasum(), quark_unpack_args_8, storev, and uplo.

{
int storev;
int uplo;
int M;
int N;
double *A;
int lda;
double *work;
double *fake;
quark_unpack_args_8(quark, storev, uplo, M, N, A, lda, work, fake);
CORE_dasum(storev, uplo, M, N, A, lda, work);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dasum_quark ( Quark quark)

Declarations of QUARK wrappers (called by QUARK) - alphabetical order

Definition at line 119 of file core_dasum.c.

References A, CORE_dasum(), quark_unpack_args_7, storev, and uplo.

{
int storev;
int uplo;
int M;
int N;
double *A;
int lda;
double *work;
quark_unpack_args_7(quark, storev, uplo, M, N, A, lda, work);
CORE_dasum(storev, uplo, M, N, A, lda, work);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dbrdalg ( PLASMA_enum  uplo,
int  N,
int  NB,
PLASMA_desc pA,
double *  V,
double *  TAU,
int  i,
int  j,
int  m,
int  grsiz 
)

CORE_dbrdalg is a part of the bidiagonal reduction algorithm (bulgechasing). It correspond to a local driver of the kernels that should be executed on a single core.

Parameters:
[in]uplo
  • PlasmaLower:
  • PlasmaUpper:
[in]NThe order of the matrix A. N >= 0.
[in]NBThe size of the Bandwidth of the matrix A, which correspond to the tile size. NB >= 0.
[in]pAA pointer to the descriptor of the matrix A.
[out]Vdouble array, dimension (N). The scalar elementary reflectors are written in this array. So it is used as a workspace for V at each step of the bulge chasing algorithm.
[out]TAUdouble array, dimension (N). The scalar factors of the elementary reflectors are written in thisarray. So it is used as a workspace for TAU at each step of the bulge chasing algorithm.
[in]iInteger that refer to the current sweep. (outer loop).
[in]jInteger that refer to the sweep to chase.(inner loop).
[in]mInteger that refer to a sweep step, to ensure order dependencies.
[in]grsizInteger that refer to the size of a group. group mean the number of kernel that should be executed sequentially on the same core. group size is a trade-off between locality (cache reuse) and parallelism. a small group size increase parallelism while a large group size increase cache reuse.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 83 of file core_dbrdalg.c.

References A, CORE_dgbelr(), CORE_dgblrx(), CORE_dgbrce(), plasma_desc_t::dtyp, min, and plasma_element_size().

{
int k, shift=3;
int myid, colpt, stind, edind, blklastind, stepercol;
size_t eltsize;
PLASMA_desc A = *pA;
eltsize = plasma_element_size(A.dtyp);
k = shift / grsiz;
stepercol = (k*grsiz == shift) ? k : k+1;
for (k = 0; k < grsiz; k++){
myid = (i-j)*(stepercol*grsiz) +(m-1)*grsiz + k+1;
if(myid%2 ==0) {
colpt = (myid/2) * NB + 1 + j - 1;
stind = colpt - NB + 1;
edind = min(colpt, N);
blklastind = colpt;
} else {
colpt = ((myid+1)/2)*NB + 1 +j -1 ;
stind = colpt-NB+1;
edind = min(colpt,N);
if( (stind>=edind-1) && (edind==N) )
blklastind = N;
else
blklastind = 0;
}
if( myid == 1 )
CORE_dgbelr(uplo, N, &A, V, TAU, stind, edind, eltsize);
else if(myid%2 == 0)
CORE_dgbrce(uplo, N, &A, V, TAU, stind, edind, eltsize);
else /*if(myid%2 == 1)*/
CORE_dgblrx(uplo, N, &A, V, TAU, stind, edind, eltsize);
if(blklastind >= (N-1)) break;
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dbrdalg_quark ( Quark quark)

Definition at line 161 of file core_dbrdalg.c.

References CORE_dbrdalg(), quark_unpack_args_10, TAU, uplo, and V.

{
double *V;
double *TAU;
int uplo;
int N, NB;
int i, j, m, grsiz;
quark_unpack_args_10(quark, uplo, N, NB, pA, V, TAU, i, j, m, grsiz);
CORE_dbrdalg(uplo, N, NB, pA, V, TAU, i, j, m, grsiz);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dgbelr ( int  uplo,
int  N,
PLASMA_desc A,
double *  V,
double *  TAU,
int  st,
int  ed,
int  eltsize 
)

Definition at line 78 of file core_dgbelr.c.

References A, CORE_dlarfx2(), CORE_dlarfx2ce(), coreblas_error, ELTLDD, max, plasma_desc_t::mb, min, PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, TAU, and V.

{
int NB, J1, J2;
int len1, len2, t1ed, t2st;
int i;
static double zzero = 0.0;
/* Check input arguments */
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if (ed <= st) {
coreblas_error(6, "Illegal value of st and ed (internal)");
return -6;
}
/* Quick return */
if (N == 0)
NB = A->mb;
if( uplo == PlasmaLower ){
/* ========================
* LOWER CASE
* ========================*/
for (i = ed; i >= st+1 ; i--){
/* generate Householder to annihilate a(i+k-1,i) within the band*/
*V(i) = *A(i, (st-1));
*A(i, (st-1)) = zzero;
LAPACKE_dlarfg_work( 2, A((i-1),(st-1)), V(i), 1, TAU(i));
/* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
J1 = st;
J2 = i-2;
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1 ), ELTLDD(vA, (i-1)), A(i, J1 ), ELTLDD(vA, i) );
if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, (i-1)), A(i, t2st), ELTLDD(vA, i) );
CORE_dlarfx2ce(PlasmaLower, V(i), TAU(i), A(i-1,i-1), A(i,i-1), A(i,i));
}
/* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 1 */
for (i = ed; i >= st+1 ; i--){
J1 = i+1;
J2 = min(ed,N);
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) );
if(len2>0)CORE_dlarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
}
} else {
/* ========================
* UPPER CASE
* ========================*/
for (i = ed; i >= st+1 ; i--){
/* generate Householder to annihilate a(i+k-1,i) within the band*/
*V(i) = *A((st-1), i);
*A((st-1), i) = zzero;
LAPACKE_dlarfg_work( 2, A((st-1), (i-1)), V(i), 1, TAU(i));
/* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
J1 = st;
J2 = i-2;
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) );
if(len2>0)CORE_dlarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
CORE_dlarfx2ce(PlasmaUpper, V(i), TAU(i), A((i-1),(i-1)), A((i-1), i), A(i,i));
}
/* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 1*/
for (i = ed; i >= st+1 ; i--){
J1 = i+1;
J2 = min(ed,N);
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1 ), ELTLDD(vA, (i-1)), A(i, J1 ), ELTLDD(vA, i) );
if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, (i-1)), A(i, t2st), ELTLDD(vA, i) );
}
} /* end of else for the upper case*/
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dgblrx ( int  uplo,
int  N,
PLASMA_desc A,
double *  V,
double *  TAU,
int  st,
int  ed,
int  eltsize 
)

Definition at line 78 of file core_dgblrx.c.

References A, CORE_dlarfx2(), CORE_dlarfx2ce(), coreblas_error, ELTLDD, max, plasma_desc_t::mb, min, PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, TAU, and V.

{
int NB, J1, J2;
int len1, len2, t1ed, t2st;
int i;
/* Check input arguments */
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if (ed <= st) {
coreblas_error(6, "Illegal value of st and ed (internal)");
return -6;
}
/* Quick return */
if (N == 0)
NB = A->mb;
if( uplo == PlasmaLower ){
/* ========================
* LOWER CASE
* ========================*/
for (i = ed; i >= st+1 ; i--){
/* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
J1 = st;
J2 = i-2;
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1 ), ELTLDD(vA, (i-1)), A(i, J1 ), ELTLDD(vA, i) );
if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, (i-1)), A(i, t2st), ELTLDD(vA, i) );
CORE_dlarfx2ce(PlasmaLower, V(i), TAU(i), A(i-1,i-1), A(i,i-1), A(i,i));
}
/* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 1 */
for (i = ed; i >= st+1 ; i--){
J1 = i+1;
J2 = min(ed,N);
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) );
if(len2>0)CORE_dlarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
}
} else {
/* ========================
* UPPER CASE
* ========================*/
for (i = ed; i >= st+1 ; i--){
/* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
J1 = st;
J2 = i-2;
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) );
if(len2>0)CORE_dlarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
CORE_dlarfx2ce(PlasmaUpper, V(i), TAU(i), A(i-1,i-1), A(i-1, i), A(i,i));
}
/* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 1 */
for (i = ed; i >= st+1 ; i--){
J1 = i+1;
J2 = min(ed,N);
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1 ), ELTLDD(vA, (i-1)), A(i, J1 ), ELTLDD(vA, i) );
if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, (i-1)), A(i, t2st), ELTLDD(vA, i) );
}
} /* end of else for the upper case */
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dgbrce ( int  uplo,
int  N,
PLASMA_desc A,
double *  V,
double *  TAU,
int  st,
int  ed,
int  eltsize 
)

Definition at line 76 of file core_dgbrce.c.

References A, CORE_dlarfx2(), coreblas_error, ELTLDD, max, plasma_desc_t::mb, min, PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaRight, TAU, and V.

{
int NB, J1, J2, J3, KDM2, len, pt;
int len1, len2, t1ed, t2st;
int i;
static double zzero = 0.0;
/* Check input arguments */
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if (ed <= st) {
coreblas_error(6, "Illegal value of st and ed (internal)");
return -6;
}
/* Quick return */
if (N == 0)
NB = A->mb;
KDM2 = A->mb-2;
if( uplo == PlasmaLower ){
/* ========================
* LOWER CASE
* ========================*/
for (i = ed; i >= st+1 ; i--){
/* apply Householder from the right. and create newnnz outside the band if J3 < N */
J1 = ed+1;
J2 = min((i+1+KDM2), N);
J3 = min((J2+1), N);
len = J3-J1+1;
if(J3>J2)*A(J3,(i-1))=zzero;/* could be removed because A is supposed to be band.*/
t1ed = (J3/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J3-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1, i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) );
if(len2>0)CORE_dlarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st));
len = J3-J2;
if(len>0){
/* generate Householder to annihilate a(j+kd,j-1) within the band */
*V(J3) = *A(J3,(i-1));
*A(J3,(i-1)) = 0.0;
LAPACKE_dlarfg_work( 2, A(J2,(i-1)), V(J3), 1, TAU(J3));
}
}
/* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 2 */
for (i = ed; i >= st+1 ; i--){
J2 = min((i+1+KDM2), N);
J3 = min((J2+1), N);
len = J3-J2;
if(len>0){
pt = J2;
J1 = i;
J2 = min(ed,N);
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(J3), (*TAU(J3)), A(pt, i ), ELTLDD(vA, pt), A((pt+1), i ), ELTLDD(vA, pt+1) );
if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(J3), (*TAU(J3)), A(pt, t2st), ELTLDD(vA, pt), A((pt+1), t2st), ELTLDD(vA, pt+1) );
}
}
} else {
/* ========================
* UPPER CASE
* ========================*/
for (i = ed; i >= st+1 ; i--){
/* apply Householder from the right. and create newnnz outside the band if J3 < N */
J1 = ed+1;
J2 = min((i+1+KDM2), N);
J3 = min((J2+1), N);
len = J3-J1+1;
if(J3>J2)*A((i-1), J3)=zzero;
t1ed = (J3/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J3-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1 ), ELTLDD(vA, i-1), A(i, J1 ), ELTLDD(vA, i) );
if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, i-1), A(i, t2st), ELTLDD(vA, i) );
/* if nonzero element a(j+kd,j-1) has been created outside the band (if index < N) then eliminate it. */
len = J3-J2;
if(len>0){
/* generate Householder to annihilate a(j+kd,j-1) within the band */
*V(J3) = *A(i-1, J3);
*A(i-1, J3) = 0.0;
LAPACKE_dlarfg_work( 2, A(i-1, J2), V(J3), 1, TAU(J3));
}
}
/* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 2 */
for (i = ed; i >= st+1 ; i--){
/* find if there was a nnz created. if yes apply right else nothing to be done. */
J2 = min((i+1+KDM2), N);
J3 = min((J2+1), N);
len = J3-J2;
if(len>0){
pt = J2;
J1 = i;
J2 = min(ed,N);
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaRight, len1 , (*V(J3)), (*TAU(J3)), A(i , pt), ELTLDD(vA, i), A(i, pt+1), ELTLDD(vA, i) );
if(len2>0)CORE_dlarfx2(PlasmaRight, len2 , (*V(J3)), (*TAU(J3)), A(t2st, pt), ELTLDD(vA, t2st), A(t2st, pt+1), ELTLDD(vA, t2st) );
}
}
} /* end of else for the upper case */
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dgeadd ( int  M,
int  N,
double  alpha,
double *  A,
int  LDA,
double *  B,
int  LDB 
)

Definition at line 26 of file core_dgeadd.c.

References cblas_daxpy().

{
int j;
if (M == LDA && M == LDB)
cblas_daxpy(M*N, (alpha), A, 1, B, 1);
else {
for (j = 0; j < N; j++)
cblas_daxpy(M, (alpha), &A[j*LDA], 1, &B[j*LDB], 1);
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dgeadd_quark ( Quark quark)

Definition at line 67 of file core_dgeadd.c.

References A, B, cblas_daxpy(), and quark_unpack_args_7.

{
int M;
int N;
double alpha;
double *A;
int LDA;
double *B;
int LDB;
int j;
quark_unpack_args_7(quark, M, N, alpha, A, LDA, B, LDB);
if (M == LDA && M == LDB)
cblas_daxpy(M*N, (alpha), A, 1, B, 1);
else {
for (j = 0; j < N; j++)
cblas_daxpy(M, (alpha), &A[j*LDA], 1, &B[j*LDB], 1);
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dgelqt ( int  M,
int  N,
int  IB,
double *  A,
int  LDA,
double *  T,
int  LDT,
double *  TAU,
double *  WORK 
)

CORE_dgelqt - computes a LQ factorization of a complex M-by-N tile A: A = L * Q.

The tile Q is represented as a product of elementary reflectors

Q = H(k)' . . . H(2)' H(1)', where k = min(M,N).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; g(v(i+1:n)) is stored on exit in A(i,i+1:n), and tau in TAU(i).

Parameters:
[in]MThe number of rows of the tile A. M >= 0.
[in]NThe number of columns of the tile A. N >= 0.
[in]IBThe inner-blocking size. IB >= 0.
[in,out]AOn entry, the M-by-N tile A. On exit, the elements on and below the diagonal of the array contain the M-by-min(M,N) lower trapezoidal tile L (L is lower triangular if M <= N); the elements above the diagonal, with the array TAU, represent the unitary tile Q as a product of elementary reflectors (see Further Details).
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[out]TThe IB-by-N triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]LDTThe leading dimension of the array T. LDT >= IB.
[out]TAUThe scalar factors of the elementary reflectors (see Further Details).
[out]WORK
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 85 of file core_dgelqt.c.

References coreblas_error, lapack_const, max, min, PLASMA_SUCCESS, PlasmaForward, PlasmaNoTrans, PlasmaRight, and PlasmaRowwise.

{
int i, k, sb;
/* Check input arguments */
if (M < 0) {
coreblas_error(1, "Illegal value of M");
return -1;
}
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if ((IB < 0) || ( (IB == 0) && ((M > 0) && (N > 0)) )) {
coreblas_error(3, "Illegal value of IB");
return -3;
}
if ((LDA < max(1,M)) && (M > 0)) {
coreblas_error(5, "Illegal value of LDA");
return -5;
}
if ((LDT < max(1,IB)) && (IB > 0)) {
coreblas_error(7, "Illegal value of LDT");
return -7;
}
/* Quick return */
if ((M == 0) || (N == 0) || (IB == 0))
k = min(M, N);
for(i = 0; i < k; i += IB) {
sb = min(IB, k-i);
LAPACKE_dgelq2_work(LAPACK_COL_MAJOR, sb, N-i,
&A[LDA*i+i], LDA, &TAU[i], WORK);
LAPACKE_dlarft_work(LAPACK_COL_MAJOR,
N-i, sb,
&A[LDA*i+i], LDA, &TAU[i],
&T[LDT*i], LDT);
if (M > i+sb) {
LAPACKE_dlarfb_work(
LAPACK_COL_MAJOR,
M-i-sb, N-i, sb,
&A[LDA*i+i], LDA,
&T[LDT*i], LDT,
&A[LDA*i+(i+sb)], LDA,
WORK, M-i-sb);
}
}
}

Here is the caller graph for this function:

void CORE_dgelqt_quark ( Quark quark)

Definition at line 180 of file core_dgelqt.c.

References A, CORE_dgelqt(), quark_unpack_args_9, T, and TAU.

{
int m;
int n;
int ib;
double *A;
int lda;
double *T;
int ldt;
double *TAU;
double *WORK;
quark_unpack_args_9(quark, m, n, ib, A, lda, T, ldt, TAU, WORK);
CORE_dgelqt(m, n, ib, A, lda, T, ldt, TAU, WORK);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dgemm ( int  transA,
int  transB,
int  M,
int  N,
int  K,
double  alpha,
double *  A,
int  LDA,
double *  B,
int  LDB,
double  beta,
double *  C,
int  LDC 
)

Definition at line 28 of file core_dgemm.c.

References cblas_dgemm(), and CblasColMajor.

{
(CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
M, N, K,
(alpha), A, LDA,
B, LDB,
(beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dgemm_f2_quark ( Quark quark)

Definition at line 171 of file core_dgemm.c.

References A, B, C, cblas_dgemm(), CblasColMajor, and quark_unpack_args_15.

{
int transA;
int transB;
int M;
int N;
int K;
double alpha;
double *A;
int LDA;
double *B;
int LDB;
double beta;
double *C;
int LDC;
void *fake1, *fake2;
quark_unpack_args_15(quark, transA, transB, M, N, K, alpha,
A, LDA, B, LDB, beta, C, LDC, fake1, fake2);
(CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
M, N, K,
(alpha), A, LDA,
B, LDB,
(beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dgemm_p2_quark ( Quark quark)

Definition at line 234 of file core_dgemm.c.

References A, B, C, cblas_dgemm(), CblasColMajor, and quark_unpack_args_13.

{
int transA;
int transB;
int M;
int N;
int K;
double alpha;
double *A;
int LDA;
double **B;
int LDB;
double beta;
double *C;
int LDC;
quark_unpack_args_13(quark, transA, transB, M, N, K, alpha,
A, LDA, B, LDB, beta, C, LDC);
(CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
M, N, K,
(alpha), A, LDA,
*B, LDB,
(beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dgemm_p2f1_quark ( Quark quark)

Definition at line 360 of file core_dgemm.c.

References A, B, C, cblas_dgemm(), CblasColMajor, and quark_unpack_args_14.

{
int transA;
int transB;
int M;
int N;
int K;
double alpha;
double *A;
int LDA;
double **B;
int LDB;
double beta;
double *C;
int LDC;
void *fake1;
quark_unpack_args_14(quark, transA, transB, M, N, K, alpha,
A, LDA, B, LDB, beta, C, LDC, fake1);
(CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
M, N, K,
(alpha), A, LDA,
*B, LDB,
(beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dgemm_p3_quark ( Quark quark)

Definition at line 296 of file core_dgemm.c.

References A, B, C, cblas_dgemm(), CblasColMajor, and quark_unpack_args_13.

{
int transA;
int transB;
int M;
int N;
int K;
double alpha;
double *A;
int LDA;
double *B;
int LDB;
double beta;
double **C;
int LDC;
quark_unpack_args_13(quark, transA, transB, M, N, K, alpha,
A, LDA, B, LDB, beta, C, LDC);
(CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
M, N, K,
(alpha), A, LDA,
B, LDB,
(beta), *C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dgemm_quark ( Quark quark)

Definition at line 106 of file core_dgemm.c.

References A, B, C, cblas_dgemm(), CblasColMajor, and quark_unpack_args_13.

{
int transA;
int transB;
int m;
int n;
int k;
double alpha;
double *A;
int lda;
double *B;
int ldb;
double beta;
double *C;
int ldc;
quark_unpack_args_13(quark, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
(CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
m, n, k,
(alpha), A, lda,
B, ldb,
(beta), C, ldc);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dgeqrt ( int  M,
int  N,
int  IB,
double *  A,
int  LDA,
double *  T,
int  LDT,
double *  TAU,
double *  WORK 
)

CORE_dgeqrt computes a QR factorization of a complex M-by-N tile A: A = Q * R.

The tile Q is represented as a product of elementary reflectors

Q = H(1) H(2) . . . H(k), where k = min(M,N).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i).

Parameters:
[in]MThe number of rows of the tile A. M >= 0.
[in]NThe number of columns of the tile A. N >= 0.
[in]IBThe inner-blocking size. IB >= 0.
[in,out]AOn entry, the M-by-N tile A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal tile R (R is upper triangular if M >= N); the elements below the diagonal, with the array TAU, represent the unitary tile Q as a product of elementary reflectors (see Further Details).
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[out]TThe IB-by-N triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]LDTThe leading dimension of the array T. LDT >= IB.
[out]TAUThe scalar factors of the elementary reflectors (see Further Details).
[out]WORK
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 86 of file core_dgeqrt.c.

References coreblas_error, lapack_const, max, min, PLASMA_SUCCESS, PlasmaColumnwise, PlasmaForward, PlasmaLeft, and PlasmaTrans.

{
int i, k, sb;
/* Check input arguments */
if (M < 0) {
coreblas_error(1, "Illegal value of M");
return -1;
}
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if ((IB < 0) || ( (IB == 0) && ((M > 0) && (N > 0)) )) {
coreblas_error(3, "Illegal value of IB");
return -3;
}
if ((LDA < max(1,M)) && (M > 0)) {
coreblas_error(5, "Illegal value of LDA");
return -5;
}
if ((LDT < max(1,IB)) && (IB > 0)) {
coreblas_error(7, "Illegal value of LDT");
return -7;
}
/* Quick return */
if ((M == 0) || (N == 0) || (IB == 0))
k = min(M, N);
for(i = 0; i < k; i += IB) {
sb = min(IB, k-i);
LAPACKE_dgeqr2_work(LAPACK_COL_MAJOR, M-i, sb,
&A[LDA*i+i], LDA, &TAU[i], WORK);
LAPACKE_dlarft_work(LAPACK_COL_MAJOR,
M-i, sb,
&A[LDA*i+i], LDA, &TAU[i],
&T[LDT*i], LDT);
if (N > i+sb) {
LAPACKE_dlarfb_work(
LAPACK_COL_MAJOR,
M-i, N-i-sb, sb,
&A[LDA*i+i], LDA,
&T[LDT*i], LDT,
&A[LDA*(i+sb)+i], LDA,
WORK, N-i-sb);
}
}
}

Here is the caller graph for this function:

void CORE_dgeqrt_quark ( Quark quark)

Definition at line 181 of file core_dgeqrt.c.

References A, CORE_dgeqrt(), quark_unpack_args_9, T, and TAU.

{
int m;
int n;
int ib;
double *A;
int lda;
double *T;
int ldt;
double *TAU;
double *WORK;
quark_unpack_args_9(quark, m, n, ib, A, lda, T, ldt, TAU, WORK);
CORE_dgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dgessm ( int  M,
int  N,
int  K,
int  IB,
int *  IPIV,
double *  L,
int  LDL,
double *  A,
int  LDA 
)

CORE_dgessm applies the factor L computed by CORE_dgetrf_incpiv to a complex M-by-N tile A.

Parameters:
[in]MThe number of rows of the tile A. M >= 0.
[in]NThe number of columns of the tile A. N >= 0.
[in]K
[in]IBThe inner-blocking size. IB >= 0.
[in]IPIVas returned by CORE_dgetrf_incpiv.
[in]LThe NB-by-NB lower triangular tile.
[in]LDLThe leading dimension of the array L. LDL >= max(1,NB).
[in,out]AOn entry, the M-by-N tile A. On exit, updated by the application of L.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if INFO = -k, the k-th argument had an illegal value

Definition at line 68 of file core_dgessm.c.

References cblas_dgemm(), cblas_dtrsm(), CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, coreblas_error, max, min, and PLASMA_SUCCESS.

{
static double zone = 1.0;
static double mzone = -1.0;
static int ione = 1;
int i, sb;
int tmp, tmp2;
/* Check input arguments */
if (M < 0) {
coreblas_error(1, "Illegal value of M");
return -1;
}
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if (K < 0) {
coreblas_error(3, "Illegal value of K");
return -3;
}
if (IB < 0) {
coreblas_error(4, "Illegal value of IB");
return -4;
}
if ((LDL < max(1,M)) && (M > 0)) {
coreblas_error(7, "Illegal value of LDL");
return -7;
}
if ((LDA < max(1,M)) && (M > 0)) {
coreblas_error(9, "Illegal value of LDA");
return -9;
}
/* Quick return */
if ((M == 0) || (N == 0) || (K == 0) || (IB == 0))
for(i = 0; i < K; i += IB) {
sb = min(IB, K-i);
/*
* Apply interchanges to columns I*IB+1:IB*( I+1 )+1.
*/
tmp = i+1;
tmp2 = i+sb;
LAPACKE_dlaswp_work(LAPACK_COL_MAJOR, N, A, LDA, tmp, tmp2, IPIV, ione);
/*
* Compute block row of U.
*/
sb, N, (zone),
&L[LDL*i+i], LDL,
&A[i], LDA );
if (i+sb < M) {
/*
* Update trailing submatrix.
*/
M-(i+sb), N, sb,
(mzone), &L[LDL*i+(i+sb)], LDL,
&A[i], LDA,
(zone), &A[i+sb], LDA );
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dgessm_quark ( Quark quark)

Definition at line 172 of file core_dgessm.c.

References A, CORE_dgessm(), IPIV, L, and quark_unpack_args_9.

{
int m;
int n;
int k;
int ib;
int *IPIV;
double *L;
int ldl;
double *A;
int lda;
quark_unpack_args_9(quark, m, n, k, ib, IPIV, L, ldl, A, lda);
CORE_dgessm(m, n, k, ib, IPIV, L, ldl, A, lda);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dgetrf ( int  M,
int  N,
double *  A,
int  LDA,
int *  IPIV,
int *  INFO 
)

Definition at line 22 of file core_dgetrf.c.

References PLASMA_SUCCESS.

{
*info = LAPACKE_dgetrf_work(LAPACK_COL_MAJOR, m, n, A, lda, IPIV );
}
int CORE_dgetrf_incpiv ( int  M,
int  N,
int  IB,
double *  A,
int  LDA,
int *  IPIV,
int *  INFO 
)

CORE_dgetrf_incpiv computes an LU factorization of a general M-by-N tile A using partial pivoting with row interchanges.

The factorization has the form

A = P * L * U

where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n).

This is the right-looking Level 2.5 BLAS version of the algorithm.

Parameters:
[in]MThe number of rows of the tile A. M >= 0.
[in]NThe number of columns of the tile A. N >= 0.
[in]IBThe inner-blocking size. IB >= 0.
[in,out]AOn entry, the M-by-N tile to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[out]IPIVThe pivot indices; for 1 <= i <= min(M,N), row i of the tile was interchanged with row IPIV(i).
[out]INFOSee returned value.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if INFO = -k, the k-th argument had an illegal value
>0if INFO = k, U(k,k) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.

Definition at line 83 of file core_dgetrf_incpiv.c.

References CORE_dgessm(), coreblas_error, max, min, and PLASMA_SUCCESS.

{
int i, j, k, sb;
int iinfo;
/* Check input arguments */
*INFO = 0;
if (M < 0) {
coreblas_error(1, "Illegal value of M");
return -1;
}
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if (IB < 0) {
coreblas_error(3, "Illegal value of IB");
return -3;
}
if ((LDA < max(1,M)) && (M > 0)) {
coreblas_error(5, "Illegal value of LDA");
return -5;
}
/* Quick return */
if ((M == 0) || (N == 0) || (IB == 0))
k = min(M, N);
for(i =0 ; i < k; i += IB) {
sb = min(IB, k-i);
/*
* Factor diagonal and subdiagonal blocks and test for exact singularity.
*/
iinfo = LAPACKE_dgetf2_work(LAPACK_COL_MAJOR, M-i, sb, &A[LDA*i+i], LDA, &IPIV[i]);
/*
* Adjust INFO and the pivot indices.
*/
if((*INFO == 0) && (iinfo > 0))
*INFO = iinfo + i;
if (i+sb < N) {
M-i, N-(i+sb), sb, sb,
&IPIV[i],
&A[LDA*i+i], LDA,
&A[LDA*(i+sb)+i], LDA);
}
for(j = i; j < i+sb; j++) {
IPIV[j] = i + IPIV[j];
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dgetrf_incpiv_quark ( Quark quark)

Definition at line 174 of file core_dgetrf_incpiv.c.

References A, CORE_dgetrf_incpiv(), IPIV, plasma_sequence_flush(), PLASMA_SUCCESS, and quark_unpack_args_10.

{
int m;
int n;
int ib;
double *A;
int lda;
int *IPIV;
PLASMA_sequence *sequence;
PLASMA_request *request;
PLASMA_bool check_info;
int iinfo;
int info;
quark_unpack_args_10(quark, m, n, ib, A, lda, IPIV, sequence, request, check_info, iinfo);
CORE_dgetrf_incpiv(m, n, ib, A, lda, IPIV, &info);
if (info != PLASMA_SUCCESS && check_info)
plasma_sequence_flush(quark, sequence, request, iinfo+info);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dgetrf_quark ( Quark quark)

Definition at line 61 of file core_dgetrf.c.

References A, IPIV, plasma_sequence_flush(), PLASMA_SUCCESS, and quark_unpack_args_9.

{
int m;
int n;
double *A;
int lda;
int *IPIV;
PLASMA_sequence *sequence;
PLASMA_request *request;
PLASMA_bool check_info;
int iinfo;
int info;
quark_unpack_args_9(quark, m, n, A, lda, IPIV, sequence, request, check_info, iinfo);
info = LAPACKE_dgetrf_work(LAPACK_COL_MAJOR, m, n, A, lda, IPIV );
if (info != PLASMA_SUCCESS && check_info)
plasma_sequence_flush(quark, sequence, request, iinfo+info);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dgetrf_reclap ( const int  M,
const int  N,
double *  A,
const int  LDA,
int *  IPIV,
int *  info 
)

Definition at line 307 of file core_dgetrf_reclap.c.

References coreblas_error, max, min, and PLASMA_SUCCESS.

{
int thidx = info[1];
int thcnt = min( info[2], M / N );
int minMN = min(M, N);
if( M < 0 ) {
coreblas_error(1, "illegal value of M");
return -1;
}
if( N < 0 ) {
coreblas_error(2, "illegal value of N");
return -2;
}
if( LDA < max(1, M) ) {
coreblas_error(5, "illegal value of LDA");
return -5;
}
/*
* Quick return
*/
if ( (M == 0) || (N == 0) || (thidx >= thcnt) ){
}
*info = 0;
CORE_dgetrf_reclap_rec( M, minMN, A, LDA, IPIV, info,
thidx, thcnt, 0 );
if ( N > minMN ) {
CORE_dgetrf_reclap_update(M, 0, minMN, N-minMN,
A, LDA, IPIV,
thidx, thcnt);
}
return info[0];
}

Here is the caller graph for this function:

void CORE_dgetrf_reclap_quark ( Quark quark)

Definition at line 381 of file core_dgetrf_reclap.c.

References A, CORE_dgetrf_reclap(), IPIV, plasma_sequence_flush(), PLASMA_SUCCESS, QUARK_Get_RankInTask(), and quark_unpack_args_10.

{
int M;
int N;
double *A;
int LDA;
int *IPIV;
PLASMA_sequence *sequence;
PLASMA_request *request;
PLASMA_bool check_info;
int iinfo;
int info[3];
int maxthreads;
quark_unpack_args_10(quark, M, N, A, LDA, IPIV, sequence, request,
check_info, iinfo, maxthreads );
info[1] = QUARK_Get_RankInTask(quark);
info[2] = maxthreads;
CORE_dgetrf_reclap( M, N, A, LDA, IPIV, info );
if (info[1] == 0 && info[0] != PLASMA_SUCCESS && check_info)
plasma_sequence_flush(quark, sequence, request, iinfo + info[0] );
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dgetrf_rectil ( const PLASMA_desc  A,
int *  IPIV,
int *  info 
)

Definition at line 653 of file core_dgetrf_rectil.c.

References coreblas_error, plasma_desc_t::m, min, plasma_desc_t::mt, plasma_desc_t::n, and plasma_desc_t::nt.

{
int ft, lt;
int thidx = info[1];
int thcnt = min( info[2], A.mt );
int minMN = min( A.m, A.n );
double pivot;
if ( A.nt > 1 ) {
coreblas_error(1, "Illegal value of A.nt");
return -1;
}
if ( thidx >= thcnt )
return 0;
int q = A.mt / thcnt;
int r = A.mt % thcnt;
if (thidx < r) {
q++;
ft = thidx * q;
lt = ft + q;
} else {
ft = r * (q + 1) + (thidx - r) * q;
lt = ft + q;
lt = min( lt, A.mt );
}
info[0] = 0;
CORE_dgetrf_rectil_rec( A, IPIV, info, &pivot,
thidx, thcnt, 0, minMN, ft, lt);
if ( A.n > minMN ) {
CORE_dgetrf_rectil_update( A, IPIV,
0, minMN, A.n-minMN,
thidx, thcnt,
ft, lt);
}
return info[0];
}

Here is the caller graph for this function:

void CORE_dgetrf_rectil_quark ( Quark quark)

Definition at line 726 of file core_dgetrf_rectil.c.

References A, CORE_dgetrf_rectil(), IPIV, plasma_sequence_flush(), PLASMA_SUCCESS, QUARK_Get_RankInTask(), and quark_unpack_args_8.

{
double *Amn;
int *IPIV;
PLASMA_sequence *sequence;
PLASMA_request *request;
PLASMA_bool check_info;
int iinfo;
int info[3];
int maxthreads;
quark_unpack_args_8(quark, A, Amn, IPIV, sequence, request,
check_info, iinfo, maxthreads );
info[1] = QUARK_Get_RankInTask(quark);
info[2] = maxthreads;
CORE_dgetrf_rectil( A, IPIV, info );
if (info[1] == 0 && info[0] != PLASMA_SUCCESS && check_info)
plasma_sequence_flush(quark, sequence, request, iinfo + info[0] );
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dgetrip ( int  m,
int  n,
double *  A,
double *  W 
)

CORE_dgetrip transposes a m-by-n matrix in place using an extra workspace of size m-by-n. Note : For square tile, workspace is not used.

Parameters:
[in]mNumber of lines of tile A
[in]nNumber of columns of tile A
[in,out]ATile of size m-by-n On exit, A = trans(A)
[out]WWorkspace of size n-by-m if n != m, NULL otherwise.

Definition at line 54 of file core_dgetrip.c.

{
double t;
int i, j;
if( m != n ) {
/* rectangular transposition (use workspace) */
for (i=0; i<m; i++) {
for (j=0; j<n; j++) {
W[j+i*n] = A[i+j*m];
}
}
memcpy(A, W, m*n*sizeof(double));
}
else {
/* square transposition (swap pairwise) */
for (i=0; i<m; i++) {
for (j=i+1; j<n; j++) {
t = A[j+i*n];
A[j+i*n] = A[i+j*m];
A[i+j*m] = t;
}
}
}
}

Here is the caller graph for this function:

void CORE_dgetrip_f1_quark ( Quark quark)

Definition at line 138 of file core_dgetrip.c.

References A, CORE_dgetrip(), quark_unpack_args_5, and W.

{
int m;
int n;
double *A;
double *W;
double *fake;
quark_unpack_args_5(quark, m, n, A, W, fake);
CORE_dgetrip(m, n, A, W);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dgetrip_f2_quark ( Quark quark)

Definition at line 178 of file core_dgetrip.c.

References A, CORE_dgetrip(), quark_unpack_args_6, and W.

{
int m;
int n;
double *A;
double *W;
double *fake1;
double *fake2;
quark_unpack_args_6(quark, m, n, A, W, fake1, fake2);
CORE_dgetrip(m, n, A, W);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dgetrip_quark ( Quark quark)

Definition at line 101 of file core_dgetrip.c.

References A, CORE_dgetrip(), quark_unpack_args_4, and W.

{
int m;
int n;
double *A;
double *W;
quark_unpack_args_4(quark, m, n, A, W);
CORE_dgetrip(m, n, A, W);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dhbelr ( int  uplo,
int  N,
PLASMA_desc A,
double *  V,
double *  TAU,
int  st,
int  ed,
int  eltsize 
)

Definition at line 78 of file core_dhbelr.c.

References A, CORE_dlarfx2(), CORE_dlarfx2c(), coreblas_error, ELTLDD, max, plasma_desc_t::mb, min, PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, TAU, and V.

{
int NB, J1, J2;
int len1, len2, t1ed, t2st;
int i;
static double zzero = 0.0;
/* Check input arguments */
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if (ed <= st) {
coreblas_error(23, "Illegal value of st and ed (internal)");
return -23;
}
/* Quick return */
if (N == 0)
NB = A->mb;
if( uplo == PlasmaLower ) {
/* ========================
* LOWER CASE
* ========================*/
for (i = ed; i >= st+1 ; i--){
/* generate Householder to annihilate a(i+k-1,i) within the band */
*V(i) = *A(i, (st-1));
*A(i, (st-1)) = zzero;
LAPACKE_dlarfg_work( 2, A((i-1),(st-1)), V(i), 1, TAU(i));
/* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
J1 = st;
J2 = i-2;
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1; /* can be negative */
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1 ), ELTLDD(vA, i-1), A(i, J1 ), ELTLDD(vA, i) );
if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, i-1), A(i, t2st), ELTLDD(vA, i) );
CORE_dlarfx2c(PlasmaLower, *V(i), *TAU(i), A(i-1, i-1), A(i, i-1), A(i, i));
}
/* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 1 */
for (i = ed; i >= st+1 ; i--){
J1 = i+1;
J2 = min(ed,N);
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1; /* can be negative */
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaRight, len1, *V(i), *TAU(i), A(J1, i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) );
if(len2>0)CORE_dlarfx2(PlasmaRight, len2, *V(i), *TAU(i), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
}
}else{
/* ========================
* UPPER CASE
* ========================*/
for (i = ed; i >= st+1 ; i--){
/* generate Householder to annihilate a(i+k-1,i) within the band*/
*V(i) = *A((st-1), i);
*A((st-1), i) = zzero;
LAPACKE_dlarfg_work( 2, A(st-1, i-1), V(i), 1, TAU(i));
/* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
J1 = st;
J2 = i-2;
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1; /* can be negative */
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1, i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) );
if(len2>0)CORE_dlarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
CORE_dlarfx2c(PlasmaUpper, *V(i), *TAU(i), A(i-1, i-1), A(i-1, i), A(i,i));
}
/* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 1 */
for (i = ed; i >= st+1 ; i--){
J1 = i+1;
J2 = min(ed,N);
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1; /* can be negative */
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , (*V(i)), *TAU(i), A(i-1, J1 ), ELTLDD(vA, i-1), A(i, J1 ), ELTLDD(vA, i) );
if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , (*V(i)), *TAU(i), A(i-1, t2st), ELTLDD(vA, i-1), A(i, t2st), ELTLDD(vA, i) );
}
} /* end of else for the upper case */
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dhblrx ( int  uplo,
int  N,
PLASMA_desc A,
double *  V,
double *  TAU,
int  st,
int  ed,
int  eltsize 
)

Definition at line 76 of file core_dhblrx.c.

References A, CORE_dlarfx2(), CORE_dlarfx2c(), coreblas_error, ELTLDD, max, plasma_desc_t::mb, min, PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaRight, PlasmaUpper, TAU, and V.

{
int NB, J1, J2;
int len1, len2, t1ed, t2st;
int i;
/* Check input arguments */
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if (ed <= st) {
coreblas_error(6, "Illegal value of st and ed (internal)");
return -6;
}
/* Quick return */
if (N == 0)
NB = A->mb;
if( uplo == PlasmaLower ){
/* ========================
* LOWER CASE
* ========================*/
for (i = ed; i >= st+1 ; i--){
/* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
J1 = st;
J2 = i-2;
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1 ), ELTLDD(vA, i-1), A(i, J1 ), ELTLDD(vA, i) );
if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, i-1), A(i, t2st), ELTLDD(vA, i) );
CORE_dlarfx2c(PlasmaLower, *V(i), *TAU(i), A(i-1,i-1), A(i,i-1), A(i,i));
}
/* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 1 */
for (i = ed; i >= st+1 ; i--){
J1 = i+1;
J2 = min(ed,N);
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaRight, len1, *V(i), *TAU(i), A(J1, i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) );
if(len2>0)CORE_dlarfx2(PlasmaRight, len2, *V(i), *TAU(i), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
}
} else {
/* ========================
* UPPER CASE
* ========================*/
for (i = ed; i >= st+1 ; i--){
/* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
J1 = st;
J2 = i-2;
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1, i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) );
if(len2>0)CORE_dlarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
CORE_dlarfx2c(PlasmaUpper, *V(i), *TAU(i), A(i-1,i-1), A(i-1, i), A(i,i));
}
/* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 1 */
for (i = ed; i >= st+1 ; i--){
J1 = i+1;
J2 = min(ed,N);
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , (*V(i)), *TAU(i), A(i-1, J1 ), ELTLDD(vA, i-1), A(i, J1 ), ELTLDD(vA, i) );
if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , (*V(i)), *TAU(i), A(i-1, t2st), ELTLDD(vA, i-1), A(i, t2st), ELTLDD(vA, i) );
}
} /* end of else for the upper case */
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dhbrce ( int  uplo,
int  N,
PLASMA_desc A,
double *  V,
double *  TAU,
int  st,
int  ed,
int  eltsize 
)

Definition at line 76 of file core_dhbrce.c.

References A, CORE_dlarfx2(), coreblas_error, ELTLDD, max, plasma_desc_t::mb, min, PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaRight, TAU, and V.

{
int NB, J1, J2, J3, KDM2, len, pt;
int len1, len2, t1ed, t2st;
int i;
static double zzero = 0.0;
/* Check input arguments */
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if (ed <= st) {
coreblas_error(6, "Illegal value of st and ed (internal)");
return -6;
}
/* Quick return */
if (N == 0)
NB = A->mb;
KDM2 = A->mb-2;
if( uplo == PlasmaLower ) {
/* ========================
* LOWER CASE
* ========================*/
for (i = ed; i >= st+1 ; i--){
/* apply Householder from the right. and create newnnz outside the band if J3 < N */
J1 = ed+1;
J2 = min((i+1+KDM2), N);
J3 = min((J2+1), N);
len = J3-J1+1;
if(J3>J2)*A(J3,(i-1))=zzero;/* could be removed because A is supposed to be band.*/
t1ed = (J3/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1; /* can be negative*/
len2 = J3-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaRight, len1, *V(i), *TAU(i), A(J1, i-1), ELTLDD(vA, J1), A(J1 , i), ELTLDD(vA, J1) );
if(len2>0)CORE_dlarfx2(PlasmaRight, len2, *V(i), *TAU(i), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st));
/* if nonzero element a(j+kd,j-1) has been created outside the band (if index < N) then eliminate it.*/
len = J3-J2; // soit 1 soit 0
if(len>0){
/* generate Householder to annihilate a(j+kd,j-1) within the band */
*V(J3) = *A(J3,i-1);
*A(J3,i-1) = 0.0;
LAPACKE_dlarfg_work( 2, A(J2,i-1), V(J3), 1, TAU(J3));
}
}
/* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 2 */
for (i = ed; i >= st+1 ; i--){
/* find if there was a nnz created. if yes apply left else nothing to be done.*/
J2 = min((i+1+KDM2), N);
J3 = min((J2+1), N);
len = J3-J2;
if(len>0){
pt = J2;
J1 = i;
J2 = min(ed,N);
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1; /* can be negative*/
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(J3), (*TAU(J3)), A(pt, i ), ELTLDD(vA, pt), A((pt+1), i ), ELTLDD(vA, pt+1) );
if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(J3), (*TAU(J3)), A(pt, t2st), ELTLDD(vA, pt), A((pt+1), t2st), ELTLDD(vA, pt+1) );
}
}
} else {
/* ========================
* UPPER CASE
* ========================*/
for (i = ed; i >= st+1 ; i--){
/* apply Householder from the right. and create newnnz outside the band if J3 < N */
J1 = ed+1;
J2 = min((i+1+KDM2), N);
J3 = min((J2+1), N);
len = J3-J1+1;
if(J3>J2)*A((i-1), J3)=zzero;/* could be removed because A is supposed to be band.*/
t1ed = (J3/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1; /* can be negative*/
len2 = J3-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , (*V(i)), *TAU(i), A(i-1, J1 ), ELTLDD(vA, (i-1)), A(i, J1 ), ELTLDD(vA, i) );
if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , (*V(i)), *TAU(i), A(i-1, t2st), ELTLDD(vA, (i-1)), A(i, t2st), ELTLDD(vA, i) );
/* if nonzero element a(j+kd,j-1) has been created outside the band (if index < N) then eliminate it.*/
len = J3-J2; /* either 1 soit 0*/
if(len>0){
/* generate Householder to annihilate a(j+kd,j-1) within the band*/
*V(J3) = *A((i-1), J3);
*A((i-1), J3) = 0.0;
LAPACKE_dlarfg_work( 2, A((i-1), J2), V(J3), 1, TAU(J3));
}
}
/* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 2*/
for (i = ed; i >= st+1 ; i--){
/* find if there was a nnz created. if yes apply right else nothing to be done.*/
J2 = min((i+1+KDM2), N);
J3 = min((J2+1), N);
len = J3-J2;
if(len>0){
pt = J2;
J1 = i;
J2 = min(ed,N);
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1; /* can be negative*/
len2 = J2-t2st+1;
if(len1>0)CORE_dlarfx2(PlasmaRight, len1 , (*V(J3)), (*TAU(J3)), A(i , pt), ELTLDD(vA, i), A(i, pt+1), ELTLDD(vA, i) );
if(len2>0)CORE_dlarfx2(PlasmaRight, len2 , (*V(J3)), (*TAU(J3)), A(t2st, pt), ELTLDD(vA, t2st), A(t2st, pt+1), ELTLDD(vA, t2st) );
}
}
} /* end of else for the upper case */
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dlacpy ( PLASMA_enum  uplo,
int  M,
int  N,
double *  A,
int  LDA,
double *  B,
int  LDB 
)

Definition at line 29 of file core_dlacpy.c.

References lapack_const.

{
LAPACKE_dlacpy_work(
LAPACK_COL_MAJOR,
M, N, A, LDA, B, LDB);
}

Here is the caller graph for this function:

void CORE_dlacpy_quark ( Quark quark)

Definition at line 66 of file core_dlacpy.c.

References A, B, lapack_const, quark_unpack_args_7, and uplo.

{
int M;
int N;
double *A;
int LDA;
double *B;
int LDB;
quark_unpack_args_7(quark, uplo, M, N, A, LDA, B, LDB);
LAPACKE_dlacpy_work(
LAPACK_COL_MAJOR,
lapack_const(uplo),
M, N, A, LDA, B, LDB);
}

Here is the caller graph for this function:

void CORE_dlange ( int  norm,
int  M,
int  N,
double *  A,
int  LDA,
double *  work,
double *  normA 
)

Definition at line 29 of file core_dlange.c.

References lapack_const.

{
*normA = LAPACKE_dlange_work(
LAPACK_COL_MAJOR,
M, N, A, LDA, work);
}

Here is the caller graph for this function:

void CORE_dlange_f1_quark ( Quark quark)

Definition at line 114 of file core_dlange.c.

References A, lapack_const, norm, and quark_unpack_args_8.

{
double *normA;
int norm;
int M;
int N;
double *A;
int LDA;
double *work;
double *fake;
quark_unpack_args_8(quark, norm, M, N, A, LDA, work, normA, fake);
*normA = LAPACKE_dlange_work(
LAPACK_COL_MAJOR,
lapack_const(norm),
M, N, A, LDA, work);
}

Here is the caller graph for this function:

void CORE_dlange_quark ( Quark quark)

Definition at line 67 of file core_dlange.c.

References A, lapack_const, norm, and quark_unpack_args_7.

{
double *normA;
int norm;
int M;
int N;
double *A;
int LDA;
double *work;
quark_unpack_args_7(quark, norm, M, N, A, LDA, work, normA);
*normA = LAPACKE_dlange_work(
LAPACK_COL_MAJOR,
lapack_const(norm),
M, N, A, LDA, work);
}

Here is the caller graph for this function:

void CORE_dlansy ( int  norm,
int  uplo,
int  N,
double *  A,
int  LDA,
double *  work,
double *  normA 
)

Definition at line 29 of file core_dlansy.c.

References lapack_const.

{
*normA = LAPACKE_dlansy_work(
LAPACK_COL_MAJOR,
N, A, LDA, work);
}

Here is the caller graph for this function:

void CORE_dlansy_f1_quark ( Quark quark)

Definition at line 114 of file core_dlansy.c.

References A, lapack_const, norm, quark_unpack_args_8, and uplo.

{
double *normA;
int norm;
int uplo;
int N;
double *A;
int LDA;
double *work;
double *fake;
quark_unpack_args_8(quark, norm, uplo, N, A, LDA, work, normA, fake);
*normA = LAPACKE_dlansy_work(
LAPACK_COL_MAJOR,
lapack_const(norm),
lapack_const(uplo),
N, A, LDA, work);
}

Here is the caller graph for this function:

void CORE_dlansy_quark ( Quark quark)

Definition at line 67 of file core_dlansy.c.

References A, lapack_const, norm, quark_unpack_args_7, and uplo.

{
double *normA;
int norm;
int uplo;
int N;
double *A;
int LDA;
double *work;
quark_unpack_args_7(quark, normA, norm, uplo, N, A, LDA, work);
*normA = LAPACKE_dlansy_work(
LAPACK_COL_MAJOR,
lapack_const(norm), lapack_const(uplo),
N, A, LDA, work);
}

Here is the caller graph for this function:

int CORE_dlarfx2 ( PLASMA_enum  side,
int  N,
double  V,
double  TAU,
double *  C1,
int  LDC1,
double *  C2,
int  LDC2 
)

Declarations of serial kernels - alphabetical order

Purpose

CORE_dlarfx2 applies a complex elementary reflector H to a complex m by n matrix C, from either the left or the right. H is represented in the form

  H = I - tau * v * v'

where tau is a complex scalar and v is a complex vector.

If tau = 0, then H is taken to be the unit matrix

This version uses inline code if H has order < 11.

Arguments

Parameters:
[in]side
  • PlasmaLeft : form H * C
  • PlasmaRight: form C * H
[in]NThe number of columns of C1 and C2 if side = PlasmaLeft. The number of rows of C1 and C2 if side = PlasmaRight.
[in]VThe double complex V in the representation of H.
[in]TAUThe value tau in the representation of H.
[in,out]C1dimension (LDC1,N), if side = PlasmaLeft dimension (LDC1,1), if side = PlasmaRight On entry, the m by n matrix C1. On exit, C1 is overwritten by the matrix H * C1 if SIDE = PlasmaLeft, or C1 * H if SIDE = PlasmaRight.
[in]LDC1The leading dimension of the array C1. LDC1 >= max(1,N), if side == PlasmaRight. LDC1 >= 1, otherwise.
[in,out]C2dimension (LDC2,N), if side = PlasmaLeft dimension (LDC2,1), if side = PlasmaRight On entry, the m by n matrix C2. On exit, C2 is overwritten by the matrix H * C2 if SIDE = PlasmaLeft, or C2 * H if SIDE = PlasmaRight.
[in]LDC2The leading dimension of the array C2. LDC2 >= max(1,N), if side == PlasmaRight. LDC2 >= 1, otherwise.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 86 of file core_dlarfx_tbrd.c.

References PLASMA_SUCCESS, PlasmaLeft, T2, TAU, and V.

{
double V2, T2, SUM;
int j;
if (TAU == (double)0.0)
/*
* Special code for 2 x 2 Householder where V1 = I
*/
V2 = (V);
T2 = TAU*(V2);
for (j = 0; j < N ; j++, C1+=LDC1 ) {
SUM = *C1 + V2 * (*C2);
*C1 = *C1 - SUM*TAU;
*C2 = *C2 - SUM*T2;
C2 += LDC2;
}
}
else {
V2 = V;
T2 = TAU*(V2);
for (j = 0; j < N ; j++, C1++){
SUM = *C1 + V2 * (*C2);
*C1 = *C1 - SUM*TAU;
*C2 = *C2 - SUM*T2;
C2++;
}
}
}

Here is the caller graph for this function:

int CORE_dlarfx2c ( PLASMA_enum  uplo,
double  V,
double  TAU,
double *  C1,
double *  C2,
double *  C3 
)

Purpose

CORE_dlarfx2c applies a complex elementary reflector H to a diagonal corner C=[C1, C2, C3], from both the left and the right side. C = H * C * H. It is used in the case of Hermetian. If PlasmaLower, a left apply is followed by a right apply. If PlasmaUpper, a right apply is followed by a left apply. H is represented in the form

This routine is a special code for a corner C diagonal block C1 C2 C3

H = I - tau * v * v'

where tau is a complex scalar and v is a complex vector.

If tau = 0, then H is taken to be the unit matrix

This version uses inline code if H has order < 11.

Arguments

Parameters:
[in]uplo= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in]VThe double complex V in the representation of H.
[in]TAUThe value tau in the representation of H.
[in,out]C1On entry, the element C1. On exit, C1 is overwritten by the result H * C * H.
[in,out]C2On entry, the element C2. On exit, C2 is overwritten by the result H * C * H.
[in,out]C3On entry, the element C3. On exit, C3 is overwritten by the result H * C * H.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 185 of file core_dlarfx_tbrd.c.

References PLASMA_SUCCESS, PlasmaLower, T2, TAU, and V.

{
double T2, SUM, TEMP;
/* Quick return */
if (TAU == (double)0.0)
/*
* Special code for a diagonal block C1
* C2 C3
*/
if(uplo==PlasmaLower) {
/*
* Do the corner Left then Right (used for the lower case
* tridiag) L and R for the 2x2 corner
* C(N-1, N-1) C(N-1,N) C1 TEMP
* C(N , N-1) C(N ,N) C2 C3
* For Left : use (TAU) and V.
* For Right: nothing, keep TAU and V.
* Left 1 ==> C1
* C2
*/
TEMP = (*C2); /* copy C2 here before modifying it. */
T2 = (TAU) * V;
SUM = *C1 + (V) * (*C2);
*C1 = *C1 - SUM * (TAU);
*C2 = *C2 - SUM * T2;
/* Left 2 ==> TEMP */
/* C3 */
SUM = TEMP + (V) * (*C3);
TEMP = TEMP - SUM * (TAU);
*C3 = *C3 - SUM * T2;
/* Right 1 ==> C1 TEMP. NB: no need to compute corner (2,2)=TEMP */
T2 = TAU * (V);
SUM = *C1 + V*TEMP;
*C1 = *C1 - SUM*TAU;
/* Right 2 ==> C2 C3 */
SUM = *C2 + V*(*C3);
*C2 = *C2 - SUM*TAU;
*C3 = *C3 - SUM*T2;
}
else {
/*
* Do the corner Right then Left (used for the upper case tridiag)
* C(N-1, N-1) C(N-1,N) C1 C2
* C(N , N-1) C(N ,N) TEMP C3
* For Left : use TAU and (V).
* For Right: use (TAU) and (V).
* Right 1 ==> C1 C2
*/
V = (V);
TEMP = (*C2); /* copy C2 here before modifying it. */
T2 = (TAU) * (V);
SUM = *C1 + V * (*C2);
*C1 = *C1 - SUM * (TAU);
*C2 = *C2 - SUM * T2;
/* Right 2 ==> TEMP C3 */
SUM = TEMP + V * (*C3);
TEMP = TEMP - SUM * (TAU);
*C3 = *C3 - SUM * T2;
/* Left 1 ==> C1 */
/* TEMP. NB: no need to compute corner (2,1)=TEMP */
T2 = TAU * V;
SUM = *C1 + (V) * TEMP;
*C1 = *C1 - SUM * TAU;
/* Left 2 ==> C2 */
/* C3 */
SUM = *C2 + (V) * (*C3);
*C2 = *C2 - SUM * TAU;
*C3 = *C3 - SUM * T2;
}
}

Here is the caller graph for this function:

int CORE_dlarfx2ce ( PLASMA_enum  uplo,
double *  V,
double *  TAU,
double *  C1,
double *  C2,
double *  C3 
)

Purpose

CORE_dlarfx2c applies a complex elementary reflector H to a diagonal corner C=[C1, C2, C3], from both the left and the right side. C = H * C * H. It is used in the case of general matrices, where it create a nnz at the NEW_NNZ position, then it eliminate it and update the reflector V and TAU. If PlasmaLower, a left apply is followed by a right apply. If PlasmaUpper, a right apply is followed by a left apply. H is represented in the form

This routine is a special code for a corner C diagonal block C1 NEW_NNZ C2 C3

H = I - tau * v * v'

where tau is a complex scalar and v is a complex vector.

If tau = 0, then H is taken to be the unit matrix

This version uses inline code if H has order < 11.

Arguments

Parameters:
[in]uplo= PlasmaUpper: Upper triangle of A is stored; = PlasmaLower: Lower triangle of A is stored.
[in,out]VOn entry, the double complex V in the representation of H. On exit, the double complex V in the representation of H, updated by the elimination of the NEW_NNZ created by the left apply in case of PlasmaLower or the right apply in case of PlasmaUpper.
[in]TAUOn entry, the value tau in the representation of H. On exit, the value tau in the representation of H, updated by the elimination of the NEW_NNZ created by the left apply in case of PlasmaLower or the right apply in case of PlasmaUpper.
[in,out]C1On entry, the element C1. On exit, C1 is overwritten by the result H * C * H.
[in,out]C2On entry, the element C2. On exit, C2 is overwritten by the result H * C * H.
[in,out]C3On entry, the element C3. On exit, C3 is overwritten by the result H * C * H.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 335 of file core_dlarfx_tbrd.c.

References PLASMA_SUCCESS, PlasmaLower, PlasmaUpper, T2, and V.

{
double T2, SUM, TEMP, VIN, TAUIN;
/* Quick return */
if (*TAU == (double)0.0)
/*
* Special code for a diagonal block C1
* C2 C3
*/
/*
* Do the corner for the lower case BIDIAG ==> Left then will
* create a new nnz. eliminate it and modify V TAU and then
* Right L and R for the 2x2 corner
* C(N-1, N-1) C(N-1,N) C1 TEMP
* C(N , N-1) C(N ,N) C2 C3
*/
VIN = *V;
TAUIN = (*TAU);
/* Left 1 ==> C1 */
/* C2 */
VIN = (VIN);
T2 = TAUIN * (VIN);
SUM = *C1 + VIN*(*C2);
*C1 = *C1 - SUM*TAUIN;
*C2 = *C2 - SUM*T2;
/* new nnz at TEMP and update C3 */
SUM = VIN * (*C3);
TEMP = - SUM * TAUIN;
*C3 = *C3 - SUM * T2;
/* generate Householder to annihilate the nonzero created at TEMP */
*V = TEMP;
LAPACKE_dlarfg_work( 2, C1, V, 1, TAU);
VIN = (*V);
TAUIN = (*TAU);
/* Right 1 ==> C2 C3 */
/* VIN = VIN */
T2 = TAUIN * (VIN);
SUM = *C2 + VIN*(*C3);
*C2 = *C2 - SUM*TAUIN;
*C3 = *C3 - SUM*T2;
}else if(uplo==PlasmaUpper){
/*
* Do the corner for the upper case BIDIAG ==> Right then will
* create a new nnz. eliminate it and modify V TAU and then
* Left
* C(N-1, N-1) C(N-1,N) C1 C2
* C(N , N-1) C(N ,N) TEMP C3
* For Left : use (TAU) and V.
* For Right: use (TAU) and (V) as input.
*/
VIN = (*V);
TAUIN = (*TAU);
/* Right 1 ==> C1 C2 */
/* VIN = VIN */
T2 = TAUIN*(VIN);
SUM = *C1 + VIN*(*C2);
*C1 = *C1 - SUM*TAUIN;
*C2 = *C2 - SUM*T2;
/* new nnz at TEMP and update C3 */
SUM = VIN * (*C3);
TEMP = - SUM * TAUIN;
*C3 = *C3 - SUM * T2;
/* generate Householder to annihilate the nonzero created at TEMP */
*V = TEMP;
LAPACKE_dlarfg_work( 2, C1, V, 1, TAU);
VIN = *V;
TAUIN = (*TAU);
/* apply from the Left using the NEW V TAU to the remaining 2 elements [C2 C3] */
/* Left 2 ==> C2 */
/* C3 */
VIN = (VIN);
T2 = TAUIN*(VIN);
SUM = *C2 + VIN*(*C3);
*C2 = *C2 - SUM*TAUIN;
*C3 = *C3 - SUM*T2;
}
}

Here is the caller graph for this function:

void CORE_dlaset ( PLASMA_enum  uplo,
int  M,
int  N,
double  alpha,
double  beta,
double *  A,
int  LDA 
)

CORE_dlaset - Sets the elements of the matrix A on the diagonal to beta and on the off-diagonals to alpha

Parameters:
[in]uploSpecifies which elements of the matrix are to be set = PlasmaUpper: Upper part of A is set; = PlasmaLower: Lower part of A is set; = PlasmaUpperLower: ALL elements of A are set.
[in]MThe number of rows of the matrix A. M >= 0.
[in]NThe number of columns of the matrix A. N >= 0.
[in]alphaThe constant to which the off-diagonal elements are to be set.
[in]betaThe constant to which the diagonal elements are to be set.
[in,out]AOn entry, the M-by-N tile A. On exit, A has been set accordingly.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).

Definition at line 58 of file core_dlaset.c.

References lapack_const.

{
LAPACKE_dlaset_work(
LAPACK_COL_MAJOR,
M, N, alpha, beta, A, LDA);
}

Here is the caller graph for this function:

void CORE_dlaset2 ( PLASMA_enum  uplo,
int  M,
int  N,
double  alpha,
double *  A,
int  LDA 
)

CORE_dlaset2 - Sets the elements of the matrix A to alpha. Not LAPACK compliant! Read below.

Parameters:
[in]uploSpecifies which elements of the matrix are to be set = PlasmaUpper: STRICT Upper part of A is set to alpha; = PlasmaLower: STRICT Lower part of A is set to alpha; = PlasmaUpperLower: ALL elements of A are set to alpha. Not LAPACK Compliant.
[in]MThe number of rows of the matrix A. M >= 0.
[in]NThe number of columns of the matrix A. N >= 0.
[in]alphaThe constant to which the elements are to be set.
[in,out]AOn entry, the M-by-N tile A. On exit, A has been set to alpha accordingly.
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).

Definition at line 56 of file core_dlaset2.c.

References lapack_const, PlasmaLower, and PlasmaUpper.

{
if (uplo == PlasmaUpper) {
LAPACKE_dlaset_work(
LAPACK_COL_MAJOR,
M, N-1, alpha, alpha, A+LDA, LDA);
}
else if (uplo == PlasmaLower) {
LAPACKE_dlaset_work(
LAPACK_COL_MAJOR,
M-1, N, alpha, alpha, A+1, LDA);
}
else {
LAPACKE_dlaset_work(
LAPACK_COL_MAJOR,
M, N, alpha, alpha, A, LDA);
}
}

Here is the caller graph for this function:

void CORE_dlaset2_quark ( Quark quark)

Definition at line 103 of file core_dlaset2.c.

References A, CORE_dlaset2(), quark_unpack_args_6, and uplo.

{
int uplo;
int M;
int N;
double alpha;
double *A;
int LDA;
quark_unpack_args_6(quark, uplo, M, N, alpha, A, LDA);
CORE_dlaset2(uplo, M, N, alpha, A, LDA);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dlaset_quark ( Quark quark)

Definition at line 95 of file core_dlaset.c.

References A, lapack_const, quark_unpack_args_7, and uplo.

{
int uplo;
int M;
int N;
double alpha;
double beta;
double *A;
int LDA;
quark_unpack_args_7(quark, uplo, M, N, alpha, beta, A, LDA);
LAPACKE_dlaset_work(
LAPACK_COL_MAJOR,
lapack_const(uplo),
M, N, alpha, beta, A, LDA);
}

Here is the caller graph for this function:

void CORE_dlaswp ( int  N,
double *  A,
int  LDA,
int  I1,
int  I2,
int *  IPIV,
int  INC 
)

Definition at line 29 of file core_dlaswp.c.

{
LAPACKE_dlaswp_work( LAPACK_COL_MAJOR, N, A, LDA, I1, I2, IPIV, INC );
}
void CORE_dlaswp_f2_quark ( Quark quark)

Definition at line 102 of file core_dlaswp.c.

References A, and quark_unpack_args_9.

{
int n, lda, i1, i2, inc;
int *ipiv;
double *A;
void *fake1, *fake2;
quark_unpack_args_9(quark, n, A, lda, i1, i2, ipiv, inc, fake1, fake2);
LAPACKE_dlaswp_work(LAPACK_COL_MAJOR, n, A, lda, i1, i2, ipiv, inc );
}

Here is the caller graph for this function:

int CORE_dlaswp_ontile ( PLASMA_desc  descA,
int  i1,
int  i2,
int *  ipiv,
int  inc 
)

CORE_dlaswp_ontile apply the dlaswp function on a matrix stored in tile layout

Parameters:
[in,out]AThe descriptor of the matrix A to permute.
[in]i1The first element of IPIV for which a row interchange will be done.
[in]i2The last element of IPIV for which a row interchange will be done.
[in]ipivThe pivot indices; Only the element in position i1 to i2 are accessed. The pivot are offset by A.i.
[in]incThe increment between successive values of IPIV. If IPIV is negative, the pivots are applied in reverse order.

Definition at line 147 of file core_dlaswp.c.

References A, BLKLDD, cblas_dswap(), coreblas_error, plasma_desc_t::i, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nt, and PLASMA_SUCCESS.

{
int i, j, ip, it;
double *A1;
int lda1, lda2;
/* Change i1 to C notation */
i1--;
if ( descA.nt > 1 ) {
coreblas_error(1, "Illegal value of descA.nt");
return -1;
}
if ( i1 < 0 ) {
coreblas_error(2, "Illegal value of i1");
return -2;
}
if ( (i2 < i1) || (i2 > descA.m) ) {
coreblas_error(3, "Illegal value of i2");
return -3;
}
if ( ! ( (i2 - i1 - i1%descA.mb -1) < descA.mb ) ) {
coreblas_error(2, "Illegal value of i1,i2. They have to be part of the same block.");
return -3;
}
it = i1 / descA.mb;
if (inc > 0) {
A1 = A(it, 0);
lda1 = BLKLDD(descA, 0);
for (j = i1; j < i2; ++j, ipiv+=inc) {
ip = (*ipiv) - descA.i - 1;
if ( ip != j )
{
it = ip / descA.mb;
i = ip % descA.mb;
lda2 = BLKLDD(descA, it);
cblas_dswap(descA.n, A1 + j, lda1,
A(it, 0) + i, lda2 );
}
}
}
else
{
A1 = A(it, 0);
lda1 = BLKLDD(descA, descA.mt-1);
i1--;
ipiv = &ipiv[(1-i2)*inc];
for (j = i2-1; j > i1; --j, ipiv+=inc) {
ip = (*ipiv) - descA.i - 1;
if ( ip != j )
{
it = ip / descA.mb;
i = ip % descA.mb;
lda2 = BLKLDD(descA, it);
cblas_dswap(descA.n, A1 + j, lda1,
A(it, 0) + i, lda2 );
}
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dlaswp_ontile_f2_quark ( Quark quark)

Definition at line 279 of file core_dlaswp.c.

References A, CORE_dlaswp_ontile(), and quark_unpack_args_8.

{
int i1, i2, inc;
int *ipiv;
double *A;
PLASMA_desc descA;
void *fake1, *fake2;
quark_unpack_args_8(quark, descA, A, i1, i2, ipiv, inc, fake1, fake2);
CORE_dlaswp_ontile(descA, i1, i2, ipiv, inc);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dlaswp_ontile_quark ( Quark quark)

Definition at line 238 of file core_dlaswp.c.

References A, CORE_dlaswp_ontile(), and quark_unpack_args_7.

{
int i1, i2, inc;
int *ipiv;
double *A, *fake;
PLASMA_desc descA;
quark_unpack_args_7(quark, descA, A, i1, i2, ipiv, inc, fake);
CORE_dlaswp_ontile(descA, i1, i2, ipiv, inc);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dlaswp_quark ( Quark quark)

Definition at line 61 of file core_dlaswp.c.

References A, and quark_unpack_args_7.

{
int n, lda, i1, i2, inc;
int *ipiv;
double *A;
quark_unpack_args_7(quark, n, A, lda, i1, i2, ipiv, inc);
LAPACKE_dlaswp_work(LAPACK_COL_MAJOR, n, A, lda, i1, i2, ipiv, inc );
}

Here is the caller graph for this function:

int CORE_dlaswpc_ontile ( PLASMA_desc  descA,
int  i1,
int  i2,
int *  ipiv,
int  inc 
)

CORE_dlaswpc_ontile apply the dlaswp function on a matrix stored in tile layout

Parameters:
[in,out]AThe descriptor of the matrix A to permute.
[in]i1The first element of IPIV for which a column interchange will be done.
[in]i2The last element of IPIV for which a column interchange will be done.
[in]ipivThe pivot indices; Only the element in position i1 to i2 are accessed. The pivot are offset by A.i.
[in]incThe increment between successive values of IPIV. If IPIV is negative, the pivots are applied in reverse order.

Definition at line 430 of file core_dlaswp.c.

References A, BLKLDD, cblas_dswap(), coreblas_error, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, and PLASMA_SUCCESS.

{
int i, j, ip, it;
double *A1;
int lda;
if ( descA.mt > 1 ) {
coreblas_error(1, "Illegal value of descA.mt");
return -1;
}
if ( i1 < 1 ) {
coreblas_error(2, "Illegal value of i1");
return -2;
}
if ( (i2 < i1) || (i2 > descA.n) ) {
coreblas_error(3, "Illegal value of i2");
return -3;
}
if ( ! ( (i2 - i1 - i1%descA.nb -1) < descA.nb ) ) {
coreblas_error(2, "Illegal value of i1,i2. They have to be part of the same block.");
return -3;
}
lda = BLKLDD(descA, 0);
it = i1 / descA.nb;
if (inc > 0) {
A1 = A(0, it);
for (j = i1-1; j < i2; ++j, ipiv+=inc) {
ip = (*ipiv) - descA.j - 1;
if ( ip != j )
{
it = ip / descA.nb;
i = ip % descA.nb;
cblas_dswap(descA.m, A1 + j*lda, 1,
A(0, it) + i*lda, 1 );
}
}
}
else
{
A1 = A(0, it);
i1 -= 2;
ipiv = &ipiv[(1-i2)*inc];
for (j = i2-1; j > i1; --j, ipiv+=inc) {
ip = (*ipiv) - descA.j - 1;
if ( ip != j )
{
it = ip / descA.nb;
i = ip % descA.nb;
cblas_dswap(descA.m, A1 + j*lda, 1,
A(0, it) + i*lda, 1 );
}
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dlaswpc_ontile_quark ( Quark quark)

Definition at line 516 of file core_dlaswp.c.

References A, CORE_dlaswpc_ontile(), and quark_unpack_args_7.

{
int i1, i2, inc;
int *ipiv;
double *A, *fake;
PLASMA_desc descA;
quark_unpack_args_7(quark, descA, A, i1, i2, ipiv, inc, fake);
CORE_dlaswpc_ontile(descA, i1, i2, ipiv, inc);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dlauum ( int  uplo,
int  N,
double *  A,
int  LDA 
)

Definition at line 29 of file core_dlauum.c.

References lapack_const.

{
LAPACKE_dlauum_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, A, LDA );
}
void CORE_dlauum_quark ( Quark quark)

Definition at line 57 of file core_dlauum.c.

References A, lapack_const, quark_unpack_args_4, and uplo.

{
int uplo;
int N;
double *A;
int LDA;
quark_unpack_args_4(quark, uplo, N, A, LDA);
LAPACKE_dlauum_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, A, LDA);
}

Here is the caller graph for this function:

int CORE_dormlq ( int  side,
int  trans,
int  M,
int  N,
int  K,
int  IB,
double *  A,
int  LDA,
double *  T,
int  LDT,
double *  C,
int  LDC,
double *  WORK,
int  LDWORK 
)

CORE_dormlq overwrites the general complex M-by-N tile C with

              SIDE = 'L'     SIDE = 'R'

TRANS = 'N': Q * C C * Q TRANS = 'C': Q**T * C C * Q**T

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(k) . . . H(2) H(1)

as returned by CORE_dgelqt. Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'.

Parameters:
[in]side
  • PlasmaLeft : apply Q or Q**T from the Left;
  • PlasmaRight : apply Q or Q**T from the Right.
[in]trans
  • PlasmaNoTrans : No transpose, apply Q;
  • PlasmaTrans : Transpose, apply Q**T.
[in]MThe number of rows of the tile C. M >= 0.
[in]NThe number of columns of the tile C. N >= 0.
[in]KThe number of elementary reflectors whose product defines the matrix Q. If SIDE = PlasmaLeft, M >= K >= 0; if SIDE = PlasmaRight, N >= K >= 0.
[in]IBThe inner-blocking size. IB >= 0.
[in]ADimension: (LDA,M) if SIDE = PlasmaLeft, (LDA,N) if SIDE = PlasmaRight, The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_dgelqt in the first k rows of its array argument A.
[in]LDAThe leading dimension of the array A. LDA >= max(1,K).
[out]TThe IB-by-K triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]LDTThe leading dimension of the array T. LDT >= IB.
[in,out]COn entry, the M-by-N tile C. On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
[in]LDCThe leading dimension of the array C. LDC >= max(1,M).
[in,out]WORKOn exit, if INFO = 0, WORK(1) returns the optimal LDWORK.
[in]LDWORKThe dimension of the array WORK. If SIDE = PlasmaLeft, LDWORK >= max(1,N); if SIDE = PlasmaRight, LDWORK >= max(1,M).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 108 of file core_dormlq.c.

References coreblas_error, lapack_const, max, min, PLASMA_SUCCESS, PlasmaForward, PlasmaLeft, PlasmaNoTrans, PlasmaRight, PlasmaRowwise, and PlasmaTrans.

{
int i, kb;
int i1, i3;
int nq, nw;
int ic = 0;
int jc = 0;
int ni = N;
int mi = M;
/* Check input arguments */
if ((side != PlasmaLeft) && (side != PlasmaRight)) {
coreblas_error(1, "Illegal value of side");
return -1;
}
/*
* NQ is the order of Q and NW is the minimum dimension of WORK
*/
if (side == PlasmaLeft) {
nq = M;
nw = N;
}
else {
nq = N;
nw = M;
}
if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
coreblas_error(2, "Illegal value of trans");
return -2;
}
if (M < 0) {
coreblas_error(3, "Illegal value of M");
return -3;
}
if (N < 0) {
coreblas_error(4, "Illegal value of N");
return -4;
}
if ((K < 0) || (K > nq)) {
coreblas_error(5, "Illegal value of K");
return -5;
}
if ((IB < 0) || ( (IB == 0) && ((M > 0) && (N > 0)) )) {
coreblas_error(6, "Illegal value of IB");
return -6;
}
if ((LDA < max(1,K)) && (K > 0)) {
coreblas_error(8, "Illegal value of LDA");
return -8;
}
if ((LDC < max(1,M)) && (M > 0)) {
coreblas_error(12, "Illegal value of LDC");
return -12;
}
if ((LDWORK < max(1,nw)) && (nw > 0)) {
coreblas_error(14, "Illegal value of LDWORK");
return -14;
}
/* Quick return */
if ((M == 0) || (N == 0) || (K == 0))
if (((side == PlasmaLeft) && (trans == PlasmaNoTrans))
|| ((side == PlasmaRight) && (trans != PlasmaNoTrans))) {
i1 = 0;
i3 = IB;
}
else {
i1 = ( ( K-1 ) / IB )*IB;
i3 = -IB;
}
if( trans == PlasmaNoTrans) {
}
else {
}
for(i = i1; (i >- 1) && (i < K); i+=i3 ) {
kb = min(IB, K-i);
if (side == PlasmaLeft) {
/*
* H or H' is applied to C(i:m,1:n)
*/
mi = M - i;
ic = i;
}
else {
/*
* H or H' is applied to C(1:m,i:n)
*/
ni = N - i;
jc = i;
}
/*
* Apply H or H'
*/
LAPACKE_dlarfb_work(LAPACK_COL_MAJOR,
mi, ni, kb,
&A[LDA*i+i], LDA,
&T[LDT*i], LDT,
&C[LDC*jc+ic], LDC,
WORK, LDWORK);
}
}

Here is the caller graph for this function:

void CORE_dormlq_quark ( Quark quark)

Definition at line 264 of file core_dormlq.c.

References A, C, CORE_dormlq(), quark_unpack_args_14, side, T, and trans.

{
int side;
int trans;
int m;
int n;
int k;
int ib;
double *A;
int lda;
double *T;
int ldt;
double *C;
int ldc;
double *WORK;
int ldwork;
quark_unpack_args_14(quark, side, trans, m, n, k, ib,
A, lda, T, ldt, C, ldc, WORK, ldwork);
CORE_dormlq(side, trans, m, n, k, ib,
A, lda, T, ldt, C, ldc, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dormqr ( int  side,
int  trans,
int  M,
int  N,
int  K,
int  IB,
double *  A,
int  LDA,
double *  T,
int  LDT,
double *  C,
int  LDC,
double *  WORK,
int  LDWORK 
)

CORE_dormqr overwrites the general complex M-by-N tile C with

              SIDE = 'L'     SIDE = 'R'

TRANS = 'N': Q * C C * Q TRANS = 'C': Q**T * C C * Q**T

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by CORE_dgeqrt. Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'.

Parameters:
[in]side
  • PlasmaLeft : apply Q or Q**T from the Left;
  • PlasmaRight : apply Q or Q**T from the Right.
[in]trans
  • PlasmaNoTrans : No transpose, apply Q;
  • PlasmaTrans : Transpose, apply Q**T.
[in]MThe number of rows of the tile C. M >= 0.
[in]NThe number of columns of the tile C. N >= 0.
[in]KThe number of elementary reflectors whose product defines the matrix Q. If SIDE = PlasmaLeft, M >= K >= 0; if SIDE = PlasmaRight, N >= K >= 0.
[in]IBThe inner-blocking size. IB >= 0.
[in]ADimension: (LDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_dgeqrt in the first k columns of its array argument A.
[in]LDAThe leading dimension of the array A. If SIDE = PlasmaLeft, LDA >= max(1,M); if SIDE = PlasmaRight, LDA >= max(1,N).
[out]TThe IB-by-K triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]LDTThe leading dimension of the array T. LDT >= IB.
[in,out]COn entry, the M-by-N tile C. On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
[in]LDCThe leading dimension of the array C. LDC >= max(1,M).
[in,out]WORKOn exit, if INFO = 0, WORK(1) returns the optimal LDWORK.
[in]LDWORKThe dimension of the array WORK. If SIDE = PlasmaLeft, LDWORK >= max(1,N); if SIDE = PlasmaRight, LDWORK >= max(1,M).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 108 of file core_dormqr.c.

References coreblas_error, lapack_const, max, min, PLASMA_SUCCESS, PlasmaColumnwise, PlasmaForward, PlasmaLeft, PlasmaNoTrans, PlasmaRight, and PlasmaTrans.

{
int i, kb;
int i1, i3;
int nq, nw;
int ic = 0;
int jc = 0;
int ni = N;
int mi = M;
/* Check input arguments */
if ((side != PlasmaLeft) && (side != PlasmaRight)) {
coreblas_error(1, "Illegal value of side");
return -1;
}
/*
* NQ is the order of Q and NW is the minimum dimension of WORK
*/
if (side == PlasmaLeft) {
nq = M;
nw = N;
}
else {
nq = N;
nw = M;
}
if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
coreblas_error(2, "Illegal value of trans");
return -2;
}
if (M < 0) {
coreblas_error(3, "Illegal value of M");
return -3;
}
if (N < 0) {
coreblas_error(4, "Illegal value of N");
return -4;
}
if ((K < 0) || (K > nq)) {
coreblas_error(5, "Illegal value of K");
return -5;
}
if ((IB < 0) || ( (IB == 0) && ((M > 0) && (N > 0)) )) {
coreblas_error(6, "Illegal value of IB");
return -6;
}
if ((LDA < max(1,nq)) && (nq > 0)) {
coreblas_error(8, "Illegal value of LDA");
return -8;
}
if ((LDC < max(1,M)) && (M > 0)) {
coreblas_error(12, "Illegal value of LDC");
return -12;
}
if ((LDWORK < max(1,nw)) && (nw > 0)) {
coreblas_error(14, "Illegal value of LDWORK");
return -14;
}
/* Quick return */
if ((M == 0) || (N == 0) || (K == 0))
if (((side == PlasmaLeft) && (trans != PlasmaNoTrans))
|| ((side == PlasmaRight) && (trans == PlasmaNoTrans))) {
i1 = 0;
i3 = IB;
}
else {
i1 = ( ( K-1 ) / IB )*IB;
i3 = -IB;
}
for(i = i1; (i >- 1) && (i < K); i+=i3 ) {
kb = min(IB, K-i);
if (side == PlasmaLeft) {
/*
* H or H' is applied to C(i:m,1:n)
*/
mi = M - i;
ic = i;
}
else {
/*
* H or H' is applied to C(1:m,i:n)
*/
ni = N - i;
jc = i;
}
/*
* Apply H or H'
*/
LAPACKE_dlarfb_work(LAPACK_COL_MAJOR,
mi, ni, kb,
&A[LDA*i+i], LDA,
&T[LDT*i], LDT,
&C[LDC*jc+ic], LDC,
WORK, LDWORK);
}
}

Here is the caller graph for this function:

void CORE_dormqr_quark ( Quark quark)

Definition at line 257 of file core_dormqr.c.

References A, C, CORE_dormqr(), quark_unpack_args_14, side, T, and trans.

{
int side;
int trans;
int m;
int n;
int k;
int ib;
double *A;
int lda;
double *T;
int ldt;
double *C;
int ldc;
double *WORK;
int ldwork;
quark_unpack_args_14(quark, side, trans, m, n, k, ib,
A, lda, T, ldt, C, ldc, WORK, ldwork);
CORE_dormqr(side, trans, m, n, k, ib,
A, lda, T, ldt, C, ldc, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dpamm ( int  op,
int  side,
int  storev,
int  M,
int  N,
int  K,
int  L,
double *  A1,
int  LDA1,
double *  A2,
int  LDA2,
double *  V,
int  LDV,
double *  W,
int  LDW 
)

ZPAMM performs one of the matrix-matrix operations

              LEFT                      RIGHT

OP PlasmaW : W = A1 + op(V) * A2 or W = A1 + A2 * op(V) OP PlasmaA2 : A2 = A2 - op(V) * W or A2 = A2 - W * op(V)

where op( V ) is one of

op( V ) = V or op( V ) = V**T or op( V ) = V**T,

A1, A2 and W are general matrices, and V is:

  l = k: rectangle + triangle
  l < k: rectangle + trapezoid
  l = 0: rectangle

Size of V, both rowwise and columnwise, is:


side trans size

left N M x K T K x M right N K x N

T N x K

LEFT (columnwise and rowwise):

        |    K    |                 |         M         |
     _  __________   _              _______________        _ 
        |    |    |                 |             | \

V: | | | V': |_____________|___\ K | | | M-L | | M | | | |__________________| _ |____| | _ \ | | | M - L | L | \ | | L _ \|____| _

RIGHT (columnwise and rowwise):

    |         K         |                   |    N    |   
    _______________        _             _  __________   _
    |             | \                       |    |    |   

V': |_____________|___\ N V: | | | | | | | | K-L |__________________| _ K | | | |____| | _ | K - L | L | \ | | \ | | L _ \|____| _

Arguments

Parameters:
[in]OP
    OP specifies which operation to perform:

    @arg PlasmaW  : W  = A1 + op(V) * A2  or  W  = A1 + A2 * op(V)
    @arg PlasmaA2 : A2 = A2 - op(V) * W   or  A2 = A2 - W * op(V)
[in]SIDE
    SIDE specifies whether  op( V ) multiplies A2
    or W from the left or right as follows:

    @arg PlasmaLeft  : multiply op( V ) from the left
                       OP PlasmaW  :  W  = A1 + op(V) * A2
                       OP PlasmaA2 :  A2 = A2 - op(V) * W

    @arg PlasmaRight : multiply op( V ) from the right
                       OP PlasmaW  :  W  = A1 + A2 * op(V)
                       OP PlasmaA2 :  A2 = A2 - W * op(V)
[in]STOREV
    Indicates how the vectors which define the elementary
    reflectors are stored in V:

    @arg PlasmaColumnwise
    @arg PlasmaRowwise
[in]MThe number of rows of the A1, A2 and W If SIDE is PlasmaLeft, the number of rows of op( V )
[in]NThe number of columns of the A1, A2 and W If SIDE is PlasmaRight, the number of columns of op( V )
[in]KIf SIDE is PlasmaLeft, the number of columns of op( V ) If SIDE is PlasmaRight, the number of rows of op( V )
[in]LThe size of the triangular part of V
[in]A1On entry, the M-by-N tile A1.
[in]LDA1The leading dimension of the array A1. LDA1 >= max(1,M).
[in,out]A2On entry, the M-by-N tile A2. On exit, if OP is PlasmaA2 A2 is overwritten
[in]LDA2The leading dimension of the tile A2. LDA2 >= max(1,M).
[in]VThe matrix V as described above. If SIDE is PlasmaLeft : op( V ) is M-by-K If SIDE is PlasmaRight: op( V ) is K-by-N
[in]LDVThe leading dimension of the array V.
[in,out]WOn entry, the M-by-N matrix W. On exit, W is overwritten either if OP is PlasmaA2 or PlasmaW. If OP is PlasmaA2, W is an input and is used as a workspace.
[in]LDWThe leading dimension of array WORK.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 174 of file core_dpamm.c.

References CblasLower, CblasUpper, coreblas_error, L, PLASMA_SUCCESS, PlasmaA2, PlasmaColumnwise, PlasmaLeft, PlasmaNoTrans, PlasmaRight, PlasmaRowwise, PlasmaTrans, PlasmaW, trans, and uplo.

{
int vi2, vi3, uplo, trans, info;
/* Check input arguments */
if ((op != PlasmaW) && (op != PlasmaA2)) {
coreblas_error(1, "Illegal value of op");
return -1;
}
if ((side != PlasmaLeft) && (side != PlasmaRight)) {
coreblas_error(2, "Illegal value of side");
return -2;
}
coreblas_error(3, "Illegal value of storev");
return -3;
}
if (M < 0) {
coreblas_error(4, "Illegal value of M");
return -4;
}
if (N < 0) {
coreblas_error(5, "Illegal value of N");
return -5;
}
if (K < 0) {
coreblas_error(6, "Illegal value of K");
return -6;
}
if (L < 0) {
coreblas_error(7, "Illegal value of L");
return -7;
}
if (LDA1 < 0) {
coreblas_error(9, "Illegal value of LDA1");
return -9;
}
if (LDA2 < 0) {
coreblas_error(11, "Illegal value of LDA2");
return -11;
}
if (LDV < 0) {
coreblas_error(13, "Illegal value of LDV");
return -13;
}
if (LDW < 0) {
coreblas_error(15, "Illegal value of LDW");
return -15;
}
/* Quick return */
if ((M == 0) || (N == 0) || (K == 0))
/*
* TRANS is set as:
*
* -------------------------------------
* side direct PlasmaW PlasmaA2
* -------------------------------------
* left colwise T N
* rowwise N T
* right colwise N T
* rowwise T N
* -------------------------------------
*/
/* Columnwise*/
uplo = CblasUpper;
if (side == PlasmaLeft) {
trans = op == PlasmaA2 ? PlasmaNoTrans : PlasmaTrans;
vi2 = trans == PlasmaNoTrans ? M - L : K - L;
}
else {
trans = op == PlasmaW ? PlasmaNoTrans : PlasmaTrans;
vi2 = trans == PlasmaNoTrans ? K - L : N - L;
}
vi3 = LDV * L;
}
/* Rowwise */
else {
uplo = CblasLower;
if (side == PlasmaLeft) {
trans = op == PlasmaW ? PlasmaNoTrans : PlasmaTrans;
vi2 = trans == PlasmaNoTrans ? K - L : M - L;
}
else {
trans = op == PlasmaA2 ? PlasmaNoTrans : PlasmaTrans;
vi2 = trans == PlasmaNoTrans ? N - L : K - L;
}
vi2 *= LDV;
vi3 = L;
}
if (op==PlasmaW) {
info = CORE_dpamm_w(
side, trans, uplo, M, N, K, L, vi2, vi3,
A1, LDA1, A2, LDA2, V, LDV, W, LDW);
if (info != 0)
return info;
} else if (op==PlasmaA2) {
info = CORE_dpamm_a2(
side, trans, uplo, M, N, K, L, vi2, vi3,
A2, LDA2, V, LDV, W, LDW);
if (info != 0)
return info;
}
}

Here is the caller graph for this function:

void CORE_dpamm_quark ( Quark quark)

Definition at line 600 of file core_dpamm.c.

References CORE_dpamm(), L, quark_unpack_args_15, side, storev, V, and W.

{
int op;
int side;
int storev;
int M;
int N;
int K;
int L;
double *A1;
int LDA1;
double *A2;
int LDA2;
double *V;
int LDV;
double *W;
int LDW;
quark_unpack_args_15(quark, op, side, storev, M, N, K, L,
A1, LDA1, A2, LDA2, V, LDV, W, LDW);
CORE_dpamm( op, side, storev, M, N, K, L, A1, LDA1, A2, LDA2, V, LDV, W, LDW);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dparfb ( int  side,
int  trans,
int  direct,
int  storev,
int  M1,
int  N1,
int  M2,
int  N2,
int  K,
int  L,
double *  A1,
int  LDA1,
double *  A2,
int  LDA2,
double *  V,
int  LDV,
double *  T,
int  LDT,
double *  WORK,
int  LDWORK 
)

CORE_dparfb applies a complex upper triangular block reflector H or its transpose H' to a complex rectangular matrix formed by coupling two tiles A1 and A2. Matrix V is:

    COLUMNWISE                    ROWWISE

   |     K     |                 |      N2-L     |   L  |
__ _____________ __           __ _________________        __
   |    |      |                 |               | \
   |    |      |                 |               |   \    L

M2-L | | | K |_______________|_____\ __ | | | M2 | | __ |____| | | | K-L \ | | __ |______________________| __ L \ | | __ \|______| __ | N2 |

| L | K-L |

Parameters:
[in]side
  • PlasmaLeft : apply Q or Q**T from the Left;
  • PlasmaRight : apply Q or Q**T from the Right.
[in]trans
  • PlasmaNoTrans : No transpose, apply Q;
  • PlasmaTrans : ConjTranspose, apply Q**T.
[in]directIndicates how H is formed from a product of elementary reflectors
  • PlasmaForward : H = H(1) H(2) . . . H(k) (Forward)
  • PlasmaBackward : H = H(k) . . . H(2) H(1) (Backward)
[in]storevIndicates how the vectors which define the elementary reflectors are stored:
  • PlasmaColumnwise
  • PlasmaRowwise
[in]M1The number of columns of the tile A1. M1 >= 0.
[in]N1The number of rows of the tile A1. N1 >= 0.
[in]M2The number of columns of the tile A2. M2 >= 0.
[in]N2The number of rows of the tile A2. N2 >= 0.
[in]KThe order of the matrix T (= the number of elementary reflectors whose product defines the block reflector).
[in]LThe size of the triangular part of V
[in,out]A1On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]LDA1The leading dimension of the array A1. LDA1 >= max(1,N1).
[in,out]A2On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]LDA2The leading dimension of the tile A2. LDA2 >= max(1,N2).
[in]V(LDV,K) if STOREV = 'C' (LDV,M2) if STOREV = 'R' and SIDE = 'L' (LDV,N2) if STOREV = 'R' and SIDE = 'R' Matrix V.
[in]LDVThe leading dimension of the array V. If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M2); if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N2); if STOREV = 'R', LDV >= K.
[out]TThe triangular K-by-K matrix T in the representation of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]LDTThe leading dimension of the array T. LDT >= K.
[in,out]WORK
[in]LDWORKThe dimension of the array WORK.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 131 of file core_dparfb.c.

References cblas_daxpy(), cblas_dtrmm(), CblasColMajor, CblasLeft, CblasNonUnit, CblasRight, CblasUpper, CORE_dpamm(), coreblas_error, PLASMA_ERR_NOT_SUPPORTED, PLASMA_SUCCESS, PlasmaA2, PlasmaBackward, PlasmaColumnwise, PlasmaForward, PlasmaLeft, PlasmaNoTrans, PlasmaRight, PlasmaRowwise, PlasmaTrans, and PlasmaW.

{
static double zone = 1.0;
static double mzone = -1.0;
int j;
/* Check input arguments */
if ((side != PlasmaLeft) && (side != PlasmaRight)) {
coreblas_error(1, "Illegal value of side");
return -1;
}
if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
coreblas_error(2, "Illegal value of trans");
return -2;
}
if ((direct != PlasmaForward) && (direct != PlasmaBackward)) {
coreblas_error(3, "Illegal value of direct");
return -3;
}
coreblas_error(4, "Illegal value of storev");
return -4;
}
if (M1 < 0) {
coreblas_error(5, "Illegal value of M1");
return -5;
}
if (N1 < 0) {
coreblas_error(6, "Illegal value of N1");
return -6;
}
if ((M2 < 0) ||
( (side == PlasmaRight) && (M1 != M2) ) ) {
coreblas_error(7, "Illegal value of M2");
return -7;
}
if ((N2 < 0) ||
( (side == PlasmaLeft) && (N1 != N2) ) ) {
coreblas_error(8, "Illegal value of N2");
return -8;
}
if (K < 0) {
coreblas_error(9, "Illegal value of K");
return -9;
}
/* Quick return */
if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0))
if (direct == PlasmaForward) {
if (side == PlasmaLeft) {
/*
* Column or Rowwise / Forward / Left
* ----------------------------------
*
* Form H * A or H' * A where A = ( A1 )
* ( A2 )
*/
/* W = A1 + op(V) * A2 */
K, N1, M2, L,
A1, LDA1,
A2, LDA2,
V, LDV,
WORK, LDWORK);
/* W = op(T) * W */
(zone), T, LDT, WORK, LDWORK);
/* A1 = A1 - W */
for(j = 0; j < N1; j++) {
K, (mzone),
&WORK[LDWORK*j], 1,
&A1[LDA1*j], 1);
}
/* A2 = A2 - op(V) * W */
/* W also changes: W = V * W, A2 = A2 - W */
M2, N2, K, L,
A1, LDA1,
A2, LDA2,
V, LDV,
WORK, LDWORK);
}
else {
/*
* Column or Rowwise / Forward / Right
* -----------------------------------
*
* Form H * A or H' * A where A = ( A1 A2 )
*
*/
/* W = A1 + A2 * op(V) */
M1, K, N2, L,
A1, LDA1,
A2, LDA2,
V, LDV,
WORK, LDWORK);
/* W = W * op(T) */
(CBLAS_TRANSPOSE)trans, CblasNonUnit, M2, K,
(zone), T, LDT, WORK, LDWORK);
/* A1 = A1 - W */
for(j = 0; j < K; j++) {
M1, (mzone),
&WORK[LDWORK*j], 1,
&A1[LDA1*j], 1);
}
/* A2 = A2 - W * op(V) */
/* W also changes: W = W * V', A2 = A2 - W */
M2, N2, K, L,
A1, LDA1,
A2, LDA2,
V, LDV,
WORK, LDWORK);
}
}
else {
coreblas_error(3, "Not implemented (Backward / Left or Right)");
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dpemv ( int  trans,
int  storev,
int  M,
int  N,
int  L,
double  ALPHA,
double *  A,
int  LDA,
double *  X,
int  INCX,
double  BETA,
double *  Y,
int  INCY,
double *  WORK 
)

DPEMV performs one of the matrix-vector operations

y = alpha*op( A )*x + beta*y

where op( A ) is one of

op( A ) = A or op( A ) = A**T or op( A ) = A**T,

alpha and beta are scalars, x and y are vectors and A is a pentagonal matrix (see further details).

Arguments

Parameters:
[in]storev
    @arg PlasmaColumnwise :  array A stored columwise
    @arg PlasmaRowwise    :  array A stored rowwise
[in]trans
    @arg PlasmaNoTrans   :  y := alpha*A*x    + beta*y.
    @arg PlasmaTrans     :  y := alpha*A**T*x + beta*y.
    @arg PlasmaTrans :  y := alpha*A**T*x + beta*y.
[in]MNumber of rows of the matrix A. M must be at least zero.
[in]NNumber of columns of the matrix A. N must be at least zero.
[in]LOrder of triangle within the matrix A (L specifies the shape of the matrix A; see further details).
[in]ALPHAScalar alpha.
[in]AArray of size LDA-by-N. On entry, the leading M by N part of the array A must contain the matrix of coefficients.
[in]LDALeading dimension of array A.
[in]XOn entry, the incremented array X must contain the vector x.
[in]INCXIncrement for the elements of X. INCX must not be zero.
[in]BETAScalar beta.
[in,out]YOn entry, the incremented array Y must contain the vector y.
[out]INCYIncrement for the elements of Y. INCY must not be zero.
[in]WORKWorkspace array of size at least L.

Further Details

         |     N    |
      _   ___________   _
         |          |

A: | | M-L | | | | M _ |..... | \ : | L \ : | _ \:_____| _

| L | N-L |

Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 118 of file core_dpemv.c.

References cblas_daxpy(), cblas_dcopy(), cblas_dgemv(), cblas_dscal(), cblas_dtrmv(), CblasColMajor, coreblas_error, L, max, min, PLASMA_SUCCESS, PlasmaColumnwise, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRowwise, PlasmaTrans, and PlasmaUpper.

{
/*
* y = alpha * op(A) * x + beta * y
*/
int K;
static double zzero = 0.0;
/* Check input arguments */
if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans) && (trans != PlasmaTrans)) {
coreblas_error(1, "Illegal value of trans");
return -1;
}
coreblas_error(2, "Illegal value of storev");
return -2;
}
if (!( ((storev == PlasmaColumnwise) && (trans != PlasmaNoTrans)) ||
coreblas_error(2, "Illegal values of trans/storev");
return -2;
}
if (M < 0) {
coreblas_error(3, "Illegal value of M");
return -3;
}
if (N < 0) {
coreblas_error(4, "Illegal value of N");
return -4;
}
if (L > min(M ,N)) {
coreblas_error(5, "Illegal value of L");
return -5;
}
if (LDA < max(1,M)) {
coreblas_error(8, "Illegal value of LDA");
return -8;
}
if (INCX < 1) {
coreblas_error(10, "Illegal value of INCX");
return -10;
}
if (INCY < 1) {
coreblas_error(13, "Illegal value of INCY");
return -13;
}
/* Quick return */
if ((M == 0) || (N == 0))
if ((ALPHA == zzero) && (BETA == zzero))
/* If L < 2, there is no triangular part */
if (L == 1) L = 0;
/* Columnwise */
/*
* ______________
* | | | A1: A[ 0 ]
* | | | A2: A[ M-L ]
* | A1 | | A3: A[ (N-L) * LDA ]
* | | |
* |______| A3 |
* \ | |
* \ A2 | |
* \ | |
* \|_____|
*
*/
/* Columnwise / NoTrans */
if (trans == PlasmaNoTrans) {
coreblas_error(1, "The case PlasmaNoTrans / PlasmaColumnwise is not yet implemented");
return -1;
}
/* Columnwise / [Conj]Trans */
else {
/* L top rows of y */
if (L > 0) {
/* w = A_2' * x_2 */
L, &X[INCX*(M-L)], INCX, WORK, 1);
L, &A[M-L], LDA, WORK, 1);
if (M > L) {
/* y_1 = beta * y_1 [ + alpha * A_1 * x_1 ] */
M-L, L, (ALPHA), A, LDA,
X, INCX, (BETA), Y, INCY);
/* y_1 = y_1 + alpha * w */
cblas_daxpy(L, (ALPHA), WORK, 1, Y, INCY);
} else {
/* y_1 = y_1 + alpha * w */
if (BETA == zzero) {
cblas_dscal(L, (ALPHA), WORK, 1);
cblas_dcopy(L, WORK, 1, Y, INCY);
} else {
cblas_dscal(L, (BETA), Y, INCY);
cblas_daxpy(L, (ALPHA), WORK, 1, Y, INCY);
}
}
}
/* N-L bottom rows of Y */
if (N > L) {
K = N - L;
M, K, (ALPHA), &A[LDA*L], LDA,
X, INCX, (BETA), &Y[INCY*L], INCY);
}
}
}
/* Rowwise */
else {
/*
* --------------
* | | \ A1: A[ 0 ]
* | A1 | \ A2: A[ (N-L) * LDA ]
* | | A2 \ A3: A[ L ]
* |--------------------\
* | A3 |
* ----------------------
*
*/
/* Rowwise / NoTrans */
if (trans == PlasmaNoTrans) {
/* L top rows of A and y */
if (L > 0) {
/* w = A_2 * x_2 */
L, &X[INCX*(N-L)], INCX, WORK, 1);
L, &A[LDA*(N-L)], LDA, WORK, 1);
if (N > L) {
/* y_1 = beta * y_1 [ + alpha * A_1 * x_1 ] */
CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans,
L, N-L, (ALPHA), A, LDA,
X, INCX, (BETA), Y, INCY);
/* y_1 = y_1 + alpha * w */
cblas_daxpy(L, (ALPHA), WORK, 1, Y, INCY);
} else {
/* y_1 = y_1 + alpha * w */
if (BETA == zzero) {
cblas_dscal(L, (ALPHA), WORK, 1);
cblas_dcopy(L, WORK, 1, Y, INCY);
} else {
cblas_dscal(L, (BETA), Y, INCY);
cblas_daxpy(L, (ALPHA), WORK, 1, Y, INCY);
}
}
}
/* M-L bottom rows of Y */
if (M > L) {
CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans,
M-L, N, (ALPHA), &A[L], LDA,
X, INCX, (BETA), &Y[INCY*L], INCY);
}
}
/* Rowwise / [Conj]Trans */
else {
coreblas_error(1, "The case Plasma[Conj]Trans / PlasmaRowwise is not yet implemented");
return -1;
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dplgsy ( double  bump,
int  m,
int  n,
double *  A,
int  lda,
int  bigM,
int  m0,
int  n0,
unsigned long long int  seed 
)

Definition at line 64 of file core_dplgsy.c.

References A, NBELEM, Rnd64_A, Rnd64_C, and RndF_Mul.

{
double *tmp = A;
int64_t i, j;
unsigned long long int ran, jump;
jump = m0 + n0 * bigM;
/*
* Tile diagonal
*/
if ( m0 == n0 ) {
for (j = 0; j < n; j++) {
ran = Rnd64_jump( NBELEM * jump, seed );
for (i = j; i < m; i++) {
*tmp = 0.5f - ran * RndF_Mul;
ran = Rnd64_A * ran + Rnd64_C;
#ifdef COMPLEX
*tmp += I*(0.5f - ran * RndF_Mul);
ran = Rnd64_A * ran + Rnd64_C;
#endif
tmp++;
}
tmp += (lda - i + j + 1);
jump += bigM + j;
}
for (j = 0; j < n; j++) {
A[j+j*lda] += bump;
for (i=0; i<j; i++) {
A[lda*j+i] = A[lda*i+j];
}
}
}
/*
* Lower part
*/
else if ( m0 > n0 ) {
for (j = 0; j < n; j++) {
ran = Rnd64_jump( NBELEM * jump, seed );
for (i = 0; i < m; i++) {
*tmp = 0.5f - ran * RndF_Mul;
ran = Rnd64_A * ran + Rnd64_C;
#ifdef COMPLEX
*tmp += I*(0.5f - ran * RndF_Mul);
ran = Rnd64_A * ran + Rnd64_C;
#endif
tmp++;
}
tmp += (lda - i);
jump += bigM;
}
}
/*
* Upper part
*/
else if ( m0 < n0 ) {
/* Overwrite jump */
jump = n0 + m0 * bigM;
for (i = 0; i < m; i++) {
ran = Rnd64_jump( NBELEM * jump, seed );
for (j = 0; j < n; j++) {
A[j*lda+i] = 0.5f - ran * RndF_Mul;
ran = Rnd64_A * ran + Rnd64_C;
#ifdef COMPLEX
A[j*lda+i] += I*(0.5f - ran * RndF_Mul);
ran = Rnd64_A * ran + Rnd64_C;
#endif
}
jump += bigM;
}
}
}

Here is the caller graph for this function:

void CORE_dplgsy_quark ( Quark quark)

Definition at line 172 of file core_dplgsy.c.

References A, CORE_dplgsy(), and quark_unpack_args_9.

{
double bump;
int m;
int n;
double *A;
int lda;
int bigM;
int m0;
int n0;
unsigned long long int seed;
quark_unpack_args_9( quark, bump, m, n, A, lda, bigM, m0, n0, seed );
CORE_dplgsy( bump, m, n, A, lda, bigM, m0, n0, seed );
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dplrnt ( int  m,
int  n,
double *  A,
int  lda,
int  bigM,
int  m0,
int  n0,
unsigned long long int  seed 
)

Definition at line 64 of file core_dplrnt.c.

References A, NBELEM, Rnd64_A, Rnd64_C, and RndF_Mul.

{
double *tmp = A;
int64_t i, j;
unsigned long long int ran, jump;
jump = m0 + n0 * bigM;
for (j=0; j<n; ++j ) {
ran = Rnd64_jump( NBELEM*jump, seed );
for (i = 0; i < m; ++i) {
*tmp = 0.5f - ran * RndF_Mul;
ran = Rnd64_A * ran + Rnd64_C;
#ifdef COMPLEX
*tmp += I*(0.5f - ran * RndF_Mul);
ran = Rnd64_A * ran + Rnd64_C;
#endif
tmp++;
}
tmp += lda-i;
jump += bigM;
}
}

Here is the caller graph for this function:

void CORE_dplrnt_quark ( Quark quark)

Definition at line 116 of file core_dplrnt.c.

References A, CORE_dplrnt(), and quark_unpack_args_8.

{
int m;
int n;
double *A;
int lda;
int bigM;
int m0;
int n0;
unsigned long long int seed;
quark_unpack_args_8( quark, m, n, A, lda, bigM, m0, n0, seed );
CORE_dplrnt( m, n, A, lda, bigM, m0, n0, seed );
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dpotrf ( int  uplo,
int  N,
double *  A,
int  LDA,
int *  INFO 
)

Definition at line 29 of file core_dpotrf.c.

References lapack_const.

{
*INFO = LAPACKE_dpotrf_work(
LAPACK_COL_MAJOR,
N, A, LDA );
}

Here is the caller graph for this function:

void CORE_dpotrf_quark ( Quark quark)

Definition at line 65 of file core_dpotrf.c.

References A, lapack_const, plasma_sequence_flush(), PLASMA_SUCCESS, quark_unpack_args_7, plasma_sequence_t::status, and uplo.

{
int uplo;
int n;
double *A;
int lda;
PLASMA_sequence *sequence;
PLASMA_request *request;
int iinfo;
int info;
quark_unpack_args_7(quark, uplo, n, A, lda, sequence, request, iinfo);
info = LAPACKE_dpotrf_work(
LAPACK_COL_MAJOR,
lapack_const(uplo),
n, A, lda);
if (sequence->status == PLASMA_SUCCESS && info != 0)
plasma_sequence_flush(quark, sequence, request, iinfo+info);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dshift ( int  s,
int  m,
int  n,
int  L,
double *  A 
)

CORE_dshift Shift a cycle of block. Same as core_dshiftw but you don't need to provide the workspace. As a matter of fact, the cycle cannot be split anymore to keep data coherency.

Parameters:
[in]sStart value in the cycle
[in]mNumber of lines of tile A
[in]nNumber of columns of tile A
[in]LLength of each block of data to move
[in,out]AMatrix of size m-by-n with each element of size L. On exit, A = A', where A' contains the permutations

Definition at line 175 of file core_dshift.c.

References CORE_dshiftw(), and W.

{
double *W;
W = (double*)malloc(L * sizeof(double));
memcpy(W, &(A[s*L]), L*sizeof(double));
CORE_dshiftw(s, 0, m, n, L, A, W);
free(W);
}

Here is the call graph for this function:

void CORE_dshift_quark ( Quark quark)

Definition at line 208 of file core_dshift.c.

References A, CORE_dshiftw(), L, quark_unpack_args_6, and W.

{
int s;
int m;
int n;
int L;
double *A;
double *W;
quark_unpack_args_6(quark, s, m, n, L, A, W);
memcpy(W, &(A[s*L]), L*sizeof(double));
CORE_dshiftw(s, 0, m, n, L, A, W);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dshiftw ( int  s,
int  cl,
int  m,
int  n,
int  L,
double *  A,
double *  W 
)

CORE_dshiftw Shift a linear chain of block using a supplied workspace by following the cycle defined by: k_(i+1) = (k_i * m) % q;

Parameters:
[in]sStart value in the cycle
[in]clCycle length if cl == 0, all the permutations from the cycle are done else the cycle is split onto several threads and the number of permutation to do has to be specified to not get overlap
[in]mNumber of lines of tile A
[in]nNumber of columns of tile A
[in]LLength of each block of data to move
[in,out]AMatrix of size m-by-n with each element of size L. On exit, A = A', where A' contains the permutations
[in]WArray of size L. On entry, must contain: W(:) = A(s*L:s*L+L-1)

Definition at line 66 of file core_dshift.c.

References L.

{
int64_t k, k1;
int i, j, q, kL, k1L;
q = m * n - 1;
k = s;
if( cl != 0 ) {
for (i=1; i<cl; i++) {
k1 = (k * m) % (int64_t)q;
/* A(k*L:k*L+L-1) = A(k1*L:k1*L+L-1) */
kL = k *L;
k1L = k1*L;
for(j=0; j<L; j++) {
A[kL+j] = A[k1L+j];
}
k = k1;
}
}
else {
while (1) {
k1 = (k * m) % (int64_t)q;
if( k1 == s )
break;
/* A(k*L:k*L+L-1) = A(k1*L:k1*L+L-1) */
kL = k *L;
k1L = k1*L;
for (j=0; j<L; j++) {
A[kL+j] = A[k1L+j];
}
k = k1;
}
}
memcpy(&(A[k*L]), W, L*sizeof(double));
}

Here is the caller graph for this function:

void CORE_dshiftw_quark ( Quark quark)

Definition at line 130 of file core_dshift.c.

References A, CORE_dshiftw(), L, quark_unpack_args_7, and W.

{
int s;
int cl;
int m;
int n;
int L;
double *A;
double *W;
quark_unpack_args_7(quark, s, cl, m, n, L, A, W);
CORE_dshiftw(s, cl, m, n, L, A, W);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dssssm ( int  M1,
int  N1,
int  M2,
int  N2,
int  K,
int  IB,
double *  A1,
int  LDA1,
double *  A2,
int  LDA2,
double *  L1,
int  LDL1,
double *  L2,
int  LDL2,
int *  IPIV 
)

CORE_dtstrf computes an LU factorization of a complex matrix formed by an upper triangular M1-by-N1 tile U on top of a M2-by-N2 tile A (N1 == N2) using partial pivoting with row interchanges.

This is the right-looking Level 2.5 BLAS version of the algorithm.

Parameters:
[in]M1The number of rows of the tile A1. M1 >= 0.
[in]N1The number of columns of the tile A1. N1 >= 0.
[in]M2The number of rows of the tile A2. M2 >= 0.
[in]N2The number of columns of the tile A2. N2 >= 0.
[in]KThe number of columns of the tiles L1 and L2. K >= 0.
[in]IBThe inner-blocking size. IB >= 0.
[in,out]A1On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of L.
[in]LDA1The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]A2On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of L.
[in]LDA2The leading dimension of the array A2. LDA2 >= max(1,M2).
[in]L1The IB-by-K lower triangular tile as returned by CORE_dtstrf.
[in]LDL1The leading dimension of the array L1. LDL1 >= max(1,IB).
[in]L2The M2-by-N2 tile as returned by CORE_dtstrf.
[in]LDL2The leading dimension of the array L2. LDL2 >= max(1,M2).
[in]IPIVas returned by CORE_dtstrf.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if INFO = -k, the k-th argument had an illegal value

Definition at line 90 of file core_dssssm.c.

References cblas_dgemm(), cblas_dswap(), cblas_dtrsm(), CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, coreblas_error, max, min, and PLASMA_SUCCESS.

{
static double zone = 1.0;
static double mzone =-1.0;
int i, ii, sb;
int im, ip;
/* Check input arguments */
if (M1 < 0) {
coreblas_error(1, "Illegal value of M1");
return -1;
}
if (N1 < 0) {
coreblas_error(2, "Illegal value of N1");
return -2;
}
if (M2 < 0) {
coreblas_error(3, "Illegal value of M2");
return -3;
}
if (N2 < 0) {
coreblas_error(4, "Illegal value of N2");
return -4;
}
if (K < 0) {
coreblas_error(5, "Illegal value of K");
return -5;
}
if (IB < 0) {
coreblas_error(6, "Illegal value of IB");
return -6;
}
if (LDA1 < max(1,M1)) {
coreblas_error(8, "Illegal value of LDA1");
return -8;
}
if (LDA2 < max(1,M2)) {
coreblas_error(10, "Illegal value of LDA2");
return -10;
}
if (LDL1 < max(1,IB)) {
coreblas_error(12, "Illegal value of LDL1");
return -12;
}
if (LDL2 < max(1,M2)) {
coreblas_error(14, "Illegal value of LDL2");
return -14;
}
/* Quick return */
if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
ip = 0;
for(ii = 0; ii < K; ii += IB) {
sb = min(K-ii, IB);
for(i = 0; i < sb; i++) {
im = IPIV[ip]-1;
if (im != (ii+i)) {
im = im - M1;
cblas_dswap(N1, &A1[ii+i], LDA1, &A2[im], LDA2);
}
ip = ip + 1;
}
sb, N1, (zone),
&L1[LDL1*ii], LDL1,
&A1[ii], LDA1);
M2, N2, sb,
(mzone), &L2[LDL2*ii], LDL2,
&A1[ii], LDA1,
(zone), A2, LDA2);
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dssssm_quark ( Quark quark)

Definition at line 219 of file core_dssssm.c.

References CORE_dssssm(), IPIV, and quark_unpack_args_15.

{
int m1;
int n1;
int m2;
int n2;
int k;
int ib;
double *A1;
int lda1;
double *A2;
int lda2;
double *L1;
int ldl1;
double *L2;
int ldl2;
int *IPIV;
quark_unpack_args_15(quark, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
CORE_dssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dswpab ( int  i,
int  n1,
int  n2,
double *  A,
double *  work 
)

CORE_dswpab swaps two adjacent contiguous blocks of data.

n1                     n2

+————-+——————————-+

become : n2 n1 +——————————-+————-+

Parameters:
[in,out]AArray of size i+n1+n2. On entry, a block of size n1 followed by a block of size n2. On exit, the block of size n1 follows the block of size n2.
[in]iFirst block starts at A[i].
[in]n1Size of the first block to swap.
[in]n2Size of the second block to swap.
[out]workWorkspace array of size min(n1, n2).

Definition at line 63 of file core_dswpab.c.

{
double *A0 = &(A[i]);
double *A1 = &(A[i+n1]);
double *A2 = &(A[i+n2]);
int j;
if( n1 < n2 ) {
memcpy(work, A0, n1*sizeof(double));
for (j=0; j<n2; j++)
A0[j] = A1[j];
memcpy(A2, work, n1*sizeof(double));
} else {
memcpy(work, A1, n2*sizeof(double));
for (j=n1-1; j>-1; j--)
A2[j] = A0[j];
memcpy(A0, work, n2*sizeof(double));
}
}

Here is the caller graph for this function:

void CORE_dswpab_quark ( Quark quark)

Definition at line 107 of file core_dswpab.c.

References A, CORE_dswpab(), and quark_unpack_args_5.

{
int i;
int n1;
int n2;
double *A;
double *work;
quark_unpack_args_5(quark, i, n1, n2, A, work);
CORE_dswpab( i, n1, n2, A, work);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dswptr_ontile ( PLASMA_desc  descA,
int  i1,
int  i2,
int *  ipiv,
int  inc,
double *  Akk,
int  ldak 
)

CORE_dswptr_ontile apply the dlaswp function on a matrix stored in tile layout, followed by a dtrsm on the first tile of the panel.

Parameters:
[in,out]AThe descriptor of the matrix A to permute.
[in]i1The first element of IPIV for which a row interchange will be done.
[in]i2The last element of IPIV for which a row interchange will be done.
[in]ipivThe pivot indices; Only the element in position i1 to i2 are accessed. The pivot are offset by A.i.
[in]incThe increment between successive values of IPIV. If IPIV is negative, the pivots are applied in reverse order.

Definition at line 325 of file core_dlaswp.c.

References A, BLKLDD, cblas_dtrsm(), CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, CORE_dlaswp_ontile(), coreblas_error, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nt, and PLASMA_SUCCESS.

{
double zone = 1.0;
int lda;
int m = descA.mt == 1 ? descA.m : descA.mb;
if ( descA.nt > 1 ) {
coreblas_error(1, "Illegal value of descA.nt");
return -1;
}
if ( i1 < 1 ) {
coreblas_error(2, "Illegal value of i1");
return -2;
}
if ( (i2 < i1) || (i2 > m) ) {
coreblas_error(3, "Illegal value of i2");
return -3;
}
CORE_dlaswp_ontile(descA, i1, i2, ipiv, inc);
lda = BLKLDD(descA, 0);
m, descA.n, (zone),
Akk, ldak,
A(0, 0), lda );
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dswptr_ontile_quark ( Quark quark)

Definition at line 385 of file core_dlaswp.c.

References A, CORE_dswptr_ontile(), and quark_unpack_args_8.

{
int i1, i2, inc, ldak;
int *ipiv;
double *A, *Akk;
PLASMA_desc descA;
quark_unpack_args_8(quark, descA, A, i1, i2, ipiv, inc, Akk, ldak);
CORE_dswptr_ontile(descA, i1, i2, ipiv, inc, Akk, ldak);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dsygst_quark ( Quark quark)

Definition at line 67 of file core_dsygst.c.

References A, B, itype, lapack_const, plasma_sequence_flush(), PLASMA_SUCCESS, quark_unpack_args_10, plasma_sequence_t::status, and uplo.

{
int itype;
int n;
double *A;
int lda;
double *B;
int ldb;
PLASMA_sequence *sequence;
PLASMA_request *request;
int iinfo;
int info;
quark_unpack_args_10(quark, itype, uplo, n, A, lda, B, ldb, sequence, request, iinfo);
info = LAPACKE_dsygst_work(
LAPACK_COL_MAJOR,
itype,
lapack_const(uplo),
n, A, lda, B, ldb);
if (sequence->status == PLASMA_SUCCESS && info != 0)
plasma_sequence_flush(quark, sequence, request, iinfo+info);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dsymm ( int  side,
int  uplo,
int  M,
int  N,
double  alpha,
double *  A,
int  LDA,
double *  B,
int  LDB,
double  beta,
double *  C,
int  LDC 
)

Definition at line 28 of file core_dsymm.c.

References cblas_dsymm(), and CblasColMajor.

{
M, N,
(alpha), A, LDA,
B, LDB,
(beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dsymm_quark ( Quark quark)

Definition at line 77 of file core_dsymm.c.

References A, B, C, cblas_dsymm(), CblasColMajor, quark_unpack_args_12, side, and uplo.

{
int side;
int uplo;
int M;
int N;
double alpha;
double *A;
int LDA;
double *B;
int LDB;
double beta;
double *C;
int LDC;
quark_unpack_args_12(quark, side, uplo, M, N, alpha, A, LDA, B, LDB, beta, C, LDC);
(CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
M, N,
(alpha), A, LDA,
B, LDB,
(beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dsyr2k ( int  uplo,
int  trans,
int  N,
int  K,
double  alpha,
double *  A,
int  LDA,
double *  B,
int  LDB,
double  beta,
double *  C,
int  LDC 
)

Definition at line 28 of file core_dsyr2k.c.

References cblas_dsyr2k(), and CblasColMajor.

{
N, K,
(alpha), A, LDA, B, LDB,
(beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dsyr2k_quark ( Quark quark)

Definition at line 76 of file core_dsyr2k.c.

References A, B, C, CORE_dsyr2k(), quark_unpack_args_12, trans, and uplo.

{
int uplo;
int trans;
int n;
int k;
double alpha;
double *A;
int lda;
double *B;
int ldb;
double beta;
double *C;
int ldc;
quark_unpack_args_12(quark, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
CORE_dsyr2k(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dsyrfb_quark ( Quark quark)

Definition at line 215 of file core_dsyrfb.c.

References A, C, CORE_dsyrfb(), quark_unpack_args_13, T, and uplo.

{
int n;
int k;
int ib;
int nb;
double *A;
int lda;
double *T;
int ldt;
double *C;
int ldc;
double *WORK;
int ldwork;
quark_unpack_args_13(quark, uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork);
CORE_dsyrfb(uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dsyrk ( int  uplo,
int  trans,
int  N,
int  K,
double  alpha,
double *  A,
int  LDA,
double  beta,
double *  C,
int  LDC 
)

Definition at line 28 of file core_dsyrk.c.

References cblas_dsyrk(), and CblasColMajor.

{
N, K,
(alpha), A, LDA,
(beta), C, LDC);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dsyrk_quark ( Quark quark)

Definition at line 72 of file core_dsyrk.c.

References A, C, cblas_dsyrk(), CblasColMajor, quark_unpack_args_10, trans, and uplo.

{
int uplo;
int trans;
int n;
int k;
double alpha;
double *A;
int lda;
double beta;
double *C;
int ldc;
quark_unpack_args_10(quark, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
(CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans,
n, k,
(alpha), A, lda,
(beta), C, ldc);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dtrdalg ( PLASMA_enum  uplo,
int  N,
int  NB,
PLASMA_desc pA,
double *  V,
double *  TAU,
int  i,
int  j,
int  m,
int  grsiz 
)

CORE_dtrdalg is a part of the tridiagonal reduction algorithm (bulgechasing) It correspond to a local driver of the kernels that should be executed on a single core.

Parameters:
[in]uplo
  • PlasmaLower:
  • PlasmaUpper:
[in]NThe order of the matrix A. N >= 0.
[in]NBThe size of the Bandwidth of the matrix A, which correspond to the tile size. NB >= 0.
[in]pAA pointer to the descriptor of the matrix A.
[out]Vdouble array, dimension (N). The scalar elementary reflectors are written in this array. So it is used as a workspace for V at each step of the bulge chasing algorithm.
[out]TAUdouble array, dimension (N). The scalar factors of the elementary reflectors are written in thisarray. So it is used as a workspace for TAU at each step of the bulge chasing algorithm.
[in]iInteger that refer to the current sweep. (outer loop).
[in]jInteger that refer to the sweep to chase.(inner loop).
[in]mInteger that refer to a sweep step, to ensure order dependencies.
[in]grsizInteger that refer to the size of a group. group mean the number of kernel that should be executed sequentially on the same core. group size is a trade-off between locality (cache reuse) and parallelism. a small group size increase parallelism while a large group size increase cache reuse.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 82 of file core_dtrdalg.c.

References A, CORE_dhbelr(), CORE_dhblrx(), CORE_dhbrce(), plasma_desc_t::dtyp, min, and plasma_element_size().

{
int k, shift=3;
int myid, colpt, stind, edind, blklastind, stepercol;
size_t eltsize;
PLASMA_desc A = *pA;
eltsize = plasma_element_size(A.dtyp);
k = shift / grsiz;
stepercol = (k*grsiz == shift) ? k : k+1;
for (k = 0; k < grsiz; k++){
myid = (i-j)*(stepercol*grsiz) +(m-1)*grsiz + k+1;
if(myid%2 ==0) {
colpt = (myid/2) * NB + 1 + j - 1;
stind = colpt - NB + 1;
edind = min(colpt, N);
blklastind = colpt;
} else {
colpt = ((myid+1)/2)*NB + 1 +j -1 ;
stind = colpt-NB+1;
edind = min(colpt,N);
if( (stind>=edind-1) && (edind==N) )
blklastind = N;
else
blklastind = 0;
}
if( myid == 1 )
CORE_dhbelr(uplo, N, &A, V, TAU, stind, edind, eltsize);
else if(myid%2 == 0)
CORE_dhbrce(uplo, N, &A, V, TAU, stind, edind, eltsize);
else /*if(myid%2 == 1)*/
CORE_dhblrx(uplo, N, &A, V, TAU, stind, edind, eltsize);
if(blklastind >= (N-1)) break;
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dtrdalg_quark ( Quark quark)

Definition at line 160 of file core_dtrdalg.c.

References CORE_dtrdalg(), quark_unpack_args_10, TAU, uplo, and V.

{
double *V;
double *TAU;
int uplo;
int N, NB;
int i, j, m, grsiz;
quark_unpack_args_10(quark, uplo, N, NB, pA, V, TAU, i, j, m, grsiz);
CORE_dtrdalg(uplo, N, NB, pA, V, TAU, i, j, m, grsiz);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dtrmm ( int  side,
int  uplo,
int  transA,
int  diag,
int  M,
int  N,
double  alpha,
double *  A,
int  LDA,
double *  B,
int  LDB 
)

Definition at line 28 of file core_dtrmm.c.

References cblas_dtrmm(), and CblasColMajor.

{
M, N,
(alpha), A, LDA,
B, LDB);
}

Here is the call graph for this function:

void CORE_dtrmm_p2_quark ( Quark quark)

Definition at line 132 of file core_dtrmm.c.

References A, B, cblas_dtrmm(), CblasColMajor, diag, quark_unpack_args_11, side, and uplo.

{
int side;
int uplo;
int transA;
int diag;
int M;
int N;
double alpha;
double *A;
int LDA;
double **B;
int LDB;
quark_unpack_args_11(quark, side, uplo, transA, diag, M, N, alpha, A, LDA, B, LDB);
(CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
(CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag,
M, N,
(alpha), A, LDA,
*B, LDB);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dtrmm_quark ( Quark quark)

Definition at line 76 of file core_dtrmm.c.

References A, B, cblas_dtrmm(), CblasColMajor, diag, quark_unpack_args_11, side, and uplo.

{
int side;
int uplo;
int transA;
int diag;
int M;
int N;
double alpha;
double *A;
int LDA;
double *B;
int LDB;
quark_unpack_args_11(quark, side, uplo, transA, diag, M, N, alpha, A, LDA, B, LDB);
(CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
(CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag,
M, N,
(alpha), A, LDA,
B, LDB);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dtrsm ( int  side,
int  uplo,
int  transA,
int  diag,
int  M,
int  N,
double  alpha,
double *  A,
int  LDA,
double *  B,
int  LDB 
)

Definition at line 28 of file core_dtrsm.c.

References cblas_dtrsm(), and CblasColMajor.

{
M, N,
(alpha), A, LDA,
B, LDB);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dtrsm_quark ( Quark quark)

Definition at line 75 of file core_dtrsm.c.

References A, B, cblas_dtrsm(), CblasColMajor, diag, quark_unpack_args_11, side, and uplo.

{
int side;
int uplo;
int transA;
int diag;
int m;
int n;
double alpha;
double *A;
int lda;
double *B;
int ldb;
quark_unpack_args_11(quark, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb);
(CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
(CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag,
m, n,
(alpha), A, lda,
B, ldb);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dtrtri ( int  uplo,
int  diag,
int  N,
double *  A,
int  LDA,
int *  info 
)

Definition at line 29 of file core_dtrtri.c.

References lapack_const.

{
*info = LAPACKE_dtrtri_work(
LAPACK_COL_MAJOR,
N, A, LDA);
}
void CORE_dtrtri_quark ( Quark quark)

Definition at line 67 of file core_dtrtri.c.

References A, diag, lapack_const, plasma_sequence_flush(), PLASMA_SUCCESS, quark_unpack_args_8, plasma_sequence_t::status, and uplo.

{
int uplo;
int diag;
int N;
double *A;
int LDA;
PLASMA_sequence *sequence;
PLASMA_request *request;
int iinfo;
int info;
quark_unpack_args_8(quark, uplo, diag, N, A, LDA, sequence, request, iinfo);
info = LAPACKE_dtrtri_work(
LAPACK_COL_MAJOR,
lapack_const(uplo), lapack_const(diag),
N, A, LDA);
if ((sequence->status == PLASMA_SUCCESS) && (info > 0))
plasma_sequence_flush(quark, sequence, request, iinfo + info);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dtslqt ( int  M,
int  N,
int  IB,
double *  A1,
int  LDA1,
double *  A2,
int  LDA2,
double *  T,
int  LDT,
double *  TAU,
double *  WORK 
)

CORE_dtslqt computes a LQ factorization of a rectangular matrix formed by coupling side-by-side a complex M-by-M lower triangular tile A1 and a complex M-by-N tile A2:

| A1 A2 | = L * Q

The tile Q is represented as a product of elementary reflectors

Q = H(k)' . . . H(2)' H(1)', where k = min(M,N).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; g(v(i+1:n)) is stored on exit in A2(i,1:n), and tau in TAU(i).

Parameters:
[in]MThe number of rows of the tile A1 and A2. M >= 0. The number of columns of the tile A1.
[in]NThe number of columns of the tile A2. N >= 0.
[in]IBThe inner-blocking size. IB >= 0.
[in,out]A1On entry, the M-by-M tile A1. On exit, the elements on and below the diagonal of the array contain the M-by-M lower trapezoidal tile L; the elements above the diagonal are not referenced.
[in]LDA1The leading dimension of the array A1. LDA1 >= max(1,M).
[in,out]A2On entry, the M-by-N tile A2. On exit, all the elements with the array TAU, represent the unitary tile Q as a product of elementary reflectors (see Further Details).
[in]LDA2The leading dimension of the tile A2. LDA2 >= max(1,M).
[out]TThe IB-by-N triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]LDTThe leading dimension of the array T. LDT >= IB.
[out]TAUThe scalar factors of the elementary reflectors (see Further Details).
[out]WORK
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 107 of file core_dtslqt.c.

References cblas_daxpy(), cblas_dcopy(), cblas_dgemv(), cblas_dger(), cblas_dtrmv(), CblasColMajor, CORE_dtsmlq(), coreblas_error, max, min, PLASMA_SUCCESS, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaTrans, and PlasmaUpper.

{
static double zone = 1.0;
static double zzero = 0.0;
double alpha;
int i, ii, sb;
/* Check input arguments */
if (M < 0) {
coreblas_error(1, "Illegal value of M");
return -1;
}
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if (IB < 0) {
coreblas_error(3, "Illegal value of IB");
return -3;
}
if ((LDA2 < max(1,M)) && (M > 0)) {
coreblas_error(8, "Illegal value of LDA2");
return -8;
}
/* Quick return */
if ((M == 0) || (N == 0) || (IB == 0))
for(ii = 0; ii < M; ii += IB) {
sb = min(M-ii, IB);
for(i = 0; i < sb; i++) {
/*
* Generate elementary reflector H( II*IB+I ) to annihilate A( II*IB+I, II*IB+I:N ).
*/
#ifdef COMPLEX
LAPACKE_dlacgv_work(N, &A2[ii+i], LDA2);
LAPACKE_dlacgv_work(1, &A1[LDA1*(ii+i)+ii+i], LDA1);
#endif
LAPACKE_dlarfg_work(N+1, &A1[LDA1*(ii+i)+ii+i], &A2[ii+i], LDA2, &TAU[ii+i]);
alpha = -(TAU[ii+i]);
if (ii+i+1 < M) {
/*
* Apply H( II+I-1 ) to A( II+I:II+IB-1, II+I-1:N ) from the right.
*/
sb-i-1,
&A1[LDA1*(ii+i)+(ii+i+1)], 1,
WORK, 1);
sb-i-1, N,
(zone), &A2[ii+i+1], LDA2,
&A2[ii+i], LDA2,
(zone), WORK, 1);
sb-i-1, (alpha),
WORK, 1,
&A1[LDA1*(ii+i)+ii+i+1], 1);
CblasColMajor, sb-i-1, N,
(alpha), WORK, 1,
&A2[ii+i], LDA2,
&A2[ii+i+1], LDA2);
}
/*
* Calculate T.
*/
CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans, i, N,
(alpha), &A2[ii], LDA2,
&A2[ii+i], LDA2,
(zzero), &T[LDT*(ii+i)], 1);
#ifdef COMPLEX
LAPACKE_dlacgv_work(N, &A2[ii+i], LDA2 );
LAPACKE_dlacgv_work(1, &A1[LDA1*(ii+i)+ii+i], LDA1 );
#endif
&T[LDT*ii], LDT,
&T[LDT*(ii+i)], 1);
T[LDT*(ii+i)+i] = TAU[ii+i];
}
if (M > ii+sb) {
M-(ii+sb), sb, M-(ii+sb), N, IB, IB,
&A1[LDA1*ii+ii+sb], LDA1,
&A2[ii+sb], LDA2,
&A2[ii], LDA2,
&T[LDT*ii], LDT,
WORK, LDA1);
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dtslqt_quark ( Quark quark)

Definition at line 247 of file core_dtslqt.c.

References CORE_dtslqt(), quark_unpack_args_11, T, and TAU.

{
int m;
int n;
int ib;
double *A1;
int lda1;
double *A2;
int lda2;
double *T;
int ldt;
double *TAU;
double *WORK;
quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
CORE_dtslqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int CORE_dtsmlq ( int  side,
int  trans,
int  M1,
int  N1,
int  M2,
int  N2,
int  K,
int  IB,
double *  A1,
int  LDA1,
double *  A2,
int  LDA2,
double *  V,
int  LDV,
double *  T,
int  LDT,
double *  WORK,
int  LDWORK 
)

CORE_dtsmlq overwrites the general complex M1-by-N1 tile A1 and M2-by-N2 tile A2 with

                  SIDE = 'L'        SIDE = 'R'

TRANS = 'N': Q * | A1 | | A1 A2 | * Q | A2 |

TRANS = 'C': Q**T * | A1 | | A1 A2 | * Q**T | A2 |

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(k)' . . . H(2)' H(1)'

as returned by CORE_DTSLQT.

Parameters:
[in]side
  • PlasmaLeft : apply Q or Q**T from the Left;
  • PlasmaRight : apply Q or Q**T from the Right.
[in]trans
  • PlasmaNoTrans : No transpose, apply Q;
  • PlasmaTrans : ConjTranspose, apply Q**T.
[in]M1The number of rows of the tile A1. M1 >= 0.
[in]N1The number of columns of the tile A1. N1 >= 0.
[in]M2The number of rows of the tile A2. M2 >= 0. M2 = M1 if side == PlasmaRight.
[in]N2The number of columns of the tile A2. N2 >= 0. N2 = N1 if side == PlasmaLeft.
[in]KThe number of elementary reflectors whose product defines the matrix Q.
[in]IBThe inner-blocking size. IB >= 0.
[in,out]A1On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]LDA1The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]A2On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]LDA2The leading dimension of the tile A2. LDA2 >= max(1,M2).
[in]VThe i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_DTSLQT in the first k rows of its array argument V.
[in]LDVThe leading dimension of the array V. LDV >= max(1,K).
[out]TThe IB-by-N1 triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]LDTThe leading dimension of the array T. LDT >= IB.
[out]WORKWorkspace array of size LDWORK-by-M1 if side == PlasmaLeft LDWORK-by-IB if side == PlasmaRight
[in]LDWORKThe leading dimension of the array WORK. LDWORK >= max(1,IB) if side == PlasmaLeft LDWORK >= max(1,N1) if side == PlasmaRight
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 124 of file core_dtsmlq.c.

References CORE_dparfb(), coreblas_error, max, min, PLASMA_SUCCESS, PlasmaForward, PlasmaLeft, PlasmaNoTrans, PlasmaRight, PlasmaRowwise, and PlasmaTrans.

{
int i, i1, i3;
int NW;
int kb;
int ic = 0;
int jc = 0;
int mi = M1;
int ni = N1;
/* Check input arguments */
if ((side != PlasmaLeft) && (side != PlasmaRight)) {
coreblas_error(1, "Illegal value of side");
return -1;
}
/* NW is the minimum dimension of WORK */
if (side == PlasmaLeft) {
NW = IB;
}
else {
NW = N1;
}
if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
coreblas_error(2, "Illegal value of trans");
return -2;
}
if (M1 < 0) {
coreblas_error(3, "Illegal value of M1");
return -3;
}
if (N1 < 0) {
coreblas_error(4, "Illegal value of N1");
return -4;
}
if ( (M2 < 0) ||
( (M2 != M1) && (side == PlasmaRight) ) ){
coreblas_error(5, "Illegal value of M2");
return -5;
}
if ( (N2 < 0) ||
( (N2 != N1) && (side == PlasmaLeft) ) ){
coreblas_error(6, "Illegal value of N2");
return -6;
}
if ((K < 0) ||
( (side == PlasmaLeft) && (K > M1) ) ||
( (side == PlasmaRight) && (K > N1) ) ) {
coreblas_error(7, "Illegal value of K");
return -7;
}
if (IB < 0) {
coreblas_error(8, "Illegal value of IB");
return -8;
}
if (LDA1 < max(1,M1)){
coreblas_error(10, "Illegal value of LDA1");
return -10;
}
if (LDA2 < max(1,M2)){
coreblas_error(12, "Illegal value of LDA2");
return -12;
}
if (LDV < max(1,K)){
coreblas_error(14, "Illegal value of LDV");
return -14;
}
if (LDT < max(1,IB)){
coreblas_error(16, "Illegal value of LDT");
return -16;
}
if (LDWORK < max(1,NW)){
coreblas_error(18, "Illegal value of LDWORK");
return -18;
}
/* Quick return */
if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
if (((side == PlasmaLeft) && (trans == PlasmaNoTrans))
|| ((side == PlasmaRight) && (trans != PlasmaNoTrans))) {
i1 = 0;
i3 = IB;
}
else {
i1 = ((K-1) / IB)*IB;
i3 = -IB;
}
if (trans == PlasmaNoTrans) {
}
else {
}
for(i = i1; (i > -1) && (i < K); i += i3) {
kb = min(IB, K-i);
if (side == PlasmaLeft) {
/*
* H or H' is applied to C(i:m,1:n)
*/
mi = M1 - i;
ic = i;
}
else {
/*
* H or H' is applied to C(1:m,i:n)
*/
ni = N1 - i;
jc = i;
}
/*
* Apply H or H' (NOTE: CORE_dparfb used to be CORE_dtsrfb)
*/
mi, ni, M2, N2, kb, 0,
&A1[LDA1*jc+ic], LDA1,
A2, LDA2,
&V[i], LDV,
&T[LDT*i], LDT,
WORK, LDWORK);
}
}

Here is the call graph for this function:

Here is the caller graph for this function: