MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
magma_z.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define PRECISION_z
 

Functions

magma_int_t magma_zgebrd (magma_int_t m, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, double *d, double *e, cuDoubleComplex *tauq, cuDoubleComplex *taup, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgehrd2 (magma_int_t n, magma_int_t ilo, magma_int_t ihi, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t *lwork, magma_int_t *info)
 
magma_int_t magma_zgehrd (magma_int_t n, magma_int_t ilo, magma_int_t ihi, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t lwork, cuDoubleComplex *d_T, magma_int_t *info)
 
magma_int_t magma_zgelqf (magma_int_t m, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgeqlf (magma_int_t m, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgeqrf (magma_int_t m, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgeqrf_ooc (magma_int_t m, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgesv (magma_int_t n, magma_int_t nrhs, cuDoubleComplex *A, magma_int_t lda, magma_int_t *ipiv, cuDoubleComplex *B, magma_int_t ldb, magma_int_t *info)
 
magma_int_t magma_zgetrf (magma_int_t m, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_zgetrf_mc (magma_context *cntxt, magma_int_t *m, magma_int_t *n, cuDoubleComplex *A, magma_int_t *lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_zgeqrf_mc (magma_context *cntxt, magma_int_t *m, magma_int_t *n, cuDoubleComplex *A, magma_int_t *lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t *lwork, magma_int_t *info)
 
magma_int_t magma_zgetrf2 (magma_int_t m, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_zlatrd (char uplo, magma_int_t n, magma_int_t nb, cuDoubleComplex *a, magma_int_t lda, double *e, cuDoubleComplex *tau, cuDoubleComplex *w, magma_int_t ldw, cuDoubleComplex *da, magma_int_t ldda, cuDoubleComplex *dw, magma_int_t lddw)
 
magma_int_t magma_zlatrd2 (char uplo, magma_int_t n, magma_int_t nb, cuDoubleComplex *a, magma_int_t lda, double *e, cuDoubleComplex *tau, cuDoubleComplex *w, magma_int_t ldw, cuDoubleComplex *da, magma_int_t ldda, cuDoubleComplex *dw, magma_int_t lddw, cuDoubleComplex *dwork, magma_int_t ldwork)
 
magma_int_t magma_zlahr2 (magma_int_t m, magma_int_t n, magma_int_t nb, cuDoubleComplex *da, cuDoubleComplex *dv, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *t, magma_int_t ldt, cuDoubleComplex *y, magma_int_t ldy)
 
magma_int_t magma_zlahru (magma_int_t m, magma_int_t n, magma_int_t nb, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *da, cuDoubleComplex *y, cuDoubleComplex *v, cuDoubleComplex *t, cuDoubleComplex *dwork)
 
magma_int_t magma_zposv (char uplo, magma_int_t n, magma_int_t nrhs, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *B, magma_int_t ldb, magma_int_t *info)
 
magma_int_t magma_zpotrf (char uplo, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_zpotrf_mc (magma_context *cntxt, char *uplo, magma_int_t *n, cuDoubleComplex *A, magma_int_t *lda, magma_int_t *info)
 
magma_int_t magma_zpotri (char uplo, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_zlauum (char uplo, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_ztrtri (char uplo, char diag, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_zhetrd (char uplo, magma_int_t n, cuDoubleComplex *A, magma_int_t lda, double *d, double *e, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgeqrf2 (magma_context *cntxt, magma_int_t m, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgeqrf3 (magma_context *cntxt, magma_int_t m, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zungqr (magma_int_t m, magma_int_t n, magma_int_t k, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *dwork, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_zunmql (const char side, const char trans, magma_int_t m, magma_int_t n, magma_int_t k, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *c, magma_int_t ldc, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zunmqr (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *c, magma_int_t ldc, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zunmtr (char side, char uplo, char trans, magma_int_t m, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *c, magma_int_t ldc, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zunghr (magma_int_t n, magma_int_t ilo, magma_int_t ihi, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *dT, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_zheev (char jobz, char uplo, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, double *w, cuDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *info)
 
magma_int_t magma_zgeev (char jobvl, char jobvr, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *w, cuDoubleComplex *vl, magma_int_t ldvl, cuDoubleComplex *vr, magma_int_t ldvr, cuDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *info)
 
magma_int_t magma_zgesvd (char jobu, char jobvt, magma_int_t m, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, double *s, cuDoubleComplex *u, magma_int_t ldu, cuDoubleComplex *vt, magma_int_t ldvt, cuDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *info)
 
magma_int_t magma_zheevd (char jobz, char uplo, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, double *w, cuDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zhegvd (magma_int_t itype, char jobz, char uplo, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb, double *w, cuDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zhegst (magma_int_t itype, char uplo, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb, magma_int_t *info)
 
magma_int_t magma_zgels_gpu (char trans, magma_int_t m, magma_int_t n, magma_int_t nrhs, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *dB, magma_int_t lddb, cuDoubleComplex *hwork, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgels3_gpu (char trans, magma_int_t m, magma_int_t n, magma_int_t nrhs, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *dB, magma_int_t lddb, cuDoubleComplex *hwork, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgelqf_gpu (magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgeqrf_gpu (magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *dT, magma_int_t *info)
 
magma_int_t magma_zgeqrf2_gpu (magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *tau, magma_int_t *info)
 
magma_int_t magma_zgeqrf3_gpu (magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *dT, magma_int_t *info)
 
magma_int_t magma_zgeqrs_gpu (magma_int_t m, magma_int_t n, magma_int_t nrhs, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *dT, cuDoubleComplex *dB, magma_int_t lddb, cuDoubleComplex *hwork, magma_int_t lhwork, magma_int_t *info)
 
magma_int_t magma_zgeqrs3_gpu (magma_int_t m, magma_int_t n, magma_int_t nrhs, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *dT, cuDoubleComplex *dB, magma_int_t lddb, cuDoubleComplex *hwork, magma_int_t lhwork, magma_int_t *info)
 
magma_int_t magma_zgessm_gpu (char storev, magma_int_t m, magma_int_t n, magma_int_t k, magma_int_t ib, magma_int_t *ipiv, cuDoubleComplex *dL1, magma_int_t lddl1, cuDoubleComplex *dL, magma_int_t lddl, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_zgesv_gpu (magma_int_t n, magma_int_t nrhs, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *ipiv, cuDoubleComplex *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_zgetrf_incpiv_gpu (char storev, magma_int_t m, magma_int_t n, magma_int_t ib, cuDoubleComplex *hA, magma_int_t ldha, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *hL, magma_int_t ldhl, cuDoubleComplex *dL, magma_int_t lddl, magma_int_t *ipiv, cuDoubleComplex *dwork, magma_int_t lddwork, magma_int_t *info)
 
magma_int_t magma_zgetrf_gpu (magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_zgetrf_nopiv_gpu (magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_zgetrs_gpu (char trans, magma_int_t n, magma_int_t nrhs, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *ipiv, cuDoubleComplex *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_zlabrd_gpu (magma_int_t m, magma_int_t n, magma_int_t nb, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *da, magma_int_t ldda, double *d, double *e, cuDoubleComplex *tauq, cuDoubleComplex *taup, cuDoubleComplex *x, magma_int_t ldx, cuDoubleComplex *dx, magma_int_t lddx, cuDoubleComplex *y, magma_int_t ldy, cuDoubleComplex *dy, magma_int_t lddy)
 
magma_int_t magma_zlarfb_gpu (char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, cuDoubleComplex *dv, magma_int_t ldv, cuDoubleComplex *dt, magma_int_t ldt, cuDoubleComplex *dc, magma_int_t ldc, cuDoubleComplex *dowrk, magma_int_t ldwork)
 
magma_int_t magma_zposv_gpu (char uplo, magma_int_t n, magma_int_t nrhs, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_zpotrf_gpu (char uplo, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_zpotri_gpu (char uplo, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_zlauum_gpu (char uplo, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_ztrtri_gpu (char uplo, char diag, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_zhetrd_gpu (char uplo, magma_int_t n, cuDoubleComplex *da, magma_int_t ldda, double *d, double *e, cuDoubleComplex *tau, cuDoubleComplex *wa, magma_int_t ldwa, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zhetrd2_gpu (char uplo, magma_int_t n, cuDoubleComplex *da, magma_int_t ldda, double *d, double *e, cuDoubleComplex *tau, cuDoubleComplex *wa, magma_int_t ldwa, cuDoubleComplex *work, magma_int_t lwork, cuDoubleComplex *dwork, magma_int_t ldwork, magma_int_t *info)
 
magma_int_t magma_zpotrs_gpu (char uplo, magma_int_t n, magma_int_t nrhs, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_zssssm_gpu (char storev, magma_int_t m1, magma_int_t n1, magma_int_t m2, magma_int_t n2, magma_int_t k, magma_int_t ib, cuDoubleComplex *dA1, magma_int_t ldda1, cuDoubleComplex *dA2, magma_int_t ldda2, cuDoubleComplex *dL1, magma_int_t lddl1, cuDoubleComplex *dL2, magma_int_t lddl2, magma_int_t *IPIV, magma_int_t *info)
 
magma_int_t magma_ztstrf_gpu (char storev, magma_int_t m, magma_int_t n, magma_int_t ib, magma_int_t nb, cuDoubleComplex *hU, magma_int_t ldhu, cuDoubleComplex *dU, magma_int_t lddu, cuDoubleComplex *hA, magma_int_t ldha, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *hL, magma_int_t ldhl, cuDoubleComplex *dL, magma_int_t lddl, magma_int_t *ipiv, cuDoubleComplex *hwork, magma_int_t ldhwork, cuDoubleComplex *dwork, magma_int_t lddwork, magma_int_t *info)
 
magma_int_t magma_zungqr_gpu (magma_int_t m, magma_int_t n, magma_int_t k, cuDoubleComplex *da, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *dwork, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_zunmql2_gpu (const char side, const char trans, magma_int_t m, magma_int_t n, magma_int_t k, cuDoubleComplex *da, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *dc, magma_int_t lddc, cuDoubleComplex *wa, magma_int_t ldwa, magma_int_t *info)
 
magma_int_t magma_zunmqr_gpu (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *c, magma_int_t ldc, cuDoubleComplex *work, magma_int_t lwork, cuDoubleComplex *td, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_zunmqr2_gpu (const char side, const char trans, magma_int_t m, magma_int_t n, magma_int_t k, cuDoubleComplex *da, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *dc, magma_int_t lddc, cuDoubleComplex *wa, magma_int_t ldwa, magma_int_t *info)
 
magma_int_t magma_zunmtr_gpu (char side, char uplo, char trans, magma_int_t m, magma_int_t n, cuDoubleComplex *da, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *dc, magma_int_t lddc, cuDoubleComplex *wa, magma_int_t ldwa, magma_int_t *info)
 
magma_int_t magma_zheevd_gpu (char jobz, char uplo, magma_int_t n, cuDoubleComplex *da, magma_int_t ldda, double *w, cuDoubleComplex *wa, magma_int_t ldwa, cuDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zhegst_gpu (magma_int_t itype, char uplo, magma_int_t n, cuDoubleComplex *da, magma_int_t ldda, cuDoubleComplex *db, magma_int_t lddb, magma_int_t *info)
 

Macro Definition Documentation

#define PRECISION_z

Definition at line 13 of file magma_z.h.

Function Documentation

magma_int_t magma_zgebrd ( magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  A,
magma_int_t  lda,
double *  d,
double *  e,
cuDoubleComplex *  tauq,
cuDoubleComplex *  taup,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgeev ( char  jobvl,
char  jobvr,
magma_int_t  n,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  w,
cuDoubleComplex *  vl,
magma_int_t  ldvl,
cuDoubleComplex *  vr,
magma_int_t  ldvr,
cuDoubleComplex *  work,
magma_int_t  lwork,
double *  rwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgehrd ( magma_int_t  n,
magma_int_t  ilo,
magma_int_t  ihi,
cuDoubleComplex *  A,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  work,
magma_int_t  lwork,
cuDoubleComplex *  d_T,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgehrd2 ( magma_int_t  n,
magma_int_t  ilo,
magma_int_t  ihi,
cuDoubleComplex *  A,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  work,
magma_int_t lwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgelqf ( magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  A,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgelqf_gpu ( magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  dA,
magma_int_t  ldda,
cuDoubleComplex *  tau,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgels3_gpu ( char  trans,
magma_int_t  m,
magma_int_t  n,
magma_int_t  nrhs,
cuDoubleComplex *  dA,
magma_int_t  ldda,
cuDoubleComplex *  dB,
magma_int_t  lddb,
cuDoubleComplex *  hwork,
magma_int_t  lwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgels_gpu ( char  trans,
magma_int_t  m,
magma_int_t  n,
magma_int_t  nrhs,
cuDoubleComplex *  dA,
magma_int_t  ldda,
cuDoubleComplex *  dB,
magma_int_t  lddb,
cuDoubleComplex *  hwork,
magma_int_t  lwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgeqlf ( magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  A,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgeqrf ( magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  A,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgeqrf2 ( magma_context cntxt,
magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 73 of file zgeqrf-v2.cpp.

References __func__, a_ref, da_ref, dwork, magma_qr_params::flag, magma_qr_params::ib, lapackf77_zgeqrf, lapackf77_zlarft, MAGMA_ERR_ILLEGAL_VALUE, MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_MAKE, MAGMA_Z_ONE, magma_zgeqrf_mc(), magma_zlarfb_gpu(), MagmaColumnwise, MagmaColumnwiseStr, MagmaConjTrans, MagmaForward, MagmaForwardStr, MagmaLeft, MagmaUpper, max, min, magma_qr_params::nb, context::nb, magma_qr_params::p, context::params, magma_qr_params::t, magma_qr_params::w, zpanel_to_q(), and zq_to_panel().

77 {
78 /* -- MAGMA (version 1.4.0) --
79  Univ. of Tennessee, Knoxville
80  Univ. of California, Berkeley
81  Univ. of Colorado, Denver
82  August 2013
83 
84  Purpose
85  =======
86  ZGEQRF computes a QR factorization of a COMPLEX_16 M-by-N matrix A:
87  A = Q * R. This version does not require work space on the GPU
88  passed as input. GPU memory is allocated in the routine.
89 
90  Arguments
91  =========
92  CNTXT (input) MAGMA_CONTEXT
93  CNTXT specifies the MAGMA hardware context for this routine.
94 
95  M (input) INTEGER
96  The number of rows of the matrix A. M >= 0.
97 
98  N (input) INTEGER
99  The number of columns of the matrix A. N >= 0.
100 
101  A (input/output) COMPLEX_16 array, dimension (LDA,N)
102  On entry, the M-by-N matrix A.
103  On exit, the elements on and above the diagonal of the array
104  contain the min(M,N)-by-N upper trapezoidal matrix R (R is
105  upper triangular if m >= n); the elements below the diagonal,
106  with the array TAU, represent the orthogonal matrix Q as a
107  product of min(m,n) elementary reflectors (see Further
108  Details).
109 
110  Higher performance is achieved if A is in pinned memory, e.g.
111  allocated using cudaMallocHost.
112 
113  LDA (input) INTEGER
114  The leading dimension of the array A. LDA >= max(1,M).
115 
116  TAU (output) COMPLEX_16 array, dimension (min(M,N))
117  The scalar factors of the elementary reflectors (see Further
118  Details).
119 
120  WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
121  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
122 
123  Higher performance is achieved if WORK is in pinned memory, e.g.
124  allocated using cudaMallocHost.
125 
126  LWORK (input) INTEGER
127  The dimension of the array WORK. LWORK >= N*NB,
128  where NB can be obtained through magma_get_zgeqrf_nb(M).
129 
130  If LWORK = -1, then a workspace query is assumed; the routine
131  only calculates the optimal size of the WORK array, returns
132  this value as the first entry of the WORK array, and no error
133  message related to LWORK is issued.
134 
135  INFO (output) INTEGER
136  = 0: successful exit
137  < 0: if INFO = -i, the i-th argument had an illegal value
138  if INFO = -8, the GPU memory allocation failed
139 
140  Further Details
141  ===============
142  The matrix Q is represented as a product of elementary reflectors
143 
144  Q = H(1) H(2) . . . H(k), where k = min(m,n).
145 
146  Each H(i) has the form
147 
148  H(i) = I - tau * v * v'
149 
150  where tau is a complex scalar, and v is a complex vector with
151  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
152  and tau in TAU(i).
153  ===================================================================== */
154 
155  #define a_ref(a_1,a_2) ( a+(a_2)*(lda) + (a_1))
156  #define da_ref(a_1,a_2) (da+(a_2)*ldda + (a_1))
157 
158  int cnt=-1;
159  cuDoubleComplex c_one = MAGMA_Z_ONE;
160 
161  int i, k, lddwork, old_i, old_ib;
162  int nbmin, nx, ib, ldda;
163 
164  *info = 0;
165 
166  magma_qr_params *qr_params = (magma_qr_params *)cntxt->params;
167  int nb = qr_params->nb;
168 
169  int lwkopt = n * nb;
170  work[0] = MAGMA_Z_MAKE( (double)lwkopt, 0 );
171  long int lquery = (lwork == -1);
172  if (m < 0) {
173  *info = -1;
174  } else if (n < 0) {
175  *info = -2;
176  } else if (lda < max(1,m)) {
177  *info = -4;
178  } else if (lwork < max(1,n) && ! lquery) {
179  *info = -7;
180  }
181  if (*info != 0) {
182  magma_xerbla( __func__, -(*info) );
184  }
185  else if (lquery)
186  return MAGMA_SUCCESS;
187 
188  k = min(m,n);
189  if (k == 0) {
190  work[0] = c_one;
191  return MAGMA_SUCCESS;
192  }
193 
194  cublasStatus status;
195  static cudaStream_t stream[2];
196  cudaStreamCreate(&stream[0]);
197  cudaStreamCreate(&stream[1]);
198 
199  nbmin = 2;
200  nx = nb;
201 
202  lddwork = ((n+31)/32)*32;
203  ldda = ((m+31)/32)*32;
204 
205  cuDoubleComplex *da;
206  status = cublasAlloc((n)*ldda + nb*lddwork, sizeof(cuDoubleComplex), (void**)&da);
207  if (status != CUBLAS_STATUS_SUCCESS) {
208  *info = -8;
209  return 0;
210  }
211  cuDoubleComplex *dwork = da + ldda*(n);
212 
213  if (nb >= nbmin && nb < k && nx < k) {
214  /* Use blocked code initially */
215  cudaMemcpy2DAsync(da_ref(0,nb), ldda*sizeof(cuDoubleComplex),
216  a_ref(0,nb), lda *sizeof(cuDoubleComplex),
217  sizeof(cuDoubleComplex)*(m), (n-nb),
218  cudaMemcpyHostToDevice,stream[0]);
219 
220  old_i = 0; old_ib = nb;
221  for (i = 0; i < k-nx; i += nb) {
222  ib = min(k-i, nb);
223  if (i>0){
224  cudaMemcpy2DAsync( a_ref(i,i), lda *sizeof(cuDoubleComplex),
225  da_ref(i,i), ldda*sizeof(cuDoubleComplex),
226  sizeof(cuDoubleComplex)*(m-i), ib,
227  cudaMemcpyDeviceToHost,stream[1]);
228 
229  cudaMemcpy2DAsync( a_ref(0,i), lda *sizeof(cuDoubleComplex),
230  da_ref(0,i), ldda*sizeof(cuDoubleComplex),
231  sizeof(cuDoubleComplex)*i, ib,
232  cudaMemcpyDeviceToHost,stream[0]);
233 
234  /* Apply H' to A(i:m,i+2*ib:n) from the left */
236  m-old_i, n-old_i-2*old_ib, old_ib,
237  da_ref(old_i, old_i), ldda, dwork, lddwork,
238  da_ref(old_i, old_i+2*old_ib), ldda, dwork+old_ib, lddwork);
239  }
240 
241  cudaStreamSynchronize(stream[1]);
242  int rows = m-i;
243 
244  cnt++;
245  cntxt->nb = qr_params->ib;
246  magma_zgeqrf_mc(cntxt, &rows, &ib, a_ref(i,i), &lda,
247  tau+i, work, &lwork, info);
248  cntxt->nb = nb;
249 
250  /* Form the triangular factor of the block reflector
251  H = H(i) H(i+1) . . . H(i+ib-1) */
253  &rows, &ib, a_ref(i,i), &lda, tau+i, qr_params->t+cnt*nb*nb, &ib);
254  if (cnt < qr_params->np_gpu) {
255  qr_params->p[cnt]=a;
256  }
257  zpanel_to_q(MagmaUpper, ib, a_ref(i,i), lda, qr_params->w+cnt*qr_params->nb*qr_params->nb);
258  cublasSetMatrix(rows, ib, sizeof(cuDoubleComplex),
259  a_ref(i,i), lda, da_ref(i,i), ldda);
260  if (qr_params->flag == 1)
261  zq_to_panel(MagmaUpper, ib, a_ref(i,i), lda, qr_params->w+cnt*qr_params->nb*qr_params->nb);
262 
263  if (i + ib < n) {
264  cublasSetMatrix(ib, ib, sizeof(cuDoubleComplex), qr_params->t+cnt*nb*nb, ib, dwork, lddwork);
265 
266  if (i+ib < k-nx)
267  /* Apply H' to A(i:m,i+ib:i+2*ib) from the left */
269  rows, ib, ib,
270  da_ref(i, i ), ldda, dwork, lddwork,
271  da_ref(i, i+ib), ldda, dwork+ib, lddwork);
272  else
274  rows, n-i-ib, ib,
275  da_ref(i, i ), ldda, dwork, lddwork,
276  da_ref(i, i+ib), ldda, dwork+ib, lddwork);
277 
278  old_i = i;
279  old_ib = ib;
280  }
281  }
282  } else {
283  i = 0;
284  }
285 
286  /* Use unblocked code to factor the last or only block. */
287  if (i < k)
288  {
289  ib = n-i;
290  if (i!=0)
291  cublasGetMatrix(m, ib, sizeof(cuDoubleComplex),
292  da_ref(0,i), ldda, a_ref(0,i), lda);
293  int rows = m-i;
294 
295  cnt++;
296  lapackf77_zgeqrf(&rows, &ib, a_ref(i,i), &lda, tau+i, work, &lwork, info);
297 
298  if (cnt < qr_params->np_gpu)
299  {
300  int ib2=min(ib,nb);
301 
303  &rows, &ib2, a_ref(i,i), &lda, tau+i, qr_params->t+cnt*nb*nb, &ib2);
304 
305  qr_params->p[cnt]=a;
306  }
307  }
308 
309  cudaStreamDestroy( stream[0] );
310  cudaStreamDestroy( stream[1] );
311  cublasFree(da);
312  return MAGMA_SUCCESS;
313 } /* magma_zgeqrf */
#define MAGMA_ERR_ILLEGAL_VALUE
Definition: magma.h:107
void zq_to_panel(char uplo, magma_int_t ib, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *work)
Definition: zpanel_to_q.cpp:57
magma_int_t nb
Definition: magma.h:40
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define MagmaForwardStr
Definition: magma.h:94
#define MagmaLeft
Definition: magma.h:68
#define __func__
Definition: common_magma.h:65
#define MagmaUpper
Definition: magma.h:61
#define lapackf77_zlarft
Definition: magma_zlapack.h:80
magma_int_t magma_zgeqrf_mc(magma_context *cntxt, magma_int_t *m, magma_int_t *n, cuDoubleComplex *A, magma_int_t *lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t *lwork, magma_int_t *info)
Definition: zgeqrf_mc.cpp:361
magma_int_t magma_zlarfb_gpu(char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, cuDoubleComplex *dv, magma_int_t ldv, cuDoubleComplex *dt, magma_int_t ldt, cuDoubleComplex *dc, magma_int_t ldc, cuDoubleComplex *dowrk, magma_int_t ldwork)
Definition: zlarfb_gpu.cpp:21
#define dwork(dev, i, j)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MagmaConjTrans
Definition: magma.h:59
#define a_ref(a_1, a_2)
#define MagmaColumnwiseStr
Definition: magma.h:97
volatile cuDoubleComplex ** p
Definition: zgeqrf-v2.cpp:55
void * params
Definition: magma.h:43
#define MagmaForward
Definition: magma.h:71
void zpanel_to_q(char uplo, magma_int_t ib, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *work)
Definition: zpanel_to_q.cpp:17
#define MAGMA_SUCCESS
Definition: magma.h:106
#define lapackf77_zgeqrf
Definition: magma_zlapack.h:62
cuDoubleComplex * t
Definition: zgeqrf-v2.cpp:52
#define da_ref(a_1, a_2)
cuDoubleComplex * w
Definition: zgeqrf-v2.cpp:67
#define MAGMA_Z_ONE
Definition: magma.h:132
#define max(a, b)
Definition: common_magma.h:82
#define MagmaColumnwise
Definition: magma.h:74

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zgeqrf2_gpu ( magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  dA,
magma_int_t  ldda,
cuDoubleComplex *  tau,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgeqrf3 ( magma_context cntxt,
magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 73 of file zgeqrf-v3.cpp.

References __func__, magma_qr_params::fb, magma_qr_params::flag, magma_qr_params::m, MAGMA_ERR_ILLEGAL_VALUE, MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_MAKE, MAGMA_Z_ONE, magma_zgeqrf2(), MagmaUpper, max, min, magma_qr_params::n, magma_qr_params::nb, magma_qr_params::np_gpu, magma_qr_params::nthreads, magma_qr_params::ob, magma_qr_params::p, context::params, magma_qr_params::sync0, magma_qr_params::sync1, magma_qr_params::sync2, magma_qr_params::w, and zq_to_panel().

77 {
78 /* -- MAGMA (version 1.4.0) --
79  Univ. of Tennessee, Knoxville
80  Univ. of California, Berkeley
81  Univ. of Colorado, Denver
82  August 2013
83 
84  Purpose
85  =======
86  ZGEQRF computes a QR factorization of a COMPLEX_16 M-by-N matrix A:
87  A = Q * R.
88 
89  Arguments
90  =========
91  M (input) INTEGER
92  The number of rows of the matrix A. M >= 0.
93 
94  N (input) INTEGER
95  The number of columns of the matrix A. N >= 0.
96 
97  A (input/output) COMPLEX_16 array, dimension (LDA,N)
98  On entry, the M-by-N matrix A.
99  On exit, the elements on and above the diagonal of the array
100  contain the min(M,N)-by-N upper trapezoidal matrix R (R is
101  upper triangular if m >= n); the elements below the diagonal,
102  with the array TAU, represent the orthogonal matrix Q as a
103  product of min(m,n) elementary reflectors (see Further
104  Details).
105 
106  LDA (input) INTEGER
107  The leading dimension of the array A. LDA >= max(1,M).
108 
109  TAU (output) COMPLEX_16 array, dimension (min(M,N))
110  The scalar factors of the elementary reflectors (see Further
111  Details).
112 
113  WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
114  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
115 
116  LWORK (input) INTEGER
117  The dimension of the array WORK. LWORK >= N*NB.
118 
119  If LWORK = -1, then a workspace query is assumed; the routine
120  only calculates the optimal size of the WORK array, returns
121  this value as the first entry of the WORK array, and no error
122  message related to LWORK is issued.
123 
124  INFO (output) INTEGER
125  = 0: successful exit
126  < 0: if INFO = -i, the i-th argument had an illegal value
127 
128  Further Details
129  ===============
130  The matrix Q is represented as a product of elementary reflectors
131 
132  Q = H(1) H(2) . . . H(k), where k = min(m,n).
133 
134  Each H(i) has the form
135 
136  H(i) = I - tau * v * v'
137 
138  where tau is a complex scalar, and v is a complex vector with
139  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
140  and tau in TAU(i).
141  ==================================================================== */
142 
143  cuDoubleComplex c_one = MAGMA_Z_ONE;
144  int k, ib;
145  magma_qr_params *qr_params = (magma_qr_params *)cntxt->params;
146 
147  *info = 0;
148 
149  int lwkopt = n * qr_params->nb;
150  work[0] = MAGMA_Z_MAKE( (double)lwkopt, 0 );
151  long int lquery = (lwork == -1);
152  if (m < 0) {
153  *info = -1;
154  } else if (n < 0) {
155  *info = -2;
156  } else if (lda < max(1,m)) {
157  *info = -4;
158  } else if (lwork < max(1,n) && ! lquery) {
159  *info = -7;
160  }
161  if (*info != 0) {
162  magma_xerbla( __func__, -(*info) );
164  }
165  else if (lquery)
166  return MAGMA_SUCCESS;
167 
168  k = min(m,n);
169  if (k == 0) {
170  work[0] = c_one;
171  return MAGMA_SUCCESS;
172  }
173 
174  int M=qr_params->nthreads*qr_params->ob;
175  int N=qr_params->nthreads*qr_params->ob;
176 
177  if (qr_params->m > qr_params->n)
178  M = qr_params->m - (qr_params->n-qr_params->nthreads*qr_params->ob);
179 
180  /* Use MAGMA code to factor left portion of matrix, waking up threads
181  along the way to perform updates on the right portion of matrix */
182  magma_zgeqrf2(cntxt, m, n - qr_params->nthreads * qr_params->ob,
183  a, lda, tau, work, lwork, info);
184 
185  /* Wait for all update threads to finish */
186  for (k = 0; k < qr_params->nthreads; k++) {
187  while (qr_params->sync1[k] == 0) {
188  sched_yield();
189  }
190  }
191 
192  /* Unzero upper part of each panel */
193  for (k = 0; k < qr_params->np_gpu-1; k++){
194  ib = min(qr_params->nb,(n-qr_params->nthreads*qr_params->ob)-qr_params->nb*k);
195  zq_to_panel(MagmaUpper, ib, a + k*qr_params->nb*lda + k*qr_params->nb, lda,
196  qr_params->w+qr_params->nb*qr_params->nb*k);
197  }
198 
199  /* Use final blocking size */
200  qr_params->nb = qr_params->fb;
201 
202  /* Flag MAGMA code to internally unzero upper part of each panel */
203  qr_params->flag = 1;
204 
205  /* Use MAGMA code to perform final factorization if necessary */
206  if (qr_params->m > (qr_params->n - (qr_params->nthreads*qr_params->ob)))
207 
208  if (M > (qr_params->m-(qr_params->n-(qr_params->ob*qr_params->nthreads))))
209  M = qr_params->m-(qr_params->n-(qr_params->ob*qr_params->nthreads));
210 
211  magma_zgeqrf2(cntxt, M, N,
212  a + (n-qr_params->nthreads*qr_params->ob)*m+
213  (n-qr_params->nthreads*qr_params->ob), lda,
214  &tau[n-qr_params->nthreads*qr_params->ob],
215  work, lwork, info);
216 
217  /* Prepare for next run */
218  for (k = 0; k < qr_params->np_gpu; k++) {
219  qr_params->p[k] = NULL;
220  }
221 
222  for (k = 0; k < qr_params->nthreads; k++) {
223  qr_params->sync1[k] = 0;
224  }
225 
226  /* Infrastructure for next run is not in place yet */
227  qr_params->sync0 = 0;
228 
229  /* Signal update threads to get in position for next run */
230  qr_params->sync2 = 1;
231 }
#define MAGMA_ERR_ILLEGAL_VALUE
Definition: magma.h:107
void zq_to_panel(char uplo, magma_int_t ib, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *work)
Definition: zpanel_to_q.cpp:57
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define __func__
Definition: common_magma.h:65
#define MagmaUpper
Definition: magma.h:61
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
volatile cuDoubleComplex ** p
Definition: zgeqrf-v2.cpp:55
magma_int_t magma_zgeqrf2(magma_context *cntxt, magma_int_t m, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
Definition: zgeqrf-v2.cpp:73
void * params
Definition: magma.h:43
volatile int * sync1
Definition: zgeqrf-v2.cpp:61
#define MAGMA_SUCCESS
Definition: magma.h:106
volatile int sync2
Definition: zgeqrf-v2.cpp:64
cuDoubleComplex * w
Definition: zgeqrf-v2.cpp:67
#define MAGMA_Z_ONE
Definition: magma.h:132
#define max(a, b)
Definition: common_magma.h:82
volatile int sync0
Definition: zgeqrf-v2.cpp:58

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zgeqrf3_gpu ( magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  dA,
magma_int_t  ldda,
cuDoubleComplex *  tau,
cuDoubleComplex *  dT,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgeqrf_gpu ( magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  dA,
magma_int_t  ldda,
cuDoubleComplex *  tau,
cuDoubleComplex *  dT,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgeqrf_mc ( magma_context cntxt,
magma_int_t m,
magma_int_t n,
cuDoubleComplex *  A,
magma_int_t lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  work,
magma_int_t lwork,
magma_int_t info 
)

Definition at line 361 of file zgeqrf_mc.cpp.

References __func__, A, MAGMA_ERR_ILLEGAL_VALUE, magma_get_zpotrf_nb(), magma_xerbla(), MAGMA_Z_MAKE, MAGMA_Z_NEG_ONE, MAGMA_Z_ONE, max, min, context::nb, context::num_cores, context::num_gpus, context::quark, QUARK_Barrier(), QUARK_Insert_Task_zgemm(), QUARK_Insert_Task_zgeqrt(), QUARK_Insert_Task_zlarfb(), QUARK_Insert_Task_ztrmm(), T, and W.

365 {
366 /* -- MAGMA (version 1.4.0) --
367  Univ. of Tennessee, Knoxville
368  Univ. of California, Berkeley
369  Univ. of Colorado, Denver
370  August 2013
371 
372  Purpose
373  =======
374 
375  ZGEQRF computes a QR factorization of a complex M-by-N matrix A:
376  A = Q * R.
377 
378  Arguments
379  =========
380  CNTXT (input) MAGMA_CONTEXT
381  CNTXT specifies the MAGMA hardware context for this routine.
382 
383  M (input) magma_int_tEGER
384  The number of rows of the matrix A. M >= 0.
385 
386  N (input) magma_int_tEGER
387  The number of columns of the matrix A. N >= 0.
388 
389  A (input/output) COMPLEX_16 array, dimension (LDA,N)
390  On entry, the M-by-N matrix A.
391  On exit, the elements on and above the diagonal of the array
392  contain the min(M,N)-by-N upper trapezoidal matrix R (R is
393  upper triangular if m >= n); the elements below the diagonal,
394  with the array TAU, represent the orthogonal matrix Q as a
395  product of min(m,n) elementary reflectors (see Further
396  Details).
397 
398  LDA (input) magma_int_tEGER
399  The leading dimension of the array A. LDA >= max(1,M).
400 
401  TAU (output) COMPLEX_16 array, dimension (min(M,N))
402  The scalar factors of the elementary reflectors (see Further
403  Details).
404 
405  WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
406  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
407 
408  LWORK (input) magma_int_tEGER
409  The dimension of the array WORK. LWORK >= N*NB.
410 
411  If LWORK = -1, then a workspace query is assumed; the routine
412  only calculates the optimal size of the WORK array, returns
413  this value as the first entry of the WORK array, and no error
414  message related to LWORK is issued.
415 
416  INFO (output) magma_int_tEGER
417  = 0: successful exit
418  < 0: if INFO = -i, the i-th argument had an illegal value
419 
420  Further Details
421  ===============
422  The matrix Q is represented as a product of elementary reflectors
423 
424  Q = H(1) H(2) . . . H(k), where k = min(m,n).
425 
426  Each H(i) has the form
427 
428  H(i) = I - tau * v * v'
429 
430  where tau is a complex scalar, and v is a complex vector with
431  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
432  and tau in TAU(i).
433  ==================================================================== */
434 
435  if (cntxt->num_cores == 1 && cntxt->num_gpus == 1)
436  {
437  //magma_int_t result = magma_zgeqrf(*m, *n, a, *lda, tau, work, *lwork, info);
438  //return result;
439  }
440 
441  magma_int_t i,j,l;
442 
443  magma_int_t ii=-1,jj=-1,ll=-1;
444 
445  Quark* quark = cntxt->quark;
446 
447  // DAG labels
448  char sgeqrt_dag_label[1000];
449  char slarfb_dag_label[1000];
450  char strmm_dag_label[1000];
451  char sgemm_dag_label[1000];
452 
453  *info = 0;
454 
455  cuDoubleComplex c_one = MAGMA_Z_ONE;
456  cuDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
457 
458  magma_int_t nb = (cntxt->nb ==-1)? magma_get_zpotrf_nb(*n): cntxt->nb;
459 
460  magma_int_t lwkopt = *n * nb;
461  work[0] = MAGMA_Z_MAKE( (double)lwkopt, 0 );
462 
463  long int lquery = *lwork == -1;
464 
465  // check input arguments
466  if (*m < 0) {
467  *info = -1;
468  } else if (*n < 0) {
469  *info = -2;
470  } else if (*lda < max(1,*m)) {
471  *info = -4;
472  } else if (*lwork < max(1,*n) && ! lquery) {
473  *info = -7;
474  }
475  if (*info != 0) {
476  magma_xerbla( __func__, -(*info) );
478  }
479  else if (lquery)
480  return 0;
481 
482  magma_int_t k = min(*m,*n);
483  if (k == 0) {
484  work[0] = c_one;
485  return 0;
486  }
487 
488  magma_int_t nt = (((*n)%nb) == 0) ? (*n)/nb : (*n)/nb + 1;
489  magma_int_t mt = (((*m)%nb) == 0) ? (*m)/nb : (*m)/nb + 1;
490 
491  cuDoubleComplex **local_work = (cuDoubleComplex**) malloc(sizeof(cuDoubleComplex*)*(nt-1)*mt);
492  memset(local_work, 0, sizeof(cuDoubleComplex*)*(nt-1)*mt);
493 
494  magma_int_t priority;
495 
496  // traverse diagonal blocks
497  for (i = 0; i < k; i += nb) {
498 
499  ii++;
500 
501  jj = ii;
502 
503  sprintf(sgeqrt_dag_label, "GEQRT %d",ii);
504 
505  // factor diagonal block, also compute T matrix
507  0, (*m)-i, min(nb,(*n)-i), A(i,i), *lda, T(i), nb, &tau[i], sgeqrt_dag_label);
508 
509  if (i > 0) {
510 
511  priority = 100;
512 
513  // update panels in a left looking fashion
514  for (j = (i-nb) + (2*nb); j < *n; j += nb) {
515 
516  jj++;
517 
518  ll = ii-1;
519 
520  sprintf(slarfb_dag_label, "LARFB %d %d",ii-1, jj);
521 
522  // perform part of update
523  QUARK_Insert_Task_zlarfb(quark, 0,
524  (*m)-(i-nb), min(nb,(*n)-(i-nb)), min(nb,(*m)-(i-nb)), min(nb,(*n)-j), nb,
525  A(i-nb,i-nb), *lda, A(i-nb,j), *lda, T(i-nb), nb, W(ii-1,jj), nb, slarfb_dag_label, priority);
526 
527  sprintf(strmm_dag_label, "TRMM %d %d",ii-1, jj);
528 
529  // perform more of update
530  QUARK_Insert_Task_ztrmm(quark, 0, min(nb,(*m)-(i-nb)), min(nb,(*n)-j), c_neg_one,
531  A(i-nb,i-nb), *lda, W(ii-1,jj), nb, c_one, A(i-nb,j), *lda, strmm_dag_label, priority);
532 
533  sprintf(sgemm_dag_label, "GEMM %d %d %d",ii-1, jj, ll);
534 
535  // finish update
536  QUARK_Insert_Task_zgemm(quark, 0, (*m)-i, min(nb,(*n)-j), min(nb,(*n)-(i-nb)), c_neg_one,
537  A(i,i-nb), *lda, W(ii-1,jj), nb, c_one, A(i,j), *lda, A(i,j), sgemm_dag_label, priority, jj);
538 
539  }
540 
541  }
542 
543  j = i + nb;
544 
545  jj = ii;
546 
547  // handle case of short wide rectangular matrix
548  if (j < (*n)) {
549 
550  priority = 0;
551 
552  jj++;
553 
554  ll = ii;
555 
556  sprintf(slarfb_dag_label, "LARFB %d %d",ii, jj);
557 
558  // perform part of update
559  QUARK_Insert_Task_zlarfb(quark, 0,
560  (*m)-i, min(nb,(*n)-i), min(nb,(*m)-i), min(nb,(*n)-j), nb,
561  A(i,i), *lda, A(i,j), *lda, T(i), nb, W(ii,jj), nb, slarfb_dag_label, priority);
562 
563  sprintf(strmm_dag_label, "TRMM %d %d",ii, jj);
564 
565  // perform more of update
566  QUARK_Insert_Task_ztrmm(quark, 0, min(nb,(*m)-i), min(nb,(*n)-j), c_neg_one,
567  A(i,i), *lda, W(ii,jj), nb, c_one, A(i,j), *lda, strmm_dag_label, priority);
568 
569  sprintf(sgemm_dag_label, "GEMM %d %d %d",ii, jj, ll);
570 
571  // finish update
572  QUARK_Insert_Task_zgemm(quark, 0, (*m)-i-nb, min(nb,(*n)-j), min(nb,(*n)-i), c_neg_one,
573  A(i+nb,i), *lda, W(ii,jj), nb, c_one, A(i+nb,j), *lda, A(i+nb,j), sgemm_dag_label, priority, jj);
574 
575  }
576 
577  }
578 
579  // wait for all tasks to finish executing
580  QUARK_Barrier(quark);
581 
582  // free memory
583  for(k = 0 ; k < (nt-1)*mt; k++) {
584  if (local_work[k] != NULL) {
585  free(local_work[k]);
586  }
587  }
588  free(local_work);
589 
590 }
#define MAGMA_ERR_ILLEGAL_VALUE
Definition: magma.h:107
Definition: quark.c:96
magma_int_t nb
Definition: magma.h:40
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define __func__
Definition: common_magma.h:65
#define MAGMA_Z_NEG_ONE
Definition: magma.h:134
magma_int_t num_gpus
Definition: magma.h:31
#define T(m)
Definition: zgeqrf_mc.cpp:14
Quark * quark
Definition: magma.h:37
int magma_int_t
Definition: magmablas.h:12
#define W(k, n)
Definition: zgeqrf_mc.cpp:15
void QUARK_Insert_Task_zgemm(Quark *quark, Quark_Task_Flags *task_flags, magma_int_t m, magma_int_t n, magma_int_t k, cuDoubleComplex alpha, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex **b, magma_int_t ldb, cuDoubleComplex beta, cuDoubleComplex *c, magma_int_t ldc, cuDoubleComplex *fake, char *dag_label, magma_int_t priority, magma_int_t dkdk)
Definition: zgeqrf_mc.cpp:216
magma_int_t magma_get_zpotrf_nb(magma_int_t m)
Definition: get_nb.cpp:79
#define A(m, n)
Definition: zgeqrf_mc.cpp:13
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
void QUARK_Insert_Task_zgeqrt(Quark *quark, Quark_Task_Flags *task_flags, magma_int_t m, magma_int_t n, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *t, magma_int_t ldt, cuDoubleComplex *tau, char *dag_label)
Definition: zgeqrf_mc.cpp:289
void QUARK_Insert_Task_zlarfb(Quark *quark, Quark_Task_Flags *task_flags, magma_int_t m, magma_int_t n, magma_int_t mm, magma_int_t nn, magma_int_t ib, cuDoubleComplex *v, magma_int_t ldv, cuDoubleComplex *c, magma_int_t ldc, cuDoubleComplex *t, magma_int_t ldt, cuDoubleComplex **w, magma_int_t ldw, char *dag_label, magma_int_t priority)
Definition: zgeqrf_mc.cpp:320
void QUARK_Barrier(Quark *quark)
Definition: quark.c:771
magma_int_t num_cores
Definition: magma.h:28
void QUARK_Insert_Task_ztrmm(Quark *quark, Quark_Task_Flags *task_flags, magma_int_t m, magma_int_t n, cuDoubleComplex alpha, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex **b, magma_int_t ldb, cuDoubleComplex beta, cuDoubleComplex *c, magma_int_t ldc, char *dag_label, magma_int_t priority)
Definition: zgeqrf_mc.cpp:255
#define MAGMA_Z_ONE
Definition: magma.h:132
#define max(a, b)
Definition: common_magma.h:82

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zgeqrf_ooc ( magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  A,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgeqrs3_gpu ( magma_int_t  m,
magma_int_t  n,
magma_int_t  nrhs,
cuDoubleComplex *  dA,
magma_int_t  ldda,
cuDoubleComplex *  tau,
cuDoubleComplex *  dT,
cuDoubleComplex *  dB,
magma_int_t  lddb,
cuDoubleComplex *  hwork,
magma_int_t  lhwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgeqrs_gpu ( magma_int_t  m,
magma_int_t  n,
magma_int_t  nrhs,
cuDoubleComplex *  dA,
magma_int_t  ldda,
cuDoubleComplex *  tau,
cuDoubleComplex *  dT,
cuDoubleComplex *  dB,
magma_int_t  lddb,
cuDoubleComplex *  hwork,
magma_int_t  lhwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgessm_gpu ( char  storev,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magma_int_t  ib,
magma_int_t ipiv,
cuDoubleComplex *  dL1,
magma_int_t  lddl1,
cuDoubleComplex *  dL,
magma_int_t  lddl,
cuDoubleComplex *  dA,
magma_int_t  ldda,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgesv ( magma_int_t  n,
magma_int_t  nrhs,
cuDoubleComplex *  A,
magma_int_t  lda,
magma_int_t ipiv,
cuDoubleComplex *  B,
magma_int_t  ldb,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgesv_gpu ( magma_int_t  n,
magma_int_t  nrhs,
cuDoubleComplex *  dA,
magma_int_t  ldda,
magma_int_t ipiv,
cuDoubleComplex *  dB,
magma_int_t  lddb,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgesvd ( char  jobu,
char  jobvt,
magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  a,
magma_int_t  lda,
double *  s,
cuDoubleComplex *  u,
magma_int_t  ldu,
cuDoubleComplex *  vt,
magma_int_t  ldvt,
cuDoubleComplex *  work,
magma_int_t  lwork,
double *  rwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgetrf ( magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  A,
magma_int_t  lda,
magma_int_t ipiv,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgetrf2 ( magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  a,
magma_int_t  lda,
magma_int_t ipiv,
magma_int_t info 
)
magma_int_t magma_zgetrf_gpu ( magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  dA,
magma_int_t  ldda,
magma_int_t ipiv,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgetrf_incpiv_gpu ( char  storev,
magma_int_t  m,
magma_int_t  n,
magma_int_t  ib,
cuDoubleComplex *  hA,
magma_int_t  ldha,
cuDoubleComplex *  dA,
magma_int_t  ldda,
cuDoubleComplex *  hL,
magma_int_t  ldhl,
cuDoubleComplex *  dL,
magma_int_t  lddl,
magma_int_t ipiv,
cuDoubleComplex *  dwork,
magma_int_t  lddwork,
magma_int_t info 
)
magma_int_t magma_zgetrf_mc ( magma_context cntxt,
magma_int_t m,
magma_int_t n,
cuDoubleComplex *  A,
magma_int_t lda,
magma_int_t ipiv,
magma_int_t info 
)

Definition at line 111 of file zgetrf_mc.cpp.

References __func__, A, GATHERV, INOUT, INPUT, MAGMA_ERR_ILLEGAL_VALUE, magma_get_zpotrf_nb(), magma_xerbla(), MAGMA_Z_NEG_ONE, MAGMA_Z_ONE, max, min, context::nb, context::num_cores, context::num_gpus, OUTPUT, context::quark, QUARK_Barrier(), QUARK_Insert_Task(), SCHED_panel_update(), SCHED_zgemm(), SCHED_zgetrf(), SCHED_zlaswp(), TASK_PRIORITY, TASKCOLOR, TASKLABEL, and VALUE.

115 {
116 /* -- MAGMA (version 1.4.0) --
117  Univ. of Tennessee, Knoxville
118  Univ. of California, Berkeley
119  Univ. of Colorado, Denver
120  August 2013
121 
122  Purpose
123  =======
124  ZGETRF computes an LU factorization of a general COMPLEX_16
125  M-by-N matrix A using partial pivoting with row interchanges.
126 
127  The factorization has the form
128  A = P * L * U
129  where P is a permutation matrix, L is lower triangular with unit
130  diagonal elements (lower trapezoidal if m > n), and U is upper
131  triangular (upper trapezoidal if m < n).
132 
133  This is the right-looking Level 3 BLAS version of the algorithm.
134 
135  Arguments
136  =========
137  CNTXT (input) MAGMA_CONTEXT
138  CNTXT specifies the MAGMA hardware context for this routine.
139 
140  M (input) INTEGER
141  The number of rows of the matrix A. M >= 0.
142 
143  N (input) INTEGER
144  The number of columns of the matrix A. N >= 0.
145 
146  A (input/output) COMPLEX_16 array, dimension (LDA,N)
147  On entry, the M-by-N matrix to be factored.
148  On exit, the factors L and U from the factorization
149  A = P*L*U; the unit diagonal elements of L are not stored.
150 
151  LDA (input) INTEGER
152  The leading dimension of the array A. LDA >= max(1,M).
153 
154  IPIV (output) INTEGER array, dimension (min(M,N))
155  The pivot indices; for 1 <= i <= min(M,N), row i of the
156  matrix was interchanged with row IPIV(i).
157 
158  INFO (output) INTEGER
159  = 0: successful exit
160  < 0: if INFO = -i, the i-th argument had an illegal value
161  > 0: if INFO = i, U(i,i) is exactly zero. The factorization
162  has been completed, but the factor U is exactly
163  singular, and division by zero will occur if it is used
164  to solve a system of equations.
165  ===================================================================== */
166 
167  if (cntxt->num_cores == 1 && cntxt->num_gpus == 1)
168  {
169  //int result = magma_zgetrf(*m, *n, a, *lda, ipiv, info);
170  //return result;
171  }
172 
173  int EN_BEE = cntxt->nb;
174  Quark* quark = cntxt->quark;
175 
176  int i,j,l;
177  int ii,jj,ll;
178 
179  void *fakedep;
180 
181  int ione=1;
182 
183  cuDoubleComplex fone = MAGMA_Z_ONE;
184  cuDoubleComplex mone = MAGMA_Z_NEG_ONE;
185 
186  int M,N,MM,NN,MMM,K;
187 
188  int priority=0;
189 
190  *info = 0;
191 
192  int nb = (EN_BEE==-1)? magma_get_zpotrf_nb(*n): EN_BEE;
193 
194  /* Check arguments */
195  if (*m < 0) {
196  *info = -1;
197  } else if (*n < 0) {
198  *info = -2;
199  } else if (*lda < max(1,*m)) {
200  *info = -4;
201  }
202  if (*info != 0) {
203  magma_xerbla( __func__, -(*info) );
205  }
206 
207  int k = min(*m,*n);
208 
209  int iinfo[2];
210  iinfo[1] = 0;
211 
212  char label[10000];
213 
214  ii = -1;
215 
216  /* Loop across diagonal blocks */
217  for (i = 0; i < k; i += nb)
218  {
219  ii++;
220 
221  jj = -1;
222 
223  priority = 10000 - ii;
224 
225  /* Update panels in left looking fashion */
226  for (j = 0; j < i; j += nb)
227  {
228  jj++;
229 
230  NN=min(nb,(*n)-i);
231  MM=min(nb,(*m)-j);
232 
233  l = j + nb;
234 
235  MMM = min(nb,(*m)-l);
236 
237  sprintf(label, "UPDATE %d %d", ii, jj);
238 
240  sizeof(int), &NN, VALUE,
241  sizeof(cuDoubleComplex)*(*m)*(*n), A(j,i), INOUT,
242  sizeof(int), lda, VALUE,
243  sizeof(int), &MM, VALUE,
244  sizeof(cuDoubleComplex)*nb, &ipiv[j], INPUT,
245  sizeof(cuDoubleComplex)*(*m)*(*n), A(j,j), INPUT,
246  sizeof(int), &MMM, VALUE,
247  sizeof(int), &nb, VALUE,
248  sizeof(cuDoubleComplex)*(*m)*(*n), A(l,j), INPUT,
249  sizeof(cuDoubleComplex)*(*m)*(*n), A(l,i), INOUT,
250  sizeof(int), &priority,VALUE | TASK_PRIORITY,
251  sizeof(cuDoubleComplex)*(*m)*(*n), A(i,i), OUTPUT,
252  strlen(label)+1, label, VALUE | TASKLABEL,
253  5, "cyan", VALUE | TASKCOLOR,
254  0);
255 
256  ll = jj + 1;
257 
258  /* Split gemm into tiles */
259  for (l = j + (2*nb); l < (*m); l += nb)
260  {
261  ll++;
262 
263  MMM = min(nb,(*m)-l);
264 
265  fakedep = (void *)(intptr_t)(j+1);
266 
267  sprintf(label, "GEMM %d %d %d", ii, jj, ll);
268 
269  QUARK_Insert_Task(quark, SCHED_zgemm, 0,
270  sizeof(int), &MMM, VALUE,
271  sizeof(int), &NN, VALUE,
272  sizeof(int), &nb, VALUE,
273  sizeof(cuDoubleComplex)*(*m)*(*n), A(l,j), INPUT,
274  sizeof(int), lda, VALUE,
275  sizeof(cuDoubleComplex)*(*m)*(*n), A(j,i), INPUT,
276  sizeof(cuDoubleComplex)*(*m)*(*n), A(l,i), INOUT,
277  sizeof(int), &priority,VALUE | TASK_PRIORITY,
278  sizeof(cuDoubleComplex)*(*m)*(*n), A(i,i), OUTPUT | GATHERV,
279  sizeof(void*), fakedep, OUTPUT | GATHERV,
280  strlen(label)+1, label, VALUE | TASKLABEL,
281  5, "blue", VALUE | TASKCOLOR,
282  0);
283 
284  }
285 
286  }
287 
288  M=(*m)-i;
289  N=min(nb,(*n)-i);
290 
291  iinfo[0] = i;
292 
293  sprintf(label, "GETRF %d", ii);
294 
296  sizeof(int), &M, VALUE,
297  sizeof(int), &N, VALUE,
298  sizeof(cuDoubleComplex)*(*m)*(*n), A(i,i), INOUT,
299  sizeof(int), lda, VALUE,
300  sizeof(cuDoubleComplex)*nb, &ipiv[i], OUTPUT,
301  sizeof(int), iinfo, OUTPUT,
302  sizeof(int), &priority,VALUE | TASK_PRIORITY,
303  strlen(label)+1, label, VALUE | TASKLABEL,
304  6, "green", VALUE | TASKCOLOR,
305  0);
306 
307  }
308 
309  K = (*m)/nb;
310 
311  if ((K*nb)==(*m)) {
312  ii = K - 1;
313  K = *m;
314  } else {
315  ii = k;
316  K = (K+1)*nb;
317  }
318 
319  priority = 0;
320 
321  /* If n > m */
322  for (i = K; i < (*n); i += nb)
323  {
324  ii++;
325 
326  jj = -1;
327 
328  /* Update remaining panels in left looking fashion */
329  for (j = 0; j < (*m); j += nb)
330  {
331  jj++;
332 
333  NN=min(nb,(*n)-i);
334  MM=min(nb,(*m)-j);
335 
336  l = j + nb;
337 
338  MMM = min(nb,(*m)-l);
339 
340  sprintf(label, "UPDATE %d %d", ii, jj);
341 
343  sizeof(int), &NN, VALUE,
344  sizeof(cuDoubleComplex)*(*m)*(*n), A(j,i), INOUT,
345  sizeof(int), lda, VALUE,
346  sizeof(int), &MM, VALUE,
347  sizeof(cuDoubleComplex)*nb, &ipiv[j], INPUT,
348  sizeof(cuDoubleComplex)*(*m)*(*n), A(j,j), INPUT,
349  sizeof(int), &MMM, VALUE,
350  sizeof(int), &nb, VALUE,
351  sizeof(cuDoubleComplex)*(*m)*(*n), A(l,j), INPUT,
352  sizeof(cuDoubleComplex)*(*m)*(*n), A(l,i), INOUT,
353  sizeof(int), &priority,VALUE | TASK_PRIORITY,
354  sizeof(cuDoubleComplex)*(*m)*(*n), A(i,i), OUTPUT,
355  strlen(label)+1, label, VALUE | TASKLABEL,
356  5, "cyan", VALUE | TASKCOLOR,
357  0);
358 
359  ll = jj + 1;
360 
361  /* Split gemm into tiles */
362  for (l = j + (2*nb); l < (*m); l += nb) {
363 
364  ll++;
365 
366  MMM = min(nb,(*m)-l);
367 
368  fakedep = (void *)(intptr_t)(j+1);
369 
370  sprintf(label, "GEMM %d %d %d", ii, jj, ll);
371 
372  QUARK_Insert_Task(quark, SCHED_zgemm, 0,
373  sizeof(int), &MMM, VALUE,
374  sizeof(int), &NN, VALUE,
375  sizeof(int), &nb, VALUE,
376  sizeof(cuDoubleComplex)*(*m)*(*n), A(l,j), INPUT,
377  sizeof(int), lda, VALUE,
378  sizeof(cuDoubleComplex)*(*m)*(*n), A(j,i), INPUT,
379  sizeof(cuDoubleComplex)*(*m)*(*n), A(l,i), INOUT,
380  sizeof(int), &priority,VALUE | TASK_PRIORITY,
381  sizeof(cuDoubleComplex)*(*m)*(*n), A(i,i), OUTPUT | GATHERV,
382  sizeof(void*), fakedep, OUTPUT | GATHERV,
383  strlen(label)+1, label, VALUE | TASKLABEL,
384  5, "blue", VALUE | TASKCOLOR,
385  0);
386 
387  }
388 
389  }
390 
391  }
392 
393  ii = -1;
394 
395  /* Swap behinds */
396  for (i = 0; i < k; i += nb) {
397 
398  ii++;
399 
400  jj = -1;
401 
402  MM = min(nb,(*m)-i);
403  MM = min(MM,(*n)-i);
404 
405  for (j = 0; j < i; j += nb) {
406 
407  jj++;
408 
409  fakedep = (void *)(intptr_t)(j+1);
410 
411  sprintf(label, "LASWPF %d %d", ii, jj);
412 
414  sizeof(int), &nb, VALUE,
415  sizeof(cuDoubleComplex)*(*m)*(*n), A(i,j), INOUT,
416  sizeof(int), lda, VALUE,
417  sizeof(int), &MM, VALUE,
418  sizeof(cuDoubleComplex)*nb, &ipiv[i], INPUT,
419  sizeof(int), &priority, VALUE | TASK_PRIORITY,
420  sizeof(void*), fakedep, INPUT,
421  sizeof(cuDoubleComplex)*(*m)*(*n), A(i+nb,j), OUTPUT,
422  strlen(label)+1, label, VALUE | TASKLABEL,
423  7, "purple", VALUE | TASKCOLOR,
424  0);
425 
426  }
427 
428  }
429 
430  /* Synchronization point */
431  QUARK_Barrier(quark);
432 
433  /* Fix pivot */
434  ii = -1;
435 
436  for (i = 0; i < k; i +=nb) {
437  ii++;
438  for (j = 0; j < min(nb,(k-i)); j++) {
439  ipiv[ii*nb+j] += ii*nb;
440  }
441  }
442 
443  QUARK_Barrier(quark);
444 
445 }
#define MAGMA_ERR_ILLEGAL_VALUE
Definition: magma.h:107
Definition: quark.c:96
Definition: quark.h:52
static void SCHED_panel_update(Quark *quark)
Definition: zgetrf_mc.cpp:37
magma_int_t nb
Definition: magma.h:40
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
#define TASK_PRIORITY
Definition: quark.h:75
#define MAGMA_Z_NEG_ONE
Definition: magma.h:134
magma_int_t num_gpus
Definition: magma.h:31
Quark * quark
Definition: magma.h:37
static void SCHED_zgemm(Quark *quark)
Definition: zgetrf_mc.cpp:16
#define TASKLABEL
Definition: quark.h:70
magma_int_t magma_get_zpotrf_nb(magma_int_t m)
Definition: get_nb.cpp:79
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define A(m, n)
Definition: zgetrf_mc.cpp:13
#define TASKCOLOR
Definition: quark.h:73
void SCHED_zlaswp(Quark *quark)
Definition: zgetrf_mc.cpp:94
void SCHED_zgetrf(Quark *quark)
Definition: zgetrf_mc.cpp:71
#define INPUT
Definition: quark.h:53
unsigned long long QUARK_Insert_Task(Quark *quark, void(*function)(Quark *), Quark_Task_Flags *task_flags,...)
Definition: quark.c:1073
Definition: quark.h:52
void QUARK_Barrier(Quark *quark)
Definition: quark.c:771
magma_int_t num_cores
Definition: magma.h:28
Definition: quark.h:52
#define GATHERV
Definition: quark.h:64
#define MAGMA_Z_ONE
Definition: magma.h:132
#define max(a, b)
Definition: common_magma.h:82

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zgetrf_nopiv_gpu ( magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  dA,
magma_int_t  ldda,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgetrs_gpu ( char  trans,
magma_int_t  n,
magma_int_t  nrhs,
cuDoubleComplex *  dA,
magma_int_t  ldda,
magma_int_t ipiv,
cuDoubleComplex *  dB,
magma_int_t  lddb,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zheev ( char  jobz,
char  uplo,
magma_int_t  n,
cuDoubleComplex *  a,
magma_int_t  lda,
double *  w,
cuDoubleComplex *  work,
magma_int_t  lwork,
double *  rwork,
magma_int_t info 
)
magma_int_t magma_zheevd ( char  jobz,
char  uplo,
magma_int_t  n,
cuDoubleComplex *  a,
magma_int_t  lda,
double *  w,
cuDoubleComplex *  work,
magma_int_t  lwork,
double *  rwork,
magma_int_t  lrwork,
magma_int_t iwork,
magma_int_t  liwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zheevd_gpu ( char  jobz,
char  uplo,
magma_int_t  n,
cuDoubleComplex *  da,
magma_int_t  ldda,
double *  w,
cuDoubleComplex *  wa,
magma_int_t  ldwa,
cuDoubleComplex *  work,
magma_int_t  lwork,
double *  rwork,
magma_int_t  lrwork,
magma_int_t iwork,
magma_int_t  liwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zhegst ( magma_int_t  itype,
char  uplo,
magma_int_t  n,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  b,
magma_int_t  ldb,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zhegst_gpu ( magma_int_t  itype,
char  uplo,
magma_int_t  n,
cuDoubleComplex *  da,
magma_int_t  ldda,
cuDoubleComplex *  db,
magma_int_t  lddb,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zhegvd ( magma_int_t  itype,
char  jobz,
char  uplo,
magma_int_t  n,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  b,
magma_int_t  ldb,
double *  w,
cuDoubleComplex *  work,
magma_int_t  lwork,
double *  rwork,
magma_int_t  lrwork,
magma_int_t iwork,
magma_int_t  liwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zhetrd ( char  uplo,
magma_int_t  n,
cuDoubleComplex *  A,
magma_int_t  lda,
double *  d,
double *  e,
cuDoubleComplex *  tau,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zhetrd2_gpu ( char  uplo,
magma_int_t  n,
cuDoubleComplex *  da,
magma_int_t  ldda,
double *  d,
double *  e,
cuDoubleComplex *  tau,
cuDoubleComplex *  wa,
magma_int_t  ldwa,
cuDoubleComplex *  work,
magma_int_t  lwork,
cuDoubleComplex *  dwork,
magma_int_t  ldwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zhetrd_gpu ( char  uplo,
magma_int_t  n,
cuDoubleComplex *  da,
magma_int_t  ldda,
double *  d,
double *  e,
cuDoubleComplex *  tau,
cuDoubleComplex *  wa,
magma_int_t  ldwa,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zlabrd_gpu ( magma_int_t  m,
magma_int_t  n,
magma_int_t  nb,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  da,
magma_int_t  ldda,
double *  d,
double *  e,
cuDoubleComplex *  tauq,
cuDoubleComplex *  taup,
cuDoubleComplex *  x,
magma_int_t  ldx,
cuDoubleComplex *  dx,
magma_int_t  lddx,
cuDoubleComplex *  y,
magma_int_t  ldy,
cuDoubleComplex *  dy,
magma_int_t  lddy 
)

Here is the caller graph for this function:

magma_int_t magma_zlahr2 ( magma_int_t  m,
magma_int_t  n,
magma_int_t  nb,
cuDoubleComplex *  da,
cuDoubleComplex *  dv,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  t,
magma_int_t  ldt,
cuDoubleComplex *  y,
magma_int_t  ldy 
)

Here is the caller graph for this function:

magma_int_t magma_zlahru ( magma_int_t  m,
magma_int_t  n,
magma_int_t  nb,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  da,
cuDoubleComplex *  y,
cuDoubleComplex *  v,
cuDoubleComplex *  t,
cuDoubleComplex *  dwork 
)

Here is the caller graph for this function:

magma_int_t magma_zlarfb_gpu ( char  side,
char  trans,
char  direct,
char  storev,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
cuDoubleComplex *  dv,
magma_int_t  ldv,
cuDoubleComplex *  dt,
magma_int_t  ldt,
cuDoubleComplex *  dc,
magma_int_t  ldc,
cuDoubleComplex *  dowrk,
magma_int_t  ldwork 
)

Definition at line 21 of file zlarfb_gpu.cpp.

References __func__, MAGMA_ERR_ILLEGAL_VALUE, MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_NEG_ONE, MAGMA_Z_ONE, MAGMA_Z_ZERO, MagmaConjTrans, MagmaLower, MagmaNonUnit, MagmaNoTrans, MagmaRight, and MagmaUpper.

27 {
28 /* -- MAGMA (version 1.4.0) --
29  Univ. of Tennessee, Univ. of California Berkeley
30  August 2013
31 
32  Purpose
33  =======
34  ZLARFB applies a complex block reflector H or its transpose H' to a
35  COMPLEX_16 m by n matrix C, from the left.
36 
37  Arguments
38  =========
39  SIDE (input) CHARACTER
40  = 'L': apply H or H' from the Left
41  = 'R': apply H or H' from the Right (Not implemented)
42 
43  TRANS (input) CHARACTER
44  = 'N': apply H (No transpose) (Not implemented)
45  = 'C': apply H' (Conjugate transpose)
46 
47  DIRECT (input) CHARACTER
48  Indicates how H is formed from a product of elementary
49  reflectors
50  = 'F': H = H(1) H(2) . . . H(k) (Forward)
51  = 'B': H = H(k) . . . H(2) H(1) (Backward)
52 
53  STOREV (input) CHARACTER
54  Indicates how the vectors which define the elementary
55  reflectors are stored:
56  = 'C': Columnwise
57  = 'R': Rowwise
58 
59  M (input) INTEGER
60  The number of rows of the matrix C.
61 
62  N (input) INTEGER
63  The number of columns of the matrix C.
64 
65  K (input) INTEGER
66  The order of the matrix T (= the number of elementary
67  reflectors whose product defines the block reflector).
68 
69  DV (input) COMPLEX_16 array, dimension (LDV,K)
70  The matrix V. See further details.
71 
72  LDV (input) INTEGER
73  The leading dimension of the array V. LDV >= max(1,M);
74 
75  DT (input) COMPLEX_16 array, dimension (LDT,K)
76  The triangular k by k matrix T in the representation of the
77  block reflector.
78 
79  LDT (input) INTEGER
80  The leading dimension of the array T. LDT >= K.
81 
82  DC (input/output) COMPLEX_16 array, dimension (LDC,N)
83  On entry, the m by n matrix C.
84  On exit, C is overwritten by H*C.
85 
86  LDC (input) INTEGER
87  The leading dimension of the array C. LDA >= max(1,M).
88 
89  WORK (workspace) COMPLEX_16 array, dimension (LDWORK,K)
90 
91  LDWORK (input) INTEGER
92  The leading dimension of the array WORK. LDWORK >= max(1,N);
93  =================================================================== */
94 
95  cuDoubleComplex c_zero = MAGMA_Z_ZERO;
96  cuDoubleComplex c_one = MAGMA_Z_ONE;
97  cuDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
98 
99  /* Function Body */
100  if (m <= 0 || n <= 0) {
101  return MAGMA_SUCCESS;
102  }
103 
104  char transt;
105  if (trans == 'N' || trans == 'n')
106  transt = MagmaConjTrans;
107  else
108  transt = MagmaNoTrans;
109 
110  if ( ( side == 'r' || side == 'R') ) {
111  fprintf(stderr, "The case (side == right) is not implemented\n");
112  magma_xerbla( __func__, 1 );
114  }
115 
116  if ( storev == 'c' || storev == 'C') {
117  /*
118  if (n==1 && m%32==0){
119  // This is used when we have to apply H on only one vector
120  magmablas_zgemvt(m, k, 1., dv_ref(0,0), ldv, dc_ref(0, 0), dwork);
121  printf("m= %d, n = %d, ldwork = %d\n", m, k, ldwork);
122  }
123  else
124  */
125  cublasZgemm( MagmaConjTrans, MagmaNoTrans,
126  n, k, m,
127  c_one, dC, ldc,
128  dV, ldv,
129  c_zero, dwork, ldwork);
130 
131  if (direct == 'F' || direct =='f')
132  cublasZtrmm( MagmaRight, MagmaUpper, transt, MagmaNonUnit,
133  n, k,
134  c_one, dT, ldt,
135  dwork, ldwork);
136  else
137  cublasZtrmm( MagmaRight, MagmaLower, transt, MagmaNonUnit,
138  n, k,
139  c_one, dT, ldt,
140  dwork, ldwork);
141 
142  cublasZgemm( MagmaNoTrans, MagmaConjTrans,
143  m, n, k,
144  c_neg_one, dV, ldv,
145  dwork, ldwork,
146  c_one, dC, ldc);
147  }
148  else {
149  cublasZgemm( MagmaNoTrans, MagmaConjTrans,
150  m, k, n,
151  c_one, dC, ldc,
152  dV, ldv,
153  c_zero, dwork, ldwork);
154 
155  cublasZtrmm( MagmaRight, MagmaUpper, transt, MagmaNonUnit,
156  m, k,
157  c_one, dT, ldt,
158  dwork, ldwork);
159 
160  cublasZgemm( MagmaNoTrans, MagmaNoTrans,
161  m, n, k,
162  c_neg_one, dwork, ldwork,
163  dV, ldv,
164  c_one, dC, ldc);
165  }
166  return MAGMA_SUCCESS;
167 } /* magma_zlarfb */
#define MAGMA_ERR_ILLEGAL_VALUE
Definition: magma.h:107
#define __func__
Definition: common_magma.h:65
#define MagmaUpper
Definition: magma.h:61
#define MAGMA_Z_NEG_ONE
Definition: magma.h:134
#define dwork(dev, i, j)
#define MagmaLower
Definition: magma.h:62
#define dV(m)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MagmaConjTrans
Definition: magma.h:59
#define MAGMA_Z_ZERO
Definition: magma.h:131
#define MagmaNonUnit
Definition: magma.h:65
#define MAGMA_SUCCESS
Definition: magma.h:106
#define dC(dev, i, j)
#define MagmaRight
Definition: magma.h:69
#define MAGMA_Z_ONE
Definition: magma.h:132
#define dT(m)
#define MagmaNoTrans
Definition: magma.h:57

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zlatrd ( char  uplo,
magma_int_t  n,
magma_int_t  nb,
cuDoubleComplex *  a,
magma_int_t  lda,
double *  e,
cuDoubleComplex *  tau,
cuDoubleComplex *  w,
magma_int_t  ldw,
cuDoubleComplex *  da,
magma_int_t  ldda,
cuDoubleComplex *  dw,
magma_int_t  lddw 
)

Here is the caller graph for this function:

magma_int_t magma_zlatrd2 ( char  uplo,
magma_int_t  n,
magma_int_t  nb,
cuDoubleComplex *  a,
magma_int_t  lda,
double *  e,
cuDoubleComplex *  tau,
cuDoubleComplex *  w,
magma_int_t  ldw,
cuDoubleComplex *  da,
magma_int_t  ldda,
cuDoubleComplex *  dw,
magma_int_t  lddw,
cuDoubleComplex *  dwork,
magma_int_t  ldwork 
)

Here is the caller graph for this function:

magma_int_t magma_zlauum ( char  uplo,
magma_int_t  n,
cuDoubleComplex *  A,
magma_int_t  lda,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zlauum_gpu ( char  uplo,
magma_int_t  n,
cuDoubleComplex *  dA,
magma_int_t  ldda,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zposv ( char  uplo,
magma_int_t  n,
magma_int_t  nrhs,
cuDoubleComplex *  A,
magma_int_t  lda,
cuDoubleComplex *  B,
magma_int_t  ldb,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zposv_gpu ( char  uplo,
magma_int_t  n,
magma_int_t  nrhs,
cuDoubleComplex *  dA,
magma_int_t  ldda,
cuDoubleComplex *  dB,
magma_int_t  lddb,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zpotrf ( char  uplo,
magma_int_t  n,
cuDoubleComplex *  A,
magma_int_t  lda,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zpotrf_gpu ( char  uplo,
magma_int_t  n,
cuDoubleComplex *  dA,
magma_int_t  ldda,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zpotrf_mc ( magma_context cntxt,
char *  uplo,
magma_int_t n,
cuDoubleComplex *  A,
magma_int_t lda,
magma_int_t info 
)

Definition at line 134 of file zpotrf_mc.cpp.

References __func__, A, INOUT, INPUT, MAGMA_ERR_ILLEGAL_VALUE, magma_get_zpotrf_nb(), magma_xerbla(), max, min, context::nb, context::num_cores, context::num_gpus, OUTPUT, context::quark, QUARK_Barrier(), QUARK_Insert_Task(), SCHED_zgemm(), SCHED_zpotrf(), SCHED_zsyrk(), SCHED_ztrsm(), TASKCOLOR, TASKLABEL, and VALUE.

138 {
139 /* -- MAGMA (version 1.4.0) --
140  Univ. of Tennessee, Knoxville
141  Univ. of California, Berkeley
142  Univ. of Colorado, Denver
143  August 2013
144 
145  Purpose
146  =======
147  ZPOTRF computes the Cholesky factorization of a Hermitian
148  positive definite matrix A.
149 
150  The factorization has the form
151  A = U**T * U, if UPLO = 'U', or
152  A = L * L**T, if UPLO = 'L',
153  where U is an upper triangular matrix and L is lower triangular.
154 
155  This is the block version of the algorithm, calling Level 3 BLAS.
156 
157  Arguments
158  =========
159  CNTXT (input) MAGMA_CONTEXT
160  CNTXT specifies the MAGMA hardware context for this routine.
161 
162  UPLO (input) CHARACTER*1
163  = 'U': Upper triangle of A is stored;
164  = 'L': Lower triangle of A is stored.
165 
166  N (input) INTEGER
167  The order of the matrix A. N >= 0.
168 
169  A (input/output) COMPLEX_16 array, dimension (LDA,N)
170  On entry, the Hermitian matrix A. If UPLO = 'U', the leading
171  N-by-N upper triangular part of A contains the upper
172  triangular part of the matrix A, and the strictly lower
173  triangular part of A is not referenced. If UPLO = 'L', the
174  leading N-by-N lower triangular part of A contains the lower
175  triangular part of the matrix A, and the strictly upper
176  triangular part of A is not referenced.
177 
178  On exit, if INFO = 0, the factor U or L from the Cholesky
179  factorization A = U**T*U or A = L*L**T.
180 
181  LDA (input) INTEGER
182  The leading dimension of the array A. LDA >= max(1,N).
183 
184  INFO (output) INTEGER
185  = 0: successful exit
186  < 0: if INFO = -i, the i-th argument had an illegal value
187  > 0: if INFO = i, the leading minor of order i is not
188  positive definite, and the factorization could not be
189  completed.
190  ===================================================================== */
191 
192  if (cntxt->num_cores == 1 && cntxt->num_gpus == 1)
193  {
194  //magma_int_t result = magma_zpotrf(*uplo, *n, a, *lda, info);
195  //return result;
196  }
197 
198  // check arguments
199  magma_int_t upper = (magma_int_t) lsame_(uplo, "U");
200  *info = 0;
201  if (! upper && ! lsame_(uplo, "L")) {
202  *info = -1;
203  } else if (*n < 0) {
204  *info = -2;
205  } else if (*lda < max(1,*n)) {
206  *info = -4;
207  }
208  if (*info != 0) {
209  magma_xerbla( __func__, -(*info) );
211  }
212 
213  Quark* quark = cntxt->quark;
214 
215  // get block size
216  magma_int_t nb = (cntxt->nb ==-1)? magma_get_zpotrf_nb(*n): cntxt->nb;
217 
218  magma_int_t i,j,k;
219  magma_int_t ii,jj,kk;
220  magma_int_t temp,temp2,temp3;
221 
222  char label[10000];
223 
224  magma_int_t iinfo[2];
225  iinfo[1] = 0;
226  ii = -1;
227 
228  // traverse diagonal blocks
229  for (i = 0; i < (*n); i += nb) {
230  ii++;
231  temp2 = min(nb,(*n)-i);
232 
233  // if not first block
234  if (i > 0) {
235 
236  // first do large syrk, then split
237  if (i < (*n)/2) {
238 
239  sprintf(label, "SYRK %d", ii);
240 
241  if (upper) {
242 
243  QUARK_Insert_Task(quark, SCHED_zsyrk, 0,
244  sizeof(magma_int_t), &upper, VALUE,
245  sizeof(magma_int_t), &temp2, VALUE,
246  sizeof(magma_int_t), &i, VALUE,
247  sizeof(cuDoubleComplex)*(*n)*(*n), A(0,i), INPUT,
248  sizeof(magma_int_t), lda, VALUE,
249  sizeof(cuDoubleComplex)*(*n)*(*n), A(i,i), INOUT,
250  sizeof(cuDoubleComplex)*(*n)*(*n), A(i-nb,i), INPUT,
251  strlen(label)+1, label, VALUE | TASKLABEL,
252  6, "green", VALUE | TASKCOLOR,
253  0);
254 
255  } else {
256 
257  QUARK_Insert_Task(quark, SCHED_zsyrk, 0,
258  sizeof(magma_int_t), &upper, VALUE,
259  sizeof(magma_int_t), &temp2, VALUE,
260  sizeof(magma_int_t), &i, VALUE,
261  sizeof(cuDoubleComplex)*(*n)*(*n), A(i,0), INPUT,
262  sizeof(magma_int_t), lda, VALUE,
263  sizeof(cuDoubleComplex)*(*n)*(*n), A(i,i), INOUT,
264  sizeof(cuDoubleComplex)*(*n)*(*n), A(i,i-nb), INPUT,
265  strlen(label)+1, label, VALUE | TASKLABEL,
266  6, "green", VALUE | TASKCOLOR,
267  0);
268  }
269 
270  } else {
271 
272  jj = -1;
273 
274  // split syrk into tiles
275  for (j = 0; j < i; j += nb) {
276  jj++;
277 
278  sprintf(label, "SYRK %d %d", ii, jj);
279 
280  if (upper) {
281 
282  QUARK_Insert_Task(quark, SCHED_zsyrk, 0,
283  sizeof(magma_int_t), &upper, VALUE,
284  sizeof(magma_int_t), &temp2, VALUE,
285  sizeof(magma_int_t), &nb, VALUE,
286  sizeof(cuDoubleComplex)*(*n)*(*n), A(j,i), INPUT,
287  sizeof(magma_int_t), lda, VALUE,
288  sizeof(cuDoubleComplex)*(*n)*(*n), A(i,i), INOUT,
289  strlen(label)+1, label, VALUE | TASKLABEL,
290  6, "green", VALUE | TASKCOLOR,
291  0);
292 
293  } else {
294 
295  QUARK_Insert_Task(quark, SCHED_zsyrk, 0,
296  sizeof(magma_int_t), &upper, VALUE,
297  sizeof(magma_int_t), &temp2, VALUE,
298  sizeof(magma_int_t), &nb, VALUE,
299  sizeof(cuDoubleComplex)*(*n)*(*n), A(i,j), INPUT,
300  sizeof(magma_int_t), lda, VALUE,
301  sizeof(cuDoubleComplex)*(*n)*(*n), A(i,i), INOUT,
302  strlen(label)+1, label, VALUE | TASKLABEL,
303  6, "green", VALUE | TASKCOLOR,
304  0);
305  }
306  }
307  }
308 
309  // if not last block
310  if (i < ((*n)-nb)) {
311 
312  jj = -1;
313 
314  // split gemm into tiles
315  for (j = i+nb; j < (*n); j += nb){
316  jj++;
317  kk = -1;
318 
319  for (k = 0; k < i; k += nb) {
320  kk++;
321  temp = min(nb,(*n)-j);
322 
323  sprintf(label, "GEMM %d %d %d", ii, jj, kk);
324 
325  if (upper) {
326  QUARK_Insert_Task(quark, SCHED_zgemm, 0,
327  sizeof(magma_int_t), &upper, VALUE,
328  sizeof(magma_int_t), &nb, VALUE,
329  sizeof(magma_int_t), &temp, VALUE,
330  sizeof(magma_int_t), &nb, VALUE,
331  sizeof(cuDoubleComplex)*(*n)*(*n), A(k,i), INPUT,
332  sizeof(magma_int_t), lda, VALUE,
333  sizeof(cuDoubleComplex)*(*n)*(*n), A(k,j), INPUT,
334  sizeof(cuDoubleComplex)*(*n)*(*n), A(i,j), INOUT,
335  strlen(label)+1, label, VALUE | TASKLABEL,
336  5, "blue", VALUE | TASKCOLOR,
337  0);
338 
339  } else {
340 
341  QUARK_Insert_Task(quark, SCHED_zgemm, 0,
342  sizeof(magma_int_t), &upper, VALUE,
343  sizeof(magma_int_t), &temp, VALUE,
344  sizeof(magma_int_t), &nb, VALUE,
345  sizeof(magma_int_t), &nb, VALUE,
346  sizeof(cuDoubleComplex)*(*n)*(*n), A(j,k), INPUT,
347  sizeof(magma_int_t), lda, VALUE,
348  sizeof(cuDoubleComplex)*(*n)*(*n), A(i,k), INPUT,
349  sizeof(cuDoubleComplex)*(*n)*(*n), A(j,i), INOUT,
350  strlen(label)+1, label, VALUE | TASKLABEL,
351  5, "blue", VALUE | TASKCOLOR,
352  0);
353  }
354  }
355  }
356  }
357  }
358 
359  iinfo[0] = i;
360 
361  sprintf(label, "POTRF %d", ii);
362 
364  sizeof(magma_int_t), &upper, VALUE,
365  sizeof(magma_int_t), &temp2, VALUE,
366  sizeof(cuDoubleComplex)*(*n)*(*n), A(i,i), INOUT,
367  sizeof(magma_int_t), lda, VALUE,
368  sizeof(magma_int_t), iinfo, OUTPUT,
369  strlen(label)+1, label, VALUE | TASKLABEL,
370  5, "cyan", VALUE | TASKCOLOR,
371  0);
372 
373  // if not last block
374  if (i < ((*n)-nb)) {
375 
376  // split trsm into tiles
377  for (j = i + nb; j < (*n); j += nb) {
378 
379  temp = min(nb,(*n)-j);
380 
381  sprintf(label, "TRSM %d", ii);
382 
383  if (upper) {
384 
385  QUARK_Insert_Task(quark, SCHED_ztrsm, 0,
386  sizeof(magma_int_t), &upper, VALUE,
387  sizeof(magma_int_t), &nb, VALUE,
388  sizeof(magma_int_t), &temp, VALUE,
389  sizeof(cuDoubleComplex)*(*n)*(*n), A(i,i), INPUT,
390  sizeof(magma_int_t), lda, VALUE,
391  sizeof(cuDoubleComplex)*(*n)*(*n), A(i,j), INOUT,
392  strlen(label)+1, label, VALUE | TASKLABEL,
393  4, "red", VALUE | TASKCOLOR,
394  0);
395 
396  } else {
397 
398  QUARK_Insert_Task(quark, SCHED_ztrsm, 0,
399  sizeof(magma_int_t), &upper, VALUE,
400  sizeof(magma_int_t), &temp, VALUE,
401  sizeof(magma_int_t), &nb, VALUE,
402  sizeof(cuDoubleComplex)*(*n)*(*n), A(i,i), INPUT,
403  sizeof(magma_int_t), lda, VALUE,
404  sizeof(cuDoubleComplex)*(*n)*(*n), A(j,i), INOUT,
405  strlen(label)+1, label, VALUE | TASKLABEL,
406  4, "red", VALUE | TASKCOLOR,
407  0);
408  }
409  }
410  }
411  }
412 
413  QUARK_Barrier(quark);
414 }
#define MAGMA_ERR_ILLEGAL_VALUE
Definition: magma.h:107
Definition: quark.c:96
Definition: quark.h:52
magma_int_t nb
Definition: magma.h:40
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
magma_int_t num_gpus
Definition: magma.h:31
Quark * quark
Definition: magma.h:37
int magma_int_t
Definition: magmablas.h:12
void SCHED_zsyrk(Quark *quark)
Definition: zpotrf_mc.cpp:47
static void SCHED_zgemm(Quark *quark)
Definition: zpotrf_mc.cpp:16
#define TASKLABEL
Definition: quark.h:70
magma_int_t magma_get_zpotrf_nb(magma_int_t m)
Definition: get_nb.cpp:79
#define A(m, n)
Definition: zpotrf_mc.cpp:13
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define TASKCOLOR
Definition: quark.h:73
void SCHED_ztrsm(Quark *quark)
Definition: zpotrf_mc.cpp:106
#define INPUT
Definition: quark.h:53
unsigned long long QUARK_Insert_Task(Quark *quark, void(*function)(Quark *), Quark_Task_Flags *task_flags,...)
Definition: quark.c:1073
Definition: quark.h:52
void QUARK_Barrier(Quark *quark)
Definition: quark.c:771
magma_int_t num_cores
Definition: magma.h:28
Definition: quark.h:52
#define max(a, b)
Definition: common_magma.h:82
void SCHED_zpotrf(Quark *quark)
Definition: zpotrf_mc.cpp:76

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zpotri ( char  uplo,
magma_int_t  n,
cuDoubleComplex *  A,
magma_int_t  lda,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zpotri_gpu ( char  uplo,
magma_int_t  n,
cuDoubleComplex *  dA,
magma_int_t  ldda,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zpotrs_gpu ( char  uplo,
magma_int_t  n,
magma_int_t  nrhs,
cuDoubleComplex *  dA,
magma_int_t  ldda,
cuDoubleComplex *  dB,
magma_int_t  lddb,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zssssm_gpu ( char  storev,
magma_int_t  m1,
magma_int_t  n1,
magma_int_t  m2,
magma_int_t  n2,
magma_int_t  k,
magma_int_t  ib,
cuDoubleComplex *  dA1,
magma_int_t  ldda1,
cuDoubleComplex *  dA2,
magma_int_t  ldda2,
cuDoubleComplex *  dL1,
magma_int_t  lddl1,
cuDoubleComplex *  dL2,
magma_int_t  lddl2,
magma_int_t IPIV,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_ztrtri ( char  uplo,
char  diag,
magma_int_t  n,
cuDoubleComplex *  A,
magma_int_t  lda,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_ztrtri_gpu ( char  uplo,
char  diag,
magma_int_t  n,
cuDoubleComplex *  dA,
magma_int_t  ldda,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_ztstrf_gpu ( char  storev,
magma_int_t  m,
magma_int_t  n,
magma_int_t  ib,
magma_int_t  nb,
cuDoubleComplex *  hU,
magma_int_t  ldhu,
cuDoubleComplex *  dU,
magma_int_t  lddu,
cuDoubleComplex *  hA,
magma_int_t  ldha,
cuDoubleComplex *  dA,
magma_int_t  ldda,
cuDoubleComplex *  hL,
magma_int_t  ldhl,
cuDoubleComplex *  dL,
magma_int_t  lddl,
magma_int_t ipiv,
cuDoubleComplex *  hwork,
magma_int_t  ldhwork,
cuDoubleComplex *  dwork,
magma_int_t  lddwork,
magma_int_t info 
)
magma_int_t magma_zunghr ( magma_int_t  n,
magma_int_t  ilo,
magma_int_t  ihi,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  dT,
magma_int_t  nb,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zungqr ( magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  dwork,
magma_int_t  nb,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zungqr_gpu ( magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
cuDoubleComplex *  da,
magma_int_t  ldda,
cuDoubleComplex *  tau,
cuDoubleComplex *  dwork,
magma_int_t  nb,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zunmql ( const char  side,
const char  trans,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  c,
magma_int_t  ldc,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zunmql2_gpu ( const char  side,
const char  trans,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
cuDoubleComplex *  da,
magma_int_t  ldda,
cuDoubleComplex *  tau,
cuDoubleComplex *  dc,
magma_int_t  lddc,
cuDoubleComplex *  wa,
magma_int_t  ldwa,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zunmqr ( char  side,
char  trans,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  c,
magma_int_t  ldc,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zunmqr2_gpu ( const char  side,
const char  trans,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
cuDoubleComplex *  da,
magma_int_t  ldda,
cuDoubleComplex *  tau,
cuDoubleComplex *  dc,
magma_int_t  lddc,
cuDoubleComplex *  wa,
magma_int_t  ldwa,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zunmqr_gpu ( char  side,
char  trans,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  c,
magma_int_t  ldc,
cuDoubleComplex *  work,
magma_int_t  lwork,
cuDoubleComplex *  td,
magma_int_t  nb,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zunmtr ( char  side,
char  uplo,
char  trans,
magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  a,
magma_int_t  lda,
cuDoubleComplex *  tau,
cuDoubleComplex *  c,
magma_int_t  ldc,
cuDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zunmtr_gpu ( char  side,
char  uplo,
char  trans,
magma_int_t  m,
magma_int_t  n,
cuDoubleComplex *  da,
magma_int_t  ldda,
cuDoubleComplex *  tau,
cuDoubleComplex *  dc,
magma_int_t  lddc,
cuDoubleComplex *  wa,
magma_int_t  ldwa,
magma_int_t info 
)

Here is the caller graph for this function: