MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
magma_z.h File Reference
#include "magma_types.h"
#include "magma_zgehrd_m.h"
Include dependency graph for magma_z.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define PRECISION_z
 

Functions

magma_int_t magma_get_zpotrf_nb (magma_int_t m)
 
magma_int_t magma_get_zgetrf_nb (magma_int_t m)
 
magma_int_t magma_get_zgetri_nb (magma_int_t m)
 
magma_int_t magma_get_zgeqp3_nb (magma_int_t m)
 
magma_int_t magma_get_zgeqrf_nb (magma_int_t m)
 
magma_int_t magma_get_zgeqlf_nb (magma_int_t m)
 
magma_int_t magma_get_zgehrd_nb (magma_int_t m)
 
magma_int_t magma_get_zhetrd_nb (magma_int_t m)
 
magma_int_t magma_get_zgelqf_nb (magma_int_t m)
 
magma_int_t magma_get_zgebrd_nb (magma_int_t m)
 
magma_int_t magma_get_zhegst_nb (magma_int_t m)
 
magma_int_t magma_get_zgesvd_nb (magma_int_t m)
 
magma_int_t magma_get_zhegst_nb_m (magma_int_t m)
 
magma_int_t magma_get_zbulge_nb (magma_int_t m, magma_int_t nbthreads)
 
magma_int_t magma_get_zbulge_nb_mgpu (magma_int_t m)
 
magma_int_t magma_zbulge_get_Vblksiz (magma_int_t m, magma_int_t nb, magma_int_t nbthreads)
 
magma_int_t magma_get_zbulge_gcperf ()
 
magma_int_t magma_get_smlsize_divideconquer ()
 
void magma_dmove_eig (char range, magma_int_t n, double *w, magma_int_t *il, magma_int_t *iu, double vl, double vu, magma_int_t *m)
 
magma_int_t magma_zgebrd (magma_int_t m, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, double *d, double *e, magmaDoubleComplex *tauq, magmaDoubleComplex *taup, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgehrd2 (magma_int_t n, magma_int_t ilo, magma_int_t ihi, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgehrd (magma_int_t n, magma_int_t ilo, magma_int_t ihi, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magmaDoubleComplex *dT, magma_int_t *info)
 
magma_int_t magma_zgelqf (magma_int_t m, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgeqlf (magma_int_t m, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgeqrf (magma_int_t m, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgeqrf4 (magma_int_t num_gpus, magma_int_t m, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgeqrf_ooc (magma_int_t m, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgesv (magma_int_t n, magma_int_t nrhs, magmaDoubleComplex *A, magma_int_t lda, magma_int_t *ipiv, magmaDoubleComplex *B, magma_int_t ldb, magma_int_t *info)
 
magma_int_t magma_zgetrf (magma_int_t m, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_zgetrf2 (magma_int_t m, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_zlaqps (magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *jpvt, magmaDoubleComplex *tau, double *vn1, double *vn2, magmaDoubleComplex *auxv, magmaDoubleComplex *F, magma_int_t ldf, magmaDoubleComplex *dF, magma_int_t lddf)
 
void magma_zlarfg (magma_int_t n, magmaDoubleComplex *alpha, magmaDoubleComplex *x, magma_int_t incx, magmaDoubleComplex *tau)
 
magma_int_t magma_zlatrd (char uplo, magma_int_t n, magma_int_t nb, magmaDoubleComplex *a, magma_int_t lda, double *e, magmaDoubleComplex *tau, magmaDoubleComplex *w, magma_int_t ldw, magmaDoubleComplex *da, magma_int_t ldda, magmaDoubleComplex *dw, magma_int_t lddw)
 
magma_int_t magma_zlatrd2 (char uplo, magma_int_t n, magma_int_t nb, magmaDoubleComplex *a, magma_int_t lda, double *e, magmaDoubleComplex *tau, magmaDoubleComplex *w, magma_int_t ldw, magmaDoubleComplex *da, magma_int_t ldda, magmaDoubleComplex *dw, magma_int_t lddw, magmaDoubleComplex *dwork, magma_int_t ldwork)
 
magma_int_t magma_zlahr2 (magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComplex *da, magmaDoubleComplex *dv, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *t, magma_int_t ldt, magmaDoubleComplex *y, magma_int_t ldy)
 
magma_int_t magma_zlahru (magma_int_t n, magma_int_t ihi, magma_int_t k, magma_int_t nb, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *da, magmaDoubleComplex *y, magmaDoubleComplex *v, magmaDoubleComplex *t, magmaDoubleComplex *dwork)
 
magma_int_t magma_zposv (char uplo, magma_int_t n, magma_int_t nrhs, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *B, magma_int_t ldb, magma_int_t *info)
 
magma_int_t magma_zpotrf (char uplo, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_zpotri (char uplo, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_zlauum (char uplo, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_ztrtri (char uplo, char diag, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_zhetrd (char uplo, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, double *d, double *e, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zungqr (magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *dT, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_zungqr2 (magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *tau, magma_int_t *info)
 
magma_int_t magma_zunmql (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *c, magma_int_t ldc, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zunmqr (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *c, magma_int_t ldc, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zunmtr (char side, char uplo, char trans, magma_int_t m, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *c, magma_int_t ldc, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zunghr (magma_int_t n, magma_int_t ilo, magma_int_t ihi, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *dT, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_zgeev (char jobvl, char jobvr, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *w, magmaDoubleComplex *vl, magma_int_t ldvl, magmaDoubleComplex *vr, magma_int_t ldvr, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *info)
 
magma_int_t magma_zgeqp3 (magma_int_t m, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magma_int_t *jpvt, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *info)
 
magma_int_t magma_zgesvd (char jobu, char jobvt, magma_int_t m, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, double *s, magmaDoubleComplex *u, magma_int_t ldu, magmaDoubleComplex *vt, magma_int_t ldvt, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *info)
 
magma_int_t magma_zheevd (char jobz, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, double *w, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zheevdx (char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zheevdx_2stage (char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zheevx (char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, double vl, double vu, magma_int_t il, magma_int_t iu, double abstol, magma_int_t *m, double *w, magmaDoubleComplex *z, magma_int_t ldz, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
 
magma_int_t magma_zheevr (char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, double vl, double vu, magma_int_t il, magma_int_t iu, double abstol, magma_int_t *m, double *w, magmaDoubleComplex *z, magma_int_t ldz, magma_int_t *isuppz, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zhegvd (magma_int_t itype, char jobz, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *b, magma_int_t ldb, double *w, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zhegvdx (magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *b, magma_int_t ldb, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zhegvdx_2stage (magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *b, magma_int_t ldb, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zhegvx (magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *b, magma_int_t ldb, double vl, double vu, magma_int_t il, magma_int_t iu, double abstol, magma_int_t *m, double *w, magmaDoubleComplex *z, magma_int_t ldz, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
 
magma_int_t magma_zhegvr (magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *b, magma_int_t ldb, double vl, double vu, magma_int_t il, magma_int_t iu, double abstol, magma_int_t *m, double *w, magmaDoubleComplex *z, magma_int_t ldz, magma_int_t *isuppz, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zstedx (char range, magma_int_t n, double vl, double vu, magma_int_t il, magma_int_t iu, double *D, double *E, magmaDoubleComplex *Z, magma_int_t ldz, double *rwork, magma_int_t ldrwork, magma_int_t *iwork, magma_int_t liwork, double *dwork, magma_int_t *info)
 
magma_int_t magma_zhegst (magma_int_t itype, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *b, magma_int_t ldb, magma_int_t *info)
 
magma_int_t magma_zlahr2_m (magma_int_t n, magma_int_t k, magma_int_t nb, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *T, magma_int_t ldt, magmaDoubleComplex *Y, magma_int_t ldy, struct zgehrd_data *data)
 
magma_int_t magma_zlahru_m (magma_int_t n, magma_int_t ihi, magma_int_t k, magma_int_t nb, magmaDoubleComplex *A, magma_int_t lda, struct zgehrd_data *data)
 
magma_int_t magma_zgeev_m (char jobvl, char jobvr, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *W, magmaDoubleComplex *vl, magma_int_t ldvl, magmaDoubleComplex *vr, magma_int_t ldvr, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *info)
 
magma_int_t magma_zgehrd_m (magma_int_t n, magma_int_t ilo, magma_int_t ihi, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magmaDoubleComplex *T, magma_int_t *info)
 
magma_int_t magma_zunghr_m (magma_int_t n, magma_int_t ilo, magma_int_t ihi, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *T, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_zungqr_m (magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *T, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_zpotrf_m (magma_int_t num_gpus, char uplo, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_zstedx_m (magma_int_t nrgpu, char range, magma_int_t n, double vl, double vu, magma_int_t il, magma_int_t iu, double *D, double *E, magmaDoubleComplex *Z, magma_int_t ldz, double *rwork, magma_int_t ldrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_ztrsm_m (magma_int_t nrgpu, char side, char uplo, char transa, char diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *b, magma_int_t ldb)
 
magma_int_t magma_zunmqr_m (magma_int_t nrgpu, char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *c, magma_int_t ldc, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zunmtr_m (magma_int_t nrgpu, char side, char uplo, char trans, magma_int_t m, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *c, magma_int_t ldc, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zhegst_m (magma_int_t nrgpu, magma_int_t itype, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *b, magma_int_t ldb, magma_int_t *info)
 
magma_int_t magma_zheevd_m (magma_int_t nrgpu, char jobz, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, double *w, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zhegvd_m (magma_int_t nrgpu, magma_int_t itype, char jobz, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *b, magma_int_t ldb, double *w, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zheevdx_m (magma_int_t nrgpu, char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zhegvdx_m (magma_int_t nrgpu, magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *b, magma_int_t ldb, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zheevdx_2stage_m (magma_int_t nrgpu, char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zhegvdx_2stage_m (magma_int_t nrgpu, magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *b, magma_int_t ldb, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zgels_gpu (char trans, magma_int_t m, magma_int_t n, magma_int_t nrhs, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *dB, magma_int_t lddb, magmaDoubleComplex *hwork, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgels3_gpu (char trans, magma_int_t m, magma_int_t n, magma_int_t nrhs, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *dB, magma_int_t lddb, magmaDoubleComplex *hwork, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgelqf_gpu (magma_int_t m, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgeqr2x_gpu (magma_int_t *m, magma_int_t *n, magmaDoubleComplex *dA, magma_int_t *ldda, magmaDoubleComplex *dtau, magmaDoubleComplex *dT, magmaDoubleComplex *ddA, double *dwork, magma_int_t *info)
 
magma_int_t magma_zgeqr2x2_gpu (magma_int_t *m, magma_int_t *n, magmaDoubleComplex *dA, magma_int_t *ldda, magmaDoubleComplex *dtau, magmaDoubleComplex *dT, magmaDoubleComplex *ddA, double *dwork, magma_int_t *info)
 
magma_int_t magma_zgeqr2x3_gpu (magma_int_t *m, magma_int_t *n, magmaDoubleComplex *dA, magma_int_t *ldda, magmaDoubleComplex *dtau, magmaDoubleComplex *dT, magmaDoubleComplex *ddA, double *dwork, magma_int_t *info)
 
magma_int_t magma_zgeqr2x4_gpu (magma_int_t *m, magma_int_t *n, magmaDoubleComplex *dA, magma_int_t *ldda, magmaDoubleComplex *dtau, magmaDoubleComplex *dT, magmaDoubleComplex *ddA, double *dwork, magma_int_t *info, magma_queue_t stream)
 
magma_int_t magma_zgeqrf_gpu (magma_int_t m, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *tau, magmaDoubleComplex *dT, magma_int_t *info)
 
magma_int_t magma_zgeqrf2_gpu (magma_int_t m, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *tau, magma_int_t *info)
 
magma_int_t magma_zgeqrf2_mgpu (magma_int_t num_gpus, magma_int_t m, magma_int_t n, magmaDoubleComplex **dlA, magma_int_t ldda, magmaDoubleComplex *tau, magma_int_t *info)
 
magma_int_t magma_zgeqrf3_gpu (magma_int_t m, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *tau, magmaDoubleComplex *dT, magma_int_t *info)
 
magma_int_t magma_zgeqr2_gpu (magma_int_t m, magma_int_t n, magmaDoubleComplex *dA, magma_int_t lda, magmaDoubleComplex *tau, double *work, magma_int_t *info)
 
magma_int_t magma_zgeqrs_gpu (magma_int_t m, magma_int_t n, magma_int_t nrhs, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *tau, magmaDoubleComplex *dT, magmaDoubleComplex *dB, magma_int_t lddb, magmaDoubleComplex *hwork, magma_int_t lhwork, magma_int_t *info)
 
magma_int_t magma_zgeqrs3_gpu (magma_int_t m, magma_int_t n, magma_int_t nrhs, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *tau, magmaDoubleComplex *dT, magmaDoubleComplex *dB, magma_int_t lddb, magmaDoubleComplex *hwork, magma_int_t lhwork, magma_int_t *info)
 
magma_int_t magma_zgessm_gpu (char storev, magma_int_t m, magma_int_t n, magma_int_t k, magma_int_t ib, magma_int_t *ipiv, magmaDoubleComplex *dL1, magma_int_t lddl1, magmaDoubleComplex *dL, magma_int_t lddl, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_zgesv_gpu (magma_int_t n, magma_int_t nrhs, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *ipiv, magmaDoubleComplex *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_zgetf2_gpu (magma_int_t m, magma_int_t n, magmaDoubleComplex *dA, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_zgetrf_incpiv_gpu (char storev, magma_int_t m, magma_int_t n, magma_int_t ib, magmaDoubleComplex *hA, magma_int_t ldha, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *hL, magma_int_t ldhl, magmaDoubleComplex *dL, magma_int_t lddl, magma_int_t *ipiv, magmaDoubleComplex *dwork, magma_int_t lddwork, magma_int_t *info)
 
magma_int_t magma_zgetrf_gpu (magma_int_t m, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_zgetrf_mgpu (magma_int_t num_gpus, magma_int_t m, magma_int_t n, magmaDoubleComplex **d_lA, magma_int_t ldda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_zgetrf_m (magma_int_t num_gpus0, magma_int_t m, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_zgetrf_piv (magma_int_t m, magma_int_t n, magma_int_t NB, magmaDoubleComplex *a, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_zgetrf2_mgpu (magma_int_t num_gpus, magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t offset, magmaDoubleComplex *d_lAT[], magma_int_t lddat, magma_int_t *ipiv, magmaDoubleComplex *d_lAP[], magmaDoubleComplex *a, magma_int_t lda, magma_queue_t streaml[][2], magma_int_t *info)
 
magma_int_t magma_zgetrf_nopiv_gpu (magma_int_t m, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_zgetri_gpu (magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *ipiv, magmaDoubleComplex *dwork, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zgetrs_gpu (char trans, magma_int_t n, magma_int_t nrhs, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *ipiv, magmaDoubleComplex *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_zlabrd_gpu (magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *da, magma_int_t ldda, double *d, double *e, magmaDoubleComplex *tauq, magmaDoubleComplex *taup, magmaDoubleComplex *x, magma_int_t ldx, magmaDoubleComplex *dx, magma_int_t lddx, magmaDoubleComplex *y, magma_int_t ldy, magmaDoubleComplex *dy, magma_int_t lddy)
 
magma_int_t magma_zlaqps_gpu (magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, magmaDoubleComplex *A, magma_int_t lda, magma_int_t *jpvt, magmaDoubleComplex *tau, double *vn1, double *vn2, magmaDoubleComplex *auxv, magmaDoubleComplex *dF, magma_int_t lddf)
 
magma_int_t magma_zlaqps2_gpu (magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, magmaDoubleComplex *A, magma_int_t lda, magma_int_t *jpvt, magmaDoubleComplex *tau, double *vn1, double *vn2, magmaDoubleComplex *auxv, magmaDoubleComplex *dF, magma_int_t lddf)
 
magma_int_t magma_zlaqps3_gpu (magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, magmaDoubleComplex *A, magma_int_t lda, magma_int_t *jpvt, magmaDoubleComplex *tau, double *vn1, double *vn2, magmaDoubleComplex *auxv, magmaDoubleComplex *dF, magma_int_t lddf)
 
magma_int_t magma_zlarf_gpu (magma_int_t m, magma_int_t n, magmaDoubleComplex *v, magmaDoubleComplex *tau, magmaDoubleComplex *c, magma_int_t ldc, double *xnorm)
 
magma_int_t magma_zlarfb_gpu (char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, const magmaDoubleComplex *dv, magma_int_t ldv, const magmaDoubleComplex *dt, magma_int_t ldt, magmaDoubleComplex *dc, magma_int_t ldc, magmaDoubleComplex *dwork, magma_int_t ldwork)
 
magma_int_t magma_zlarfb2_gpu (magma_int_t m, magma_int_t n, magma_int_t k, const magmaDoubleComplex *dV, magma_int_t ldv, const magmaDoubleComplex *dT, magma_int_t ldt, magmaDoubleComplex *dC, magma_int_t ldc, magmaDoubleComplex *dwork, magma_int_t ldwork)
 
magma_int_t magma_zlarfb_gpu_gemm (char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, const magmaDoubleComplex *dv, magma_int_t ldv, const magmaDoubleComplex *dt, magma_int_t ldt, magmaDoubleComplex *dc, magma_int_t ldc, magmaDoubleComplex *dwork, magma_int_t ldwork, magmaDoubleComplex *dworkvt, magma_int_t ldworkvt)
 
magma_int_t magma_zlarfg_gpu (magma_int_t n, magmaDoubleComplex *dx0, magmaDoubleComplex *dx, magmaDoubleComplex *dtau, double *dxnorm, magmaDoubleComplex *dAkk)
 
magma_int_t magma_zposv_gpu (char uplo, magma_int_t n, magma_int_t nrhs, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_zpotf2_gpu (magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex *dA, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_zpotrf_gpu (char uplo, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_zpotrf_mgpu (magma_int_t ngpu, char uplo, magma_int_t n, magmaDoubleComplex **d_lA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_zpotrf3_mgpu (magma_int_t num_gpus, char uplo, magma_int_t m, magma_int_t n, magma_int_t off_i, magma_int_t off_j, magma_int_t nb, magmaDoubleComplex *d_lA[], magma_int_t ldda, magmaDoubleComplex *d_lP[], magma_int_t lddp, magmaDoubleComplex *a, magma_int_t lda, magma_int_t h, magma_queue_t stream[][3], magma_event_t event[][5], magma_int_t *info)
 
magma_int_t magma_zpotri_gpu (char uplo, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_zlauum_gpu (char uplo, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_ztrtri_gpu (char uplo, char diag, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_zhetrd_gpu (char uplo, magma_int_t n, magmaDoubleComplex *da, magma_int_t ldda, double *d, double *e, magmaDoubleComplex *tau, magmaDoubleComplex *wa, magma_int_t ldwa, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zhetrd2_gpu (char uplo, magma_int_t n, magmaDoubleComplex *da, magma_int_t ldda, double *d, double *e, magmaDoubleComplex *tau, magmaDoubleComplex *wa, magma_int_t ldwa, magmaDoubleComplex *work, magma_int_t lwork, magmaDoubleComplex *dwork, magma_int_t ldwork, magma_int_t *info)
 
double magma_zlatrd_mgpu (magma_int_t num_gpus, char uplo, magma_int_t n0, magma_int_t n, magma_int_t nb, magma_int_t nb0, magmaDoubleComplex *a, magma_int_t lda, double *e, magmaDoubleComplex *tau, magmaDoubleComplex *w, magma_int_t ldw, magmaDoubleComplex **da, magma_int_t ldda, magma_int_t offset, magmaDoubleComplex **dw, magma_int_t lddw, magmaDoubleComplex *dwork[MagmaMaxGPUs], magma_int_t ldwork, magma_int_t k, magmaDoubleComplex *dx[MagmaMaxGPUs], magmaDoubleComplex *dy[MagmaMaxGPUs], magmaDoubleComplex *work, magma_queue_t stream[][10], double *times)
 
magma_int_t magma_zhetrd_mgpu (magma_int_t num_gpus, magma_int_t k, char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, double *d, double *e, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_zhetrd_hb2st (magma_int_t threads, char uplo, magma_int_t n, magma_int_t nb, magma_int_t Vblksiz, magmaDoubleComplex *A, magma_int_t lda, double *D, double *E, magmaDoubleComplex *V, magma_int_t ldv, magmaDoubleComplex *TAU, magma_int_t compT, magmaDoubleComplex *T, magma_int_t ldt)
 
magma_int_t magma_zhetrd_he2hb (char uplo, magma_int_t n, magma_int_t NB, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magmaDoubleComplex *dT, magma_int_t threads, magma_int_t *info)
 
magma_int_t magma_zhetrd_he2hb_mgpu (char uplo, magma_int_t n, magma_int_t nb, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magmaDoubleComplex *dAmgpu[], magma_int_t ldda, magmaDoubleComplex *dTmgpu[], magma_int_t lddt, magma_int_t ngpu, magma_int_t distblk, magma_queue_t streams[][20], magma_int_t nstream, magma_int_t threads, magma_int_t *info)
 
magma_int_t magma_zhetrd_he2hb_mgpu_spec (char uplo, magma_int_t n, magma_int_t nb, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magmaDoubleComplex *dAmgpu[], magma_int_t ldda, magmaDoubleComplex *dTmgpu[], magma_int_t lddt, magma_int_t ngpu, magma_int_t distblk, magma_queue_t streams[][20], magma_int_t nstream, magma_int_t threads, magma_int_t *info)
 
magma_int_t magma_zpotrs_gpu (char uplo, magma_int_t n, magma_int_t nrhs, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_zssssm_gpu (char storev, magma_int_t m1, magma_int_t n1, magma_int_t m2, magma_int_t n2, magma_int_t k, magma_int_t ib, magmaDoubleComplex *dA1, magma_int_t ldda1, magmaDoubleComplex *dA2, magma_int_t ldda2, magmaDoubleComplex *dL1, magma_int_t lddl1, magmaDoubleComplex *dL2, magma_int_t lddl2, magma_int_t *IPIV, magma_int_t *info)
 
magma_int_t magma_ztstrf_gpu (char storev, magma_int_t m, magma_int_t n, magma_int_t ib, magma_int_t nb, magmaDoubleComplex *hU, magma_int_t ldhu, magmaDoubleComplex *dU, magma_int_t lddu, magmaDoubleComplex *hA, magma_int_t ldha, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *hL, magma_int_t ldhl, magmaDoubleComplex *dL, magma_int_t lddl, magma_int_t *ipiv, magmaDoubleComplex *hwork, magma_int_t ldhwork, magmaDoubleComplex *dwork, magma_int_t lddwork, magma_int_t *info)
 
magma_int_t magma_zungqr_gpu (magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex *da, magma_int_t ldda, magmaDoubleComplex *tau, magmaDoubleComplex *dwork, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_zunmql2_gpu (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex *da, magma_int_t ldda, magmaDoubleComplex *tau, magmaDoubleComplex *dc, magma_int_t lddc, magmaDoubleComplex *wa, magma_int_t ldwa, magma_int_t *info)
 
magma_int_t magma_zunmqr_gpu (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *tau, magmaDoubleComplex *dC, magma_int_t lddc, magmaDoubleComplex *hwork, magma_int_t lwork, magmaDoubleComplex *dT, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_zunmqr2_gpu (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex *da, magma_int_t ldda, magmaDoubleComplex *tau, magmaDoubleComplex *dc, magma_int_t lddc, magmaDoubleComplex *wa, magma_int_t ldwa, magma_int_t *info)
 
magma_int_t magma_zunmtr_gpu (char side, char uplo, char trans, magma_int_t m, magma_int_t n, magmaDoubleComplex *da, magma_int_t ldda, magmaDoubleComplex *tau, magmaDoubleComplex *dc, magma_int_t lddc, magmaDoubleComplex *wa, magma_int_t ldwa, magma_int_t *info)
 
magma_int_t magma_zgeqp3_gpu (magma_int_t m, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magma_int_t *jpvt, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *info)
 
magma_int_t magma_zheevd_gpu (char jobz, char uplo, magma_int_t n, magmaDoubleComplex *da, magma_int_t ldda, double *w, magmaDoubleComplex *wa, magma_int_t ldwa, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zheevdx_gpu (char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *da, magma_int_t ldda, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, magmaDoubleComplex *wa, magma_int_t ldwa, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zheevx_gpu (char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *da, magma_int_t ldda, double vl, double vu, magma_int_t il, magma_int_t iu, double abstol, magma_int_t *m, double *w, magmaDoubleComplex *dz, magma_int_t lddz, magmaDoubleComplex *wa, magma_int_t ldwa, magmaDoubleComplex *wz, magma_int_t ldwz, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
 
magma_int_t magma_zheevr_gpu (char jobz, char range, char uplo, magma_int_t n, magmaDoubleComplex *da, magma_int_t ldda, double vl, double vu, magma_int_t il, magma_int_t iu, double abstol, magma_int_t *m, double *w, magmaDoubleComplex *dz, magma_int_t lddz, magma_int_t *isuppz, magmaDoubleComplex *wa, magma_int_t ldwa, magmaDoubleComplex *wz, magma_int_t ldwz, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t lrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_zhegst_gpu (magma_int_t itype, char uplo, magma_int_t n, magmaDoubleComplex *da, magma_int_t ldda, magmaDoubleComplex *db, magma_int_t lddb, magma_int_t *info)
 
void magma_zprint (magma_int_t m, magma_int_t n, const magmaDoubleComplex *A, magma_int_t lda)
 
void magma_zprint_gpu (magma_int_t m, magma_int_t n, const magmaDoubleComplex *dA, magma_int_t ldda)
 
void zpanel_to_q (magma_uplo_t uplo, magma_int_t ib, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *work)
 
void zq_to_panel (magma_uplo_t uplo, magma_int_t ib, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *work)
 

Macro Definition Documentation

#define PRECISION_z

Definition at line 17 of file magma_z.h.

Function Documentation

void magma_dmove_eig ( char  range,
magma_int_t  n,
double *  w,
magma_int_t il,
magma_int_t iu,
double  vl,
double  vu,
magma_int_t m 
)

Definition at line 16 of file dmove_eig.cpp.

References lapackf77_lsame.

18 {
19  char range_[2] = {range, 0};
20 
21  magma_int_t valeig, indeig, i;
22 
23  valeig = lapackf77_lsame( range_, "V" );
24  indeig = lapackf77_lsame( range_, "I" );
25 
26  if (indeig){
27  *m = *iu - *il + 1;
28  if(*il > 1)
29  for (i = 0; i < *m; ++i)
30  w[i] = w[*il - 1 + i];
31  }
32  else if(valeig){
33  *il=1;
34  *iu=n;
35  for (i = 0; i < n; ++i){
36  if (w[i] > vu){
37  *iu = i;
38  break;
39  }
40  else if (w[i] < vl)
41  ++*il;
42  else if (*il > 1)
43  w[i-*il+1]=w[i];
44  }
45  *m = *iu - *il + 1;
46  }
47  else{
48  *il = 1;
49  *iu = n;
50  *m = n;
51  }
52 
53  return;
54 }
int magma_int_t
Definition: magmablas.h:12
#define vl(i, j)
#define lapackf77_lsame
Definition: magma_lapack.h:23
magma_int_t magma_get_smlsize_divideconquer ( )

Definition at line 768 of file get_nb.cpp.

769  {
770  return 128;
771  }

Here is the caller graph for this function:

magma_int_t magma_get_zbulge_gcperf ( )

Definition at line 749 of file get_nb.cpp.

References magma_getdevice_arch().

750 {
752  if ( arch >= 300 ) { // 3.x Kepler + SB
753  return 50;
754  }
755  else if ( arch >= 200 ) { // 2.x Fermi
756  return 15000;
757  }
758  else { // 1.x
759  return 10000;
760  }
761 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_zbulge_nb ( magma_int_t  m,
magma_int_t  nbthreads 
)

Definition at line 823 of file get_nb.cpp.

References magma_getdevice_arch().

824 {
826  if ( arch >= 300 ) { // 3.x Kepler + SB
827  if(nbthreads>14)
828  return 128;
829  else
830  return 64;
831  }
832  else if ( arch >= 200 ) { // 2.x Fermi
833  return 64;
834  }
835  else { // 1.x
836  return 64;
837  }
838 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_zbulge_nb_mgpu ( magma_int_t  m)

Definition at line 942 of file get_nb.cpp.

References magma_getdevice_arch().

943 {
945  if ( arch >= 300 ) { // 3.x Kepler + SB
946  return 64;
947  }
948  else if ( arch >= 200 ) { // 2.x Fermi
949  return 64;
950  }
951  else { // 1.x
952  return 64;
953  }
954 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

magma_int_t magma_get_zgebrd_nb ( magma_int_t  m)

Definition at line 488 of file get_nb.cpp.

References magma_getdevice_arch().

489 {
491  if ( arch >= 200 ) { // 2.x Fermi
492  return 32;
493  //if (m < 1024)
494  // return 64;
495  //else
496  // return 64;
497  }
498  else { // 1.x
499  return 32;
500  }
501 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_zgehrd_nb ( magma_int_t  m)

Definition at line 367 of file get_nb.cpp.

368 {
369  if (m < 2048) return 32;
370  else return 64;
371 }

Here is the caller graph for this function:

magma_int_t magma_get_zgelqf_nb ( magma_int_t  m)

Definition at line 257 of file get_nb.cpp.

258 {
259  if (m < 1024) return 64;
260  else return 128;
261 }

Here is the caller graph for this function:

magma_int_t magma_get_zgeqlf_nb ( magma_int_t  m)

Definition at line 223 of file get_nb.cpp.

224 {
225  if (m < 1024) return 64;
226  else return 128;
227 }

Here is the caller graph for this function:

magma_int_t magma_get_zgeqp3_nb ( magma_int_t  m)

Definition at line 112 of file get_nb.cpp.

113 {
114  return 32;
115 }

Here is the caller graph for this function:

magma_int_t magma_get_zgeqrf_nb ( magma_int_t  m)

Definition at line 169 of file get_nb.cpp.

References magma_getdevice_arch().

170 {
172  if ( arch >= 300 ) { // 3.x Kepler
173  if (m < 4096) return 64;
174  else return 128;
175  }
176  else if ( arch >= 200 ) { // 2.x Fermi
177  if (m < 2048) return 32;
178  else if (m < 4096) return 64;
179  else return 128;
180  }
181  else { // 1.x
182  if (m < 1024) return 64;
183  else return 128;
184  }
185 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_zgesvd_nb ( magma_int_t  m)

Definition at line 607 of file get_nb.cpp.

References magma_get_zgebrd_nb().

608 {
609  return magma_get_zgebrd_nb( m );
610 }
magma_int_t magma_get_zgebrd_nb(magma_int_t m)
Definition: get_nb.cpp:488

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_zgetrf_nb ( magma_int_t  m)

Definition at line 322 of file get_nb.cpp.

References magma_getdevice_arch().

323 {
325  if ( arch >= 300 ) { // 3.x Kepler
326  if (m < 4096) return 64;
327  else if (m < 8192) return 256;
328  else return 512;
329  }
330  else if ( arch >= 200 ) { // 2.x Fermi
331  if (m < 4096) return 64;
332  else return 128;
333  }
334  else { // 1.x
335  return 128;
336  }
337 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_zgetri_nb ( magma_int_t  m)

Definition at line 584 of file get_nb.cpp.

585 {
586  return 64;
587 }

Here is the caller graph for this function:

magma_int_t magma_get_zhegst_nb ( magma_int_t  m)

Definition at line 552 of file get_nb.cpp.

References magma_getdevice_arch().

553 {
555  if ( arch >= 300 ) { // 3.x Kepler
556  return 384;
557  }
558  else if ( arch >= 200 ) { // 2.x Fermi
559  return 256;
560  }
561  else { // 1.x
562  return 64;
563  }
564 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_zhegst_nb_m ( magma_int_t  m)

Definition at line 675 of file get_nb.cpp.

References magma_getdevice_arch().

676 {
677  return 256; //to be updated
678 
680  if ( arch >= 300 ) { // 3.x Kepler
681  return 384;
682  }
683  else if ( arch >= 200 ) { // 2.x Fermi
684  return 256;
685  }
686  else { // 1.x
687  return 64;
688  }
689 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_zhetrd_nb ( magma_int_t  m)

Definition at line 423 of file get_nb.cpp.

References magma_getdevice_arch().

424 {
426  if ( arch >= 200 ) { // 2.x Fermi
427  return 32;
428  //return 64;
429  //if (m < 1024)
430  // return 64;
431  //else
432  // return 64;
433  }
434  else { // 1.x
435  return 32;
436  //return 64;
437  }
438 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_zpotrf_nb ( magma_int_t  m)

Definition at line 79 of file get_nb.cpp.

References magma_getdevice_arch().

80 {
82  if ( arch >= 300 ) { // 3.x Kepler
83  return 256;
84  }
85  else if ( arch >= 200 ) { // 2.x Fermi
86  if (m < 1500) return 192;
87  else return 256;
88  }
89  else { // 1.x
90  return 64;
91  }
92 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zbulge_get_Vblksiz ( magma_int_t  m,
magma_int_t  nb,
magma_int_t  nbthreads 
)

Definition at line 880 of file get_nb.cpp.

References magma_getdevice_arch(), and min.

881 {
883  if ( arch >= 300 ) { // 3.x Kepler + SB
884  if(nbthreads>14)
885  return min(nb,64);
886  else
887  return min(nb,32);
888  }
889  else { // 2.x Fermi or 1.x
890  return min(nb,48);
891  }
892 }
#define min(a, b)
Definition: common_magma.h:86
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zgebrd ( magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex *  A,
magma_int_t  lda,
double *  d,
double *  e,
magmaDoubleComplex *  tauq,
magmaDoubleComplex *  taup,
magmaDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 17 of file zgebrd.cpp.

References __func__, A, dA, dwork, lapackf77_zgebrd, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_zgebrd_nb(), MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_MAKE, MAGMA_Z_NEG_ONE, MAGMA_Z_ONE, magma_zgemm(), magma_zgetmatrix, magma_zlabrd_gpu(), magma_zmalloc(), magma_zsetmatrix, MagmaConjTrans, MagmaNoTrans, max, and min.

22 {
23 /* -- MAGMA (version 1.4.0) --
24  Univ. of Tennessee, Knoxville
25  Univ. of California, Berkeley
26  Univ. of Colorado, Denver
27  August 2013
28 
29  Purpose
30  =======
31  ZGEBRD reduces a general complex M-by-N matrix A to upper or lower
32  bidiagonal form B by an orthogonal transformation: Q**H * A * P = B.
33 
34  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
35 
36  Arguments
37  =========
38  M (input) INTEGER
39  The number of rows in the matrix A. M >= 0.
40 
41  N (input) INTEGER
42  The number of columns in the matrix A. N >= 0.
43 
44  A (input/output) COMPLEX_16 array, dimension (LDA,N)
45  On entry, the M-by-N general matrix to be reduced.
46  On exit,
47  if m >= n, the diagonal and the first superdiagonal are
48  overwritten with the upper bidiagonal matrix B; the
49  elements below the diagonal, with the array TAUQ, represent
50  the orthogonal matrix Q as a product of elementary
51  reflectors, and the elements above the first superdiagonal,
52  with the array TAUP, represent the orthogonal matrix P as
53  a product of elementary reflectors;
54  if m < n, the diagonal and the first subdiagonal are
55  overwritten with the lower bidiagonal matrix B; the
56  elements below the first subdiagonal, with the array TAUQ,
57  represent the orthogonal matrix Q as a product of
58  elementary reflectors, and the elements above the diagonal,
59  with the array TAUP, represent the orthogonal matrix P as
60  a product of elementary reflectors.
61  See Further Details.
62 
63  LDA (input) INTEGER
64  The leading dimension of the array A. LDA >= max(1,M).
65 
66  D (output) double precision array, dimension (min(M,N))
67  The diagonal elements of the bidiagonal matrix B:
68  D(i) = A(i,i).
69 
70  E (output) double precision array, dimension (min(M,N)-1)
71  The off-diagonal elements of the bidiagonal matrix B:
72  if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
73  if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
74 
75  TAUQ (output) COMPLEX_16 array dimension (min(M,N))
76  The scalar factors of the elementary reflectors which
77  represent the orthogonal matrix Q. See Further Details.
78 
79  TAUP (output) COMPLEX_16 array, dimension (min(M,N))
80  The scalar factors of the elementary reflectors which
81  represent the orthogonal matrix P. See Further Details.
82 
83  WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
84  On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
85 
86  LWORK (input) INTEGER
87  The length of the array WORK. LWORK >= (M+N)*NB, where NB
88  is the optimal blocksize.
89 
90  If LWORK = -1, then a workspace query is assumed; the routine
91  only calculates the optimal size of the WORK array, returns
92  this value as the first entry of the WORK array, and no error
93  message related to LWORK is issued by XERBLA.
94 
95  INFO (output) INTEGER
96  = 0: successful exit
97  < 0: if INFO = -i, the i-th argument had an illegal value.
98 
99  Further Details
100  ===============
101  The matrices Q and P are represented as products of elementary
102  reflectors:
103 
104  If m >= n,
105  Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
106  Each H(i) and G(i) has the form:
107  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
108  where tauq and taup are complex scalars, and v and u are complex vectors;
109  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
110  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
111  tauq is stored in TAUQ(i) and taup in TAUP(i).
112 
113  If m < n,
114  Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
115  Each H(i) and G(i) has the form:
116  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
117  where tauq and taup are complex scalars, and v and u are complex vectors;
118  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
119  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
120  tauq is stored in TAUQ(i) and taup in TAUP(i).
121 
122  The contents of A on exit are illustrated by the following examples:
123 
124  m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
125 
126  ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
127  ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
128  ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
129  ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
130  ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
131  ( v1 v2 v3 v4 v5 )
132 
133  where d and e denote diagonal and off-diagonal elements of B, vi
134  denotes an element of the vector defining H(i), and ui an element of
135  the vector defining G(i).
136  ===================================================================== */
137 
138  magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
139  magmaDoubleComplex c_one = MAGMA_Z_ONE;
140  magmaDoubleComplex *da, *dwork;
141 
142  magma_int_t ncol, nrow, jmax, nb, ldda;
143 
144  magma_int_t i, j, nx;
145  magma_int_t iinfo;
146 
147  magma_int_t minmn;
148  magma_int_t ldwrkx, ldwrky, lwkopt;
149  magma_int_t lquery;
150 
151  nb = magma_get_zgebrd_nb(n);
152  ldda = m;
153 
154  lwkopt = (m + n) * nb;
155  work[0] = MAGMA_Z_MAKE( lwkopt, 0. );
156  lquery = (lwork == -1);
157 
158  /* Check arguments */
159  *info = 0;
160  if (m < 0) {
161  *info = -1;
162  } else if (n < 0) {
163  *info = -2;
164  } else if (lda < max(1,m)) {
165  *info = -4;
166  } else if (lwork < lwkopt && (! lquery) ) {
167  *info = -10;
168  }
169  if (*info < 0) {
170  magma_xerbla( __func__, -(*info) );
171  return *info;
172  }
173  else if (lquery)
174  return *info;
175 
176  /* Quick return if possible */
177  minmn = min(m,n);
178  if (minmn == 0) {
179  work[0] = c_one;
180  return *info;
181  }
182 
183  if (MAGMA_SUCCESS != magma_zmalloc( &da, n*ldda + (m + n)*nb )) {
184  fprintf (stderr, "!!!! device memory allocation error in zgebrd\n" );
185  *info = MAGMA_ERR_DEVICE_ALLOC;
186  return *info;
187  }
188  dwork = da + (n)*ldda;
189 
190  ldwrkx = m;
191  ldwrky = n;
192 
193  /* Set the block/unblock crossover point NX. */
194  nx = 128;
195 
196  /* Copy the matrix to the GPU */
197  if (minmn - nx >= 1) {
198  magma_zsetmatrix( m, n, a, lda, da, ldda );
199  }
200 
201  for (i=0; i< (minmn - nx); i += nb) {
202 
203  /* Reduce rows and columns i:i+nb-1 to bidiagonal form and return
204  the matrices X and Y which are needed to update the unreduced
205  part of the matrix */
206  nrow = m - i;
207  ncol = n - i;
208 
209  /* Get the current panel (no need for the 1st iteration) */
210  if ( i > 0 ) {
211  magma_zgetmatrix( nrow, nb, dA(i, i), ldda, A( i, i), lda );
212  magma_zgetmatrix( nb, ncol - nb,
213  dA(i, i+nb), ldda,
214  A( i, i+nb), lda );
215  }
216 
217  magma_zlabrd_gpu(nrow, ncol, nb,
218  A(i, i), lda, dA(i, i), ldda,
219  d+i, e+i, tauq+i, taup+i,
220  work, ldwrkx, dwork, ldwrkx, // x, dx
221  work+(ldwrkx*nb), ldwrky, dwork+(ldwrkx*nb), ldwrky); // y, dy
222 
223  /* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
224  of the form A := A - V*Y' - X*U' */
225  nrow = m - i - nb;
226  ncol = n - i - nb;
227 
228  // Send Y back to the GPU
229  magma_zsetmatrix( nrow, nb, work + nb, ldwrkx, dwork + nb, ldwrkx );
230  magma_zsetmatrix( ncol, nb,
231  work + (ldwrkx+1)*nb, ldwrky,
232  dwork + (ldwrkx+1)*nb, ldwrky );
233 
235  nrow, ncol, nb,
236  c_neg_one, dA(i+nb, i ), ldda,
237  dwork+(ldwrkx+1)*nb, ldwrky,
238  c_one, dA(i+nb, i+nb), ldda);
239 
241  nrow, ncol, nb,
242  c_neg_one, dwork+nb, ldwrkx,
243  dA( i, i+nb ), ldda,
244  c_one, dA( i+nb, i+nb ), ldda);
245 
246  /* Copy diagonal and off-diagonal elements of B back into A */
247  if (m >= n) {
248  jmax = i + nb;
249  for (j = i; j < jmax; ++j) {
250  *A(j, j ) = MAGMA_Z_MAKE( d[j], 0. );
251  *A(j, j+1) = MAGMA_Z_MAKE( e[j], 0. );
252  }
253  } else {
254  jmax = i + nb;
255  for (j = i; j < jmax; ++j) {
256  *A(j, j ) = MAGMA_Z_MAKE( d[j], 0. );
257  *A(j+1, j ) = MAGMA_Z_MAKE( e[j], 0. );
258  }
259  }
260  }
261 
262  /* Use unblocked code to reduce the remainder of the matrix */
263  nrow = m - i;
264  ncol = n - i;
265 
266  if ( 0 < minmn - nx ) {
267  magma_zgetmatrix( nrow, ncol, dA(i, i), ldda, A(i, i), lda );
268  }
269 
270  lapackf77_zgebrd( &nrow, &ncol,
271  A(i, i), &lda, d+i, e+i,
272  tauq+i, taup+i, work, &lwork, &iinfo);
273  work[0] = MAGMA_Z_MAKE( lwkopt, 0. );
274 
275  magma_free( da );
276  return *info;
277 } /* zgebrd */
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define __func__
Definition: common_magma.h:65
#define MAGMA_Z_NEG_ONE
Definition: magma.h:134
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
magma_int_t magma_zlabrd_gpu(magma_int_t m, magma_int_t n, magma_int_t nb, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *da, magma_int_t ldda, double *d, double *e, cuDoubleComplex *tauq, cuDoubleComplex *taup, cuDoubleComplex *x, magma_int_t ldx, cuDoubleComplex *dx, magma_int_t lddx, cuDoubleComplex *y, magma_int_t ldy, cuDoubleComplex *dy, magma_int_t lddy)
int magma_int_t
Definition: magmablas.h:12
void magma_zgemm(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
#define lapackf77_zgebrd
Definition: magma_zlapack.h:55
#define dwork(dev, i, j)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MagmaConjTrans
Definition: magma.h:59
#define MAGMA_SUCCESS
Definition: magma.h:106
#define A(i, j)
Definition: zgebrd.cpp:13
magma_int_t magma_get_zgebrd_nb(magma_int_t m)
Definition: get_nb.cpp:488
#define MAGMA_Z_ONE
Definition: magma.h:132
#define MagmaNoTrans
Definition: magma.h:57
#define magma_zsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_z.h:702
#define max(a, b)
Definition: common_magma.h:82
#define dA(i, j)
Definition: zgebrd.cpp:14

Here is the call graph for this function:

magma_int_t magma_zgeev ( char  jobvl,
char  jobvr,
magma_int_t  n,
magmaDoubleComplex *  a,
magma_int_t  lda,
magmaDoubleComplex *  w,
magmaDoubleComplex *  vl,
magma_int_t  ldvl,
magmaDoubleComplex *  vr,
magma_int_t  ldvr,
magmaDoubleComplex *  work,
magma_int_t  lwork,
double *  rwork,
magma_int_t info 
)

Definition at line 25 of file zgeev.cpp.

References __func__, cblas_dznrm2(), cblas_idamax(), CBLAS_SADDR, cblas_zdscal(), cblas_zscal(), dT, lapackf77_dlabad, lapackf77_dlamch, lapackf77_lsame, lapackf77_zgebak, lapackf77_zgebal, lapackf77_zgehrd, lapackf77_zhseqr, lapackf77_zlacpy, lapackf77_zlange, lapackf77_zlascl, lapackf77_ztrevc, lapackf77_zunghr, magma_dsqrt, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_zgehrd_nb(), MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_CNJG, MAGMA_Z_DSCALE, MAGMA_Z_IMAG, MAGMA_Z_MAKE, MAGMA_Z_REAL, MAGMA_Z_SET2REAL, magma_zgehrd(), magma_zgehrd2(), magma_zmalloc(), magma_zunghr(), MagmaLowerStr, max, vl, and vr.

33 {
34 /* -- MAGMA (version 1.4.0) --
35  Univ. of Tennessee, Knoxville
36  Univ. of California, Berkeley
37  Univ. of Colorado, Denver
38  August 2013
39 
40  Purpose
41  =======
42  ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the
43  eigenvalues and, optionally, the left and/or right eigenvectors.
44 
45  The right eigenvector v(j) of A satisfies
46  A * v(j) = lambda(j) * v(j)
47  where lambda(j) is its eigenvalue.
48  The left eigenvector u(j) of A satisfies
49  u(j)**H * A = lambda(j) * u(j)**H
50  where u(j)**H denotes the conjugate transpose of u(j).
51 
52  The computed eigenvectors are normalized to have Euclidean norm
53  equal to 1 and largest component real.
54 
55  Arguments
56  =========
57  JOBVL (input) CHARACTER*1
58  = 'N': left eigenvectors of A are not computed;
59  = 'V': left eigenvectors of are computed.
60 
61  JOBVR (input) CHARACTER*1
62  = 'N': right eigenvectors of A are not computed;
63  = 'V': right eigenvectors of A are computed.
64 
65  N (input) INTEGER
66  The order of the matrix A. N >= 0.
67 
68  A (input/output) COMPLEX_16 array, dimension (LDA,N)
69  On entry, the N-by-N matrix A.
70  On exit, A has been overwritten.
71 
72  LDA (input) INTEGER
73  The leading dimension of the array A. LDA >= max(1,N).
74 
75  W (output) COMPLEX_16 array, dimension (N)
76  W contains the computed eigenvalues.
77 
78  VL (output) COMPLEX_16 array, dimension (LDVL,N)
79  If JOBVL = 'V', the left eigenvectors u(j) are stored one
80  after another in the columns of VL, in the same order
81  as their eigenvalues.
82  If JOBVL = 'N', VL is not referenced.
83  u(j) = VL(:,j), the j-th column of VL.
84 
85  LDVL (input) INTEGER
86  The leading dimension of the array VL. LDVL >= 1; if
87  JOBVL = 'V', LDVL >= N.
88 
89  VR (output) COMPLEX_16 array, dimension (LDVR,N)
90  If JOBVR = 'V', the right eigenvectors v(j) are stored one
91  after another in the columns of VR, in the same order
92  as their eigenvalues.
93  If JOBVR = 'N', VR is not referenced.
94  v(j) = VR(:,j), the j-th column of VR.
95 
96  LDVR (input) INTEGER
97  The leading dimension of the array VR. LDVR >= 1; if
98  JOBVR = 'V', LDVR >= N.
99 
100  WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
101  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
102 
103  LWORK (input) INTEGER
104  The dimension of the array WORK. LWORK >= (1+nb)*N.
105 
106  If LWORK = -1, then a workspace query is assumed; the routine
107  only calculates the optimal size of the WORK array, returns
108  this value as the first entry of the WORK array, and no error
109  message related to LWORK is issued by XERBLA.
110 
111  RWORK (workspace) DOUBLE PRECISION array, dimension (2*N)
112 
113  INFO (output) INTEGER
114  = 0: successful exit
115  < 0: if INFO = -i, the i-th argument had an illegal value.
116  > 0: if INFO = i, the QR algorithm failed to compute all the
117  eigenvalues, and no eigenvectors have been computed;
118  elements and i+1:N of W contain eigenvalues which have
119  converged.
120  ===================================================================== */
121 
122  #define vl(i,j) (vl + (i) + (j)*ldvl)
123  #define vr(i,j) (vr + (i) + (j)*ldvr)
124 
125  magma_int_t c_one = 1;
126  magma_int_t c_zero = 0;
127 
128  double d__1, d__2;
129  magmaDoubleComplex z__1, z__2;
130  magmaDoubleComplex tmp;
131  double scl;
132  double dum[1], eps;
133  double anrm, cscale, bignum, smlnum;
134  magma_int_t i, k, ilo, ihi;
135  magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, i__1, i__2, nb;
136  magma_int_t scalea, minwrk, irwork, lquery, wantvl, wantvr, select[1];
137 
138  char side[2] = {0, 0};
139  char jobvl_[2] = {jobvl, 0};
140  char jobvr_[2] = {jobvr, 0};
141 
142  irwork = 0;
143  *info = 0;
144  lquery = lwork == -1;
145  wantvl = lapackf77_lsame( jobvl_, "V" );
146  wantvr = lapackf77_lsame( jobvr_, "V" );
147  if (! wantvl && ! lapackf77_lsame( jobvl_, "N" )) {
148  *info = -1;
149  } else if (! wantvr && ! lapackf77_lsame( jobvr_, "N" )) {
150  *info = -2;
151  } else if (n < 0) {
152  *info = -3;
153  } else if (lda < max(1,n)) {
154  *info = -5;
155  } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) {
156  *info = -8;
157  } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) {
158  *info = -10;
159  }
160 
161  /* Compute workspace */
162  nb = magma_get_zgehrd_nb( n );
163  if (*info == 0) {
164  minwrk = (1+nb)*n;
165  work[0] = MAGMA_Z_MAKE( (double) minwrk, 0. );
166 
167  if (lwork < minwrk && ! lquery) {
168  *info = -12;
169  }
170  }
171 
172  if (*info != 0) {
173  magma_xerbla( __func__, -(*info) );
174  return *info;
175  }
176  else if (lquery) {
177  return *info;
178  }
179 
180  /* Quick return if possible */
181  if (n == 0) {
182  return *info;
183  }
184 
185  #if defined(VERSION3)
186  magmaDoubleComplex *dT;
187  if (MAGMA_SUCCESS != magma_zmalloc( &dT, nb*n )) {
188  *info = MAGMA_ERR_DEVICE_ALLOC;
189  return *info;
190  }
191  #endif
192 
193  /* Get machine constants */
194  eps = lapackf77_dlamch( "P" );
195  smlnum = lapackf77_dlamch( "S" );
196  bignum = 1. / smlnum;
197  lapackf77_dlabad( &smlnum, &bignum );
198  smlnum = magma_dsqrt( smlnum ) / eps;
199  bignum = 1. / smlnum;
200 
201  /* Scale A if max element outside range [SMLNUM,BIGNUM] */
202  anrm = lapackf77_zlange( "M", &n, &n, A, &lda, dum );
203  scalea = 0;
204  if (anrm > 0. && anrm < smlnum) {
205  scalea = 1;
206  cscale = smlnum;
207  } else if (anrm > bignum) {
208  scalea = 1;
209  cscale = bignum;
210  }
211  if (scalea) {
212  lapackf77_zlascl( "G", &c_zero, &c_zero, &anrm, &cscale, &n, &n, A, &lda, &ierr );
213  }
214 
215  /* Balance the matrix
216  * (CWorkspace: none)
217  * (RWorkspace: need N) */
218  ibal = 0;
219  lapackf77_zgebal( "B", &n, A, &lda, &ilo, &ihi, &rwork[ibal], &ierr );
220 
221  /* Reduce to upper Hessenberg form
222  * (CWorkspace: need 2*N, prefer N + N*NB)
223  * (RWorkspace: none) */
224  itau = 0;
225  iwrk = itau + n;
226  liwrk = lwork - iwrk;
227 
228  #if defined(VERSION1)
229  // Version 1 - LAPACK
230  lapackf77_zgehrd( &n, &ilo, &ihi, A, &lda,
231  &work[itau], &work[iwrk], &liwrk, &ierr );
232  #elif defined(VERSION2)
233  // Version 2 - LAPACK consistent HRD
234  magma_zgehrd2( n, ilo, ihi, A, lda,
235  &work[itau], &work[iwrk], liwrk, &ierr );
236  #elif defined(VERSION3)
237  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored,
238  magma_zgehrd( n, ilo, ihi, A, lda,
239  &work[itau], &work[iwrk], liwrk, dT, &ierr );
240  #endif
241 
242  if (wantvl) {
243  /* Want left eigenvectors
244  * Copy Householder vectors to VL */
245  side[0] = 'L';
246  lapackf77_zlacpy( MagmaLowerStr, &n, &n, A, &lda, vl, &ldvl );
247 
248  /* Generate unitary matrix in VL
249  * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB)
250  * (RWorkspace: none) */
251  #if defined(VERSION1) || defined(VERSION2)
252  // Version 1 & 2 - LAPACK
253  lapackf77_zunghr( &n, &ilo, &ihi, vl, &ldvl, &work[itau],
254  &work[iwrk], &liwrk, &ierr );
255  #elif defined(VERSION3)
256  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
257  magma_zunghr( n, ilo, ihi, vl, ldvl, &work[itau], dT, nb, &ierr );
258  #endif
259 
260  /* Perform QR iteration, accumulating Schur vectors in VL
261  * (CWorkspace: need 1, prefer HSWORK (see comments) )
262  * (RWorkspace: none) */
263  iwrk = itau;
264  liwrk = lwork - iwrk;
265  lapackf77_zhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, W,
266  vl, &ldvl, &work[iwrk], &liwrk, info );
267 
268  if (wantvr) {
269  /* Want left and right eigenvectors
270  * Copy Schur vectors to VR */
271  side[0] = 'B';
272  lapackf77_zlacpy( "F", &n, &n, vl, &ldvl, vr, &ldvr );
273  }
274  }
275  else if (wantvr) {
276  /* Want right eigenvectors
277  * Copy Householder vectors to VR */
278  side[0] = 'R';
279  lapackf77_zlacpy( "L", &n, &n, A, &lda, vr, &ldvr );
280 
281  /* Generate unitary matrix in VR
282  * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB)
283  * (RWorkspace: none) */
284  #if defined(VERSION1) || defined(VERSION2)
285  // Version 1 & 2 - LAPACK
286  lapackf77_zunghr( &n, &ilo, &ihi, vr, &ldvr, &work[itau],
287  &work[iwrk], &liwrk, &ierr );
288  #elif defined(VERSION3)
289  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
290  magma_zunghr( n, ilo, ihi, vr, ldvr, &work[itau], dT, nb, &ierr );
291  #endif
292 
293  /* Perform QR iteration, accumulating Schur vectors in VR
294  * (CWorkspace: need 1, prefer HSWORK (see comments) )
295  * (RWorkspace: none) */
296  iwrk = itau;
297  liwrk = lwork - iwrk;
298  lapackf77_zhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, W,
299  vr, &ldvr, &work[iwrk], &liwrk, info );
300  }
301  else {
302  /* Compute eigenvalues only
303  * (CWorkspace: need 1, prefer HSWORK (see comments) )
304  * (RWorkspace: none) */
305  iwrk = itau;
306  liwrk = lwork - iwrk;
307  lapackf77_zhseqr( "E", "N", &n, &ilo, &ihi, A, &lda, W,
308  vr, &ldvr, &work[iwrk], &liwrk, info );
309  }
310 
311  /* If INFO > 0 from ZHSEQR, then quit */
312  if (*info > 0) {
313  goto CLEANUP;
314  }
315 
316  if (wantvl || wantvr) {
317  /* Compute left and/or right eigenvectors
318  * (CWorkspace: need 2*N)
319  * (RWorkspace: need 2*N) */
320  irwork = ibal + n;
321  lapackf77_ztrevc( side, "B", select, &n, A, &lda, vl, &ldvl,
322  vr, &ldvr, &n, &nout, &work[iwrk], &rwork[irwork], &ierr );
323  }
324 
325  if (wantvl) {
326  /* Undo balancing of left eigenvectors
327  * (CWorkspace: none)
328  * (RWorkspace: need N) */
329  lapackf77_zgebak( "B", "L", &n, &ilo, &ihi, &rwork[ibal], &n,
330  vl, &ldvl, &ierr );
331 
332  /* Normalize left eigenvectors and make largest component real */
333  for (i = 0; i < n; ++i) {
334  scl = 1. / cblas_dznrm2( n, vl(0,i), 1 );
335  cblas_zdscal( n, scl, vl(0,i), 1 );
336  for (k = 0; k < n; ++k) {
337  /* Computing 2nd power */
338  d__1 = MAGMA_Z_REAL( *vl(k,i) );
339  d__2 = MAGMA_Z_IMAG( *vl(k,i) );
340  rwork[irwork + k] = d__1*d__1 + d__2*d__2;
341  }
342  k = cblas_idamax( n, &rwork[irwork], 1 );
343  z__2 = MAGMA_Z_CNJG( *vl(k,i) );
344  d__1 = magma_dsqrt( rwork[irwork + k] );
345  MAGMA_Z_DSCALE( z__1, z__2, d__1 );
346  tmp = z__1;
347  cblas_zscal( n, CBLAS_SADDR(tmp), vl(0,i), 1 );
348  d__1 = MAGMA_Z_REAL( *vl(k,i) );
349  MAGMA_Z_SET2REAL( z__1, d__1 );
350  *vl(k,i) = z__1;
351  }
352  }
353 
354  if (wantvr) {
355  /* Undo balancing of right eigenvectors
356  * (CWorkspace: none)
357  * (RWorkspace: need N) */
358  lapackf77_zgebak( "B", "R", &n, &ilo, &ihi, &rwork[ibal], &n,
359  vr, &ldvr, &ierr );
360 
361  /* Normalize right eigenvectors and make largest component real */
362  for (i = 0; i < n; ++i) {
363  scl = 1. / cblas_dznrm2( n, vr(0,i), 1 );
364  cblas_zdscal( n, scl, vr(0,i), 1 );
365  for (k = 0; k < n; ++k) {
366  /* Computing 2nd power */
367  d__1 = MAGMA_Z_REAL( *vr(k,i) );
368  d__2 = MAGMA_Z_IMAG( *vr(k,i) );
369  rwork[irwork + k] = d__1*d__1 + d__2*d__2;
370  }
371  k = cblas_idamax( n, &rwork[irwork], 1 );
372  z__2 = MAGMA_Z_CNJG( *vr(k,i) );
373  d__1 = magma_dsqrt( rwork[irwork + k] );
374  MAGMA_Z_DSCALE( z__1, z__2, d__1 );
375  tmp = z__1;
376  cblas_zscal( n, CBLAS_SADDR(tmp), vr(0,i), 1 );
377  d__1 = MAGMA_Z_REAL( *vr(k,i) );
378  MAGMA_Z_SET2REAL( z__1, d__1 );
379  *vr(k,i) = z__1;
380  }
381  }
382 
383 CLEANUP:
384  /* Undo scaling if necessary */
385  if (scalea) {
386  i__1 = n - (*info);
387  i__2 = max( n - (*info), 1 );
388  lapackf77_zlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
389  W + (*info), &i__2, &ierr );
390  if (*info > 0) {
391  i__1 = ilo - 1;
392  lapackf77_zlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
393  W, &n, &ierr );
394  }
395  }
396 
397  #if defined(VERSION3)
398  magma_free( dT );
399  #endif
400 
401  return *info;
402 } /* magma_zgeev */
#define magma_dsqrt
Definition: common_magma.h:98
#define vr(i, j)
#define lapackf77_ztrevc
Definition: magma_zlapack.h:92
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
#define lapackf77_dlabad
Definition: magma_lapack.h:29
#define MAGMA_Z_DSCALE(v, t, s)
Definition: magma.h:121
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
double cblas_dznrm2(const int N, const void *X, const int incX)
int magma_int_t
Definition: magmablas.h:12
#define W(k, n)
Definition: zgeqrf_mc.cpp:15
#define MAGMA_Z_IMAG(a)
Definition: magma.h:125
#define lapackf77_zlacpy
Definition: magma_zlapack.h:73
#define lapackf77_dlamch
Definition: magma_lapack.h:27
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX)
#define lapackf77_zunghr
Definition: magma_zlapack.h:99
#define vl(i, j)
#define lapackf77_zgebal
Definition: magma_zlapack.h:53
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
magma_int_t magma_zunghr(magma_int_t n, magma_int_t ilo, magma_int_t ihi, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *dT, magma_int_t nb, magma_int_t *info)
#define MAGMA_Z_SET2REAL(v, t)
Definition: magma.h:115
magma_int_t magma_zgehrd2(magma_int_t n, magma_int_t ilo, magma_int_t ihi, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t *lwork, magma_int_t *info)
#define MagmaLowerStr
Definition: magma.h:85
magma_int_t magma_get_zgehrd_nb(magma_int_t m)
Definition: get_nb.cpp:367
void cblas_zscal(const int N, const void *alpha, void *X, const int incX)
#define A(i, j)
Definition: cprint.cpp:16
#define MAGMA_SUCCESS
Definition: magma.h:106
#define lapackf77_zgehrd
Definition: magma_zlapack.h:58
magma_int_t magma_zgehrd(magma_int_t n, magma_int_t ilo, magma_int_t ihi, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t lwork, cuDoubleComplex *d_T, magma_int_t *info)
#define lapackf77_zlange
Definition: magma_zlapack.h:75
#define lapackf77_zhseqr
Definition: magma_zlapack.h:72
#define lapackf77_zgebak
Definition: magma_zlapack.h:52
#define dT(m)
void cblas_zdscal(const int N, const double alpha, void *X, const int incX)
#define lapackf77_zlascl
Definition: magma_zlapack.h:83
#define CBLAS_SADDR(a)
Definition: magma.h:204
#define max(a, b)
Definition: common_magma.h:82
#define MAGMA_Z_CNJG(v, t)
Definition: magma.h:120
#define MAGMA_Z_REAL(a)
Definition: magma.h:124

Here is the call graph for this function:

magma_int_t magma_zgeev_m ( char  jobvl,
char  jobvr,
magma_int_t  n,
magmaDoubleComplex *  A,
magma_int_t  lda,
magmaDoubleComplex *  W,
magmaDoubleComplex *  vl,
magma_int_t  ldvl,
magmaDoubleComplex *  vr,
magma_int_t  ldvr,
magmaDoubleComplex *  work,
magma_int_t  lwork,
double *  rwork,
magma_int_t info 
)

Definition at line 27 of file zgeev_m.cpp.

References __func__, cblas_dznrm2(), cblas_idamax(), CBLAS_SADDR, cblas_zdscal(), cblas_zscal(), dT, lapackf77_dlabad, lapackf77_dlamch, lapackf77_lsame, lapackf77_zgebak, lapackf77_zgebal, lapackf77_zgehrd, lapackf77_zhseqr, lapackf77_zlacpy, lapackf77_zlange, lapackf77_zlascl, lapackf77_ztrevc, lapackf77_zunghr, magma_dsqrt, MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_zgehrd_nb(), MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_CNJG, MAGMA_Z_DSCALE, MAGMA_Z_IMAG, MAGMA_Z_MAKE, MAGMA_Z_REAL, MAGMA_Z_SET2REAL, magma_zgehrd(), magma_zgehrd2(), magma_zgehrd_m(), magma_zmalloc(), magma_zmalloc_cpu(), magma_zsetmatrix, magma_zunghr(), magma_zunghr_m(), MagmaLowerStr, max, T, vl, and vr.

35 {
36 /* -- MAGMA (version 1.4.0) --
37  Univ. of Tennessee, Knoxville
38  Univ. of California, Berkeley
39  Univ. of Colorado, Denver
40  August 2013
41 
42  Purpose
43  =======
44  ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the
45  eigenvalues and, optionally, the left and/or right eigenvectors.
46 
47  The right eigenvector v(j) of A satisfies
48  A * v(j) = lambda(j) * v(j)
49  where lambda(j) is its eigenvalue.
50  The left eigenvector u(j) of A satisfies
51  u(j)**H * A = lambda(j) * u(j)**H
52  where u(j)**H denotes the conjugate transpose of u(j).
53 
54  The computed eigenvectors are normalized to have Euclidean norm
55  equal to 1 and largest component real.
56 
57  Arguments
58  =========
59  JOBVL (input) CHARACTER*1
60  = 'N': left eigenvectors of A are not computed;
61  = 'V': left eigenvectors of are computed.
62 
63  JOBVR (input) CHARACTER*1
64  = 'N': right eigenvectors of A are not computed;
65  = 'V': right eigenvectors of A are computed.
66 
67  N (input) INTEGER
68  The order of the matrix A. N >= 0.
69 
70  A (input/output) COMPLEX*16 array, dimension (LDA,N)
71  On entry, the N-by-N matrix A.
72  On exit, A has been overwritten.
73 
74  LDA (input) INTEGER
75  The leading dimension of the array A. LDA >= max(1,N).
76 
77  W (output) COMPLEX*16 array, dimension (N)
78  W contains the computed eigenvalues.
79 
80  VL (output) COMPLEX*16 array, dimension (LDVL,N)
81  If JOBVL = 'V', the left eigenvectors u(j) are stored one
82  after another in the columns of VL, in the same order
83  as their eigenvalues.
84  If JOBVL = 'N', VL is not referenced.
85  u(j) = VL(:,j), the j-th column of VL.
86 
87  LDVL (input) INTEGER
88  The leading dimension of the array VL. LDVL >= 1; if
89  JOBVL = 'V', LDVL >= N.
90 
91  VR (output) COMPLEX*16 array, dimension (LDVR,N)
92  If JOBVR = 'V', the right eigenvectors v(j) are stored one
93  after another in the columns of VR, in the same order
94  as their eigenvalues.
95  If JOBVR = 'N', VR is not referenced.
96  v(j) = VR(:,j), the j-th column of VR.
97 
98  LDVR (input) INTEGER
99  The leading dimension of the array VR. LDVR >= 1; if
100  JOBVR = 'V', LDVR >= N.
101 
102  WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
103  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
104 
105  LWORK (input) INTEGER
106  The dimension of the array WORK. LWORK >= (1+nb)*N.
107 
108  If LWORK = -1, then a workspace query is assumed; the routine
109  only calculates the optimal size of the WORK array, returns
110  this value as the first entry of the WORK array, and no error
111  message related to LWORK is issued by XERBLA.
112 
113  RWORK (workspace) DOUBLE PRECISION array, dimension (2*N)
114 
115  INFO (output) INTEGER
116  = 0: successful exit
117  < 0: if INFO = -i, the i-th argument had an illegal value.
118  > 0: if INFO = i, the QR algorithm failed to compute all the
119  eigenvalues, and no eigenvectors have been computed;
120  elements and i+1:N of W contain eigenvalues which have
121  converged.
122  ===================================================================== */
123 
124  #define vl(i,j) (vl + (i) + (j)*ldvl)
125  #define vr(i,j) (vr + (i) + (j)*ldvr)
126 
127  magma_int_t c_one = 1;
128  magma_int_t c_zero = 0;
129 
130  double d__1, d__2;
131  magmaDoubleComplex z__1, z__2;
132  magmaDoubleComplex tmp;
133  double scl;
134  double dum[1], eps;
135  double anrm, cscale, bignum, smlnum;
136  magma_int_t i, k, ilo, ihi;
137  magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, i__1, i__2, nb;
138  magma_int_t scalea, minwrk, irwork, lquery, wantvl, wantvr, select[1];
139 
140  char side[2] = {0, 0};
141  char jobvl_[2] = {jobvl, 0};
142  char jobvr_[2] = {jobvr, 0};
143 
144  irwork = 0;
145  *info = 0;
146  lquery = lwork == -1;
147  wantvl = lapackf77_lsame( jobvl_, "V" );
148  wantvr = lapackf77_lsame( jobvr_, "V" );
149  if (! wantvl && ! lapackf77_lsame( jobvl_, "N" )) {
150  *info = -1;
151  } else if (! wantvr && ! lapackf77_lsame( jobvr_, "N" )) {
152  *info = -2;
153  } else if (n < 0) {
154  *info = -3;
155  } else if (lda < max(1,n)) {
156  *info = -5;
157  } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) {
158  *info = -8;
159  } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) {
160  *info = -10;
161  }
162 
163  /* Compute workspace */
164  nb = magma_get_zgehrd_nb( n );
165  if (*info == 0) {
166  minwrk = (1+nb)*n;
167  work[0] = MAGMA_Z_MAKE( (double) minwrk, 0. );
168 
169  if (lwork < minwrk && ! lquery) {
170  *info = -12;
171  }
172  }
173 
174  if (*info != 0) {
175  magma_xerbla( __func__, -(*info) );
176  return *info;
177  }
178  else if (lquery) {
179  return *info;
180  }
181 
182  /* Quick return if possible */
183  if (n == 0) {
184  return *info;
185  }
186 
187  #if defined(Version3) || defined(Version4) || defined(Version5)
188  magmaDoubleComplex *dT;
189  if (MAGMA_SUCCESS != magma_zmalloc( &dT, nb*n )) {
190  *info = MAGMA_ERR_DEVICE_ALLOC;
191  return *info;
192  }
193  #endif
194  #if defined(Version4) || defined(Version5)
195  magmaDoubleComplex *T;
196  if (MAGMA_SUCCESS != magma_zmalloc_cpu( &T, nb*n )) {
197  magma_free( dT );
198  *info = MAGMA_ERR_HOST_ALLOC;
199  return *info;
200  }
201  #endif
202 
203  /* Get machine constants */
204  eps = lapackf77_dlamch( "P" );
205  smlnum = lapackf77_dlamch( "S" );
206  bignum = 1. / smlnum;
207  lapackf77_dlabad( &smlnum, &bignum );
208  smlnum = magma_dsqrt( smlnum ) / eps;
209  bignum = 1. / smlnum;
210 
211  /* Scale A if max element outside range [SMLNUM,BIGNUM] */
212  anrm = lapackf77_zlange( "M", &n, &n, A, &lda, dum );
213  scalea = 0;
214  if (anrm > 0. && anrm < smlnum) {
215  scalea = 1;
216  cscale = smlnum;
217  } else if (anrm > bignum) {
218  scalea = 1;
219  cscale = bignum;
220  }
221  if (scalea) {
222  lapackf77_zlascl( "G", &c_zero, &c_zero, &anrm, &cscale, &n, &n, A, &lda, &ierr );
223  }
224 
225  /* Balance the matrix
226  * (CWorkspace: none)
227  * (RWorkspace: need N) */
228  ibal = 0;
229  lapackf77_zgebal( "B", &n, A, &lda, &ilo, &ihi, &rwork[ibal], &ierr );
230 
231  /* Reduce to upper Hessenberg form
232  * (CWorkspace: need 2*N, prefer N + N*NB)
233  * (RWorkspace: none) */
234  itau = 0;
235  iwrk = itau + n;
236  liwrk = lwork - iwrk;
237 
238  #if defined(Version1)
239  // Version 1 - LAPACK
240  lapackf77_zgehrd( &n, &ilo, &ihi, A, &lda,
241  &work[itau], &work[iwrk], &liwrk, &ierr );
242  #elif defined(Version2)
243  // Version 2 - LAPACK consistent HRD
244  magma_zgehrd2( n, ilo, ihi, A, lda,
245  &work[itau], &work[iwrk], &liwrk, &ierr );
246  #elif defined(Version3)
247  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored,
248  magma_zgehrd( n, ilo, ihi, A, lda,
249  &work[itau], &work[iwrk], liwrk, dT, &ierr );
250  #elif defined(Version4) || defined(Version5)
251  // Version 4 - Multi-GPU, T on host
252  magma_zgehrd_m( n, ilo, ihi, A, lda,
253  &work[itau], &work[iwrk], liwrk, T, &ierr );
254  magma_zsetmatrix( nb, n, T, nb, dT, nb );
255  #endif
256 
257  if (wantvl) {
258  /* Want left eigenvectors
259  * Copy Householder vectors to VL */
260  side[0] = 'L';
261  lapackf77_zlacpy( MagmaLowerStr, &n, &n, A, &lda, vl, &ldvl );
262 
263  /* Generate unitary matrix in VL
264  * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB)
265  * (RWorkspace: none) */
266  #if defined(Version1) || defined(Version2)
267  // Version 1 & 2 - LAPACK
268  lapackf77_zunghr( &n, &ilo, &ihi, vl, &ldvl, &work[itau],
269  &work[iwrk], &liwrk, &ierr );
270  #elif defined(Version3) || defined(Version4)
271  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
272  magma_zunghr( n, ilo, ihi, vl, ldvl, &work[itau], dT, nb, &ierr );
273  #elif defined(Version5)
274  // Version 5 - Multi-GPU, T on host
275  magma_zunghr_m( n, ilo, ihi, vl, ldvl, &work[itau], T, nb, &ierr );
276  #endif
277 
278  /* Perform QR iteration, accumulating Schur vectors in VL
279  * (CWorkspace: need 1, prefer HSWORK (see comments) )
280  * (RWorkspace: none) */
281  iwrk = itau;
282  liwrk = lwork - iwrk;
283  lapackf77_zhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, W,
284  vl, &ldvl, &work[iwrk], &liwrk, info );
285 
286  if (wantvr) {
287  /* Want left and right eigenvectors
288  * Copy Schur vectors to VR */
289  side[0] = 'B';
290  lapackf77_zlacpy( "F", &n, &n, vl, &ldvl, vr, &ldvr );
291  }
292  }
293  else if (wantvr) {
294  /* Want right eigenvectors
295  * Copy Householder vectors to VR */
296  side[0] = 'R';
297  lapackf77_zlacpy( "L", &n, &n, A, &lda, vr, &ldvr );
298 
299  /* Generate unitary matrix in VR
300  * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB)
301  * (RWorkspace: none) */
302  #if defined(Version1) || defined(Version2)
303  // Version 1 & 2 - LAPACK
304  lapackf77_zunghr( &n, &ilo, &ihi, vr, &ldvr, &work[itau],
305  &work[iwrk], &liwrk, &ierr );
306  #elif defined(Version3) || defined(Version4)
307  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
308  magma_zunghr( n, ilo, ihi, vr, ldvr, &work[itau], dT, nb, &ierr );
309  #elif defined(Version5)
310  // Version 5 - Multi-GPU, T on host
311  magma_zunghr_m( n, ilo, ihi, vr, ldvr, &work[itau], T, nb, &ierr );
312  #endif
313 
314  /* Perform QR iteration, accumulating Schur vectors in VR
315  * (CWorkspace: need 1, prefer HSWORK (see comments) )
316  * (RWorkspace: none) */
317  iwrk = itau;
318  liwrk = lwork - iwrk;
319  lapackf77_zhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, W,
320  vr, &ldvr, &work[iwrk], &liwrk, info );
321  }
322  else {
323  /* Compute eigenvalues only
324  * (CWorkspace: need 1, prefer HSWORK (see comments) )
325  * (RWorkspace: none) */
326  iwrk = itau;
327  liwrk = lwork - iwrk;
328  lapackf77_zhseqr( "E", "N", &n, &ilo, &ihi, A, &lda, W,
329  vr, &ldvr, &work[iwrk], &liwrk, info );
330  }
331 
332  /* If INFO > 0 from ZHSEQR, then quit */
333  if (*info > 0) {
334  goto CLEANUP;
335  }
336 
337  if (wantvl || wantvr) {
338  /* Compute left and/or right eigenvectors
339  * (CWorkspace: need 2*N)
340  * (RWorkspace: need 2*N) */
341  irwork = ibal + n;
342  lapackf77_ztrevc( side, "B", select, &n, A, &lda, vl, &ldvl,
343  vr, &ldvr, &n, &nout, &work[iwrk], &rwork[irwork], &ierr );
344  }
345 
346  if (wantvl) {
347  /* Undo balancing of left eigenvectors
348  * (CWorkspace: none)
349  * (RWorkspace: need N) */
350  lapackf77_zgebak( "B", "L", &n, &ilo, &ihi, &rwork[ibal], &n,
351  vl, &ldvl, &ierr );
352 
353  /* Normalize left eigenvectors and make largest component real */
354  for (i = 0; i < n; ++i) {
355  scl = 1. / cblas_dznrm2( n, vl(0,i), 1 );
356  cblas_zdscal( n, scl, vl(0,i), 1 );
357  for (k = 0; k < n; ++k) {
358  /* Computing 2nd power */
359  d__1 = MAGMA_Z_REAL( *vl(k,i) );
360  d__2 = MAGMA_Z_IMAG( *vl(k,i) );
361  rwork[irwork + k] = d__1*d__1 + d__2*d__2;
362  }
363  k = cblas_idamax( n, &rwork[irwork], 1 );
364  z__2 = MAGMA_Z_CNJG( *vl(k,i) );
365  d__1 = magma_dsqrt( rwork[irwork + k] );
366  MAGMA_Z_DSCALE( z__1, z__2, d__1 );
367  tmp = z__1;
368  cblas_zscal( n, CBLAS_SADDR(tmp), vl(0,i), 1 );
369  d__1 = MAGMA_Z_REAL( *vl(k,i) );
370  MAGMA_Z_SET2REAL( z__1, d__1 );
371  *vl(k,i) = z__1;
372  }
373  }
374 
375  if (wantvr) {
376  /* Undo balancing of right eigenvectors
377  * (CWorkspace: none)
378  * (RWorkspace: need N) */
379  lapackf77_zgebak( "B", "R", &n, &ilo, &ihi, &rwork[ibal], &n,
380  vr, &ldvr, &ierr );
381 
382  /* Normalize right eigenvectors and make largest component real */
383  for (i = 0; i < n; ++i) {
384  scl = 1. / cblas_dznrm2( n, vr(0,i), 1 );
385  cblas_zdscal( n, scl, vr(0,i), 1 );
386  for (k = 0; k < n; ++k) {
387  /* Computing 2nd power */
388  d__1 = MAGMA_Z_REAL( *vr(k,i) );
389  d__2 = MAGMA_Z_IMAG( *vr(k,i) );
390  rwork[irwork + k] = d__1*d__1 + d__2*d__2;
391  }
392  k = cblas_idamax( n, &rwork[irwork], 1 );
393  z__2 = MAGMA_Z_CNJG( *vr(k,i) );
394  d__1 = magma_dsqrt( rwork[irwork + k] );
395  MAGMA_Z_DSCALE( z__1, z__2, d__1 );
396  tmp = z__1;
397  cblas_zscal( n, CBLAS_SADDR(tmp), vr(0,i), 1 );
398  d__1 = MAGMA_Z_REAL( *vr(k,i) );
399  MAGMA_Z_SET2REAL( z__1, d__1 );
400  *vr(k,i) = z__1;
401  }
402  }
403 
404 CLEANUP:
405  /* Undo scaling if necessary */
406  if (scalea) {
407  i__1 = n - (*info);
408  i__2 = max( n - (*info), 1 );
409  lapackf77_zlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
410  W + (*info), &i__2, &ierr );
411  if (*info > 0) {
412  i__1 = ilo - 1;
413  lapackf77_zlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
414  W, &n, &ierr );
415  }
416  }
417 
418  #if defined(Version3) || defined(Version4) || defined(Version5)
419  magma_free( dT );
420  #endif
421  #if defined(Version4) || defined(Version5)
422  magma_free_cpu( T );
423  #endif
424 
425  return *info;
426 } /* magma_zgeev */
#define magma_dsqrt
Definition: common_magma.h:98
#define lapackf77_ztrevc
Definition: magma_zlapack.h:92
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
magma_int_t magma_zunghr_m(magma_int_t n, magma_int_t ilo, magma_int_t ihi, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *T, magma_int_t nb, magma_int_t *info)
Definition: zunghr_m.cpp:16
#define vl(i, j)
#define lapackf77_dlabad
Definition: magma_lapack.h:29
#define MAGMA_Z_DSCALE(v, t, s)
Definition: magma.h:121
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define T(m)
Definition: zgeqrf_mc.cpp:14
#define magma_free(ptr)
Definition: magma.h:57
double cblas_dznrm2(const int N, const void *X, const int incX)
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_zgehrd_m(magma_int_t n, magma_int_t ilo, magma_int_t ihi, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magmaDoubleComplex *T, magma_int_t *info)
Definition: zgehrd_m.cpp:16
#define W(k, n)
Definition: zgeqrf_mc.cpp:15
#define vr(i, j)
#define MAGMA_Z_IMAG(a)
Definition: magma.h:125
#define lapackf77_zlacpy
Definition: magma_zlapack.h:73
#define lapackf77_dlamch
Definition: magma_lapack.h:27
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX)
#define lapackf77_zunghr
Definition: magma_zlapack.h:99
#define lapackf77_zgebal
Definition: magma_zlapack.h:53
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
magma_int_t magma_zunghr(magma_int_t n, magma_int_t ilo, magma_int_t ihi, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *dT, magma_int_t nb, magma_int_t *info)
#define MAGMA_Z_SET2REAL(v, t)
Definition: magma.h:115
magma_int_t magma_zgehrd2(magma_int_t n, magma_int_t ilo, magma_int_t ihi, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t *lwork, magma_int_t *info)
#define MagmaLowerStr
Definition: magma.h:85
magma_int_t magma_get_zgehrd_nb(magma_int_t m)
Definition: get_nb.cpp:367
void cblas_zscal(const int N, const void *alpha, void *X, const int incX)
#define A(i, j)
Definition: cprint.cpp:16
#define MAGMA_SUCCESS
Definition: magma.h:106
#define lapackf77_zgehrd
Definition: magma_zlapack.h:58
magma_int_t magma_zgehrd(magma_int_t n, magma_int_t ilo, magma_int_t ihi, cuDoubleComplex *A, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *work, magma_int_t lwork, cuDoubleComplex *d_T, magma_int_t *info)
static magma_err_t magma_zmalloc_cpu(magmaDoubleComplex **ptrPtr, size_t n)
Definition: magma.h:86
#define lapackf77_zlange
Definition: magma_zlapack.h:75
#define lapackf77_zhseqr
Definition: magma_zlapack.h:72
#define lapackf77_zgebak
Definition: magma_zlapack.h:52
#define dT(m)
void cblas_zdscal(const int N, const double alpha, void *X, const int incX)
#define lapackf77_zlascl
Definition: magma_zlapack.h:83
#define magma_zsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_z.h:702
#define CBLAS_SADDR(a)
Definition: magma.h:204
#define max(a, b)
Definition: common_magma.h:82
#define MAGMA_Z_CNJG(v, t)
Definition: magma.h:120
magma_err_t magma_free_cpu(void *ptr)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define MAGMA_Z_REAL(a)
Definition: magma.h:124

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zgehrd ( magma_int_t  n,
magma_int_t  ilo,
magma_int_t  ihi,
magmaDoubleComplex *  A,
magma_int_t  lda,
magmaDoubleComplex *  tau,
magmaDoubleComplex *  work,
magma_int_t  lwork,
magmaDoubleComplex *  dT,
magma_int_t info 
)

Definition at line 17 of file zgehrd.cpp.

References __func__, A, dA, dTi, dV, dwork, lapackf77_zgehd2, MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_zgehrd_nb(), MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_ONE, MAGMA_Z_SET2REAL, MAGMA_Z_ZERO, magma_zgetmatrix, magma_zlahr2(), magma_zlahru(), magma_zmalloc(), magma_zmalloc_cpu(), magma_zsetmatrix, magmablas_zlaset(), max, min, T, and zzero_nbxnb_block().

23 {
24 /* -- MAGMA (version 1.4.0) --
25  Univ. of Tennessee, Knoxville
26  Univ. of California, Berkeley
27  Univ. of Colorado, Denver
28  August 2013
29 
30  Purpose
31  =======
32  ZGEHRD reduces a COMPLEX_16 general matrix A to upper Hessenberg form H by
33  an orthogonal similarity transformation: Q' * A * Q = H . This version
34  stores the triangular matrices used in the factorization so that they can
35  be applied directly (i.e., without being recomputed) later. As a result,
36  the application of Q is much faster.
37 
38  Arguments
39  =========
40  N (input) INTEGER
41  The order of the matrix A. N >= 0.
42 
43  ILO (input) INTEGER
44  IHI (input) INTEGER
45  It is assumed that A is already upper triangular in rows
46  and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
47  set by a previous call to ZGEBAL; otherwise they should be
48  set to 1 and N respectively. See Further Details.
49  1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
50 
51  A (input/output) COMPLEX_16 array, dimension (LDA,N)
52  On entry, the N-by-N general matrix to be reduced.
53  On exit, the upper triangle and the first subdiagonal of A
54  are overwritten with the upper Hessenberg matrix H, and the
55  elements below the first subdiagonal, with the array TAU,
56  represent the orthogonal matrix Q as a product of elementary
57  reflectors. See Further Details.
58 
59  LDA (input) INTEGER
60  The leading dimension of the array A. LDA >= max(1,N).
61 
62  TAU (output) COMPLEX_16 array, dimension (N-1)
63  The scalar factors of the elementary reflectors (see Further
64  Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
65  zero.
66 
67  WORK (workspace/output) COMPLEX_16 array, dimension (LWORK)
68  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
69 
70  LWORK (input) INTEGER
71  The length of the array WORK. LWORK >= max(1,N).
72  For optimum performance LWORK >= N*NB, where NB is the
73  optimal blocksize.
74 
75  If LWORK = -1, then a workspace query is assumed; the routine
76  only calculates the optimal size of the WORK array, returns
77  this value as the first entry of the WORK array, and no error
78  message related to LWORK is issued by XERBLA.
79 
80  dT (output) COMPLEX_16 array on the GPU, dimension NB*N,
81  where NB is the optimal blocksize. It stores the NB*NB blocks
82  of the triangular T matrices used in the reduction.
83 
84  INFO (output) INTEGER
85  = 0: successful exit
86  < 0: if INFO = -i, the i-th argument had an illegal value.
87 
88  Further Details
89  ===============
90  The matrix Q is represented as a product of (ihi-ilo) elementary
91  reflectors
92 
93  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
94 
95  Each H(i) has the form
96 
97  H(i) = I - tau * v * v'
98 
99  where tau is a complex scalar, and v is a complex vector with
100  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
101  exit in A(i+2:ihi,i), and tau in TAU(i).
102 
103  The contents of A are illustrated by the following example, with
104  n = 7, ilo = 2 and ihi = 6:
105 
106  on entry, on exit,
107 
108  ( a a a a a a a ) ( a a h h h h a )
109  ( a a a a a a ) ( a h h h h a )
110  ( a a a a a a ) ( h h h h h h )
111  ( a a a a a a ) ( v2 h h h h h )
112  ( a a a a a a ) ( v2 v3 h h h h )
113  ( a a a a a a ) ( v2 v3 v4 h h h )
114  ( a ) ( a )
115 
116  where a denotes an element of the original matrix A, h denotes a
117  modified element of the upper Hessenberg matrix H, and vi denotes an
118  element of the vector defining H(i).
119 
120  This implementation follows the hybrid algorithm and notations described in
121 
122  S. Tomov and J. Dongarra, "Accelerating the reduction to upper Hessenberg
123  form through hybrid GPU-based computing," University of Tennessee Computer
124  Science Technical Report, UT-CS-09-642 (also LAPACK Working Note 219),
125  May 24, 2009.
126 
127  This version stores the T matrices in dT, for later use in magma_zunghr.
128 
129  ===================================================================== */
130 
131  #define A( i, j ) ( A + (i) + (j)*lda)
132  #define dA( i, j ) (dA + (i) + (j-ilo)*ldda)
133 
134  magmaDoubleComplex c_one = MAGMA_Z_ONE;
135  magmaDoubleComplex c_zero = MAGMA_Z_ZERO;
136 
138  magma_int_t ldda = n; // assumed in zlahru
139 
140  magma_int_t nh, iws;
141  magma_int_t iinfo;
142  magma_int_t ldwork;
143  magma_int_t lquery;
144 
145  *info = 0;
146  iws = n*nb;
147  MAGMA_Z_SET2REAL( work[0], (double) iws );
148 
149  lquery = lwork == -1;
150  if (n < 0) {
151  *info = -1;
152  } else if (ilo < 1 || ilo > max(1,n)) {
153  *info = -2;
154  } else if (ihi < min(ilo,n) || ihi > n) {
155  *info = -3;
156  } else if (lda < max(1,n)) {
157  *info = -5;
158  } else if (lwork < max(1,n) && ! lquery) {
159  *info = -8;
160  }
161  if (*info != 0) {
162  magma_xerbla( __func__, -(*info) );
163  return *info;
164  }
165  else if (lquery)
166  return *info;
167 
168  // Adjust from 1-based indexing
169  ilo -= 1;
170 
171  // Quick return if possible
172  nh = ihi - ilo;
173  if (nh <= 1) {
174  work[0] = c_one;
175  return *info;
176  }
177 
178  // GPU workspace is:
179  // nb*ldda for dwork for zlahru
180  // nb*ldda for dV
181  // n*ldda for dA
182  magmaDoubleComplex *dwork;
183  if (MAGMA_SUCCESS != magma_zmalloc( &dwork, 2*nb*ldda + n*ldda )) {
184  *info = MAGMA_ERR_DEVICE_ALLOC;
185  return *info;
186  }
187  magmaDoubleComplex *dV = dwork + nb*ldda;
188  magmaDoubleComplex *dA = dwork + nb*ldda*2;
189  ldwork = n;
190 
191  magma_int_t i;
192 
193  magmaDoubleComplex *T, *dTi;
194  magma_zmalloc_cpu( &T, nb*nb );
195  if ( T == NULL ) {
196  magma_free( dwork );
197  *info = MAGMA_ERR_HOST_ALLOC;
198  return *info;
199  }
200 
201  // zero first block of V, which is lower triangular
202  zzero_nbxnb_block(nb, dV, ldda);
203 
204  // Set elements 0:ILO-1 and IHI-1:N-2 of TAU to zero
205  for(i = 0; i < ilo; ++i)
206  tau[i] = c_zero;
207 
208  for(i = max(0,ihi-1); i < n-1; ++i)
209  tau[i] = c_zero;
210 
211  for(i=0; i < nb*nb; i += 4)
212  T[i] = T[i+1] = T[i+2] = T[i+3] = c_zero;
213  magmablas_zlaset( 'F', nb, n, dT, nb );
214 
215  // If not enough workspace, use unblocked code
216  if ( lwork < iws ) {
217  nb = 1;
218  }
219 
220  if (nb == 1 || nb > nh) {
221  // Use unblocked code below
222  i = ilo;
223  }
224  else {
225  // Use blocked code
226  // Copy the matrix to the GPU
227  magma_zsetmatrix( n, n-ilo, A(0,ilo), lda, dA, ldda );
228 
229  for (i = ilo; i < ihi-1 - nb; i += nb) {
230  // Reduce columns i:i+nb-1 to Hessenberg form, returning the
231  // matrices V and T of the block reflector H = I - V*T*V'
232  // which performs the reduction, and also the matrix Y = A*V*T
233 
234  // Get the current panel (no need for the 1st iteration)
235  magma_zgetmatrix( ihi-i, nb,
236  dA(i,i), ldda,
237  A (i,i), lda );
238 
239  // add 1 to i for 1-based index
240  magma_zlahr2( ihi, i+1, nb,
241  dA(0,i),
242  dV,
243  A (0,i), lda,
244  &tau[i], T, nb, work, ldwork);
245 
246  // Copy T from the CPU to dT on the GPU
247  dTi = dT + (i - ilo)*nb;
248  magma_zsetmatrix( nb, nb, T, nb, dTi, nb );
249 
250  magma_zlahru( n, ihi, i, nb,
251  A (0,i), lda,
252  dA(0,i), // dA
253  dA(i,i), // dY, stored over current panel
254  dV, dTi, dwork );
255  }
256 
257  // Copy remainder to host
258  magma_zgetmatrix( n, n-i,
259  dA(0,i), ldda,
260  A (0,i), lda );
261  }
262 
263  // Use unblocked code to reduce the rest of the matrix
264  // add 1 to i for 1-based index
265  i += 1;
266  lapackf77_zgehd2(&n, &i, &ihi, A, &lda, tau, work, &iinfo);
267  MAGMA_Z_SET2REAL( work[0], (double) iws );
268 
269  magma_free( dwork );
270  magma_free_cpu( T );
271 
272  return *info;
273 } /* magma_zgehrd */
#define min(a, b)
Definition: common_magma.h:86
magma_int_t magma_zlahru(magma_int_t m, magma_int_t n, magma_int_t nb, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *da, cuDoubleComplex *y, cuDoubleComplex *v, cuDoubleComplex *t, cuDoubleComplex *dwork)
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
#define dA(i, j)
void magmablas_zlaset(magma_int_t m, magma_int_t n, cuDoubleComplex *A, magma_int_t lda)
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define T(m)
Definition: zgeqrf_mc.cpp:14
#define magma_free(ptr)
Definition: magma.h:57
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
int magma_int_t
Definition: magmablas.h:12
void zzero_nbxnb_block(magma_int_t, cuDoubleComplex *, magma_int_t)
#define dTi(d)
#define dwork(dev, i, j)
#define dV(m)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MAGMA_Z_SET2REAL(v, t)
Definition: magma.h:115
#define MAGMA_Z_ZERO
Definition: magma.h:131
magma_int_t magma_get_zgehrd_nb(magma_int_t m)
Definition: get_nb.cpp:367
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_zmalloc_cpu(magmaDoubleComplex **ptrPtr, size_t n)
Definition: magma.h:86
#define A(i, j)
magma_int_t magma_zlahr2(magma_int_t m, magma_int_t n, magma_int_t nb, cuDoubleComplex *da, cuDoubleComplex *dv, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *t, magma_int_t ldt, cuDoubleComplex *y, magma_int_t ldy)
#define lapackf77_zgehd2
Definition: magma_zlapack.h:57
#define MAGMA_Z_ONE
Definition: magma.h:132
#define dT(m)
#define magma_zsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_z.h:702
#define max(a, b)
Definition: common_magma.h:82
magma_err_t magma_free_cpu(void *ptr)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275

Here is the call graph for this function:

magma_int_t magma_zgehrd2 ( magma_int_t  n,
magma_int_t  ilo,
magma_int_t  ihi,
magmaDoubleComplex *  A,
magma_int_t  lda,
magmaDoubleComplex *  tau,
magmaDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file zgehrd2.cpp.

References __func__, lapackf77_zgehd2, MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_zgehrd_nb(), MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_ONE, MAGMA_Z_SET2REAL, MAGMA_Z_ZERO, magma_zgetmatrix, magma_zlahr2(), magma_zlahru(), magma_zmalloc(), magma_zmalloc_cpu(), magma_zsetmatrix, max, min, and zzero_nbxnb_block().

18 {
19 /* -- MAGMA (version 1.4.0) --
20  Univ. of Tennessee, Knoxville
21  Univ. of California, Berkeley
22  Univ. of Colorado, Denver
23  August 2013
24 
25  Purpose
26  =======
27  ZGEHRD2 reduces a COMPLEX_16 general matrix A to upper Hessenberg form H by
28  an orthogonal similarity transformation: Q' * A * Q = H .
29 
30  Arguments
31  =========
32  N (input) INTEGER
33  The order of the matrix A. N >= 0.
34 
35  ILO (input) INTEGER
36  IHI (input) INTEGER
37  It is assumed that A is already upper triangular in rows
38  and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
39  set by a previous call to ZGEBAL; otherwise they should be
40  set to 1 and N respectively. See Further Details.
41  1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
42 
43  A (input/output) COMPLEX_16 array, dimension (LDA,N)
44  On entry, the N-by-N general matrix to be reduced.
45  On exit, the upper triangle and the first subdiagonal of A
46  are overwritten with the upper Hessenberg matrix H, and the
47  elements below the first subdiagonal, with the array TAU,
48  represent the orthogonal matrix Q as a product of elementary
49  reflectors. See Further Details.
50 
51  LDA (input) INTEGER
52  The leading dimension of the array A. LDA >= max(1,N).
53 
54  TAU (output) COMPLEX_16 array, dimension (N-1)
55  The scalar factors of the elementary reflectors (see Further
56  Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
57  zero.
58 
59  WORK (workspace/output) COMPLEX_16 array, dimension (LWORK)
60  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
61 
62  LWORK (input) INTEGER
63  The length of the array WORK. LWORK >= max(1,N).
64  For optimum performance LWORK >= N*NB, where NB is the
65  optimal blocksize.
66 
67  If LWORK = -1, then a workspace query is assumed; the routine
68  only calculates the optimal size of the WORK array, returns
69  this value as the first entry of the WORK array, and no error
70  message related to LWORK is issued by XERBLA.
71 
72  INFO (output) INTEGER
73  = 0: successful exit
74  < 0: if INFO = -i, the i-th argument had an illegal value.
75 
76  Further Details
77  ===============
78  The matrix Q is represented as a product of (ihi-ilo) elementary
79  reflectors
80 
81  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
82 
83  Each H(i) has the form
84 
85  H(i) = I - tau * v * v'
86 
87  where tau is a complex scalar, and v is a complex vector with
88  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
89  exit in A(i+2:ihi,i), and tau in TAU(i).
90 
91  The contents of A are illustrated by the following example, with
92  n = 7, ilo = 2 and ihi = 6:
93 
94  on entry, on exit,
95 
96  ( a a a a a a a ) ( a a h h h h a )
97  ( a a a a a a ) ( a h h h h a )
98  ( a a a a a a ) ( h h h h h h )
99  ( a a a a a a ) ( v2 h h h h h )
100  ( a a a a a a ) ( v2 v3 h h h h )
101  ( a a a a a a ) ( v2 v3 v4 h h h )
102  ( a ) ( a )
103 
104  where a denotes an element of the original matrix A, h denotes a
105  modified element of the upper Hessenberg matrix H, and vi denotes an
106  element of the vector defining H(i).
107 
108  This implementation follows the hybrid algorithm and notations described in
109 
110  S. Tomov and J. Dongarra, "Accelerating the reduction to upper Hessenberg
111  form through hybrid GPU-based computing," University of Tennessee Computer
112  Science Technical Report, UT-CS-09-642 (also LAPACK Working Note 219),
113  May 24, 2009.
114  ===================================================================== */
115 
116 
117  magmaDoubleComplex c_one = MAGMA_Z_ONE;
118  magmaDoubleComplex c_zero = MAGMA_Z_ZERO;
119 
121  magma_int_t N = n, ldda = n;
122 
123  magma_int_t ib;
124  magma_int_t nh, iws;
125  magma_int_t nbmin, iinfo;
126  magma_int_t ldwork;
127  magma_int_t lquery;
128 
129  --tau;
130 
131  *info = 0;
132  MAGMA_Z_SET2REAL( work[0], (double) n * nb );
133 
134  lquery = lwork == -1;
135  if (n < 0) {
136  *info = -1;
137  } else if (ilo < 1 || ilo > max(1,n)) {
138  *info = -2;
139  } else if (ihi < min(ilo,n) || ihi > n) {
140  *info = -3;
141  } else if (lda < max(1,n)) {
142  *info = -5;
143  } else if (lwork < max(1,n) && ! lquery) {
144  *info = -8;
145  }
146  if (*info != 0) {
147  magma_xerbla( __func__, -(*info) );
148  return *info;
149  }
150  else if (lquery)
151  return *info;
152 
153  /* Quick return if possible */
154  nh = ihi - ilo + 1;
155  if (nh <= 1) {
156  work[0] = c_one;
157  return *info;
158  }
159 
160  magmaDoubleComplex *da;
161  if (MAGMA_SUCCESS != magma_zmalloc( &da, N*ldda + 2*N*nb + nb*nb )) {
162  *info = MAGMA_ERR_DEVICE_ALLOC;
163  return *info;
164  }
165 
166  magmaDoubleComplex *d_A = da;
167  magmaDoubleComplex *d_work = da + (N+nb)*ldda;
168 
169  magma_int_t i__;
170 
171  magmaDoubleComplex *t, *d_t;
172  magma_zmalloc_cpu( &t, nb*nb );
173  if ( t == NULL ) {
174  magma_free( da );
175  *info = MAGMA_ERR_HOST_ALLOC;
176  return *info;
177  }
178  d_t = d_work + nb * ldda;
179 
180  zzero_nbxnb_block(nb, d_A+N*ldda, ldda);
181 
182  /* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
183  for (i__ = 1; i__ < ilo; ++i__)
184  tau[i__] = c_zero;
185 
186  for (i__ = max(1,ihi); i__ < n; ++i__)
187  tau[i__] = c_zero;
188 
189  for(i__=0; i__< nb*nb; i__+=4)
190  t[i__] = t[i__+1] = t[i__+2] = t[i__+3] = c_zero;
191 
192  nbmin = 2;
193  iws = 1;
194  if (nb > 1 && nb < nh) {
195  /* Determine when to cross over from blocked to unblocked code
196  (last block is always handled by unblocked code) */
197  if (nb < nh) {
198  /* Determine if workspace is large enough for blocked code */
199  iws = n * nb;
200  if (lwork < iws) {
201  /* Not enough workspace to use optimal NB: determine the
202  minimum value of NB, and reduce NB or force use of
203  unblocked code */
204  nbmin = nb;
205  if (lwork >= n * nbmin)
206  nb = lwork / n;
207  else
208  nb = 1;
209  }
210  }
211  }
212  ldwork = n;
213 
214  if (nb < nbmin || nb >= nh) {
215  /* Use unblocked code below */
216  i__ = ilo;
217  }
218  else {
219  /* Use blocked code */
220  /* Copy the matrix to the GPU */
221  magma_zsetmatrix( N, N-ilo+1, a+(ilo-1)*(lda), lda, d_A, ldda );
222 
223  for (i__ = ilo; i__ < ihi - nb; i__ += nb) {
224  /* Computing MIN */
225  ib = min(nb, ihi - i__);
226 
227  /* Reduce columns i:i+ib-1 to Hessenberg form, returning the
228  matrices V and T of the block reflector H = I - V*T*V'
229  which performs the reduction, and also the matrix Y = A*V*T */
230 
231  /* Get the current panel (no need for the 1st iteration) */
232  magma_zgetmatrix( ihi-i__+1, ib,
233  d_A + (i__ - ilo)*ldda + i__ - 1, ldda,
234  a + (i__ - 1 )*lda + i__ - 1, lda );
235 
236  magma_zlahr2(ihi, i__, ib,
237  d_A + (i__ - ilo)*ldda,
238  d_A + N*ldda + 1,
239  a + (i__ - 1 )*(lda) , lda,
240  &tau[i__], t, nb, work, ldwork);
241 
242  /* Copy T from the CPU to D_T on the GPU */
243  magma_zsetmatrix( nb, nb, t, nb, d_t, nb );
244 
245  magma_zlahru(n, ihi, i__ - 1, ib,
246  a + (i__ - 1 )*(lda), lda,
247  d_A + (i__ - ilo)*ldda,
248  d_A + (i__ - ilo)*ldda + i__ - 1,
249  d_A + N*ldda, d_t, d_work);
250  }
251  }
252 
253  /* Use unblocked code to reduce the rest of the matrix */
254  if (!(nb < nbmin || nb >= nh)) {
255  magma_zgetmatrix( n, n-i__+1,
256  d_A+ (i__-ilo)*ldda, ldda,
257  a + (i__-1)*(lda), lda );
258  }
259  lapackf77_zgehd2(&n, &i__, &ihi, a, &lda, &tau[1], work, &iinfo);
260  MAGMA_Z_SET2REAL( work[0], (double) iws );
261 
262  magma_free( da );
263  magma_free_cpu(t);
264 
265  return *info;
266 } /* magma_zgehrd2 */
#define min(a, b)
Definition: common_magma.h:86
magma_int_t magma_zlahru(magma_int_t m, magma_int_t n, magma_int_t nb, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *da, cuDoubleComplex *y, cuDoubleComplex *v, cuDoubleComplex *t, cuDoubleComplex *dwork)
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
int magma_int_t
Definition: magmablas.h:12
void zzero_nbxnb_block(magma_int_t, cuDoubleComplex *, magma_int_t)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MAGMA_Z_SET2REAL(v, t)
Definition: magma.h:115
#define MAGMA_Z_ZERO
Definition: magma.h:131
magma_int_t magma_get_zgehrd_nb(magma_int_t m)
Definition: get_nb.cpp:367
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_zmalloc_cpu(magmaDoubleComplex **ptrPtr, size_t n)
Definition: magma.h:86
magma_int_t magma_zlahr2(magma_int_t m, magma_int_t n, magma_int_t nb, cuDoubleComplex *da, cuDoubleComplex *dv, cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *tau, cuDoubleComplex *t, magma_int_t ldt, cuDoubleComplex *y, magma_int_t ldy)
#define lapackf77_zgehd2
Definition: magma_zlapack.h:57
#define MAGMA_Z_ONE
Definition: magma.h:132
#define magma_zsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_z.h:702
#define max(a, b)
Definition: common_magma.h:82
magma_err_t magma_free_cpu(void *ptr)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275

Here is the call graph for this function:

magma_int_t magma_zgehrd_m ( magma_int_t  n,
magma_int_t  ilo,
magma_int_t  ihi,
magmaDoubleComplex *  A,
magma_int_t  lda,
magmaDoubleComplex *  tau,
magmaDoubleComplex *  work,
magma_int_t  lwork,
magmaDoubleComplex *  T,
magma_int_t info 
)

Definition at line 16 of file zgehrd_m.cpp.

References __func__, zgehrd_data::A, A, dA, lapackf77_zgehd2, lapackf77_zlaset, zgehrd_data::ldda, zgehrd_data::ldv, zgehrd_data::ldvd, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_zgehrd_nb(), magma_getdevice(), magma_num_gpus(), magma_queue_create, magma_queue_destroy, magma_setdevice(), MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_ONE, MAGMA_Z_SET2REAL, MAGMA_Z_ZERO, magma_zgetmatrix, magma_zgetmatrix_async, magma_zlahr2_m(), magma_zlahru_m(), magma_zmalloc(), magma_zsetmatrix_1D_col_bcyclic(), magmablasSetKernelStream(), max, min, zgehrd_data::ngpu, zgehrd_data::streams, zgehrd_data::Ti, zgehrd_data::V, zgehrd_data::Vd, zgehrd_data::W, and zgehrd_data::Y.

23 {
24 /* -- MAGMA (version 1.4.0) --
25  Univ. of Tennessee, Knoxville
26  Univ. of California, Berkeley
27  Univ. of Colorado, Denver
28  August 2013
29 
30  Purpose
31  =======
32  ZGEHRD reduces a COMPLEX_16 general matrix A to upper Hessenberg form H by
33  an orthogonal similarity transformation: Q' * A * Q = H . This version
34  stores the triangular matrices used in the factorization so that they can
35  be applied directly (i.e., without being recomputed) later. As a result,
36  the application of Q is much faster.
37 
38  Arguments
39  =========
40  N (input) INTEGER
41  The order of the matrix A. N >= 0.
42 
43  ILO (input) INTEGER
44  IHI (input) INTEGER
45  It is assumed that A is already upper triangular in rows
46  and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
47  set by a previous call to ZGEBAL; otherwise they should be
48  set to 1 and N respectively. See Further Details.
49  1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
50 
51  A (input/output) COMPLEX_16 array, dimension (LDA,N)
52  On entry, the N-by-N general matrix to be reduced.
53  On exit, the upper triangle and the first subdiagonal of A
54  are overwritten with the upper Hessenberg matrix H, and the
55  elements below the first subdiagonal, with the array TAU,
56  represent the orthogonal matrix Q as a product of elementary
57  reflectors. See Further Details.
58 
59  LDA (input) INTEGER
60  The leading dimension of the array A. LDA >= max(1,N).
61 
62  TAU (output) COMPLEX_16 array, dimension (N-1)
63  The scalar factors of the elementary reflectors (see Further
64  Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
65  zero.
66 
67  WORK (workspace/output) COMPLEX_16 array, dimension (LWORK)
68  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
69 
70  LWORK (input) INTEGER
71  The length of the array WORK. LWORK >= max(1,N).
72  For optimum performance LWORK >= N*NB, where NB is the
73  optimal blocksize.
74 
75  If LWORK = -1, then a workspace query is assumed; the routine
76  only calculates the optimal size of the WORK array, returns
77  this value as the first entry of the WORK array, and no error
78  message related to LWORK is issued by XERBLA.
79 
80  T (output) COMPLEX_16 array, dimension NB*N,
81  where NB is the optimal blocksize. It stores the NB*NB blocks
82  of the triangular T matrices used in the reduction.
83 
84  INFO (output) INTEGER
85  = 0: successful exit
86  < 0: if INFO = -i, the i-th argument had an illegal value.
87 
88  Further Details
89  ===============
90  The matrix Q is represented as a product of (ihi-ilo) elementary
91  reflectors
92 
93  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
94 
95  Each H(i) has the form
96 
97  H(i) = I - tau * v * v'
98 
99  where tau is a complex scalar, and v is a complex vector with
100  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
101  exit in A(i+2:ihi,i), and tau in TAU(i).
102 
103  The contents of A are illustrated by the following example, with
104  n = 7, ilo = 2 and ihi = 6:
105 
106  on entry, on exit,
107 
108  ( a a a a a a a ) ( a a h h h h a )
109  ( a a a a a a ) ( a h h h h a )
110  ( a a a a a a ) ( h h h h h h )
111  ( a a a a a a ) ( v2 h h h h h )
112  ( a a a a a a ) ( v2 v3 h h h h )
113  ( a a a a a a ) ( v2 v3 v4 h h h )
114  ( a ) ( a )
115 
116  where a denotes an element of the original matrix A, h denotes a
117  modified element of the upper Hessenberg matrix H, and vi denotes an
118  element of the vector defining H(i).
119 
120  This implementation follows the hybrid algorithm and notations described in
121 
122  S. Tomov and J. Dongarra, "Accelerating the reduction to upper Hessenberg
123  form through hybrid GPU-based computing," University of Tennessee Computer
124  Science Technical Report, UT-CS-09-642 (also LAPACK Working Note 219),
125  May 24, 2009.
126 
127  This version stores the T matrices, for later use in magma_zunghr.
128 
129  ===================================================================== */
130 
131  #define A( i, j ) (A + (i) + (j)*lda)
132  #define dA( d, i, j ) (data.A[d] + (i) + (j)*ldda)
133 
134  magmaDoubleComplex c_one = MAGMA_Z_ONE;
135  magmaDoubleComplex c_zero = MAGMA_Z_ZERO;
136 
138 
139  magma_int_t nh, iws, ldda, min_lblocks, max_lblocks, last_dev, d;
140  magma_int_t dpanel, di, nlocal, i, i2, ib, ldwork;
141  magma_int_t iinfo;
142  magma_int_t lquery;
143  struct zgehrd_data data;
144 
145  int ngpu = magma_num_gpus();
146 
147  *info = 0;
148  iws = n*(nb + nb*ngpu);
149  MAGMA_Z_SET2REAL( work[0], (double) iws );
150 
151  lquery = lwork == -1;
152  if (n < 0) {
153  *info = -1;
154  } else if (ilo < 1 || ilo > max(1,n)) {
155  *info = -2;
156  } else if (ihi < min(ilo,n) || ihi > n) {
157  *info = -3;
158  } else if (lda < max(1,n)) {
159  *info = -5;
160  } else if (lwork < max(1,n) && ! lquery) {
161  *info = -8;
162  }
163  if (*info != 0) {
164  magma_xerbla( __func__, -(*info) );
165  return *info;
166  }
167  else if (lquery)
168  return *info;
169 
170  magma_device_t cdevice;
171  magma_getdevice( &cdevice );
172 
173  // Adjust from 1-based indexing
174  ilo -= 1;
175 
176  // Quick return if possible
177  nh = ihi - ilo;
178  if (nh <= 1) {
179  work[0] = c_one;
180  return *info;
181  }
182 
183  // Set elements 0:ILO-1 and IHI-1:N-2 of TAU to zero
184  for(i = 0; i < ilo; ++i)
185  tau[i] = c_zero;
186 
187  for(i = max(0,ihi-1); i < n-1; ++i)
188  tau[i] = c_zero;
189 
190  // set T to zero
191  lapackf77_zlaset( "Full", &nb, &n, &c_zero, &c_zero, T, &nb );
192 
193  // set to null, to simplify cleanup code
194  for( d = 0; d < ngpu; ++d ) {
195  data.A[d] = NULL;
196  data.streams[d] = NULL;
197  }
198 
199  // If not enough workspace, use unblocked code
200  if ( lwork < iws ) {
201  nb = 1;
202  }
203 
204  if (nb == 1 || nb >= nh) {
205  // Use unblocked code below
206  i = ilo;
207  }
208  else {
209  // Use blocked code
210  // allocate memory on GPUs for A and workspaces
211  ldda = ((n+31)/32)*32;
212  min_lblocks = (n / nb) / ngpu;
213  max_lblocks = ((n-1) / nb) / ngpu + 1;
214  last_dev = (n / nb) % ngpu;
215 
216  // V and Vd need to be padded for copying in mzlahr2
217  data.ngpu = ngpu;
218  data.ldda = ldda;
219  data.ldv = nb*max_lblocks*ngpu;
220  data.ldvd = nb*max_lblocks;
221 
222  for( d = 0; d < ngpu; ++d ) {
223  magma_setdevice( d );
224  nlocal = min_lblocks*nb;
225  if ( d < last_dev ) {
226  nlocal += nb;
227  }
228  else if ( d == last_dev ) {
229  nlocal += (n % nb);
230  }
231 
232  ldwork = nlocal*ldda // A
233  + nb*data.ldv // V
234  + nb*data.ldvd // Vd
235  + nb*ldda // Y
236  + nb*ldda // W
237  + nb*nb; // Ti
238  if ( MAGMA_SUCCESS != magma_zmalloc( &data.A[d], ldwork )) {
239  *info = MAGMA_ERR_DEVICE_ALLOC;
240  goto CLEANUP;
241  }
242  data.V [d] = data.A [d] + nlocal*ldda;
243  data.Vd[d] = data.V [d] + nb*data.ldv;
244  data.Y [d] = data.Vd[d] + nb*data.ldvd;
245  data.W [d] = data.Y [d] + nb*ldda;
246  data.Ti[d] = data.W [d] + nb*ldda;
247 
248  magma_queue_create( &data.streams[d] );
249  }
250 
251  // Copy the matrix to GPUs
252  magma_zsetmatrix_1D_col_bcyclic( n, n, A, lda, data.A, ldda, ngpu, nb );
253 
254  // round ilo down to block boundary
255  ilo = (ilo/nb)*nb;
256  for (i = ilo; i < ihi - 1 - nb; i += nb) {
257  // Reduce columns i:i+nb-1 to Hessenberg form, returning the
258  // matrices V and T of the block reflector H = I - V*T*V'
259  // which performs the reduction, and also the matrix Y = A*V*T
260 
261  // Get the current panel (no need for the 1st iteration)
262  dpanel = (i / nb) % ngpu;
263  di = ((i / nb) / ngpu) * nb;
264  if ( i > ilo ) {
265  magma_setdevice( dpanel );
266  magma_zgetmatrix( ihi-i, nb,
267  dA(dpanel, i, di), ldda,
268  A(i,i), lda );
269  }
270 
271  // add 1 to i for 1-based index
272  magma_zlahr2_m( ihi, i+1, nb, A(0,i), lda,
273  &tau[i], &T[i*nb], nb, work, n, &data );
274 
275  magma_zlahru_m( n, ihi, i, nb, A, lda, &data );
276 
277  // copy first i rows above panel to host
278  magma_setdevice( dpanel );
279  magma_zgetmatrix_async( i, nb,
280  dA(dpanel, 0, di), ldda,
281  A(0,i), lda, data.streams[dpanel] );
282  }
283 
284  // Copy remainder to host, block-by-block
285  for( i2 = i; i2 < n; i2 += nb ) {
286  ib = min( nb, n-i2 );
287  d = (i2 / nb) % ngpu;
288  di = (i2 / nb) / ngpu * nb;
289  magma_setdevice( d );
290  magma_zgetmatrix( n, ib,
291  dA(d, 0, di), ldda,
292  A(0,i2), lda );
293  }
294  }
295 
296  // Use unblocked code to reduce the rest of the matrix
297  // add 1 to i for 1-based index
298  i += 1;
299  lapackf77_zgehd2(&n, &i, &ihi, A, &lda, tau, work, &iinfo);
300  MAGMA_Z_SET2REAL( work[0], (double) iws );
301 
302 CLEANUP:
303  for( d = 0; d < ngpu; ++d ) {
304  magma_setdevice( d );
305  magmablasSetKernelStream( NULL );
306  magma_free( data.A[d] );
307  data.A[d] = NULL;
308  if ( data.streams[d] != NULL ) {
309  magma_queue_destroy( data.streams[d] );
310  }
311  }
312  magma_setdevice( cdevice );
313 
314  return *info;
315 } // magma_zgehrd
#define min(a, b)
Definition: common_magma.h:86
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
#define magma_zgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_z.h:714
magma_int_t magma_num_gpus(void)
Definition: auxiliary.cpp:83
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
void magma_zsetmatrix_1D_col_bcyclic(magma_int_t m, magma_int_t n, const magmaDoubleComplex *hA, magma_int_t lda, magmaDoubleComplex_ptr dA[], magma_int_t ldda, magma_int_t ngpu, magma_int_t nb)
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define T(m)
Definition: zgeqrf_mc.cpp:14
#define lapackf77_zlaset
Definition: magma_zlapack.h:84
#define magma_free(ptr)
Definition: magma.h:57
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
int magma_int_t
Definition: magmablas.h:12
#define magma_queue_destroy(queue)
Definition: magma.h:116
cublasStatus_t magmablasSetKernelStream(magma_queue_t stream)
void magma_setdevice(magma_device_t dev)
void magma_getdevice(magma_device_t *dev)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
magma_int_t magma_zlahr2_m(magma_int_t n, magma_int_t k, magma_int_t nb, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *T, magma_int_t ldt, magmaDoubleComplex *Y, magma_int_t ldy, struct zgehrd_data *data)
Definition: zlahr2_m.cpp:16
#define MAGMA_Z_SET2REAL(v, t)
Definition: magma.h:115
#define dA(d, i, j)
#define MAGMA_Z_ZERO
Definition: magma.h:131
magma_int_t magma_zlahru_m(magma_int_t n, magma_int_t ihi, magma_int_t k, magma_int_t nb, magmaDoubleComplex *A, magma_int_t lda, struct zgehrd_data *data)
Definition: zlahru_m.cpp:16
magma_int_t ldda
magma_int_t magma_get_zgehrd_nb(magma_int_t m)
Definition: get_nb.cpp:367
#define MAGMA_SUCCESS
Definition: magma.h:106
#define A(i, j)
#define lapackf77_zgehd2
Definition: magma_zlapack.h:57
#define MAGMA_Z_ONE
Definition: magma.h:132
#define max(a, b)
Definition: common_magma.h:82

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zgelqf ( magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex *  A,
magma_int_t  lda,
magmaDoubleComplex *  tau,
magmaDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file zgelqf.cpp.

References __func__, dA, dAT, zgehrd_data::ldda, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_zgelqf_nb(), MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_MAKE, MAGMA_Z_ONE, magma_zgeqrf2_gpu(), magma_zgetmatrix, magma_zmalloc(), magma_zsetmatrix, magmablas_ztranspose2(), magmablas_ztranspose_inplace(), max, and min.

17 {
18 /* -- MAGMA (version 1.4.0) --
19  Univ. of Tennessee, Knoxville
20  Univ. of California, Berkeley
21  Univ. of Colorado, Denver
22  August 2013
23 
24  Purpose
25  =======
26  ZGELQF computes an LQ factorization of a COMPLEX_16 M-by-N matrix A:
27  A = L * Q.
28 
29  Arguments
30  =========
31  M (input) INTEGER
32  The number of rows of the matrix A. M >= 0.
33 
34  N (input) INTEGER
35  The number of columns of the matrix A. N >= 0.
36 
37  A (input/output) COMPLEX_16 array, dimension (LDA,N)
38  On entry, the M-by-N matrix A.
39  On exit, the elements on and below the diagonal of the array
40  contain the m-by-min(m,n) lower trapezoidal matrix L (L is
41  lower triangular if m <= n); the elements above the diagonal,
42  with the array TAU, represent the orthogonal matrix Q as a
43  product of elementary reflectors (see Further Details).
44 
45  Higher performance is achieved if A is in pinned memory, e.g.
46  allocated using magma_malloc_pinned.
47 
48  LDA (input) INTEGER
49  The leading dimension of the array A. LDA >= max(1,M).
50 
51  TAU (output) COMPLEX_16 array, dimension (min(M,N))
52  The scalar factors of the elementary reflectors (see Further
53  Details).
54 
55  WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
56  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
57 
58  Higher performance is achieved if WORK is in pinned memory, e.g.
59  allocated using magma_malloc_pinned.
60 
61  LWORK (input) INTEGER
62  The dimension of the array WORK. LWORK >= max(1,M).
63  For optimum performance LWORK >= M*NB, where NB is the
64  optimal blocksize.
65 
66  If LWORK = -1, then a workspace query is assumed; the routine
67  only calculates the optimal size of the WORK array, returns
68  this value as the first entry of the WORK array, and no error
69  message related to LWORK is issued.
70 
71  INFO (output) INTEGER
72  = 0: successful exit
73  < 0: if INFO = -i, the i-th argument had an illegal value
74  if INFO = -10 internal GPU memory allocation failed.
75 
76  Further Details
77  ===============
78  The matrix Q is represented as a product of elementary reflectors
79 
80  Q = H(k) . . . H(2) H(1), where k = min(m,n).
81 
82  Each H(i) has the form
83 
84  H(i) = I - tau * v * v'
85 
86  where tau is a complex scalar, and v is a complex vector with
87  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
88  and tau in TAU(i).
89  ===================================================================== */
90 
91  #define a_ref(a_1,a_2) ( a+(a_2)*(lda) + (a_1))
92 
93  magmaDoubleComplex *dA, *dAT;
94  magmaDoubleComplex c_one = MAGMA_Z_ONE;
95  magma_int_t maxm, maxn, maxdim, nb;
96  magma_int_t iinfo, ldda;
97  int lquery;
98 
99  /* Function Body */
100  *info = 0;
101  nb = magma_get_zgelqf_nb(m);
102 
103  work[0] = MAGMA_Z_MAKE( (double)(m*nb), 0 );
104  lquery = (lwork == -1);
105  if (m < 0) {
106  *info = -1;
107  } else if (n < 0) {
108  *info = -2;
109  } else if (lda < max(1,m)) {
110  *info = -4;
111  } else if (lwork < max(1,m) && ! lquery) {
112  *info = -7;
113  }
114  if (*info != 0) {
115  magma_xerbla( __func__, -(*info) );
116  return *info;
117  }
118  else if (lquery) {
119  return *info;
120  }
121 
122  /* Quick return if possible */
123  if (min(m, n) == 0) {
124  work[0] = c_one;
125  return *info;
126  }
127 
128  maxm = ((m + 31)/32)*32;
129  maxn = ((n + 31)/32)*32;
130  maxdim = max(maxm, maxn);
131 
132  if (maxdim*maxdim < 2*maxm*maxn)
133  {
134  ldda = maxdim;
135 
136  if (MAGMA_SUCCESS != magma_zmalloc( &dA, maxdim*maxdim )) {
137  *info = MAGMA_ERR_DEVICE_ALLOC;
138  return *info;
139  }
140 
141  magma_zsetmatrix( m, n, a, lda, dA, ldda );
142  dAT = dA;
143  magmablas_ztranspose_inplace( ldda, dAT, ldda );
144  }
145  else
146  {
147  ldda = maxn;
148 
149  if (MAGMA_SUCCESS != magma_zmalloc( &dA, 2*maxn*maxm )) {
150  *info = MAGMA_ERR_DEVICE_ALLOC;
151  return *info;
152  }
153 
154  magma_zsetmatrix( m, n, a, lda, dA, maxm );
155 
156  dAT = dA + maxn * maxm;
157  magmablas_ztranspose2( dAT, ldda, dA, maxm, m, n );
158  }
159 
160  magma_zgeqrf2_gpu(n, m, dAT, ldda, tau, &iinfo);
161 
162  if (maxdim*maxdim < 2*maxm*maxn) {
163  magmablas_ztranspose_inplace( ldda, dAT, ldda );
164  magma_zgetmatrix( m, n, dA, ldda, a, lda );
165  } else {
166  magmablas_ztranspose2( dA, maxm, dAT, ldda, n, m );
167  magma_zgetmatrix( m, n, dA, maxm, a, lda );
168  }
169 
170  magma_free( dA );
171 
172  return *info;
173 } /* magma_zgelqf */
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_zgeqrf2_gpu(magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *tau, magma_int_t *info)
#define dAT(i, j)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
void magmablas_ztranspose_inplace(magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda)
magma_int_t ldda
#define MAGMA_SUCCESS
Definition: magma.h:106
#define MAGMA_Z_ONE
Definition: magma.h:132
#define magma_zsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_z.h:702
#define max(a, b)
Definition: common_magma.h:82
void magmablas_ztranspose2(cuDoubleComplex *, magma_int_t, cuDoubleComplex *, magma_int_t, magma_int_t, magma_int_t)
magma_int_t magma_get_zgelqf_nb(magma_int_t m)
Definition: get_nb.cpp:257
#define dA(dev, i, j)

Here is the call graph for this function:

magma_int_t magma_zgelqf_gpu ( magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex *  dA,
magma_int_t  ldda,
magmaDoubleComplex *  tau,
magmaDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file zgelqf_gpu.cpp.

References __func__, dA, dAT, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_zgelqf_nb(), MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_MAKE, MAGMA_Z_ONE, magma_zgeqrf2_gpu(), magma_zmalloc(), magmablas_ztranspose2(), magmablas_ztranspose_inplace(), max, and min.

17 {
18 /* -- MAGMA (version 1.4.0) --
19  Univ. of Tennessee, Knoxville
20  Univ. of California, Berkeley
21  Univ. of Colorado, Denver
22  August 2013
23 
24  Purpose
25  =======
26  ZGELQF computes an LQ factorization of a COMPLEX_16 M-by-N matrix dA:
27  dA = L * Q.
28 
29  Arguments
30  =========
31  M (input) INTEGER
32  The number of rows of the matrix A. M >= 0.
33 
34  N (input) INTEGER
35  The number of columns of the matrix A. N >= 0.
36 
37  dA (input/output) COMPLEX_16 array on the GPU, dimension (LDA,N)
38  On entry, the M-by-N matrix dA.
39  On exit, the elements on and below the diagonal of the array
40  contain the m-by-min(m,n) lower trapezoidal matrix L (L is
41  lower triangular if m <= n); the elements above the diagonal,
42  with the array TAU, represent the orthogonal matrix Q as a
43  product of elementary reflectors (see Further Details).
44 
45  LDA (input) INTEGER
46  The leading dimension of the array dA. LDA >= max(1,M).
47 
48  TAU (output) COMPLEX_16 array, dimension (min(M,N))
49  The scalar factors of the elementary reflectors (see Further
50  Details).
51 
52  WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
53  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
54 
55  Higher performance is achieved if WORK is in pinned memory, e.g.
56  allocated using magma_malloc_pinned.
57 
58  LWORK (input) INTEGER
59  The dimension of the array WORK. LWORK >= max(1,M).
60  For optimum performance LWORK >= M*NB, where NB is the
61  optimal blocksize.
62 
63  If LWORK = -1, then a workspace query is assumed; the routine
64  only calculates the optimal size of the WORK array, returns
65  this value as the first entry of the WORK array, and no error
66  message related to LWORK is issued.
67 
68  INFO (output) INTEGER
69  = 0: successful exit
70  < 0: if INFO = -i, the i-th argument had an illegal value
71  if INFO = -10 internal GPU memory allocation failed.
72 
73  Further Details
74  ===============
75  The matrix Q is represented as a product of elementary reflectors
76 
77  Q = H(k) . . . H(2) H(1), where k = min(m,n).
78 
79  Each H(i) has the form
80 
81  H(i) = I - tau * v * v'
82 
83  where tau is a complex scalar, and v is a complex vector with
84  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
85  and tau in TAU(i).
86  ===================================================================== */
87 
88  #define a_ref(a_1,a_2) ( dA+(a_2)*(lda) + (a_1))
89 
90  magmaDoubleComplex *dAT;
91  magmaDoubleComplex c_one = MAGMA_Z_ONE;
92  magma_int_t maxm, maxn, maxdim, nb;
93  magma_int_t iinfo;
94  int lquery;
95 
96  *info = 0;
97  nb = magma_get_zgelqf_nb(m);
98 
99  work[0] = MAGMA_Z_MAKE( (double)(m*nb), 0 );
100  lquery = (lwork == -1);
101  if (m < 0) {
102  *info = -1;
103  } else if (n < 0) {
104  *info = -2;
105  } else if (lda < max(1,m)) {
106  *info = -4;
107  } else if (lwork < max(1,m) && ! lquery) {
108  *info = -7;
109  }
110  if (*info != 0) {
111  magma_xerbla( __func__, -(*info) );
112  return *info;
113  }
114  else if (lquery) {
115  return *info;
116  }
117 
118  /* Quick return if possible */
119  if (min(m, n) == 0) {
120  work[0] = c_one;
121  return *info;
122  }
123 
124  maxm = ((m + 31)/32)*32;
125  maxn = ((n + 31)/32)*32;
126  maxdim = max(maxm, maxn);
127 
128  int ldat = maxn;
129 
130  dAT = dA;
131 
132  if ( m == n ) {
133  ldat = lda;
134  magmablas_ztranspose_inplace( m, dAT, lda );
135  }
136  else {
137  if (MAGMA_SUCCESS != magma_zmalloc( &dAT, maxm*maxn ) ){
138  *info = MAGMA_ERR_DEVICE_ALLOC;
139  return *info;
140  }
141 
142  magmablas_ztranspose2( dAT, ldat, dA, lda, m, n );
143  }
144 
145  magma_zgeqrf2_gpu(n, m, dAT, ldat, tau, &iinfo);
146 
147  if ( m == n ) {
148  magmablas_ztranspose_inplace( m, dAT, ldat );
149  }
150  else {
151  magmablas_ztranspose2( dA, lda, dAT, ldat, n, m );
152  magma_free( dAT );
153  }
154 
155  return *info;
156 } /* magma_zgelqf_gpu */
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_zgeqrf2_gpu(magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *tau, magma_int_t *info)
#define dAT(i, j)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
void magmablas_ztranspose_inplace(magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda)
#define MAGMA_SUCCESS
Definition: magma.h:106
#define MAGMA_Z_ONE
Definition: magma.h:132
#define max(a, b)
Definition: common_magma.h:82
void magmablas_ztranspose2(cuDoubleComplex *, magma_int_t, cuDoubleComplex *, magma_int_t, magma_int_t, magma_int_t)
magma_int_t magma_get_zgelqf_nb(magma_int_t m)
Definition: get_nb.cpp:257
#define dA(dev, i, j)

Here is the call graph for this function:

magma_int_t magma_zgels3_gpu ( char  trans,
magma_int_t  m,
magma_int_t  n,
magma_int_t  nrhs,
magmaDoubleComplex *  dA,
magma_int_t  ldda,
magmaDoubleComplex *  dB,
magma_int_t  lddb,
magmaDoubleComplex *  hwork,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file zgels3_gpu.cpp.

References __func__, dT, MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_zgeqrf_nb(), MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_MAKE, MAGMA_Z_ONE, magma_zgeqrf3_gpu(), magma_zgeqrs3_gpu(), magma_zmalloc(), magma_zmalloc_cpu(), max, and min.

19 {
20 /* -- MAGMA (version 1.4.0) --
21  Univ. of Tennessee, Knoxville
22  Univ. of California, Berkeley
23  Univ. of Colorado, Denver
24  August 2013
25 
26  Purpose
27  =======
28  Solves the overdetermined, least squares problem
29  min || A*X - C ||
30  using the QR factorization A.
31  The underdetermined problem (m < n) is not currently handled.
32 
33 
34  Arguments
35  =========
36  TRANS (input) CHARACTER*1
37  = 'N': the linear system involves A.
38  Only trans='N' is currently handled.
39 
40  M (input) INTEGER
41  The number of rows of the matrix A. M >= 0.
42 
43  N (input) INTEGER
44  The number of columns of the matrix A. M >= N >= 0.
45 
46  NRHS (input) INTEGER
47  The number of columns of the matrix C. NRHS >= 0.
48 
49  A (input/output) COMPLEX_16 array, dimension (LDA,N)
50  On entry, the M-by-N matrix A.
51  On exit, A is overwritten by details of its QR
52  factorization as returned by ZGEQRF3.
53 
54  LDDA (input) INTEGER
55  The leading dimension of the array A, LDDA >= M.
56 
57  DB (input/output) COMPLEX_16 array on the GPU, dimension (LDDB,NRHS)
58  On entry, the M-by-NRHS matrix C.
59  On exit, the N-by-NRHS solution matrix X.
60 
61  LDDB (input) INTEGER
62  The leading dimension of the array DB. LDDB >= M.
63 
64  HWORK (workspace/output) COMPLEX_16 array, dimension MAX(1,LWORK).
65  On exit, if INFO = 0, HWORK(1) returns the optimal LWORK.
66 
67  LWORK (input) INTEGER
68  The dimension of the array HWORK,
69  LWORK >= (M - N + NB)*(NRHS + NB) + NRHS*NB,
70  where NB is the blocksize given by magma_get_zgeqrf_nb( M ).
71 
72  If LWORK = -1, then a workspace query is assumed; the routine
73  only calculates the optimal size of the HWORK array, returns
74  this value as the first entry of the HWORK array.
75 
76  INFO (output) INTEGER
77  = 0: successful exit
78  < 0: if INFO = -i, the i-th argument had an illegal value
79  ===================================================================== */
80 
81  #define a_ref(a_1,a_2) (dA + (a_2)*(ldda) + (a_1))
82 
83  magmaDoubleComplex *dT, *tau;
84  magma_int_t k;
85 
87  magma_int_t lwkopt = (m - n + nb)*(nrhs + nb) + nrhs*nb;
88  int lquery = (lwork == -1);
89 
90  hwork[0] = MAGMA_Z_MAKE( (double)lwkopt, 0. );
91 
92  *info = 0;
93  /* For now, N is the only case working */
94  if ( (trans != 'N') && (trans != 'n' ) )
95  *info = -1;
96  else if (m < 0)
97  *info = -2;
98  else if (n < 0 || m < n) /* LQ is not handle for now*/
99  *info = -3;
100  else if (nrhs < 0)
101  *info = -4;
102  else if (ldda < max(1,m))
103  *info = -6;
104  else if (lddb < max(1,m))
105  *info = -8;
106  else if (lwork < lwkopt && ! lquery)
107  *info = -10;
108 
109  if (*info != 0) {
110  magma_xerbla( __func__, -(*info) );
111  return *info;
112  }
113  else if (lquery)
114  return *info;
115 
116  k = min(m,n);
117  if (k == 0) {
118  hwork[0] = MAGMA_Z_ONE;
119  return *info;
120  }
121 
122  /*
123  * Allocate temporary buffers
124  */
125  int ldtwork = ( 2*k + ((n+31)/32)*32 )*nb;
126  if (nb < nrhs)
127  ldtwork = ( 2*k + ((n+31)/32)*32 )*nrhs;
128  if (MAGMA_SUCCESS != magma_zmalloc( &dT, ldtwork )) {
129  *info = MAGMA_ERR_DEVICE_ALLOC;
130  return *info;
131  }
132 
133  magma_zmalloc_cpu( &tau, k );
134  if ( tau == NULL ) {
135  magma_free( dT );
136  *info = MAGMA_ERR_HOST_ALLOC;
137  return *info;
138  }
139 
140  magma_zgeqrf3_gpu( m, n, dA, ldda, tau, dT, info );
141  if ( *info == 0 ) {
142  magma_zgeqrs3_gpu( m, n, nrhs,
143  dA, ldda, tau, dT,
144  dB, lddb, hwork, lwork, info );
145  }
146 
147  magma_free( dT );
148  magma_free_cpu(tau);
149  return *info;
150 }
magma_int_t magma_zgeqrf3_gpu(magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *dT, magma_int_t *info)
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define __func__
Definition: common_magma.h:65
#define hwork
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define dB(dev, i, j)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
magma_int_t ldda
magma_int_t magma_get_zgeqrf_nb(magma_int_t m)
Definition: get_nb.cpp:169
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_zmalloc_cpu(magmaDoubleComplex **ptrPtr, size_t n)
Definition: magma.h:86
#define MAGMA_Z_ONE
Definition: magma.h:132
#define dT(m)
#define max(a, b)
Definition: common_magma.h:82
magma_int_t magma_zgeqrs3_gpu(magma_int_t m, magma_int_t n, magma_int_t nrhs, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *dT, cuDoubleComplex *dB, magma_int_t lddb, cuDoubleComplex *hwork, magma_int_t lhwork, magma_int_t *info)
magma_err_t magma_free_cpu(void *ptr)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define dA(dev, i, j)

Here is the call graph for this function:

magma_int_t magma_zgels_gpu ( char  trans,
magma_int_t  m,
magma_int_t  n,
magma_int_t  nrhs,
magmaDoubleComplex *  dA,
magma_int_t  ldda,
magmaDoubleComplex *  dB,
magma_int_t  lddb,
magmaDoubleComplex *  hwork,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file zgels_gpu.cpp.

References __func__, dT, MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_zgeqrf_nb(), MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_MAKE, MAGMA_Z_ONE, magma_zgeqrf_gpu(), magma_zgeqrs_gpu(), magma_zmalloc(), magma_zmalloc_cpu(), max, and min.

19 {
20 /* -- MAGMA (version 1.4.0) --
21  Univ. of Tennessee, Knoxville
22  Univ. of California, Berkeley
23  Univ. of Colorado, Denver
24  August 2013
25 
26  Purpose
27  =======
28  Solves the overdetermined, least squares problem
29  min || A*X - C ||
30  using the QR factorization A.
31  The underdetermined problem (m < n) is not currently handled.
32 
33 
34  Arguments
35  =========
36  TRANS (input) CHARACTER*1
37  = 'N': the linear system involves A.
38  Only trans='N' is currently handled.
39 
40  M (input) INTEGER
41  The number of rows of the matrix A. M >= 0.
42 
43  N (input) INTEGER
44  The number of columns of the matrix A. M >= N >= 0.
45 
46  NRHS (input) INTEGER
47  The number of columns of the matrix C. NRHS >= 0.
48 
49  DA (input/output) COMPLEX_16 array on the GPU, dimension (LDA,N)
50  On entry, the M-by-N matrix A.
51  On exit, A is overwritten by details of its QR
52  factorization as returned by ZGEQRF.
53 
54  LDDA (input) INTEGER
55  The leading dimension of the array A, LDDA >= M.
56 
57  DB (input/output) COMPLEX_16 array on the GPU, dimension (LDDB,NRHS)
58  On entry, the M-by-NRHS matrix C.
59  On exit, the N-by-NRHS solution matrix X.
60 
61  LDDB (input) INTEGER
62  The leading dimension of the array DB. LDDB >= M.
63 
64  HWORK (workspace/output) COMPLEX_16 array, dimension MAX(1,LWORK).
65  On exit, if INFO = 0, HWORK(1) returns the optimal LWORK.
66 
67  LWORK (input) INTEGER
68  The dimension of the array HWORK,
69  LWORK >= (M - N + NB)*(NRHS + NB) + NRHS*NB,
70  where NB is the blocksize given by magma_get_zgeqrf_nb( M ).
71 
72  If LWORK = -1, then a workspace query is assumed; the routine
73  only calculates the optimal size of the HWORK array, returns
74  this value as the first entry of the HWORK array.
75 
76  INFO (output) INTEGER
77  = 0: successful exit
78  < 0: if INFO = -i, the i-th argument had an illegal value
79  ===================================================================== */
80 
81  magmaDoubleComplex *dT, *tau;
82  magma_int_t k;
83 
85  magma_int_t lwkopt = (m - n + nb)*(nrhs + nb) + nrhs*nb;
86  int lquery = (lwork == -1);
87 
88  hwork[0] = MAGMA_Z_MAKE( (double)lwkopt, 0. );
89 
90  *info = 0;
91  /* For now, N is the only case working */
92  if ( (trans != 'N') && (trans != 'n' ) )
93  *info = -1;
94  else if (m < 0)
95  *info = -2;
96  else if (n < 0 || m < n) /* LQ is not handle for now*/
97  *info = -3;
98  else if (nrhs < 0)
99  *info = -4;
100  else if (ldda < max(1,m))
101  *info = -6;
102  else if (lddb < max(1,m))
103  *info = -8;
104  else if (lwork < lwkopt && ! lquery)
105  *info = -10;
106 
107  if (*info != 0) {
108  magma_xerbla( __func__, -(*info) );
109  return *info;
110  }
111  else if (lquery)
112  return *info;
113 
114  k = min(m,n);
115  if (k == 0) {
116  hwork[0] = MAGMA_Z_ONE;
117  return *info;
118  }
119 
120  /*
121  * Allocate temporary buffers
122  */
123  int ldtwork = ( 2*k + ((n+31)/32)*32 )*nb;
124  if (nb < nrhs)
125  ldtwork = ( 2*k + ((n+31)/32)*32 )*nrhs;
126  if (MAGMA_SUCCESS != magma_zmalloc( &dT, ldtwork )) {
127  *info = MAGMA_ERR_DEVICE_ALLOC;
128  return *info;
129  }
130 
131  magma_zmalloc_cpu( &tau, k );
132  if ( tau == NULL ) {
133  magma_free( dT );
134  *info = MAGMA_ERR_HOST_ALLOC;
135  return *info;
136  }
137 
138  magma_zgeqrf_gpu( m, n, dA, ldda, tau, dT, info );
139 
140  if ( *info == 0 ) {
141  magma_zgeqrs_gpu( m, n, nrhs,
142  dA, ldda, tau, dT,
143  dB, lddb, hwork, lwork, info );
144  }
145 
146  magma_free( dT );
147  magma_free_cpu(tau);
148  return *info;
149 }
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define __func__
Definition: common_magma.h:65
#define hwork
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_zgeqrf_gpu(magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *dT, magma_int_t *info)
#define dB(dev, i, j)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
magma_int_t magma_zgeqrs_gpu(magma_int_t m, magma_int_t n, magma_int_t nrhs, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *tau, cuDoubleComplex *dT, cuDoubleComplex *dB, magma_int_t lddb, cuDoubleComplex *hwork, magma_int_t lhwork, magma_int_t *info)
magma_int_t ldda
magma_int_t magma_get_zgeqrf_nb(magma_int_t m)
Definition: get_nb.cpp:169
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_zmalloc_cpu(magmaDoubleComplex **ptrPtr, size_t n)
Definition: magma.h:86
#define MAGMA_Z_ONE
Definition: magma.h:132
#define dT(m)
#define max(a, b)
Definition: common_magma.h:82
magma_err_t magma_free_cpu(void *ptr)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define dA(dev, i, j)

Here is the call graph for this function:

magma_int_t magma_zgeqlf ( magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex *  A,
magma_int_t  lda,
magmaDoubleComplex *  tau,
magmaDoubleComplex *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file zgeqlf.cpp.

References __func__, a_ref, da_ref, dwork, lapackf77_zgeqlf, lapackf77_zlarft, zgehrd_data::ldda, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_zgeqlf_nb(), magma_queue_create, magma_queue_destroy, magma_queue_sync, MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_MAKE, MAGMA_Z_ONE, magma_zgetmatrix, magma_zgetmatrix_async, magma_zlarfb_gpu(), magma_zmalloc(), magma_zsetmatrix, magma_zsetmatrix_async, MagmaBackward, MagmaBackwardStr, MagmaColumnwise, MagmaColumnwiseStr, MagmaConjTrans, MagmaLeft, MagmaLower, max, min, zpanel_to_q(), and zq_to_panel().

17 {
18 /* -- MAGMA (version 1.4.0) --
19  Univ. of Tennessee, Knoxville
20  Univ. of California, Berkeley
21  Univ. of Colorado, Denver
22  August 2013
23 
24  Purpose
25  =======
26  SGEQLF computes a QL factorization of a COMPLEX_16 M-by-N matrix A:
27  A = Q * L.
28 
29  Arguments
30  =========
31  M (input) INTEGER
32  The number of rows of the matrix A. M >= 0.
33 
34  N (input) INTEGER
35  The number of columns of the matrix A. N >= 0.
36 
37  A (input/output) COMPLEX_16 array, dimension (LDA,N)
38  On entry, the M-by-N matrix A.
39  On exit, if m >= n, the lower triangle of the subarray
40  A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L;
41  if m <= n, the elements on and below the (n-m)-th
42  superdiagonal contain the M-by-N lower trapezoidal matrix L;
43  the remaining elements, with the array TAU, represent the
44  orthogonal matrix Q as a product of elementary reflectors
45  (see Further Details).
46 
47  Higher performance is achieved if A is in pinned memory, e.g.
48  allocated using magma_malloc_pinned.
49 
50  LDA (input) INTEGER
51  The leading dimension of the array A. LDA >= max(1,M).
52 
53  TAU (output) COMPLEX_16 array, dimension (min(M,N))
54  The scalar factors of the elementary reflectors (see Further
55  Details).
56 
57  WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
58  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
59 
60  Higher performance is achieved if WORK is in pinned memory, e.g.
61  allocated using magma_malloc_pinned.
62 
63  LWORK (input) INTEGER
64  The dimension of the array WORK. LWORK >= max(1,N).
65  For optimum performance LWORK >= N*NB, where NB can be obtained
66  through magma_get_zgeqlf_nb(M).
67 
68  If LWORK = -1, then a workspace query is assumed; the routine
69  only calculates the optimal size of the WORK array, returns
70  this value as the first entry of the WORK array, and no error
71  message related to LWORK is issued by XERBLA.
72 
73  INFO (output) INTEGER
74  = 0: successful exit
75  < 0: if INFO = -i, the i-th argument had an illegal value
76  or another error occured, such as memory allocation failed.
77 
78  Further Details
79  ===============
80  The matrix Q is represented as a product of elementary reflectors
81 
82  Q = H(k) . . . H(2) H(1), where k = min(m,n).
83 
84  Each H(i) has the form
85 
86  H(i) = I - tau * v * v'
87 
88  where tau is a complex scalar, and v is a complex vector with
89  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in
90  A(1:m-k+i-1,n-k+i), and tau in TAU(i).
91  ===================================================================== */
92 
93  #define a_ref(a_1,a_2) ( a+(a_2)*(lda) + (a_1))
94  #define da_ref(a_1,a_2) (da+(a_2)*ldda + (a_1))
95 
96  magmaDoubleComplex *da, *dwork;
97  magmaDoubleComplex c_one = MAGMA_Z_ONE;
98  magma_int_t i, k, lddwork, old_i, old_ib, nb;
99  magma_int_t rows, cols;
100  magma_int_t ib, ki, kk, mu, nu, iinfo, ldda;
101  int lquery;
102 
103  nb = magma_get_zgeqlf_nb(m);
104  *info = 0;
105  lquery = (lwork == -1);
106 
107  // silence "uninitialized" warnings
108  old_ib = nb;
109  old_i = 0;
110 
111  if (m < 0) {
112  *info = -1;
113  } else if (n < 0) {
114  *info = -2;
115  } else if (lda < max(1,m)) {
116  *info = -4;
117  }
118 
119  if (*info == 0) {
120  k = min(m,n);
121  if (k == 0)
122  work[0] = c_one;
123  else {
124  work[0] = MAGMA_Z_MAKE( n*nb, 0 );
125  }
126 
127  if (lwork < max(1,n) && ! lquery)
128  *info = -7;
129  }
130 
131  if (*info != 0) {
132  magma_xerbla( __func__, -(*info) );
133  return *info;
134  }
135  else if (lquery)
136  return *info;
137 
138  /* Quick return if possible */
139  if (k == 0)
140  return *info;
141 
142  lddwork = ((n+31)/32)*32;
143  ldda = ((m+31)/32)*32;
144 
145  if (MAGMA_SUCCESS != magma_zmalloc( &da, (n)*ldda + nb*lddwork )) {
146  *info = MAGMA_ERR_DEVICE_ALLOC;
147  return *info;
148  }
149  dwork = da + ldda*(n);
150 
151  magma_queue_t stream[2];
152  magma_queue_create( &stream[0] );
153  magma_queue_create( &stream[1] );
154 
155  if ( (nb > 1) && (nb < k) ) {
156  /* Use blocked code initially.
157  The last kk columns are handled by the block method.
158  First, copy the matrix on the GPU except the last kk columns */
159  magma_zsetmatrix_async( (m), (n-nb),
160  a_ref(0, 0), lda,
161  da_ref(0, 0), ldda, stream[0] );
162 
163  ki = ((k - nb - 1) / nb) * nb;
164  kk = min(k, ki + nb);
165  for (i = k - kk + ki; i >= k -kk; i -= nb) {
166  ib = min(k-i,nb);
167 
168  if (i < k - kk + ki) {
169  /* 1. Copy asynchronously the current panel to the CPU.
170  2. Copy asynchronously the submatrix below the panel
171  to the CPU) */
172  rows = m - k + i + ib;
173  magma_zgetmatrix_async( rows, ib,
174  da_ref(0, n-k+i), ldda,
175  a_ref(0, n-k+i), lda, stream[1] );
176 
177  magma_zgetmatrix_async( (m-rows), ib,
178  da_ref(rows, n-k+i), ldda,
179  a_ref(rows, n-k+i), lda, stream[0] );
180 
181  /* Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left in
182  two steps - implementing the lookahead techniques.
183  This is the main update from the lookahead techniques. */
184  rows = m - k + old_i + old_ib;
185  cols = n - k + old_i - old_ib;
187  rows, cols, old_ib,
188  da_ref(0, cols+old_ib), ldda, dwork, lddwork,
189  da_ref(0, 0 ), ldda, dwork+old_ib, lddwork);
190  }
191 
192  magma_queue_sync( stream[1] );
193  /* Compute the QL factorization of the current block
194  A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) */
195  rows = m - k + i + ib;
196  cols = n - k + i;
197  lapackf77_zgeqlf(&rows,&ib, a_ref(0,cols), &lda, tau+i, work, &lwork, &iinfo);
198 
199  if (cols > 0) {
200  /* Form the triangular factor of the block reflector
201  H = H(i+ib-1) . . . H(i+1) H(i) */
203  &rows, &ib,
204  a_ref(0, cols), &lda, tau + i, work, &ib);
205 
206  zpanel_to_q( MagmaLower, ib, a_ref(rows-ib,cols), lda, work+ib*ib);
207  magma_zsetmatrix( rows, ib,
208  a_ref(0,cols), lda,
209  da_ref(0,cols), ldda );
210  zq_to_panel( MagmaLower, ib, a_ref(rows-ib,cols), lda, work+ib*ib);
211 
212  // Send the triangular part on the GPU
213  magma_zsetmatrix( ib, ib, work, ib, dwork, lddwork );
214 
215  /* Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left in
216  two steps - implementing the lookahead techniques.
217  This is the update of first ib columns. */
218  if (i-ib >= k -kk)
220  rows, ib, ib,
221  da_ref(0, cols), ldda, dwork, lddwork,
222  da_ref(0,cols-ib), ldda, dwork+ib, lddwork);
223  else{
225  rows, cols, ib,
226  da_ref(0, cols), ldda, dwork, lddwork,
227  da_ref(0, 0 ), ldda, dwork+ib, lddwork);
228  }
229 
230  old_i = i;
231  old_ib = ib;
232  }
233  }
234  mu = m - k + i + nb;
235  nu = n - k + i + nb;
236 
237  magma_zgetmatrix( m, nu, da_ref(0,0), ldda, a_ref(0,0), lda );
238  } else {
239  mu = m;
240  nu = n;
241  }
242 
243  /* Use unblocked code to factor the last or only block */
244  if (mu > 0 && nu > 0)
245  lapackf77_zgeqlf(&mu, &nu, a_ref(0,0), &lda, tau, work, &lwork, &iinfo);
246 
247  magma_queue_destroy( stream[0] );
248  magma_queue_destroy( stream[1] );
249  magma_free( da );
250  return *info;
251 } /* magma_zgeqlf */
void zq_to_panel(char uplo, magma_int_t ib, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *work)
Definition: zpanel_to_q.cpp:57
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define MagmaLeft
Definition: magma.h:68
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
#define magma_zgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_z.h:714
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
#define a_ref(a_1, a_2)
#define lapackf77_zlarft
Definition: magma_zlapack.h:80
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_get_zgeqlf_nb(magma_int_t m)
Definition: get_nb.cpp:223
#define magma_queue_destroy(queue)
Definition: magma.h:116
magma_int_t magma_zlarfb_gpu(char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, cuDoubleComplex *dv, magma_int_t ldv, cuDoubleComplex *dt, magma_int_t ldt, cuDoubleComplex *dc, magma_int_t ldc, cuDoubleComplex *dowrk, magma_int_t ldwork)
Definition: zlarfb_gpu.cpp:21
#define MagmaBackward
Definition: magma.h:72
#define dwork(dev, i, j)
#define MagmaLower
Definition: magma.h:62
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MagmaConjTrans
Definition: magma.h:59
magma_int_t ldda
#define MagmaColumnwiseStr
Definition: magma.h:97
void zpanel_to_q(char uplo, magma_int_t ib, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *work)
Definition: zpanel_to_q.cpp:17
#define MAGMA_SUCCESS
Definition: magma.h:106
#define da_ref(a_1, a_2)
#define magma_zsetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_z.h:711
#define MAGMA_Z_ONE
Definition: magma.h:132
#define MagmaBackwardStr
Definition: magma.h:95
#define magma_zsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_z.h:702
#define max(a, b)
Definition: common_magma.h:82
#define lapackf77_zgeqlf
Definition: magma_zlapack.h:61
#define MagmaColumnwise
Definition: magma.h:74
#define magma_queue_sync(queue)
Definition: magma.h:119

Here is the call graph for this function:

magma_int_t magma_zgeqp3 ( magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex *  a,
magma_int_t  lda,
magma_int_t jpvt,
magmaDoubleComplex *  tau,
magmaDoubleComplex *  work,
magma_int_t  lwork,
double *  rwork,
magma_int_t info 
)

Definition at line 18 of file zgeqp3.cpp.

References __func__, A, blasf77_zswap, cblas_dznrm2(), dA, dwork, lapackf77_zgeqrf, lapackf77_zlaqp2(), lapackf77_zunmqr, zgehrd_data::ldda, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_zgeqp3_nb(), magma_queue_create, magma_queue_destroy, magma_queue_sync, MAGMA_SUCCESS, magma_xerbla(), MAGMA_Z_MAKE, magma_zgetmatrix, magma_zlaqps(), magma_zmalloc(), magma_zsetmatrix_async, MagmaConjTransStr, MagmaLeftStr, max, and min.

26 {
27 /* -- MAGMA (version 1.4.0) --
28  Univ. of Tennessee, Knoxville
29  Univ. of California, Berkeley
30  Univ. of Colorado, Denver
31  August 2013
32 
33  Purpose
34  =======
35  ZGEQP3 computes a QR factorization with column pivoting of a
36  matrix A: A*P = Q*R using Level 3 BLAS.
37 
38  Arguments
39  =========
40  M (input) INTEGER
41  The number of rows of the matrix A. M >= 0.
42 
43  N (input) INTEGER
44  The number of columns of the matrix A. N >= 0.
45 
46  A (input/output) COMPLEX_16 array, dimension (LDA,N)
47  On entry, the M-by-N matrix A.
48  On exit, the upper triangle of the array contains the
49  min(M,N)-by-N upper trapezoidal matrix R; the elements below
50  the diagonal, together with the array TAU, represent the
51  unitary matrix Q as a product of min(M,N) elementary
52  reflectors.
53 
54  LDA (input) INTEGER
55  The leading dimension of the array A. LDA >= max(1,M).
56 
57  JPVT (input/output) INTEGER array, dimension (N)
58  On entry, if JPVT(J).ne.0, the J-th column of A is permuted
59  to the front of A*P (a leading column); if JPVT(J)=0,
60  the J-th column of A is a free column.
61  On exit, if JPVT(J)=K, then the J-th column of A*P was the
62  the K-th column of A.
63 
64  TAU (output) COMPLEX_16 array, dimension (min(M,N))
65  The scalar factors of the elementary reflectors.
66 
67  WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
68  On exit, if INFO=0, WORK(1) returns the optimal LWORK.
69 
70  LWORK (input) INTEGER
71  The dimension of the array WORK.
72  For [sd]geqp3, LWORK >= (N+1)*NB + 2*N;
73  for [cz]geqp3, LWORK >= (N+1)*NB,
74  where NB is the optimal blocksize.
75 
76  If LWORK = -1, then a workspace query is assumed; the routine
77  only calculates the optimal size of the WORK array, returns
78  this value as the first entry of the WORK array, and no error
79  message related to LWORK is issued by XERBLA.
80 
81  For [cz]geqp3 only:
82  RWORK (workspace) DOUBLE PRECISION array, dimension (2*N)
83 
84  INFO (output) INTEGER
85  = 0: successful exit.
86  < 0: if INFO = -i, the i-th argument had an illegal value.
87 
88  Further Details
89  ===============
90  The matrix Q is represented as a product of elementary reflectors
91 
92  Q = H(1) H(2) . . . H(k), where k = min(m,n).
93 
94  Each H(i) has the form
95 
96  H(i) = I - tau * v * v'
97 
98  where tau is a complex scalar, and v is a complex vector
99  with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in
100  A(i+1:m,i), and tau in TAU(i).
101  ===================================================================== */
102 
103 #define A(i, j) (A + (i) + (j)*(lda ))
104 #define dA(i, j) (dwork + (i) + (j)*(ldda))
105 
106  magmaDoubleComplex *dwork, *df;
107 
108  magma_int_t ione = 1;
109 
110  magma_int_t n_j, ldda, ldwork;
111  magma_int_t j, jb, na, nb, sm, sn, fjb, nfxd, minmn;
112  magma_int_t topbmn, sminmn, lwkopt, lquery;
113 
114  *info = 0;
115  lquery = (lwork == -1);
116  if (m < 0) {
117  *info = -1;
118  } else if (n < 0) {
119  *info = -2;
120  } else if (lda < max(1,m)) {
121  *info = -4;
122  }
123 
124  nb = magma_get_zgeqp3_nb(min(m, n));
125  if (*info == 0) {
126  minmn = min(m,n);
127  if (minmn == 0) {
128  lwkopt = 1;
129  } else {
130  lwkopt = (n + 1)*nb;
131 #if defined(PRECISION_d) || defined(PRECISION_s)
132  lwkopt += 2*n;
133 #endif
134  }
135  work[0] = MAGMA_Z_MAKE( lwkopt, 0. );
136 
137  if (lwork < lwkopt && ! lquery) {
138  *info = -8;
139  }
140  }
141 
142  if (*info != 0) {
143  magma_xerbla( __func__, -(*info) );
144  return *info;
145  } else if (lquery) {
146  return *info;
147  }
148 
149  if (minmn == 0)
150  return *info;
151 
152 #if defined(PRECISION_d) || defined(PRECISION_s)
153  double *rwork = work + (n + 1)*nb;
154 #endif
155 
156  ldda = ((m+31)/32)*32;
157  ldwork = n*ldda + (n+1)*nb;
158  if (MAGMA_SUCCESS != magma_zmalloc( &dwork, ldwork )) {
159  *info = MAGMA_ERR_DEVICE_ALLOC;
160  return *info;
161  }
162  df = dwork + n*ldda;
163  // dwork used for dA
164 
165  magma_queue_t stream;
166  magma_queue_create( &stream );
167 
168  /* Move initial columns up front.
169  * Note jpvt uses 1-based indices for historical compatibility. */
170  nfxd = 0;
171  for (j = 0; j < n; ++j) {
172  if (jpvt[j] != 0) {
173  if (j != nfxd) {
174  blasf77_zswap(&m, A(0, j), &ione, A(0, nfxd), &ione);
175  jpvt[j] = jpvt[nfxd];
176  jpvt[nfxd] = j + 1;
177  }
178  else {
179  jpvt[j] = j + 1;
180  }
181  ++nfxd;
182  }
183  else {
184  jpvt[j] = j + 1;
185  }
186  }
187 
188  /* Factorize fixed columns
189  =======================
190  Compute the QR factorization of fixed columns and update
191  remaining columns. */
192  if (nfxd > 0) {
193  na = min(m,nfxd);
194  lapackf77_zgeqrf(&m, &na, A, &lda, tau, work, &lwork, info);
195  if (na < n) {
196  n_j = n - na;
198  A, &lda, tau, A(0, na), &lda,
199  work, &lwork, info );
200  }
201  }
202 
203  /* Factorize free columns */
204  if (nfxd < minmn) {
205  sm = m - nfxd;
206  sn = n - nfxd;
207  sminmn = minmn - nfxd;
208 
209  if (nb < sminmn) {
210  j = nfxd;
211 
212  // Set the original matrix to the GPU
213  magma_zsetmatrix_async( m, sn,
214  A (0,j), lda,
215  dA(0,j), ldda, stream );
216  }
217 
218  /* Initialize partial column norms. */
219  for (j = nfxd; j < n; ++j) {
220  rwork[j] = cblas_dznrm2(sm, A(nfxd, j), ione);
221  rwork[n + j] = rwork[j];
222  }
223 
224  j = nfxd;
225  if (nb < sminmn) {
226  /* Use blocked code initially. */
227  magma_queue_sync( stream );
228 
229  /* Compute factorization: while loop. */
230  topbmn = minmn - nb;
231  while(j < topbmn) {
232  jb = min(nb, topbmn - j);
233 
234  /* Factorize JB columns among columns J:N. */
235  n_j = n - j;
236 
237  if (j>nfxd) {
238  // Get panel to the CPU
239  magma_zgetmatrix( m-j, jb,
240  dA(j,j), ldda,
241  A (j,j), lda );
242 
243  // Get the rows
244  magma_zgetmatrix( jb, n_j - jb,
245  dA(j,j + jb), ldda,
246  A (j,j + jb), lda );
247  }
248 
249  magma_zlaqps( m, n_j, j, jb, &fjb,
250  A (0, j), lda,
251  dA(0, j), ldda,
252  &jpvt[j], &tau[j], &rwork[j], &rwork[n + j],
253  work,
254  &work[jb], n_j,
255  &df[jb], n_j );
256 
257  j += fjb; /* fjb is actual number of columns factored */
258  }
259  }
260 
261  /* Use unblocked code to factor the last or only block. */
262  if (j < minmn) {
263  n_j = n - j;
264  if (j > nfxd) {
265  magma_zgetmatrix( m-j, n_j,
266  dA(j,j), ldda,
267  A (j,j), lda );
268  }
269  lapackf77_zlaqp2(&m, &n_j, &j, A(0, j), &lda, &jpvt[j],
270  &tau[j], &rwork[j], &rwork[n+j], work );
271  }
272  }
273 
274  work[0] = MAGMA_Z_MAKE( lwkopt, 0. );
275  magma_free( dwork );
276 
277  magma_queue_destroy( stream );
278 
279  return *info;
280 } /* zgeqp3 */
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_Z_MAKE(r, i)
Definition: magma.h:123
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
double cblas_dznrm2(const int N, const void *X, const int incX)
#define magma_zgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_z.h:705
int magma_int_t
Definition: magmablas.h:12
#define MagmaLeftStr
Definition: magma.h:91
#define magma_queue_destroy(queue)
Definition: magma.h:116
#define dwork(dev, i, j)
magma_int_t magma_get_zgeqp3_nb(magma_int_t m)
Definition: get_nb.cpp:112
#define lapackf77_zunmqr
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define blasf77_zswap
Definition: magma_zlapack.h:44
magma_int_t ldda
#define MAGMA_SUCCESS
Definition: magma.h:106
magma_int_t magma_zlaqps(magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *jpvt, magmaDoubleComplex *tau, double *vn1, double *vn2, magmaDoubleComplex *auxv, magmaDoubleComplex *F, magma_int_t ldf, magmaDoubleComplex *dF, magma_int_t lddf)
Definition: zlaqps.cpp:18
#define lapackf77_zgeqrf
Definition: magma_zlapack.h:62
#define A(i, j)
#define magma_zsetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_z.h:711
#define dA(i, j)
#define max(a, b)
Definition: common_magma.h:82
#define MagmaConjTransStr
Definition: magma.h:82
void lapackf77_zlaqp2(magma_int_t *m, magma_int_t *n, magma_int_t *offset, magmaDoubleComplex *a, magma_int_t *lda, magma_int_t *jpvt, magmaDoubleComplex *tau, double *vn1, double *vn2, magmaDoubleComplex *work)
#define magma_queue_sync(queue)
Definition: magma.h:119

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zgeqp3_gpu ( magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex *  A,
magma_int_t  lda,
magma_int_t jpvt,
magmaDoubleComplex *  tau,
magmaDoubleComplex *  work,
magma_int_t  lwork,
double *  rwork,
magma_int_t info 
)

Definition at line 17 of file zgeqp32_gpu.cpp.

References __func__, A, blasf77_zswap, magma_dcopymatrix, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_zgeqp3_nb(), magma_scopymatrix, MAGMA_SUCCESS, magma_xerbla(), magma_zlaqps2_gpu(), magma_zlaqps3_gpu(), magma_zmalloc(), magmablas_dznrm2_cols(), max, and min.

25 {
26 /* -- MAGMA (version 1.4.0) --
27  Univ. of Tennessee, Knoxville
28  Univ. of California, Berkeley
29  Univ. of Colorado, Denver
30  August 2013
31 
32  Purpose
33  =======
34  ZGEQP3 computes a QR factorization with column pivoting of a
35  matrix A: A*P = Q*R using Level 3 BLAS.
36 
37  Arguments
38  =========
39  M (input) INTEGER
40  The number of rows of the matrix A. M >= 0.
41 
42  N (input) INTEGER
43  The number of columns of the matrix A. N >= 0.
44 
45  A (input/output) COMPLEX_16 array, dimension (LDA,N)
46  On entry, the M-by-N matrix A.
47  On exit, the upper triangle of the array contains the
48  min(M,N)-by-N upper trapezoidal matrix R; the elements below
49  the diagonal, together with the array TAU, represent the
50  unitary matrix Q as a product of min(M,N) elementary
51  reflectors.
52 
53  LDA (input) INTEGER
54  The leading dimension of the array A. LDA >= max(1,M).
55 
56  JPVT (input/output) INTEGER array, dimension (N)
57  On entry, if JPVT(J).ne.0, the J-th column of A is permuted
58  to the front of A*P (a leading column); if JPVT(J)=0,
59  the J-th column of A is a free column.
60  On exit, if JPVT(J)=K, then the J-th column of A*P was the
61  the K-th column of A.
62 
63  TAU (output) COMPLEX_16 array, dimension (min(M,N))
64  The scalar factors of the elementary reflectors.
65 
66  WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
67  On exit, if INFO=0, WORK(1) returns the optimal LWORK.
68 
69  LWORK (input) INTEGER
70  The dimension of the array WORK.
71  For [sd]geqp3, LWORK >= (N+1)*NB + 2*N;
72  for [cz]geqp3, LWORK >= (N+1)*NB,
73  where NB is the optimal blocksize.
74 
75  If LWORK = -1, then a workspace query is assumed; the routine
76  only calculates the optimal size of the WORK array, returns
77  this value as the first entry of the WORK array, and no error
78  message related to LWORK is issued by XERBLA.
79 
80  For [cz]geqp3 only:
81  RWORK (workspace) DOUBLE PRECISION array, dimension (2*N)
82 
83  INFO (output) INTEGER
84  = 0: successful exit.
85  < 0: if INFO = -i, the i-th argument had an illegal value.
86 
87  Further Details
88  ===============
89  The matrix Q is represented as a product of elementary reflectors
90 
91  Q = H(1) H(2) . . . H(k), where k = min(m,n).
92 
93  Each H(i) has the form
94 
95  H(i) = I - tau * v * v'
96 
97  where tau is a complex scalar, and v is a complex vector
98  with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in
99  A(i+1:m,i), and tau in TAU(i).
100  ===================================================================== */
101 
102 #define A(i, j) (A + (i) + (j)*(lda ))
103 
104  magma_int_t ione = 1;
105 
106  magma_int_t n_j;
107  magma_int_t j, jb, na, nb, sm, sn, fjb, nfxd, minmn;
108  magma_int_t topbmn, sminmn, lwkopt, lquery;
109 
110  *info = 0;
111  lquery = (lwork == -1);
112  if (m < 0) {
113  *info = -1;
114  } else if (n < 0) {
115  *info = -2;
116  } else if (lda < max(1,m)) {
117  *info = -4;
118  }
119 
120  if (*info == 0) {
121  minmn = min(m,n);
122  if (minmn == 0) {
123  lwkopt = 1;
124  } else {
125  nb = magma_get_zgeqp3_nb(min(m, n));
126  lwkopt = (n + 1)*nb;
127 #if defined(PRECISION_d) || defined(PRECISION_s)
128  lwkopt += 2*n;
129 #endif
130  }
131  //work[0] = MAGMA_Z_MAKE( lwkopt, 0. );
132 
133  if (lwork < lwkopt && ! lquery) {
134  *info = -8;
135  }
136  }
137 
138  if (*info != 0) {
139  magma_xerbla( __func__, -(*info) );
140  return *info;
141  } else if (lquery) {
142  return *info;
143  }
144 
145  if (minmn == 0)
146  return *info;
147 
148 #if defined(PRECISION_d) || defined(PRECISION_s)
149  double *rwork = work + (n + 1)*nb;
150 #endif
151  magmaDoubleComplex *df;
152  if (MAGMA_SUCCESS != magma_zmalloc( &df, (n+1)*(nb+32) )) {
153  *info = MAGMA_ERR_DEVICE_ALLOC;
154  return *info;
155  }
156  cudaMemset( df, 0, (n+1)*(nb+32)*sizeof(magmaDoubleComplex) );
157 
158  nfxd = 0;
159  /* Move initial columns up front.
160  * Note jpvt uses 1-based indices for historical compatibility. */
161  for (j = 0; j < n; ++j) {
162  if (jpvt[j] != 0) {
163  if (j != nfxd) {
164  blasf77_zswap(&m, A(0, j), &ione, A(0, nfxd), &ione);
165  jpvt[j] = jpvt[nfxd];
166  jpvt[nfxd] = j + 1;
167  }
168  else {
169  jpvt[j] = j + 1;
170  }
171  ++nfxd;
172  }
173  else {
174  jpvt[j] = j + 1;
175  }
176  }
177 
178  /* Factorize fixed columns
179  =======================
180  Compute the QR factorization of fixed columns and update
181  remaining columns.
182  if (nfxd > 0) {
183  na = min(m,nfxd);
184  lapackf77_zgeqrf(&m, &na, A, &lda, tau, work, &lwork, info);
185  if (na < n) {
186  n_j = n - na;
187  lapackf77_zunmqr( MagmaLeftStr, MagmaConjTransStr, &m, &n_j, &na,
188  A, &lda, tau, A(0, na), &lda,
189  work, &lwork, info );
190  }
191  }*/
192 
193  /* Factorize free columns */
194  if (nfxd < minmn) {
195  sm = m - nfxd;
196  sn = n - nfxd;
197  sminmn = minmn - nfxd;
198 
199  /*if (nb < sminmn) {
200  j = nfxd;
201 
202  // Set the original matrix to the GPU
203  magma_zsetmatrix_async( m, sn,
204  A (0,j), lda,
205  dA(0,j), ldda, stream[0] );
206  }*/
207 
208  /* Initialize partial column norms. */
209  magmablas_dznrm2_cols(sm, sn, A(nfxd,nfxd), lda, &rwork[nfxd]);
210 #if defined(PRECISION_d) || defined(PRECISION_z)
211  magma_dcopymatrix( sn, 1, &rwork[nfxd], sn, &rwork[n+nfxd], sn);
212 #else
213  magma_scopymatrix( sn, 1, &rwork[nfxd], sn, &rwork[n+nfxd], sn);
214 #endif
215  /*for (j = nfxd; j < n; ++j) {
216  rwork[j] = cblas_dznrm2(sm, A(nfxd, j), ione);
217  rwork[n + j] = rwork[j];
218  }*/
219 
220  j = nfxd;
221  //if (nb < sminmn)
222  {
223  /* Use blocked code initially. */
224  //magma_queue_sync( stream[0] );
225 
226  /* Compute factorization: while loop. */
227  topbmn = minmn;// - nb;
228  while(j < topbmn) {
229  jb = min(nb, topbmn - j);
230  if (jb+16>topbmn-j)
231  jb = topbmn - j;
232  /* Factorize JB columns among columns J:N. */
233  n_j = n - j;
234 
235  /*if (j>nfxd) {
236  // Get panel to the CPU
237  magma_zgetmatrix( m-j, jb,
238  dA(j,j), ldda,
239  A (j,j), lda );
240 
241  // Get the rows
242  magma_zgetmatrix( jb, n_j - jb,
243  dA(j,j + jb), ldda,
244  A (j,j + jb), lda );
245  }*/
246 
247  //magma_zlaqps_gpu
248  //magma_zlaqps2_gpu
249  // if (j!=nfxd)
250  //magmablas_dznrm2_cols(sm-j, jb, A(nfxd+j,nfxd+j), lda, &rwork[nfxd+j]);
251  //magmablas_dznrm2_cols(sm-j, sn-j, A(nfxd+j,nfxd+j), lda, &rwork[nfxd+j]);
252 
253  magma_zlaqps3_gpu( m, n_j, j, jb, &fjb,
254  A (0, j), lda,
255  &jpvt[j], &tau[j], &rwork[j], &rwork[n + j],
256  work,
257  &df[jb], n_j );
258 
259  j += fjb; /* fjb is actual number of columns factored */
260  //printf(" out of %d; jb was %d\n", j, jb);
261  }
262  }
263 
264  /* Use unblocked code to factor the last or only block.
265  if (j < minmn) {
266  n_j = n - j;
267  if (j > nfxd) {
268  magma_zgetmatrix( m-j, n_j,
269  dA(j,j), ldda,
270  A (j,j), lda );
271  }
272  lapackf77_zlaqp2(&m, &n_j, &j, A(0, j), &lda, &jpvt[j],
273  &tau[j], &rwork[j], &rwork[n+j], work );
274  }*/
275  }
276  //work[0] = MAGMA_Z_MAKE( lwkopt, 0. );
277  magma_free(df);
278 
279  return *info;
280 } /* zgeqp3 */
magma_int_t magma_zlaqps3_gpu(magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, magmaDoubleComplex *A, magma_int_t lda, magma_int_t *jpvt, magmaDoubleComplex *tau, double *vn1, double *vn2, magmaDoubleComplex *auxv, magmaDoubleComplex *dF, magma_int_t lddf)
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_zmalloc(magmaDoubleComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:80
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define magma_scopymatrix(m, n, dA_src, ldda, dB_dst, lddb)
Definition: magmablas_s.h:708
magma_int_t magma_get_zgeqp3_nb(magma_int_t m)
Definition: get_nb.cpp:112
void magmablas_dznrm2_cols(magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magmaDouble_ptr dxnorm)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define blasf77_zswap
Definition: magma_zlapack.h:44
#define magma_dcopymatrix(m, n, dA_src, ldda, dB_dst, lddb)
Definition: magmablas_d.h:708
#define MAGMA_SUCCESS
Definition: magma.h:106
#define A(i, j)
#define max(a, b)
Definition: common_magma.h:82

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zgeqr2_gpu ( magma_int_t  m,
magma_int_t  n,
magmaDoubleComplex *  dA,
magma_int_t  lda,
magmaDoubleComplex *  tau,
double *  work,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_zgeqr2x2_gpu ( magma_int_t m,
magma_int_t n,
magmaDoubleComplex *  dA,
magma_int_t ldda,
magmaDoubleComplex *  dtau,
magmaDoubleComplex *  dT,
magmaDoubleComplex *  ddA,
double *  dwork,
magma_int_t info 
)

Definition at line 14 of file zgeqr2x_gpu-v2.cpp.

References __func__, da_ref, magma_xerbla(), magma_zlarfbx_gpu(), magma_zlarfgtx_gpu(), magmablas_dznrm2_adjust(), magmablas_dznrm2_cols(), max, and min.

18 {
19 /* -- MAGMA (version 1.4.0) --
20  Univ. of Tennessee, Knoxville
21  Univ. of California, Berkeley
22  Univ. of Colorado, Denver
23  August 2013
24 
25  Purpose
26  =======
27  ZGEQR2 computes a QR factorization of a complex m by n matrix A:
28  A = Q * R.
29 
30  This expert routine requires two more arguments than the standard
31  zgeqr2, namely, dT and ddA, explained below. The storage for A is
32  also not as in the LAPACK's zgeqr2 routine (see below).
33 
34  The first is used to output the triangular
35  n x n factor T of the block reflector used in the factorization.
36  The second holds the diagonal nxn blocks of A, i.e., the diagonal
37  submatrices of R. This routine implements the left looking QR.
38 
39  Arguments
40  =========
41  M (input) INTEGER
42  The number of rows of the matrix A. M >= 0.
43 
44  N (input) INTEGER
45  The number of columns of the matrix A. N >= 0.
46 
47  A (input/output) COMPLEX_16 array, dimension (LDA,N)
48  On entry, the m by n matrix A.
49  On exit, the unitary matrix Q as a
50  product of elementary reflectors (see Further Details).
51 
52  the elements on and above the diagonal of the array
53  contain the min(m,n) by n upper trapezoidal matrix R (R is
54  upper triangular if m >= n); the elements below the diagonal,
55  with the array TAU, represent the unitary matrix Q as a
56  product of elementary reflectors (see Further Details).
57 
58  LDA (input) INTEGER
59  The leading dimension of the array A. LDA >= max(1,M).
60 
61  TAU (output) COMPLEX_16 array, dimension (min(M,N))
62  The scalar factors of the elementary reflectors (see Further
63  Details).
64 
65  dT (output) COMPLEX_16 array, dimension N x N.
66  Stores the triangular N x N factor T of the block reflector
67  used in the factorization. The lower triangular part is 0.
68 
69  ddA (output) COMPLEX_16 array, dimension N x N.
70  Stores the elements of the upper N x N diagonal block of A.
71  LAPACK stores this array in A. There are 0s below the diagonal.
72 
73  RWORK (workspace) DOUBLE_PRECISION array, dimension (3 N)
74 
75  INFO (output) INTEGER
76  = 0: successful exit
77  < 0: if INFO = -i, the i-th argument had an illegal value
78 
79  Further Details
80  ===============
81  The matrix Q is represented as a product of elementary reflectors
82 
83  Q = H(1) H(2) . . . H(k), where k = min(m,n).
84 
85  Each H(i) has the form
86 
87  H(i) = I - tau * v * v'
88 
89  where tau is a complex scalar, and v is a complex vector with
90  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
91  and tau in TAU(i).
92  ===================================================================== */
93 
94  #define da_ref(a_1,a_2) ( dA+(a_2)*(*ldda) + (a_1))
95 
96  magma_int_t i, k;
97 
98  magmaDoubleComplex *work = (magmaDoubleComplex *)dwork;
99  double *dnorm = dwork + 4*(*n);
100 
101 
102  *info = 0;
103  if (*m < 0) {
104  *info = -1;
105  } else if (*n < 0) {
106  *info = -2;
107  } else if (*ldda < max(1,*m)) {
108  *info = -4;
109  }
110  if (*info != 0) {
111  magma_xerbla( __func__, -(*info) );
112  return *info;
113  }
114 
115  /* Compute the norms of the trailing columns */
116  k = min(*m,*n);
117  magmablas_dznrm2_cols(*m, k, da_ref(0,0), *ldda, dnorm);
118 
119  for (i = 0; i < k; ++i) {
120  /* 1. Apply H' to A(:,i) from the left
121  2. Adjust the dnorm[i] to hold the norm of A(i:m,i) */
122  if (i>0) {
123  magma_zlarfbx_gpu(*m, i, da_ref(0, 0), *ldda,
124  dT, k, da_ref(0, i), work);
125  magmablas_dznrm2_adjust(i, dnorm+i, da_ref(0, i));
126  }
127 
128  /* Generate elementary reflector H(i) to annihilate A(i+1:m,i)
129  1. 1 is not yet put on the diagonal of A
130  2. Elements above the diagonal are copied in ddA and the ones
131  in A are set to zero
132  3. update T */
133  magma_zlarfgtx_gpu(*m-i, da_ref(i, i), da_ref(min(i+1,*m), i), dtau+i,
134  dnorm+i, ddA + i + i*(*n), i,
135  da_ref(i,0), *ldda, dT, k, work);
136  }
137 
138  return *info;
139 } /* magma_zgeqr2 */
void magma_zlarfgtx_gpu(magma_int_t n, magmaDoubleComplex *dx0, magmaDoubleComplex *dx, magmaDoubleComplex *dtau, double *dxnorm, magmaDoubleComplex *dA, magma_int_t it, magmaDoubleComplex *V, magma_int_t ldv, magmaDoubleComplex *T, magma_int_t ldt, magmaDoubleComplex *dwork)
#define min(a, b)
Definition: common_magma.h:86
#define da_ref(a_1, a_2)
#define __func__
Definition: common_magma.h:65
int magma_int_t
Definition: magmablas.h:12
void magmablas_dznrm2_adjust(magma_int_t k, double *xnorm, magmaDoubleComplex *c)
#define dwork(dev, i, j)
void magmablas_dznrm2_cols(magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magmaDouble_ptr dxnorm)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
magma_int_t ldda
#define dT(m)
#define max(a, b)
Definition: common_magma.h:82
void magma_zlarfbx_gpu(magma_int_t m, magma_int_t k, magmaDoubleComplex *V, magma_int_t ldv, magmaDoubleComplex *dT, magma_int_t ldt, magmaDoubleComplex *c, magmaDoubleComplex *dwork)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zgeqr2x3_gpu ( magma_int_t m,
magma_int_t n,
magmaDoubleComplex *  dA,
magma_int_t ldda,
magmaDoubleComplex *  dtau,
magmaDoubleComplex *  dT,
magmaDoubleComplex *  ddA,
double *  dwork,
magma_int_t info 
)

Definition at line 54 of file zgeqr2x_gpu-v3.cpp.

References __func__, BLOCK_SIZE, da_ref, dwork, magma_xerbla(), magma_zlarfb2_gpu(), magma_zlarfbx_gpu(), magma_zlarfgtx_gpu(), magmablas_dznrm2_adjust(), magmablas_dznrm2_cols(), max, and min.

58 {
59 /* -- MAGMA (version 1.4.0) --
60  Univ. of Tennessee, Knoxville
61  Univ. of California, Berkeley
62  Univ. of Colorado, Denver
63  August 2013
64 
65  Purpose
66  =======
67  ZGEQR2 computes a QR factorization of a complex m by n matrix A:
68  A = Q * R.
69 
70  This expert routine requires two more arguments than the standard
71  zgeqr2, namely, dT and ddA, explained below. The storage for A is
72  also not as in the LAPACK's zgeqr2 routine (see below).
73 
74  The first is used to output the triangular
75  n x n factor T of the block reflector used in the factorization.
76  The second holds the diagonal nxn blocks of A, i.e., the diagonal
77  submatrices of R. This routine implements the left looking QR.
78 
79  This version adds internal blocking.
80 
81  Arguments
82  =========
83  M (input) INTEGER
84  The number of rows of the matrix A. M >= 0.
85 
86  N (input) INTEGER
87  The number of columns of the matrix A. N >= 0.
88 
89  A (input/output) COMPLEX_16 array, dimension (LDA,N)
90  On entry, the m by n matrix A.
91  On exit, the unitary matrix Q as a
92  product of elementary reflectors (see Further Details).
93 
94  the elements on and above the diagonal of the array
95  contain the min(m,n) by n upper trapezoidal matrix R (R is
96  upper triangular if m >= n); the elements below the diagonal,
97  with the array TAU, represent the unitary matrix Q as a
98  product of elementary reflectors (see Further Details).
99 
100  LDA (input) INTEGER
101  The leading dimension of the array A. LDA >= max(1,M).
102 
103  TAU (output) COMPLEX_16 array, dimension (min(M,N))
104  The scalar factors of the elementary reflectors (see Further
105  Details).
106 
107  dT (output) COMPLEX_16 array, dimension N x N.
108  Stores the triangular N x N factor T of the block reflector
109  used in the factorization. The lower triangular part is 0.
110 
111  ddA (output) COMPLEX_16 array, dimension N x N.
112  Stores the elements of the upper N x N diagonal block of A.
113  LAPACK stores this array in A. There are 0s below the diagonal.
114 
115  RWORK (workspace) DOUBLE_PRECISION array, dimension (3 N)
116 
117  INFO (output) INTEGER
118  = 0: successful exit
119  < 0: if INFO = -i, the i-th argument had an illegal value
120 
121  Further Details
122  ===============
123  The matrix Q is represented as a product of elementary reflectors
124 
125  Q = H(1) H(2) . . . H(k), where k = min(m,n).
126 
127  Each H(i) has the form
128 
129  H(i) = I - tau * v * v'
130 
131  where tau is a complex scalar, and v is a complex vector with
132  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
133  and tau in TAU(i).
134  ===================================================================== */
135 
136  #define da_ref(a_1,a_2) ( dA+(a_2)*(*ldda) + (a_1))
137  #define BLOCK_SIZE 32
138 
139  magma_int_t i, k;
140 
141  double *dnorm = dwork;
142  magmaDoubleComplex *work = (magmaDoubleComplex *)(dwork+2*(*n));
143 
144  *info = 0;
145  if (*m < 0) {
146  *info = -1;
147  } else if (*n < 0) {
148  *info = -2;
149  } else if (*ldda < max(1,*m)) {
150  *info = -4;
151  }
152  if (*info != 0) {
153  magma_xerbla( __func__, -(*info) );
154  return *info;
155  }
156 
157  /* Compute the norms of the trailing columns */
158  k = min(*m,*n);
159  magmablas_dznrm2_cols(*m, k, da_ref(0,0), *ldda, dnorm);
160 
161  for (int b=0; b < k; b += BLOCK_SIZE) {
162  for (i = b; i < min(k, b+BLOCK_SIZE); ++i) {
163 
164  /* Apply H' to A(:,i) from the left */
165  if ( i-b > 0)
166  magma_zlarfbx_gpu(*m-b, i-b, da_ref(b, b), *ldda,
167  dT+b+b*k, k, da_ref(b, i), work);
168 
169  /* Adjust the dnorm[i] to hold the norm of A(i:m,i) */
170  if ( i > 0 )
171  magmablas_dznrm2_adjust(i, dnorm+i, da_ref(0, i));
172 
173  /* Generate elementary reflector H(i) to annihilate A(i+1:m,i)
174  1. 1 is not yet put on the diagonal of A
175  2. Elements above the diagonal are copied in ddA and
176  the ones in A are set to zero
177  3. update T */
178  magma_zlarfgtx_gpu(*m-i, da_ref(i, i), da_ref(min(i+1,*m), i), dtau+i,
179  dnorm+i, ddA + i + i*(*n), i,
180  da_ref(i,0), *ldda, dT, k, work);
181  }
182 
183  /* Apply the transformations to the trailing matrix. */
184  //magma_zlarfb2_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise,
186  *m-b, k-i, BLOCK_SIZE,
187  da_ref(b, b), *ldda, dT+b+b*k, k,
188  da_ref(b, i), *ldda, work, k-i);
189  }
190 
191  return *info;
192 } /* magma_zgeqr2 */
void magma_zlarfgtx_gpu(magma_int_t n, magmaDoubleComplex *dx0, magmaDoubleComplex *dx, magmaDoubleComplex *dtau, double *dxnorm, magmaDoubleComplex *dA, magma_int_t it, magmaDoubleComplex *V, magma_int_t ldv, magmaDoubleComplex *T, magma_int_t ldt, magmaDoubleComplex *dwork)
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
int magma_int_t
Definition: magmablas.h:12
void magmablas_dznrm2_adjust(magma_int_t k, double *xnorm, magmaDoubleComplex *c)
#define BLOCK_SIZE
#define dwork(dev, i, j)
void magmablas_dznrm2_cols(magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magmaDouble_ptr dxnorm)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
magma_int_t ldda
magma_int_t magma_zlarfb2_gpu(magma_int_t m, magma_int_t n, magma_int_t k, const magmaDoubleComplex *dV, magma_int_t ldv, const magmaDoubleComplex *dT, magma_int_t ldt, magmaDoubleComplex *dC, magma_int_t ldc, magmaDoubleComplex *dwork, magma_int_t ldwork)
#define da_ref(a_1, a_2)
#define dT(m)
#define max(a, b)
Definition: common_magma.h:82
void magma_zlarfbx_gpu(magma_int_t m, magma_int_t k, magmaDoubleComplex *V, magma_int_t ldv, magmaDoubleComplex *dT, magma_int_t ldt, magmaDoubleComplex *c, magmaDoubleComplex *dwork)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_zgeqr2x4_gpu ( magma_int_t m,
magma_int_t n,
magmaDoubleComplex *  dA,
magma_int_t ldda,
magmaDoubleComplex *  dtau,
magmaDoubleComplex *  dT,
magmaDoubleComplex *  ddA,
double *  dwork,
magma_int_t info,