MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
magma_d.h File Reference
#include "magma_types.h"
#include "magma_dgehrd_m.h"
Include dependency graph for magma_d.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define PRECISION_d
 

Functions

magma_int_t magma_get_dpotrf_nb (magma_int_t m)
 
magma_int_t magma_get_dgetrf_nb (magma_int_t m)
 
magma_int_t magma_get_dgetri_nb (magma_int_t m)
 
magma_int_t magma_get_dgeqp3_nb (magma_int_t m)
 
magma_int_t magma_get_dgeqrf_nb (magma_int_t m)
 
magma_int_t magma_get_dgeqlf_nb (magma_int_t m)
 
magma_int_t magma_get_dgehrd_nb (magma_int_t m)
 
magma_int_t magma_get_dsytrd_nb (magma_int_t m)
 
magma_int_t magma_get_dgelqf_nb (magma_int_t m)
 
magma_int_t magma_get_dgebrd_nb (magma_int_t m)
 
magma_int_t magma_get_dsygst_nb (magma_int_t m)
 
magma_int_t magma_get_dgesvd_nb (magma_int_t m)
 
magma_int_t magma_get_dsygst_nb_m (magma_int_t m)
 
magma_int_t magma_get_dbulge_nb (magma_int_t m, magma_int_t nbthreads)
 
magma_int_t magma_get_dbulge_nb_mgpu (magma_int_t m)
 
magma_int_t magma_dbulge_get_Vblksiz (magma_int_t m, magma_int_t nb, magma_int_t nbthreads)
 
magma_int_t magma_get_dbulge_gcperf ()
 
magma_int_t magma_get_smlsize_divideconquer ()
 
void magma_dmove_eig (char range, magma_int_t n, double *w, magma_int_t *il, magma_int_t *iu, double vl, double vu, magma_int_t *m)
 
magma_int_t magma_dgebrd (magma_int_t m, magma_int_t n, double *A, magma_int_t lda, double *d, double *e, double *tauq, double *taup, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dgehrd2 (magma_int_t n, magma_int_t ilo, magma_int_t ihi, double *A, magma_int_t lda, double *tau, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dgehrd (magma_int_t n, magma_int_t ilo, magma_int_t ihi, double *A, magma_int_t lda, double *tau, double *work, magma_int_t lwork, double *dT, magma_int_t *info)
 
magma_int_t magma_dgelqf (magma_int_t m, magma_int_t n, double *A, magma_int_t lda, double *tau, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dgeqlf (magma_int_t m, magma_int_t n, double *A, magma_int_t lda, double *tau, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dgeqrf (magma_int_t m, magma_int_t n, double *A, magma_int_t lda, double *tau, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dgeqrf4 (magma_int_t num_gpus, magma_int_t m, magma_int_t n, double *a, magma_int_t lda, double *tau, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dgeqrf_ooc (magma_int_t m, magma_int_t n, double *A, magma_int_t lda, double *tau, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dgesv (magma_int_t n, magma_int_t nrhs, double *A, magma_int_t lda, magma_int_t *ipiv, double *B, magma_int_t ldb, magma_int_t *info)
 
magma_int_t magma_dgetrf (magma_int_t m, magma_int_t n, double *A, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_dgetrf2 (magma_int_t m, magma_int_t n, double *a, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_dlaqps (magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, double *A, magma_int_t lda, double *dA, magma_int_t ldda, magma_int_t *jpvt, double *tau, double *vn1, double *vn2, double *auxv, double *F, magma_int_t ldf, double *dF, magma_int_t lddf)
 
void magma_dlarfg (magma_int_t n, double *alpha, double *x, magma_int_t incx, double *tau)
 
magma_int_t magma_dlatrd (char uplo, magma_int_t n, magma_int_t nb, double *a, magma_int_t lda, double *e, double *tau, double *w, magma_int_t ldw, double *da, magma_int_t ldda, double *dw, magma_int_t lddw)
 
magma_int_t magma_dlatrd2 (char uplo, magma_int_t n, magma_int_t nb, double *a, magma_int_t lda, double *e, double *tau, double *w, magma_int_t ldw, double *da, magma_int_t ldda, double *dw, magma_int_t lddw, double *dwork, magma_int_t ldwork)
 
magma_int_t magma_dlahr2 (magma_int_t m, magma_int_t n, magma_int_t nb, double *da, double *dv, double *a, magma_int_t lda, double *tau, double *t, magma_int_t ldt, double *y, magma_int_t ldy)
 
magma_int_t magma_dlahru (magma_int_t n, magma_int_t ihi, magma_int_t k, magma_int_t nb, double *a, magma_int_t lda, double *da, double *y, double *v, double *t, double *dwork)
 
magma_int_t magma_dposv (char uplo, magma_int_t n, magma_int_t nrhs, double *A, magma_int_t lda, double *B, magma_int_t ldb, magma_int_t *info)
 
magma_int_t magma_dpotrf (char uplo, magma_int_t n, double *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_dpotri (char uplo, magma_int_t n, double *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_dlauum (char uplo, magma_int_t n, double *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_dtrtri (char uplo, char diag, magma_int_t n, double *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_dsytrd (char uplo, magma_int_t n, double *A, magma_int_t lda, double *d, double *e, double *tau, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dorgqr (magma_int_t m, magma_int_t n, magma_int_t k, double *a, magma_int_t lda, double *tau, double *dT, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_dorgqr2 (magma_int_t m, magma_int_t n, magma_int_t k, double *a, magma_int_t lda, double *tau, magma_int_t *info)
 
magma_int_t magma_dormql (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, double *a, magma_int_t lda, double *tau, double *c, magma_int_t ldc, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dormqr (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, double *a, magma_int_t lda, double *tau, double *c, magma_int_t ldc, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dormtr (char side, char uplo, char trans, magma_int_t m, magma_int_t n, double *a, magma_int_t lda, double *tau, double *c, magma_int_t ldc, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dorghr (magma_int_t n, magma_int_t ilo, magma_int_t ihi, double *a, magma_int_t lda, double *tau, double *dT, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_dgeev (char jobvl, char jobvr, magma_int_t n, double *a, magma_int_t lda, double *wr, double *wi, double *vl, magma_int_t ldvl, double *vr, magma_int_t ldvr, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dgeqp3 (magma_int_t m, magma_int_t n, double *a, magma_int_t lda, magma_int_t *jpvt, double *tau, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dgesvd (char jobu, char jobvt, magma_int_t m, magma_int_t n, double *a, magma_int_t lda, double *s, double *u, magma_int_t ldu, double *vt, magma_int_t ldvt, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dsyevd (char jobz, char uplo, magma_int_t n, double *a, magma_int_t lda, double *w, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_dsyevdx (char jobz, char range, char uplo, magma_int_t n, double *a, magma_int_t lda, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_dsyevdx_2stage (char jobz, char range, char uplo, magma_int_t n, double *a, magma_int_t lda, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_dsygvd (magma_int_t itype, char jobz, char uplo, magma_int_t n, double *a, magma_int_t lda, double *b, magma_int_t ldb, double *w, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_dsygvdx (magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, double *a, magma_int_t lda, double *b, magma_int_t ldb, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_dsygvdx_2stage (magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, double *a, magma_int_t lda, double *b, magma_int_t ldb, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_dstedx (char range, magma_int_t n, double vl, double vu, magma_int_t il, magma_int_t iu, double *d, double *e, double *z, magma_int_t ldz, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, double *dwork, magma_int_t *info)
 
magma_int_t magma_dlaex0 (magma_int_t n, double *d, double *e, double *q, magma_int_t ldq, double *work, magma_int_t *iwork, double *dwork, char range, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *info)
 
magma_int_t magma_dlaex1 (magma_int_t n, double *d, double *q, magma_int_t ldq, magma_int_t *indxq, double rho, magma_int_t cutpnt, double *work, magma_int_t *iwork, double *dwork, char range, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *info)
 
magma_int_t magma_dlaex3 (magma_int_t k, magma_int_t n, magma_int_t n1, double *d, double *q, magma_int_t ldq, double rho, double *dlamda, double *q2, magma_int_t *indx, magma_int_t *ctot, double *w, double *s, magma_int_t *indxq, double *dwork, char range, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *info)
 
magma_int_t magma_dsygst (magma_int_t itype, char uplo, magma_int_t n, double *a, magma_int_t lda, double *b, magma_int_t ldb, magma_int_t *info)
 
magma_int_t magma_dlahr2_m (magma_int_t n, magma_int_t k, magma_int_t nb, double *A, magma_int_t lda, double *tau, double *T, magma_int_t ldt, double *Y, magma_int_t ldy, struct dgehrd_data *data)
 
magma_int_t magma_dlahru_m (magma_int_t n, magma_int_t ihi, magma_int_t k, magma_int_t nb, double *A, magma_int_t lda, struct dgehrd_data *data)
 
magma_int_t magma_dgeev_m (char jobvl, char jobvr, magma_int_t n, double *A, magma_int_t lda, double *WR, double *WI, double *vl, magma_int_t ldvl, double *vr, magma_int_t ldvr, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dgehrd_m (magma_int_t n, magma_int_t ilo, magma_int_t ihi, double *A, magma_int_t lda, double *tau, double *work, magma_int_t lwork, double *T, magma_int_t *info)
 
magma_int_t magma_dorghr_m (magma_int_t n, magma_int_t ilo, magma_int_t ihi, double *A, magma_int_t lda, double *tau, double *T, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_dorgqr_m (magma_int_t m, magma_int_t n, magma_int_t k, double *A, magma_int_t lda, double *tau, double *T, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_dpotrf_m (magma_int_t num_gpus, char uplo, magma_int_t n, double *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_dstedx_m (magma_int_t nrgpu, char range, magma_int_t n, double vl, double vu, magma_int_t il, magma_int_t iu, double *D, double *E, double *Z, magma_int_t ldz, double *rwork, magma_int_t ldrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_dtrsm_m (magma_int_t nrgpu, char side, char uplo, char transa, char diag, magma_int_t m, magma_int_t n, double alpha, double *a, magma_int_t lda, double *b, magma_int_t ldb)
 
magma_int_t magma_dormqr_m (magma_int_t nrgpu, char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, double *a, magma_int_t lda, double *tau, double *c, magma_int_t ldc, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dormtr_m (magma_int_t nrgpu, char side, char uplo, char trans, magma_int_t m, magma_int_t n, double *a, magma_int_t lda, double *tau, double *c, magma_int_t ldc, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dsygst_m (magma_int_t nrgpu, magma_int_t itype, char uplo, magma_int_t n, double *a, magma_int_t lda, double *b, magma_int_t ldb, magma_int_t *info)
 
magma_int_t magma_dsyevd_m (magma_int_t nrgpu, char jobz, char uplo, magma_int_t n, double *a, magma_int_t lda, double *w, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_dsygvd_m (magma_int_t nrgpu, magma_int_t itype, char jobz, char uplo, magma_int_t n, double *a, magma_int_t lda, double *b, magma_int_t ldb, double *w, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_dsyevdx_m (magma_int_t nrgpu, char jobz, char range, char uplo, magma_int_t n, double *a, magma_int_t lda, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_dsygvdx_m (magma_int_t nrgpu, magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, double *a, magma_int_t lda, double *b, magma_int_t ldb, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_dsyevdx_2stage_m (magma_int_t nrgpu, char jobz, char range, char uplo, magma_int_t n, double *a, magma_int_t lda, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_dsygvdx_2stage_m (magma_int_t nrgpu, magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, double *a, magma_int_t lda, double *b, magma_int_t ldb, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_dgels_gpu (char trans, magma_int_t m, magma_int_t n, magma_int_t nrhs, double *dA, magma_int_t ldda, double *dB, magma_int_t lddb, double *hwork, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dgels3_gpu (char trans, magma_int_t m, magma_int_t n, magma_int_t nrhs, double *dA, magma_int_t ldda, double *dB, magma_int_t lddb, double *hwork, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dgelqf_gpu (magma_int_t m, magma_int_t n, double *dA, magma_int_t ldda, double *tau, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dgeqr2x_gpu (magma_int_t *m, magma_int_t *n, double *dA, magma_int_t *ldda, double *dtau, double *dT, double *ddA, double *dwork, magma_int_t *info)
 
magma_int_t magma_dgeqr2x2_gpu (magma_int_t *m, magma_int_t *n, double *dA, magma_int_t *ldda, double *dtau, double *dT, double *ddA, double *dwork, magma_int_t *info)
 
magma_int_t magma_dgeqr2x3_gpu (magma_int_t *m, magma_int_t *n, double *dA, magma_int_t *ldda, double *dtau, double *dT, double *ddA, double *dwork, magma_int_t *info)
 
magma_int_t magma_dgeqr2x4_gpu (magma_int_t *m, magma_int_t *n, double *dA, magma_int_t *ldda, double *dtau, double *dT, double *ddA, double *dwork, magma_int_t *info, magma_queue_t stream)
 
magma_int_t magma_dgeqrf_gpu (magma_int_t m, magma_int_t n, double *dA, magma_int_t ldda, double *tau, double *dT, magma_int_t *info)
 
magma_int_t magma_dgeqrf2_gpu (magma_int_t m, magma_int_t n, double *dA, magma_int_t ldda, double *tau, magma_int_t *info)
 
magma_int_t magma_dgeqrf2_mgpu (magma_int_t num_gpus, magma_int_t m, magma_int_t n, double **dlA, magma_int_t ldda, double *tau, magma_int_t *info)
 
magma_int_t magma_dgeqrf3_gpu (magma_int_t m, magma_int_t n, double *dA, magma_int_t ldda, double *tau, double *dT, magma_int_t *info)
 
magma_int_t magma_dgeqr2_gpu (magma_int_t m, magma_int_t n, double *dA, magma_int_t lda, double *tau, double *work, magma_int_t *info)
 
magma_int_t magma_dgeqrs_gpu (magma_int_t m, magma_int_t n, magma_int_t nrhs, double *dA, magma_int_t ldda, double *tau, double *dT, double *dB, magma_int_t lddb, double *hwork, magma_int_t lhwork, magma_int_t *info)
 
magma_int_t magma_dgeqrs3_gpu (magma_int_t m, magma_int_t n, magma_int_t nrhs, double *dA, magma_int_t ldda, double *tau, double *dT, double *dB, magma_int_t lddb, double *hwork, magma_int_t lhwork, magma_int_t *info)
 
magma_int_t magma_dgessm_gpu (char storev, magma_int_t m, magma_int_t n, magma_int_t k, magma_int_t ib, magma_int_t *ipiv, double *dL1, magma_int_t lddl1, double *dL, magma_int_t lddl, double *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_dgesv_gpu (magma_int_t n, magma_int_t nrhs, double *dA, magma_int_t ldda, magma_int_t *ipiv, double *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_dgetf2_gpu (magma_int_t m, magma_int_t n, double *dA, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_dgetrf_incpiv_gpu (char storev, magma_int_t m, magma_int_t n, magma_int_t ib, double *hA, magma_int_t ldha, double *dA, magma_int_t ldda, double *hL, magma_int_t ldhl, double *dL, magma_int_t lddl, magma_int_t *ipiv, double *dwork, magma_int_t lddwork, magma_int_t *info)
 
magma_int_t magma_dgetrf_gpu (magma_int_t m, magma_int_t n, double *dA, magma_int_t ldda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_dgetrf_mgpu (magma_int_t num_gpus, magma_int_t m, magma_int_t n, double **d_lA, magma_int_t ldda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_dgetrf_m (magma_int_t num_gpus0, magma_int_t m, magma_int_t n, double *a, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_dgetrf_piv (magma_int_t m, magma_int_t n, magma_int_t NB, double *a, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_dgetrf2_mgpu (magma_int_t num_gpus, magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t offset, double *d_lAT[], magma_int_t lddat, magma_int_t *ipiv, double *d_lAP[], double *a, magma_int_t lda, magma_queue_t streaml[][2], magma_int_t *info)
 
magma_int_t magma_dgetrf_nopiv_gpu (magma_int_t m, magma_int_t n, double *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_dgetri_gpu (magma_int_t n, double *dA, magma_int_t ldda, magma_int_t *ipiv, double *dwork, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dgetrs_gpu (char trans, magma_int_t n, magma_int_t nrhs, double *dA, magma_int_t ldda, magma_int_t *ipiv, double *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_dlabrd_gpu (magma_int_t m, magma_int_t n, magma_int_t nb, double *a, magma_int_t lda, double *da, magma_int_t ldda, double *d, double *e, double *tauq, double *taup, double *x, magma_int_t ldx, double *dx, magma_int_t lddx, double *y, magma_int_t ldy, double *dy, magma_int_t lddy)
 
magma_int_t magma_dlaqps_gpu (magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, double *A, magma_int_t lda, magma_int_t *jpvt, double *tau, double *vn1, double *vn2, double *auxv, double *dF, magma_int_t lddf)
 
magma_int_t magma_dlaqps2_gpu (magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, double *A, magma_int_t lda, magma_int_t *jpvt, double *tau, double *vn1, double *vn2, double *auxv, double *dF, magma_int_t lddf)
 
magma_int_t magma_dlaqps3_gpu (magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, double *A, magma_int_t lda, magma_int_t *jpvt, double *tau, double *vn1, double *vn2, double *auxv, double *dF, magma_int_t lddf)
 
magma_int_t magma_dlarf_gpu (magma_int_t m, magma_int_t n, double *v, double *tau, double *c, magma_int_t ldc, double *xnorm)
 
magma_int_t magma_dlarfb_gpu (char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, const double *dv, magma_int_t ldv, const double *dt, magma_int_t ldt, double *dc, magma_int_t ldc, double *dwork, magma_int_t ldwork)
 
magma_int_t magma_dlarfb2_gpu (magma_int_t m, magma_int_t n, magma_int_t k, const double *dV, magma_int_t ldv, const double *dT, magma_int_t ldt, double *dC, magma_int_t ldc, double *dwork, magma_int_t ldwork)
 
magma_int_t magma_dlarfb_gpu_gemm (char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, const double *dv, magma_int_t ldv, const double *dt, magma_int_t ldt, double *dc, magma_int_t ldc, double *dwork, magma_int_t ldwork, double *dworkvt, magma_int_t ldworkvt)
 
magma_int_t magma_dlarfg_gpu (magma_int_t n, double *dx0, double *dx, double *dtau, double *dxnorm, double *dAkk)
 
magma_int_t magma_dposv_gpu (char uplo, magma_int_t n, magma_int_t nrhs, double *dA, magma_int_t ldda, double *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_dpotf2_gpu (magma_uplo_t uplo, magma_int_t n, double *dA, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_dpotrf_gpu (char uplo, magma_int_t n, double *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_dpotrf_mgpu (magma_int_t ngpu, char uplo, magma_int_t n, double **d_lA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_dpotrf3_mgpu (magma_int_t num_gpus, char uplo, magma_int_t m, magma_int_t n, magma_int_t off_i, magma_int_t off_j, magma_int_t nb, double *d_lA[], magma_int_t ldda, double *d_lP[], magma_int_t lddp, double *a, magma_int_t lda, magma_int_t h, magma_queue_t stream[][3], magma_event_t event[][5], magma_int_t *info)
 
magma_int_t magma_dpotri_gpu (char uplo, magma_int_t n, double *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_dlauum_gpu (char uplo, magma_int_t n, double *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_dtrtri_gpu (char uplo, char diag, magma_int_t n, double *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_dsytrd_gpu (char uplo, magma_int_t n, double *da, magma_int_t ldda, double *d, double *e, double *tau, double *wa, magma_int_t ldwa, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dsytrd2_gpu (char uplo, magma_int_t n, double *da, magma_int_t ldda, double *d, double *e, double *tau, double *wa, magma_int_t ldwa, double *work, magma_int_t lwork, double *dwork, magma_int_t ldwork, magma_int_t *info)
 
double magma_dlatrd_mgpu (magma_int_t num_gpus, char uplo, magma_int_t n0, magma_int_t n, magma_int_t nb, magma_int_t nb0, double *a, magma_int_t lda, double *e, double *tau, double *w, magma_int_t ldw, double **da, magma_int_t ldda, magma_int_t offset, double **dw, magma_int_t lddw, double *dwork[MagmaMaxGPUs], magma_int_t ldwork, magma_int_t k, double *dx[MagmaMaxGPUs], double *dy[MagmaMaxGPUs], double *work, magma_queue_t stream[][10], double *times)
 
magma_int_t magma_dsytrd_mgpu (magma_int_t num_gpus, magma_int_t k, char uplo, magma_int_t n, double *a, magma_int_t lda, double *d, double *e, double *tau, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dsytrd_sb2st (magma_int_t threads, char uplo, magma_int_t n, magma_int_t nb, magma_int_t Vblksiz, double *A, magma_int_t lda, double *D, double *E, double *V, magma_int_t ldv, double *TAU, magma_int_t compT, double *T, magma_int_t ldt)
 
magma_int_t magma_dsytrd_sy2sb (char uplo, magma_int_t n, magma_int_t NB, double *a, magma_int_t lda, double *tau, double *work, magma_int_t lwork, double *dT, magma_int_t threads, magma_int_t *info)
 
magma_int_t magma_dsytrd_sy2sb_mgpu (char uplo, magma_int_t n, magma_int_t nb, double *a, magma_int_t lda, double *tau, double *work, magma_int_t lwork, double *dAmgpu[], magma_int_t ldda, double *dTmgpu[], magma_int_t lddt, magma_int_t ngpu, magma_int_t distblk, magma_queue_t streams[][20], magma_int_t nstream, magma_int_t threads, magma_int_t *info)
 
magma_int_t magma_dsytrd_sy2sb_mgpu_spec (char uplo, magma_int_t n, magma_int_t nb, double *a, magma_int_t lda, double *tau, double *work, magma_int_t lwork, double *dAmgpu[], magma_int_t ldda, double *dTmgpu[], magma_int_t lddt, magma_int_t ngpu, magma_int_t distblk, magma_queue_t streams[][20], magma_int_t nstream, magma_int_t threads, magma_int_t *info)
 
magma_int_t magma_dpotrs_gpu (char uplo, magma_int_t n, magma_int_t nrhs, double *dA, magma_int_t ldda, double *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_dssssm_gpu (char storev, magma_int_t m1, magma_int_t n1, magma_int_t m2, magma_int_t n2, magma_int_t k, magma_int_t ib, double *dA1, magma_int_t ldda1, double *dA2, magma_int_t ldda2, double *dL1, magma_int_t lddl1, double *dL2, magma_int_t lddl2, magma_int_t *IPIV, magma_int_t *info)
 
magma_int_t magma_dtstrf_gpu (char storev, magma_int_t m, magma_int_t n, magma_int_t ib, magma_int_t nb, double *hU, magma_int_t ldhu, double *dU, magma_int_t lddu, double *hA, magma_int_t ldha, double *dA, magma_int_t ldda, double *hL, magma_int_t ldhl, double *dL, magma_int_t lddl, magma_int_t *ipiv, double *hwork, magma_int_t ldhwork, double *dwork, magma_int_t lddwork, magma_int_t *info)
 
magma_int_t magma_dorgqr_gpu (magma_int_t m, magma_int_t n, magma_int_t k, double *da, magma_int_t ldda, double *tau, double *dwork, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_dormql2_gpu (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, double *da, magma_int_t ldda, double *tau, double *dc, magma_int_t lddc, double *wa, magma_int_t ldwa, magma_int_t *info)
 
magma_int_t magma_dormqr_gpu (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, double *dA, magma_int_t ldda, double *tau, double *dC, magma_int_t lddc, double *hwork, magma_int_t lwork, double *dT, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_dormqr2_gpu (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, double *da, magma_int_t ldda, double *tau, double *dc, magma_int_t lddc, double *wa, magma_int_t ldwa, magma_int_t *info)
 
magma_int_t magma_dormtr_gpu (char side, char uplo, char trans, magma_int_t m, magma_int_t n, double *da, magma_int_t ldda, double *tau, double *dc, magma_int_t lddc, double *wa, magma_int_t ldwa, magma_int_t *info)
 
magma_int_t magma_dgeqp3_gpu (magma_int_t m, magma_int_t n, double *A, magma_int_t lda, magma_int_t *jpvt, double *tau, double *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_dsyevd_gpu (char jobz, char uplo, magma_int_t n, double *da, magma_int_t ldda, double *w, double *wa, magma_int_t ldwa, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_dsyevdx_gpu (char jobz, char range, char uplo, magma_int_t n, double *da, magma_int_t ldda, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, double *wa, magma_int_t ldwa, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_dsygst_gpu (magma_int_t itype, char uplo, magma_int_t n, double *da, magma_int_t ldda, double *db, magma_int_t lddb, magma_int_t *info)
 
void magma_dprint (magma_int_t m, magma_int_t n, const double *A, magma_int_t lda)
 
void magma_dprint_gpu (magma_int_t m, magma_int_t n, const double *dA, magma_int_t ldda)
 
void dpanel_to_q (magma_uplo_t uplo, magma_int_t ib, double *A, magma_int_t lda, double *work)
 
void dq_to_panel (magma_uplo_t uplo, magma_int_t ib, double *A, magma_int_t lda, double *work)
 

Macro Definition Documentation

#define PRECISION_d

Definition at line 17 of file magma_d.h.

Function Documentation

void dpanel_to_q ( magma_uplo_t  uplo,
magma_int_t  ib,
double *  A,
magma_int_t  lda,
double *  work 
)

Definition at line 17 of file dpanel_to_q.cpp.

References MAGMA_D_ONE, and MAGMA_D_ZERO.

18 {
19  int i, j, k = 0;
20  double *col;
21  double c_zero = MAGMA_D_ZERO;
22  double c_one = MAGMA_D_ONE;
23 
24  if (uplo == 'U' || uplo == 'u'){
25  for(i = 0; i < ib; ++i){
26  col = A + i*lda;
27  for(j = 0; j < i; ++j){
28  work[k] = col[j];
29  col [j] = c_zero;
30  ++k;
31  }
32 
33  work[k] = col[i];
34  col [j] = c_one;
35  ++k;
36  }
37  }
38  else {
39  for(i=0; i<ib; ++i){
40  col = A + i*lda;
41  work[k] = col[i];
42  col [i] = c_one;
43  ++k;
44  for(j=i+1; j<ib; ++j){
45  work[k] = col[j];
46  col [j] = c_zero;
47  ++k;
48  }
49  }
50  }
51 }
#define MAGMA_D_ONE
Definition: magma.h:176
#define A(i, j)
Definition: cprint.cpp:16
#define MAGMA_D_ZERO
Definition: magma.h:175

Here is the caller graph for this function:

void dq_to_panel ( magma_uplo_t  uplo,
magma_int_t  ib,
double *  A,
magma_int_t  lda,
double *  work 
)

Definition at line 57 of file dpanel_to_q.cpp.

58 {
59  int i, j, k = 0;
60  double *col;
61 
62  if (uplo == 'U' || uplo == 'u'){
63  for(i = 0; i < ib; ++i){
64  col = A + i*lda;
65  for(j = 0; j <= i; ++j){
66  col[j] = work[k];
67  ++k;
68  }
69  }
70  }
71  else {
72  for(i = 0; i < ib; ++i){
73  col = A + i*lda;
74  for(j = i; j < ib; ++j){
75  col[j] = work[k];
76  ++k;
77  }
78  }
79  }
80 }
#define A(i, j)
Definition: cprint.cpp:16

Here is the caller graph for this function:

magma_int_t magma_dbulge_get_Vblksiz ( magma_int_t  m,
magma_int_t  nb,
magma_int_t  nbthreads 
)

Definition at line 855 of file get_nb.cpp.

References magma_getdevice_arch(), and min.

856 {
858  if ( arch >= 300 ) { // 3.x Kepler + SB
859  return min(nb,64);
860  }
861  else { // 2.x Fermi or 1.x
862  return min(nb,64);
863  }
864 }
#define min(a, b)
Definition: common_magma.h:86
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgebrd ( magma_int_t  m,
magma_int_t  n,
double *  A,
magma_int_t  lda,
double *  d,
double *  e,
double *  tauq,
double *  taup,
double *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 17 of file dgebrd.cpp.

References __func__, A, dA, dwork, lapackf77_dgebrd(), MAGMA_D_MAKE, MAGMA_D_NEG_ONE, MAGMA_D_ONE, magma_dgemm(), magma_dgetmatrix, magma_dlabrd_gpu(), magma_dmalloc(), magma_dsetmatrix, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_dgebrd_nb(), MAGMA_SUCCESS, magma_xerbla(), MagmaNoTrans, MagmaTrans, max, and min.

22 {
23 /* -- MAGMA (version 1.4.0) --
24  Univ. of Tennessee, Knoxville
25  Univ. of California, Berkeley
26  Univ. of Colorado, Denver
27  August 2013
28 
29  Purpose
30  =======
31  DGEBRD reduces a general real M-by-N matrix A to upper or lower
32  bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
33 
34  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
35 
36  Arguments
37  =========
38  M (input) INTEGER
39  The number of rows in the matrix A. M >= 0.
40 
41  N (input) INTEGER
42  The number of columns in the matrix A. N >= 0.
43 
44  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
45  On entry, the M-by-N general matrix to be reduced.
46  On exit,
47  if m >= n, the diagonal and the first superdiagonal are
48  overwritten with the upper bidiagonal matrix B; the
49  elements below the diagonal, with the array TAUQ, represent
50  the orthogonal matrix Q as a product of elementary
51  reflectors, and the elements above the first superdiagonal,
52  with the array TAUP, represent the orthogonal matrix P as
53  a product of elementary reflectors;
54  if m < n, the diagonal and the first subdiagonal are
55  overwritten with the lower bidiagonal matrix B; the
56  elements below the first subdiagonal, with the array TAUQ,
57  represent the orthogonal matrix Q as a product of
58  elementary reflectors, and the elements above the diagonal,
59  with the array TAUP, represent the orthogonal matrix P as
60  a product of elementary reflectors.
61  See Further Details.
62 
63  LDA (input) INTEGER
64  The leading dimension of the array A. LDA >= max(1,M).
65 
66  D (output) double precision array, dimension (min(M,N))
67  The diagonal elements of the bidiagonal matrix B:
68  D(i) = A(i,i).
69 
70  E (output) double precision array, dimension (min(M,N)-1)
71  The off-diagonal elements of the bidiagonal matrix B:
72  if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
73  if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
74 
75  TAUQ (output) DOUBLE_PRECISION array dimension (min(M,N))
76  The scalar factors of the elementary reflectors which
77  represent the orthogonal matrix Q. See Further Details.
78 
79  TAUP (output) DOUBLE_PRECISION array, dimension (min(M,N))
80  The scalar factors of the elementary reflectors which
81  represent the orthogonal matrix P. See Further Details.
82 
83  WORK (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK))
84  On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
85 
86  LWORK (input) INTEGER
87  The length of the array WORK. LWORK >= (M+N)*NB, where NB
88  is the optimal blocksize.
89 
90  If LWORK = -1, then a workspace query is assumed; the routine
91  only calculates the optimal size of the WORK array, returns
92  this value as the first entry of the WORK array, and no error
93  message related to LWORK is issued by XERBLA.
94 
95  INFO (output) INTEGER
96  = 0: successful exit
97  < 0: if INFO = -i, the i-th argument had an illegal value.
98 
99  Further Details
100  ===============
101  The matrices Q and P are represented as products of elementary
102  reflectors:
103 
104  If m >= n,
105  Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
106  Each H(i) and G(i) has the form:
107  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
108  where tauq and taup are real scalars, and v and u are real vectors;
109  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
110  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
111  tauq is stored in TAUQ(i) and taup in TAUP(i).
112 
113  If m < n,
114  Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
115  Each H(i) and G(i) has the form:
116  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
117  where tauq and taup are real scalars, and v and u are real vectors;
118  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
119  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
120  tauq is stored in TAUQ(i) and taup in TAUP(i).
121 
122  The contents of A on exit are illustrated by the following examples:
123 
124  m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
125 
126  ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
127  ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
128  ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
129  ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
130  ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
131  ( v1 v2 v3 v4 v5 )
132 
133  where d and e denote diagonal and off-diagonal elements of B, vi
134  denotes an element of the vector defining H(i), and ui an element of
135  the vector defining G(i).
136  ===================================================================== */
137 
138  double c_neg_one = MAGMA_D_NEG_ONE;
139  double c_one = MAGMA_D_ONE;
140  double *da, *dwork;
141 
142  magma_int_t ncol, nrow, jmax, nb, ldda;
143 
144  magma_int_t i, j, nx;
145  magma_int_t iinfo;
146 
147  magma_int_t minmn;
148  magma_int_t ldwrkx, ldwrky, lwkopt;
149  magma_int_t lquery;
150 
151  nb = magma_get_dgebrd_nb(n);
152  ldda = m;
153 
154  lwkopt = (m + n) * nb;
155  work[0] = MAGMA_D_MAKE( lwkopt, 0. );
156  lquery = (lwork == -1);
157 
158  /* Check arguments */
159  *info = 0;
160  if (m < 0) {
161  *info = -1;
162  } else if (n < 0) {
163  *info = -2;
164  } else if (lda < max(1,m)) {
165  *info = -4;
166  } else if (lwork < lwkopt && (! lquery) ) {
167  *info = -10;
168  }
169  if (*info < 0) {
170  magma_xerbla( __func__, -(*info) );
171  return *info;
172  }
173  else if (lquery)
174  return *info;
175 
176  /* Quick return if possible */
177  minmn = min(m,n);
178  if (minmn == 0) {
179  work[0] = c_one;
180  return *info;
181  }
182 
183  if (MAGMA_SUCCESS != magma_dmalloc( &da, n*ldda + (m + n)*nb )) {
184  fprintf (stderr, "!!!! device memory allocation error in dgebrd\n" );
185  *info = MAGMA_ERR_DEVICE_ALLOC;
186  return *info;
187  }
188  dwork = da + (n)*ldda;
189 
190  ldwrkx = m;
191  ldwrky = n;
192 
193  /* Set the block/unblock crossover point NX. */
194  nx = 128;
195 
196  /* Copy the matrix to the GPU */
197  if (minmn - nx >= 1) {
198  magma_dsetmatrix( m, n, a, lda, da, ldda );
199  }
200 
201  for (i=0; i< (minmn - nx); i += nb) {
202 
203  /* Reduce rows and columns i:i+nb-1 to bidiagonal form and return
204  the matrices X and Y which are needed to update the unreduced
205  part of the matrix */
206  nrow = m - i;
207  ncol = n - i;
208 
209  /* Get the current panel (no need for the 1st iteration) */
210  if ( i > 0 ) {
211  magma_dgetmatrix( nrow, nb, dA(i, i), ldda, A( i, i), lda );
212  magma_dgetmatrix( nb, ncol - nb,
213  dA(i, i+nb), ldda,
214  A( i, i+nb), lda );
215  }
216 
217  magma_dlabrd_gpu(nrow, ncol, nb,
218  A(i, i), lda, dA(i, i), ldda,
219  d+i, e+i, tauq+i, taup+i,
220  work, ldwrkx, dwork, ldwrkx, // x, dx
221  work+(ldwrkx*nb), ldwrky, dwork+(ldwrkx*nb), ldwrky); // y, dy
222 
223  /* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
224  of the form A := A - V*Y' - X*U' */
225  nrow = m - i - nb;
226  ncol = n - i - nb;
227 
228  // Send Y back to the GPU
229  magma_dsetmatrix( nrow, nb, work + nb, ldwrkx, dwork + nb, ldwrkx );
230  magma_dsetmatrix( ncol, nb,
231  work + (ldwrkx+1)*nb, ldwrky,
232  dwork + (ldwrkx+1)*nb, ldwrky );
233 
235  nrow, ncol, nb,
236  c_neg_one, dA(i+nb, i ), ldda,
237  dwork+(ldwrkx+1)*nb, ldwrky,
238  c_one, dA(i+nb, i+nb), ldda);
239 
241  nrow, ncol, nb,
242  c_neg_one, dwork+nb, ldwrkx,
243  dA( i, i+nb ), ldda,
244  c_one, dA( i+nb, i+nb ), ldda);
245 
246  /* Copy diagonal and off-diagonal elements of B back into A */
247  if (m >= n) {
248  jmax = i + nb;
249  for (j = i; j < jmax; ++j) {
250  *A(j, j ) = MAGMA_D_MAKE( d[j], 0. );
251  *A(j, j+1) = MAGMA_D_MAKE( e[j], 0. );
252  }
253  } else {
254  jmax = i + nb;
255  for (j = i; j < jmax; ++j) {
256  *A(j, j ) = MAGMA_D_MAKE( d[j], 0. );
257  *A(j+1, j ) = MAGMA_D_MAKE( e[j], 0. );
258  }
259  }
260  }
261 
262  /* Use unblocked code to reduce the remainder of the matrix */
263  nrow = m - i;
264  ncol = n - i;
265 
266  if ( 0 < minmn - nx ) {
267  magma_dgetmatrix( nrow, ncol, dA(i, i), ldda, A(i, i), lda );
268  }
269 
270  lapackf77_dgebrd( &nrow, &ncol,
271  A(i, i), &lda, d+i, e+i,
272  tauq+i, taup+i, work, &lwork, &iinfo);
273  work[0] = MAGMA_D_MAKE( lwkopt, 0. );
274 
275  magma_free( da );
276  return *info;
277 } /* dgebrd */
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_D_ONE
Definition: magma.h:176
#define A(i, j)
Definition: dgebrd.cpp:13
magma_int_t magma_dlabrd_gpu(magma_int_t m, magma_int_t n, magma_int_t nb, double *a, magma_int_t lda, double *da, magma_int_t ldda, double *d, double *e, double *tauq, double *taup, double *x, magma_int_t ldx, double *dx, magma_int_t lddx, double *y, magma_int_t ldy, double *dy, magma_int_t lddy)
Definition: dlabrd_gpu.cpp:18
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define magma_dgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_d.h:705
#define dwork(dev, i, j)
#define MAGMA_D_MAKE(r, i)
Definition: magma.h:167
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MagmaTrans
Definition: magma.h:58
void lapackf77_dgebrd(const magma_int_t *m, const magma_int_t *n, double *A, const magma_int_t *lda, double *d, double *e, double *tauq, double *taup, double *work, const magma_int_t *lwork, magma_int_t *info)
magma_int_t ldda
#define dA(i, j)
Definition: dgebrd.cpp:14
#define MAGMA_SUCCESS
Definition: magma.h:106
void magma_dgemm(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dB, magma_int_t lddb, double beta, magmaDouble_ptr dC, magma_int_t lddc)
magma_int_t magma_get_dgebrd_nb(magma_int_t m)
Definition: get_nb.cpp:458
#define MAGMA_D_NEG_ONE
Definition: magma.h:178
#define MagmaNoTrans
Definition: magma.h:57
#define max(a, b)
Definition: common_magma.h:82
#define magma_dsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_d.h:702

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgeev ( char  jobvl,
char  jobvr,
magma_int_t  n,
double *  a,
magma_int_t  lda,
double *  wr,
double *  wi,
double *  vl,
magma_int_t  ldvl,
double *  vr,
magma_int_t  ldvr,
double *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 25 of file dgeev.cpp.

References __func__, cblas_dnrm2(), cblas_drot(), cblas_dscal(), cblas_idamax(), dT, lapackf77_dgebak(), lapackf77_dgebal(), lapackf77_dgehrd(), lapackf77_dhseqr(), lapackf77_dlabad, lapackf77_dlacpy(), lapackf77_dlamch, lapackf77_dlange(), lapackf77_dlapy2, lapackf77_dlartg(), lapackf77_dlascl(), lapackf77_dorghr(), lapackf77_dtrevc(), lapackf77_lsame, MAGMA_D_MAKE, magma_dgehrd(), magma_dgehrd2(), magma_dmalloc(), magma_dorghr(), magma_dsqrt, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_dgehrd_nb(), MAGMA_SUCCESS, magma_xerbla(), MagmaLowerStr, max, vl, and vr.

33 {
34 /* -- MAGMA (version 1.4.0) --
35  Univ. of Tennessee, Knoxville
36  Univ. of California, Berkeley
37  Univ. of Colorado, Denver
38  August 2013
39 
40  Purpose
41  =======
42  DGEEV computes for an N-by-N real nonsymmetric matrix A, the
43  eigenvalues and, optionally, the left and/or right eigenvectors.
44 
45  The right eigenvector v(j) of A satisfies
46  A * v(j) = lambda(j) * v(j)
47  where lambda(j) is its eigenvalue.
48  The left eigenvector u(j) of A satisfies
49  u(j)**T * A = lambda(j) * u(j)**T
50  where u(j)**T denotes the transpose of u(j).
51 
52  The computed eigenvectors are normalized to have Euclidean norm
53  equal to 1 and largest component real.
54 
55  Arguments
56  =========
57  JOBVL (input) CHARACTER*1
58  = 'N': left eigenvectors of A are not computed;
59  = 'V': left eigenvectors of are computed.
60 
61  JOBVR (input) CHARACTER*1
62  = 'N': right eigenvectors of A are not computed;
63  = 'V': right eigenvectors of A are computed.
64 
65  N (input) INTEGER
66  The order of the matrix A. N >= 0.
67 
68  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
69  On entry, the N-by-N matrix A.
70  On exit, A has been overwritten.
71 
72  LDA (input) INTEGER
73  The leading dimension of the array A. LDA >= max(1,N).
74 
75  WR (output) DOUBLE PRECISION array, dimension (N)
76  WI (output) DOUBLE PRECISION array, dimension (N)
77  WR and WI contain the real and imaginary parts,
78  respectively, of the computed eigenvalues. Complex
79  conjugate pairs of eigenvalues appear consecutively
80  with the eigenvalue having the positive imaginary part
81  first.
82 
83  VL (output) DOUBLE PRECISION array, dimension (LDVL,N)
84  If JOBVL = 'V', the left eigenvectors u(j) are stored one
85  after another in the columns of VL, in the same order
86  as their eigenvalues.
87  If JOBVL = 'N', VL is not referenced.
88  u(j) = VL(:,j), the j-th column of VL.
89 
90  LDVL (input) INTEGER
91  The leading dimension of the array VL. LDVL >= 1; if
92  JOBVL = 'V', LDVL >= N.
93 
94  VR (output) DOUBLE PRECISION array, dimension (LDVR,N)
95  If JOBVR = 'V', the right eigenvectors v(j) are stored one
96  after another in the columns of VR, in the same order
97  as their eigenvalues.
98  If JOBVR = 'N', VR is not referenced.
99  v(j) = VR(:,j), the j-th column of VR.
100 
101  LDVR (input) INTEGER
102  The leading dimension of the array VR. LDVR >= 1; if
103  JOBVR = 'V', LDVR >= N.
104 
105  WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
106  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
107 
108  LWORK (input) INTEGER
109  The dimension of the array WORK. LWORK >= (1+nb)*N.
110 
111  If LWORK = -1, then a workspace query is assumed; the routine
112  only calculates the optimal size of the WORK array, returns
113  this value as the first entry of the WORK array, and no error
114  message related to LWORK is issued by XERBLA.
115 
116  INFO (output) INTEGER
117  = 0: successful exit
118  < 0: if INFO = -i, the i-th argument had an illegal value.
119  > 0: if INFO = i, the QR algorithm failed to compute all the
120  eigenvalues, and no eigenvectors have been computed;
121  elements and i+1:N of W contain eigenvalues which have
122  converged.
123  ===================================================================== */
124 
125  #define vl(i,j) (vl + (i) + (j)*ldvl)
126  #define vr(i,j) (vr + (i) + (j)*ldvr)
127 
128  magma_int_t c_one = 1;
129  magma_int_t c_zero = 0;
130 
131  double d__1, d__2;
132  double r, cs, sn, scl;
133  double dum[1], eps;
134  double anrm, cscale, bignum, smlnum;
135  magma_int_t i, k, ilo, ihi;
136  magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, i__1, i__2, nb;
137  magma_int_t scalea, minwrk, lquery, wantvl, wantvr, select[1];
138 
139  char side[2] = {0, 0};
140  char jobvl_[2] = {jobvl, 0};
141  char jobvr_[2] = {jobvr, 0};
142 
143  *info = 0;
144  lquery = lwork == -1;
145  wantvl = lapackf77_lsame( jobvl_, "V" );
146  wantvr = lapackf77_lsame( jobvr_, "V" );
147  if (! wantvl && ! lapackf77_lsame( jobvl_, "N" )) {
148  *info = -1;
149  } else if (! wantvr && ! lapackf77_lsame( jobvr_, "N" )) {
150  *info = -2;
151  } else if (n < 0) {
152  *info = -3;
153  } else if (lda < max(1,n)) {
154  *info = -5;
155  } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) {
156  *info = -9;
157  } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) {
158  *info = -11;
159  }
160 
161  /* Compute workspace */
162  nb = magma_get_dgehrd_nb( n );
163  if (*info == 0) {
164  minwrk = (2+nb)*n;
165  work[0] = MAGMA_D_MAKE( (double) minwrk, 0. );
166 
167  if (lwork < minwrk && ! lquery) {
168  *info = -13;
169  }
170  }
171 
172  if (*info != 0) {
173  magma_xerbla( __func__, -(*info) );
174  return *info;
175  }
176  else if (lquery) {
177  return *info;
178  }
179 
180  /* Quick return if possible */
181  if (n == 0) {
182  return *info;
183  }
184 
185  #if defined(VERSION3)
186  double *dT;
187  if (MAGMA_SUCCESS != magma_dmalloc( &dT, nb*n )) {
188  *info = MAGMA_ERR_DEVICE_ALLOC;
189  return *info;
190  }
191  #endif
192 
193  /* Get machine constants */
194  eps = lapackf77_dlamch( "P" );
195  smlnum = lapackf77_dlamch( "S" );
196  bignum = 1. / smlnum;
197  lapackf77_dlabad( &smlnum, &bignum );
198  smlnum = magma_dsqrt( smlnum ) / eps;
199  bignum = 1. / smlnum;
200 
201  /* Scale A if max element outside range [SMLNUM,BIGNUM] */
202  anrm = lapackf77_dlange( "M", &n, &n, A, &lda, dum );
203  scalea = 0;
204  if (anrm > 0. && anrm < smlnum) {
205  scalea = 1;
206  cscale = smlnum;
207  } else if (anrm > bignum) {
208  scalea = 1;
209  cscale = bignum;
210  }
211  if (scalea) {
212  lapackf77_dlascl( "G", &c_zero, &c_zero, &anrm, &cscale, &n, &n, A, &lda, &ierr );
213  }
214 
215  /* Balance the matrix
216  * (Workspace: need N) */
217  ibal = 0;
218  lapackf77_dgebal( "B", &n, A, &lda, &ilo, &ihi, &work[ibal], &ierr );
219 
220  /* Reduce to upper Hessenberg form
221  * (Workspace: need 3*N, prefer 2*N + N*NB) */
222  itau = ibal + n;
223  iwrk = itau + n;
224  liwrk = lwork - iwrk;
225 
226  #if defined(VERSION1)
227  // Version 1 - LAPACK
228  lapackf77_dgehrd( &n, &ilo, &ihi, A, &lda,
229  &work[itau], &work[iwrk], &liwrk, &ierr );
230  #elif defined(VERSION2)
231  // Version 2 - LAPACK consistent HRD
232  magma_dgehrd2( n, ilo, ihi, A, lda,
233  &work[itau], &work[iwrk], liwrk, &ierr );
234  #elif defined(VERSION3)
235  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored,
236  magma_dgehrd( n, ilo, ihi, A, lda,
237  &work[itau], &work[iwrk], liwrk, dT, &ierr );
238  #endif
239 
240  if (wantvl) {
241  /* Want left eigenvectors
242  * Copy Householder vectors to VL */
243  side[0] = 'L';
245  A, &lda, vl, &ldvl );
246 
247  /* Generate orthogonal matrix in VL
248  * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) */
249  #if defined(VERSION1) || defined(VERSION2)
250  // Version 1 & 2 - LAPACK
251  lapackf77_dorghr( &n, &ilo, &ihi, vl, &ldvl, &work[itau],
252  &work[iwrk], &liwrk, &ierr );
253  #elif defined(VERSION3)
254  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
255  magma_dorghr( n, ilo, ihi, vl, ldvl, &work[itau], dT, nb, &ierr );
256  #endif
257 
258  /* Perform QR iteration, accumulating Schur vectors in VL
259  * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
260  iwrk = itau;
261  liwrk = lwork - iwrk;
262  lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, WR, WI,
263  vl, &ldvl, &work[iwrk], &liwrk, info );
264 
265  if (wantvr) {
266  /* Want left and right eigenvectors
267  * Copy Schur vectors to VR */
268  side[0] = 'B';
269  lapackf77_dlacpy( "F", &n, &n, vl, &ldvl, vr, &ldvr );
270  }
271  }
272  else if (wantvr) {
273  /* Want right eigenvectors
274  * Copy Householder vectors to VR */
275  side[0] = 'R';
276  lapackf77_dlacpy( "L", &n, &n, A, &lda, vr, &ldvr );
277 
278  /* Generate orthogonal matrix in VR
279  * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) */
280  #if defined(VERSION1) || defined(VERSION2)
281  // Version 1 & 2 - LAPACK
282  lapackf77_dorghr( &n, &ilo, &ihi, vr, &ldvr, &work[itau],
283  &work[iwrk], &liwrk, &ierr );
284  #elif defined(VERSION3)
285  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
286  magma_dorghr( n, ilo, ihi, vr, ldvr, &work[itau], dT, nb, &ierr );
287  #endif
288 
289  /* Perform QR iteration, accumulating Schur vectors in VR
290  * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
291  iwrk = itau;
292  liwrk = lwork - iwrk;
293  lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, WR, WI,
294  vr, &ldvr, &work[iwrk], &liwrk, info );
295  }
296  else {
297  /* Compute eigenvalues only
298  * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
299  iwrk = itau;
300  liwrk = lwork - iwrk;
301  lapackf77_dhseqr( "E", "N", &n, &ilo, &ihi, A, &lda, WR, WI,
302  vr, &ldvr, &work[iwrk], &liwrk, info );
303  }
304 
305  /* If INFO > 0 from DHSEQR, then quit */
306  if (*info > 0) {
307  goto CLEANUP;
308  }
309 
310  if (wantvl || wantvr) {
311  /* Compute left and/or right eigenvectors
312  * (Workspace: need 4*N) */
313  lapackf77_dtrevc( side, "B", select, &n, A, &lda, vl, &ldvl,
314  vr, &ldvr, &n, &nout, &work[iwrk], &ierr );
315  }
316 
317  if (wantvl) {
318  /* Undo balancing of left eigenvectors
319  * (Workspace: need N) */
320  lapackf77_dgebak( "B", "L", &n, &ilo, &ihi, &work[ibal], &n,
321  vl, &ldvl, &ierr );
322 
323  /* Normalize left eigenvectors and make largest component real */
324  for (i = 0; i < n; ++i) {
325  if ( WI[i] == 0. ) {
326  scl = 1. / cblas_dnrm2( n, vl(0,i), 1 );
327  cblas_dscal( n, scl, vl(0,i), 1 );
328  }
329  else if ( WI[i] > 0. ) {
330  d__1 = cblas_dnrm2( n, vl(0,i), 1 );
331  d__2 = cblas_dnrm2( n, vl(0,i+1), 1 );
332  scl = 1. / lapackf77_dlapy2( &d__1, &d__2 );
333  cblas_dscal( n, scl, vl(0,i), 1 );
334  cblas_dscal( n, scl, vl(0,i+1), 1 );
335  for (k = 0; k < n; ++k) {
336  /* Computing 2nd power */
337  d__1 = *vl(k,i);
338  d__2 = *vl(k,i+1);
339  work[iwrk + k] = d__1*d__1 + d__2*d__2;
340  }
341  k = cblas_idamax( n, &work[iwrk], 1 );
342  lapackf77_dlartg( vl(k,i), vl(k,i+1), &cs, &sn, &r );
343  cblas_drot( n, vl(0,i), 1, vl(0,i+1), 1, cs, sn );
344  *vl(k,i+1) = 0.;
345  }
346  }
347  }
348 
349  if (wantvr) {
350  /* Undo balancing of right eigenvectors
351  * (Workspace: need N) */
352  lapackf77_dgebak( "B", "R", &n, &ilo, &ihi, &work[ibal], &n,
353  vr, &ldvr, &ierr );
354 
355  /* Normalize right eigenvectors and make largest component real */
356  for (i = 0; i < n; ++i) {
357  if ( WI[i] == 0. ) {
358  scl = 1. / cblas_dnrm2( n, vr(0,i), 1 );
359  cblas_dscal( n, scl, vr(0,i), 1 );
360  }
361  else if ( WI[i] > 0. ) {
362  d__1 = cblas_dnrm2( n, vr(0,i), 1 );
363  d__2 = cblas_dnrm2( n, vr(0,i+1), 1 );
364  scl = 1. / lapackf77_dlapy2( &d__1, &d__2 );
365  cblas_dscal( n, scl, vr(0,i), 1 );
366  cblas_dscal( n, scl, vr(0,i+1), 1 );
367  for (k = 0; k < n; ++k) {
368  /* Computing 2nd power */
369  d__1 = *vr(k,i);
370  d__2 = *vr(k,i+1);
371  work[iwrk + k] = d__1*d__1 + d__2*d__2;
372  }
373  k = cblas_idamax( n, &work[iwrk], 1 );
374  lapackf77_dlartg( vr(k,i), vr(k,i+1), &cs, &sn, &r );
375  cblas_drot( n, vr(0,i), 1, vr(0,i+1), 1, cs, sn );
376  *vr(k,i+1) = 0.;
377  }
378  }
379  }
380 
381 CLEANUP:
382  /* Undo scaling if necessary */
383  if (scalea) {
384  i__1 = n - (*info);
385  i__2 = max( n - (*info), 1 );
386  lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
387  WR + (*info), &i__2, &ierr );
388  lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
389  WI + (*info), &i__2, &ierr );
390  if (*info > 0) {
391  i__1 = ilo - 1;
392  lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
393  WR, &n, &ierr );
394  lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
395  WI, &n, &ierr );
396  }
397  }
398 
399  #if defined(VERSION3)
400  magma_free( dT );
401  #endif
402 
403  return *info;
404 } /* magma_dgeev */
#define magma_dsqrt
Definition: common_magma.h:98
void lapackf77_dhseqr(const char *job, const char *compz, const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, double *H, const magma_int_t *ldh, WSPLIT, double *Z, const magma_int_t *ldz, double *work, const magma_int_t *lwork, magma_int_t *info)
void lapackf77_dlacpy(const char *uplo, const magma_int_t *m, const magma_int_t *n, const double *A, const magma_int_t *lda, double *B, const magma_int_t *ldb)
void lapackf77_dgehrd(const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, double *A, const magma_int_t *lda, double *tau, double *work, const magma_int_t *lwork, magma_int_t *info)
#define __func__
Definition: common_magma.h:65
void lapackf77_dgebal(const char *job, const magma_int_t *n, double *A, const magma_int_t *lda, magma_int_t *ilo, magma_int_t *ihi, double *scale, magma_int_t *info)
void lapackf77_dgebak(const char *job, const char *side, const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, const double *scale, const magma_int_t *m, double *V, const magma_int_t *ldv, magma_int_t *info)
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define lapackf77_dlabad
Definition: magma_lapack.h:29
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
double lapackf77_dlange(const char *norm, const magma_int_t *m, const magma_int_t *n, const double *A, const magma_int_t *lda, double *work)
magma_int_t magma_dorghr(magma_int_t n, magma_int_t ilo, magma_int_t ihi, double *a, magma_int_t lda, double *tau, double *dT, magma_int_t nb, magma_int_t *info)
Definition: dorghr.cpp:14
int magma_int_t
Definition: magmablas.h:12
#define vr(i, j)
#define lapackf77_dlapy2
Definition: magma_lapack.h:41
#define lapackf77_dlamch
Definition: magma_lapack.h:27
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX)
void cblas_dscal(const int N, const double alpha, double *X, const int incX)
#define MAGMA_D_MAKE(r, i)
Definition: magma.h:167
magma_int_t magma_get_dgehrd_nb(magma_int_t m)
Definition: get_nb.cpp:355
magma_int_t magma_dgehrd2(magma_int_t n, magma_int_t ilo, magma_int_t ihi, double *A, magma_int_t lda, double *tau, double *work, magma_int_t lwork, magma_int_t *info)
Definition: dgehrd2.cpp:14
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
void lapackf77_dlartg(double *F, double *G, double *cs, double *SN, double *R)
double cblas_dnrm2(const int N, const double *X, const int incX)
#define MagmaLowerStr
Definition: magma.h:85
void lapackf77_dlascl(const char *type, const magma_int_t *kl, const magma_int_t *ku, double *cfrom, double *cto, const magma_int_t *m, const magma_int_t *n, double *A, const magma_int_t *lda, magma_int_t *info)
void cblas_drot(const int N, double *X, const int incX, double *Y, const int incY, const double c, const double s)
#define A(i, j)
Definition: cprint.cpp:16
#define MAGMA_SUCCESS
Definition: magma.h:106
#define dT(m)
magma_int_t magma_dgehrd(magma_int_t n, magma_int_t ilo, magma_int_t ihi, double *A, magma_int_t lda, double *tau, double *work, magma_int_t lwork, double *dT, magma_int_t *info)
Definition: dgehrd.cpp:17
#define max(a, b)
Definition: common_magma.h:82
#define vl(i, j)
void lapackf77_dtrevc(const char *side, const char *howmny, magma_int_t *select, const magma_int_t *n, double *T, const magma_int_t *ldt, double *Vl, const magma_int_t *ldvl, double *Vr, const magma_int_t *ldvr, const magma_int_t *mm, magma_int_t *m, double *work, DWORKFORZ magma_int_t *info)
void lapackf77_dorghr(const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, double *A, const magma_int_t *lda, const double *tau, double *work, const magma_int_t *lwork, magma_int_t *info)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgeev_m ( char  jobvl,
char  jobvr,
magma_int_t  n,
double *  A,
magma_int_t  lda,
double *  WR,
double *  WI,
double *  vl,
magma_int_t  ldvl,
double *  vr,
magma_int_t  ldvr,
double *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 27 of file dgeev_m.cpp.

References __func__, cblas_dnrm2(), cblas_drot(), cblas_dscal(), cblas_idamax(), dT, lapackf77_dgebak(), lapackf77_dgebal(), lapackf77_dgehrd(), lapackf77_dhseqr(), lapackf77_dlabad, lapackf77_dlacpy(), lapackf77_dlamch, lapackf77_dlange(), lapackf77_dlapy2, lapackf77_dlartg(), lapackf77_dlascl(), lapackf77_dorghr(), lapackf77_dtrevc(), lapackf77_lsame, MAGMA_D_MAKE, magma_dgehrd(), magma_dgehrd2(), magma_dgehrd_m(), magma_dmalloc(), magma_dmalloc_cpu(), magma_dorghr(), magma_dorghr_m(), magma_dsetmatrix, magma_dsqrt, MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_dgehrd_nb(), MAGMA_SUCCESS, magma_xerbla(), MagmaLowerStr, max, T, vl, and vr.

35 {
36 /* -- MAGMA (version 1.4.0) --
37  Univ. of Tennessee, Knoxville
38  Univ. of California, Berkeley
39  Univ. of Colorado, Denver
40  August 2013
41 
42  Purpose
43  =======
44  DGEEV computes for an N-by-N real nonsymmetric matrix A, the
45  eigenvalues and, optionally, the left and/or right eigenvectors.
46 
47  The right eigenvector v(j) of A satisfies
48  A * v(j) = lambda(j) * v(j)
49  where lambda(j) is its eigenvalue.
50  The left eigenvector u(j) of A satisfies
51  u(j)**T * A = lambda(j) * u(j)**T
52  where u(j)**T denotes the transpose of u(j).
53 
54  The computed eigenvectors are normalized to have Euclidean norm
55  equal to 1 and largest component real.
56 
57  Arguments
58  =========
59  JOBVL (input) CHARACTER*1
60  = 'N': left eigenvectors of A are not computed;
61  = 'V': left eigenvectors of are computed.
62 
63  JOBVR (input) CHARACTER*1
64  = 'N': right eigenvectors of A are not computed;
65  = 'V': right eigenvectors of A are computed.
66 
67  N (input) INTEGER
68  The order of the matrix A. N >= 0.
69 
70  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
71  On entry, the N-by-N matrix A.
72  On exit, A has been overwritten.
73 
74  LDA (input) INTEGER
75  The leading dimension of the array A. LDA >= max(1,N).
76 
77  WR (output) DOUBLE PRECISION array, dimension (N)
78  WI (output) DOUBLE PRECISION array, dimension (N)
79  WR and WI contain the real and imaginary parts,
80  respectively, of the computed eigenvalues. Complex
81  conjugate pairs of eigenvalues appear consecutively
82  with the eigenvalue having the positive imaginary part
83  first.
84 
85  VL (output) DOUBLE PRECISION array, dimension (LDVL,N)
86  If JOBVL = 'V', the left eigenvectors u(j) are stored one
87  after another in the columns of VL, in the same order
88  as their eigenvalues.
89  If JOBVL = 'N', VL is not referenced.
90  u(j) = VL(:,j), the j-th column of VL.
91 
92  LDVL (input) INTEGER
93  The leading dimension of the array VL. LDVL >= 1; if
94  JOBVL = 'V', LDVL >= N.
95 
96  VR (output) DOUBLE PRECISION array, dimension (LDVR,N)
97  If JOBVR = 'V', the right eigenvectors v(j) are stored one
98  after another in the columns of VR, in the same order
99  as their eigenvalues.
100  If JOBVR = 'N', VR is not referenced.
101  v(j) = VR(:,j), the j-th column of VR.
102 
103  LDVR (input) INTEGER
104  The leading dimension of the array VR. LDVR >= 1; if
105  JOBVR = 'V', LDVR >= N.
106 
107  WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
108  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
109 
110  LWORK (input) INTEGER
111  The dimension of the array WORK. LWORK >= (1+nb)*N.
112 
113  If LWORK = -1, then a workspace query is assumed; the routine
114  only calculates the optimal size of the WORK array, returns
115  this value as the first entry of the WORK array, and no error
116  message related to LWORK is issued by XERBLA.
117 
118  INFO (output) INTEGER
119  = 0: successful exit
120  < 0: if INFO = -i, the i-th argument had an illegal value.
121  > 0: if INFO = i, the QR algorithm failed to compute all the
122  eigenvalues, and no eigenvectors have been computed;
123  elements and i+1:N of W contain eigenvalues which have
124  converged.
125  ===================================================================== */
126 
127  #define vl(i,j) (vl + (i) + (j)*ldvl)
128  #define vr(i,j) (vr + (i) + (j)*ldvr)
129 
130  magma_int_t c_one = 1;
131  magma_int_t c_zero = 0;
132 
133  double d__1, d__2;
134  double r, cs, sn, scl;
135  double dum[1], eps;
136  double anrm, cscale, bignum, smlnum;
137  magma_int_t i, k, ilo, ihi;
138  magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, i__1, i__2, nb;
139  magma_int_t scalea, minwrk, lquery, wantvl, wantvr, select[1];
140 
141  char side[2] = {0, 0};
142  char jobvl_[2] = {jobvl, 0};
143  char jobvr_[2] = {jobvr, 0};
144 
145  *info = 0;
146  lquery = lwork == -1;
147  wantvl = lapackf77_lsame( jobvl_, "V" );
148  wantvr = lapackf77_lsame( jobvr_, "V" );
149  if (! wantvl && ! lapackf77_lsame( jobvl_, "N" )) {
150  *info = -1;
151  } else if (! wantvr && ! lapackf77_lsame( jobvr_, "N" )) {
152  *info = -2;
153  } else if (n < 0) {
154  *info = -3;
155  } else if (lda < max(1,n)) {
156  *info = -5;
157  } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) {
158  *info = -9;
159  } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) {
160  *info = -11;
161  }
162 
163  /* Compute workspace */
164  nb = magma_get_dgehrd_nb( n );
165  if (*info == 0) {
166  minwrk = (2+nb)*n;
167  work[0] = MAGMA_D_MAKE( (double) minwrk, 0. );
168 
169  if (lwork < minwrk && ! lquery) {
170  *info = -13;
171  }
172  }
173 
174  if (*info != 0) {
175  magma_xerbla( __func__, -(*info) );
176  return *info;
177  }
178  else if (lquery) {
179  return *info;
180  }
181 
182  /* Quick return if possible */
183  if (n == 0) {
184  return *info;
185  }
186 
187  #if defined(Version3) || defined(Version4) || defined(Version5)
188  double *dT;
189  if (MAGMA_SUCCESS != magma_dmalloc( &dT, nb*n )) {
190  *info = MAGMA_ERR_DEVICE_ALLOC;
191  return *info;
192  }
193  #endif
194  #if defined(Version4) || defined(Version5)
195  double *T;
196  if (MAGMA_SUCCESS != magma_dmalloc_cpu( &T, nb*n )) {
197  magma_free( dT );
198  *info = MAGMA_ERR_HOST_ALLOC;
199  return *info;
200  }
201  #endif
202 
203  /* Get machine constants */
204  eps = lapackf77_dlamch( "P" );
205  smlnum = lapackf77_dlamch( "S" );
206  bignum = 1. / smlnum;
207  lapackf77_dlabad( &smlnum, &bignum );
208  smlnum = magma_dsqrt( smlnum ) / eps;
209  bignum = 1. / smlnum;
210 
211  /* Scale A if max element outside range [SMLNUM,BIGNUM] */
212  anrm = lapackf77_dlange( "M", &n, &n, A, &lda, dum );
213  scalea = 0;
214  if (anrm > 0. && anrm < smlnum) {
215  scalea = 1;
216  cscale = smlnum;
217  } else if (anrm > bignum) {
218  scalea = 1;
219  cscale = bignum;
220  }
221  if (scalea) {
222  lapackf77_dlascl( "G", &c_zero, &c_zero, &anrm, &cscale, &n, &n, A, &lda, &ierr );
223  }
224 
225  /* Balance the matrix
226  * (Workspace: need N) */
227  ibal = 0;
228  lapackf77_dgebal( "B", &n, A, &lda, &ilo, &ihi, &work[ibal], &ierr );
229 
230  /* Reduce to upper Hessenberg form
231  * (Workspace: need 3*N, prefer 2*N + N*NB) */
232  itau = ibal + n;
233  iwrk = itau + n;
234  liwrk = lwork - iwrk;
235 
236  #if defined(Version1)
237  // Version 1 - LAPACK
238  lapackf77_dgehrd( &n, &ilo, &ihi, A, &lda,
239  &work[itau], &work[iwrk], &liwrk, &ierr );
240  #elif defined(Version2)
241  // Version 2 - LAPACK consistent HRD
242  magma_dgehrd2( n, ilo, ihi, A, lda,
243  &work[itau], &work[iwrk], &liwrk, &ierr );
244  #elif defined(Version3)
245  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored,
246  magma_dgehrd( n, ilo, ihi, A, lda,
247  &work[itau], &work[iwrk], liwrk, dT, &ierr );
248  #elif defined(Version4) || defined(Version5)
249  // Version 4 - Multi-GPU, T on host
250  magma_dgehrd_m( n, ilo, ihi, A, lda,
251  &work[itau], &work[iwrk], liwrk, T, &ierr );
252  magma_dsetmatrix( nb, n, T, nb, dT, nb );
253  #endif
254 
255  if (wantvl) {
256  /* Want left eigenvectors
257  * Copy Householder vectors to VL */
258  side[0] = 'L';
260  A, &lda, vl, &ldvl );
261 
262  /* Generate orthogonal matrix in VL
263  * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) */
264  #if defined(Version1) || defined(Version2)
265  // Version 1 & 2 - LAPACK
266  lapackf77_dorghr( &n, &ilo, &ihi, vl, &ldvl, &work[itau],
267  &work[iwrk], &liwrk, &ierr );
268  #elif defined(Version3) || defined(Version4)
269  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
270  magma_dorghr( n, ilo, ihi, vl, ldvl, &work[itau], dT, nb, &ierr );
271  #elif defined(Version5)
272  // Version 5 - Multi-GPU, T on host
273  magma_dorghr_m( n, ilo, ihi, vl, ldvl, &work[itau], T, nb, &ierr );
274  #endif
275 
276  /* Perform QR iteration, accumulating Schur vectors in VL
277  * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
278  iwrk = itau;
279  liwrk = lwork - iwrk;
280  lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, WR, WI,
281  vl, &ldvl, &work[iwrk], &liwrk, info );
282 
283  if (wantvr) {
284  /* Want left and right eigenvectors
285  * Copy Schur vectors to VR */
286  side[0] = 'B';
287  lapackf77_dlacpy( "F", &n, &n, vl, &ldvl, vr, &ldvr );
288  }
289  }
290  else if (wantvr) {
291  /* Want right eigenvectors
292  * Copy Householder vectors to VR */
293  side[0] = 'R';
294  lapackf77_dlacpy( "L", &n, &n, A, &lda, vr, &ldvr );
295 
296  /* Generate orthogonal matrix in VR
297  * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) */
298  #if defined(Version1) || defined(Version2)
299  // Version 1 & 2 - LAPACK
300  lapackf77_dorghr( &n, &ilo, &ihi, vr, &ldvr, &work[itau],
301  &work[iwrk], &liwrk, &ierr );
302  #elif defined(Version3) || defined(Version4)
303  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
304  magma_dorghr( n, ilo, ihi, vr, ldvr, &work[itau], dT, nb, &ierr );
305  #elif defined(Version5)
306  // Version 5 - Multi-GPU, T on host
307  magma_dorghr_m( n, ilo, ihi, vr, ldvr, &work[itau], T, nb, &ierr );
308  #endif
309 
310  /* Perform QR iteration, accumulating Schur vectors in VR
311  * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
312  iwrk = itau;
313  liwrk = lwork - iwrk;
314  lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, WR, WI,
315  vr, &ldvr, &work[iwrk], &liwrk, info );
316  }
317  else {
318  /* Compute eigenvalues only
319  * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
320  iwrk = itau;
321  liwrk = lwork - iwrk;
322  lapackf77_dhseqr( "E", "N", &n, &ilo, &ihi, A, &lda, WR, WI,
323  vr, &ldvr, &work[iwrk], &liwrk, info );
324  }
325 
326  /* If INFO > 0 from DHSEQR, then quit */
327  if (*info > 0) {
328  goto CLEANUP;
329  }
330 
331  if (wantvl || wantvr) {
332  /* Compute left and/or right eigenvectors
333  * (Workspace: need 4*N) */
334  lapackf77_dtrevc( side, "B", select, &n, A, &lda, vl, &ldvl,
335  vr, &ldvr, &n, &nout, &work[iwrk], &ierr );
336  }
337 
338  if (wantvl) {
339  /* Undo balancing of left eigenvectors
340  * (Workspace: need N) */
341  lapackf77_dgebak( "B", "L", &n, &ilo, &ihi, &work[ibal], &n,
342  vl, &ldvl, &ierr );
343 
344  /* Normalize left eigenvectors and make largest component real */
345  for (i = 0; i < n; ++i) {
346  if ( WI[i] == 0. ) {
347  scl = 1. / cblas_dnrm2( n, vl(0,i), 1 );
348  cblas_dscal( n, scl, vl(0,i), 1 );
349  }
350  else if ( WI[i] > 0. ) {
351  d__1 = cblas_dnrm2( n, vl(0,i), 1 );
352  d__2 = cblas_dnrm2( n, vl(0,i+1), 1 );
353  scl = 1. / lapackf77_dlapy2( &d__1, &d__2 );
354  cblas_dscal( n, scl, vl(0,i), 1 );
355  cblas_dscal( n, scl, vl(0,i+1), 1 );
356  for (k = 0; k < n; ++k) {
357  /* Computing 2nd power */
358  d__1 = *vl(k,i);
359  d__2 = *vl(k,i+1);
360  work[iwrk + k] = d__1*d__1 + d__2*d__2;
361  }
362  k = cblas_idamax( n, &work[iwrk], 1 );
363  lapackf77_dlartg( vl(k,i), vl(k,i+1), &cs, &sn, &r );
364  cblas_drot( n, vl(0,i), 1, vl(0,i+1), 1, cs, sn );
365  *vl(k,i+1) = 0.;
366  }
367  }
368  }
369 
370  if (wantvr) {
371  /* Undo balancing of right eigenvectors
372  * (Workspace: need N) */
373  lapackf77_dgebak( "B", "R", &n, &ilo, &ihi, &work[ibal], &n,
374  vr, &ldvr, &ierr );
375 
376  /* Normalize right eigenvectors and make largest component real */
377  for (i = 0; i < n; ++i) {
378  if ( WI[i] == 0. ) {
379  scl = 1. / cblas_dnrm2( n, vr(0,i), 1 );
380  cblas_dscal( n, scl, vr(0,i), 1 );
381  }
382  else if ( WI[i] > 0. ) {
383  d__1 = cblas_dnrm2( n, vr(0,i), 1 );
384  d__2 = cblas_dnrm2( n, vr(0,i+1), 1 );
385  scl = 1. / lapackf77_dlapy2( &d__1, &d__2 );
386  cblas_dscal( n, scl, vr(0,i), 1 );
387  cblas_dscal( n, scl, vr(0,i+1), 1 );
388  for (k = 0; k < n; ++k) {
389  /* Computing 2nd power */
390  d__1 = *vr(k,i);
391  d__2 = *vr(k,i+1);
392  work[iwrk + k] = d__1*d__1 + d__2*d__2;
393  }
394  k = cblas_idamax( n, &work[iwrk], 1 );
395  lapackf77_dlartg( vr(k,i), vr(k,i+1), &cs, &sn, &r );
396  cblas_drot( n, vr(0,i), 1, vr(0,i+1), 1, cs, sn );
397  *vr(k,i+1) = 0.;
398  }
399  }
400  }
401 
402 CLEANUP:
403  /* Undo scaling if necessary */
404  if (scalea) {
405  i__1 = n - (*info);
406  i__2 = max( n - (*info), 1 );
407  lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
408  WR + (*info), &i__2, &ierr );
409  lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
410  WI + (*info), &i__2, &ierr );
411  if (*info > 0) {
412  i__1 = ilo - 1;
413  lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
414  WR, &n, &ierr );
415  lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
416  WI, &n, &ierr );
417  }
418  }
419 
420  #if defined(Version3) || defined(Version4) || defined(Version5)
421  magma_free( dT );
422  #endif
423  #if defined(Version4) || defined(Version5)
424  magma_free_cpu( T );
425  #endif
426 
427  return *info;
428 } /* magma_dgeev */
#define magma_dsqrt
Definition: common_magma.h:98
void lapackf77_dhseqr(const char *job, const char *compz, const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, double *H, const magma_int_t *ldh, WSPLIT, double *Z, const magma_int_t *ldz, double *work, const magma_int_t *lwork, magma_int_t *info)
void lapackf77_dlacpy(const char *uplo, const magma_int_t *m, const magma_int_t *n, const double *A, const magma_int_t *lda, double *B, const magma_int_t *ldb)
void lapackf77_dgehrd(const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, double *A, const magma_int_t *lda, double *tau, double *work, const magma_int_t *lwork, magma_int_t *info)
#define __func__
Definition: common_magma.h:65
void lapackf77_dgebal(const char *job, const magma_int_t *n, double *A, const magma_int_t *lda, magma_int_t *ilo, magma_int_t *ihi, double *scale, magma_int_t *info)
void lapackf77_dgebak(const char *job, const char *side, const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, const double *scale, const magma_int_t *m, double *V, const magma_int_t *ldv, magma_int_t *info)
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define lapackf77_dlabad
Definition: magma_lapack.h:29
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define T(m)
Definition: zgeqrf_mc.cpp:14
#define magma_free(ptr)
Definition: magma.h:57
double lapackf77_dlange(const char *norm, const magma_int_t *m, const magma_int_t *n, const double *A, const magma_int_t *lda, double *work)
magma_int_t magma_dorghr(magma_int_t n, magma_int_t ilo, magma_int_t ihi, double *a, magma_int_t lda, double *tau, double *dT, magma_int_t nb, magma_int_t *info)
Definition: dorghr.cpp:14
int magma_int_t
Definition: magmablas.h:12
#define vr(i, j)
magma_int_t magma_dorghr_m(magma_int_t n, magma_int_t ilo, magma_int_t ihi, double *A, magma_int_t lda, double *tau, double *T, magma_int_t nb, magma_int_t *info)
Definition: dorghr_m.cpp:16
#define lapackf77_dlapy2
Definition: magma_lapack.h:41
#define lapackf77_dlamch
Definition: magma_lapack.h:27
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX)
magma_int_t magma_dgehrd_m(magma_int_t n, magma_int_t ilo, magma_int_t ihi, double *A, magma_int_t lda, double *tau, double *work, magma_int_t lwork, double *T, magma_int_t *info)
Definition: dgehrd_m.cpp:16
void cblas_dscal(const int N, const double alpha, double *X, const int incX)
#define MAGMA_D_MAKE(r, i)
Definition: magma.h:167
#define vl(i, j)
magma_int_t magma_get_dgehrd_nb(magma_int_t m)
Definition: get_nb.cpp:355
magma_int_t magma_dgehrd2(magma_int_t n, magma_int_t ilo, magma_int_t ihi, double *A, magma_int_t lda, double *tau, double *work, magma_int_t lwork, magma_int_t *info)
Definition: dgehrd2.cpp:14
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
void lapackf77_dlartg(double *F, double *G, double *cs, double *SN, double *R)
double cblas_dnrm2(const int N, const double *X, const int incX)
#define MagmaLowerStr
Definition: magma.h:85
static magma_err_t magma_dmalloc_cpu(double **ptrPtr, size_t n)
Definition: magma.h:84
void lapackf77_dlascl(const char *type, const magma_int_t *kl, const magma_int_t *ku, double *cfrom, double *cto, const magma_int_t *m, const magma_int_t *n, double *A, const magma_int_t *lda, magma_int_t *info)
void cblas_drot(const int N, double *X, const int incX, double *Y, const int incY, const double c, const double s)
#define A(i, j)
Definition: cprint.cpp:16
#define MAGMA_SUCCESS
Definition: magma.h:106
#define dT(m)
magma_int_t magma_dgehrd(magma_int_t n, magma_int_t ilo, magma_int_t ihi, double *A, magma_int_t lda, double *tau, double *work, magma_int_t lwork, double *dT, magma_int_t *info)
Definition: dgehrd.cpp:17
#define max(a, b)
Definition: common_magma.h:82
magma_err_t magma_free_cpu(void *ptr)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
void lapackf77_dtrevc(const char *side, const char *howmny, magma_int_t *select, const magma_int_t *n, double *T, const magma_int_t *ldt, double *Vl, const magma_int_t *ldvl, double *Vr, const magma_int_t *ldvr, const magma_int_t *mm, magma_int_t *m, double *work, DWORKFORZ magma_int_t *info)
void lapackf77_dorghr(const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, double *A, const magma_int_t *lda, const double *tau, double *work, const magma_int_t *lwork, magma_int_t *info)
#define magma_dsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_d.h:702

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgehrd ( magma_int_t  n,
magma_int_t  ilo,
magma_int_t  ihi,
double *  A,
magma_int_t  lda,
double *  tau,
double *  work,
magma_int_t  lwork,
double *  dT,
magma_int_t info 
)

Definition at line 17 of file dgehrd.cpp.

References __func__, A, dA, dTi, dV, dwork, dzero_nbxnb_block(), lapackf77_dgehd2(), MAGMA_D_ONE, MAGMA_D_SET2REAL, MAGMA_D_ZERO, magma_dgetmatrix, magma_dlahr2(), magma_dlahru(), magma_dmalloc(), magma_dmalloc_cpu(), magma_dsetmatrix, MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_dgehrd_nb(), MAGMA_SUCCESS, magma_xerbla(), magmablas_dlaset(), max, min, and T.

23 {
24 /* -- MAGMA (version 1.4.0) --
25  Univ. of Tennessee, Knoxville
26  Univ. of California, Berkeley
27  Univ. of Colorado, Denver
28  August 2013
29 
30  Purpose
31  =======
32  DGEHRD reduces a DOUBLE_PRECISION general matrix A to upper Hessenberg form H by
33  an orthogonal similarity transformation: Q' * A * Q = H . This version
34  stores the triangular matrices used in the factorization so that they can
35  be applied directly (i.e., without being recomputed) later. As a result,
36  the application of Q is much faster.
37 
38  Arguments
39  =========
40  N (input) INTEGER
41  The order of the matrix A. N >= 0.
42 
43  ILO (input) INTEGER
44  IHI (input) INTEGER
45  It is assumed that A is already upper triangular in rows
46  and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
47  set by a previous call to DGEBAL; otherwise they should be
48  set to 1 and N respectively. See Further Details.
49  1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
50 
51  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
52  On entry, the N-by-N general matrix to be reduced.
53  On exit, the upper triangle and the first subdiagonal of A
54  are overwritten with the upper Hessenberg matrix H, and the
55  elements below the first subdiagonal, with the array TAU,
56  represent the orthogonal matrix Q as a product of elementary
57  reflectors. See Further Details.
58 
59  LDA (input) INTEGER
60  The leading dimension of the array A. LDA >= max(1,N).
61 
62  TAU (output) DOUBLE_PRECISION array, dimension (N-1)
63  The scalar factors of the elementary reflectors (see Further
64  Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
65  zero.
66 
67  WORK (workspace/output) DOUBLE_PRECISION array, dimension (LWORK)
68  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
69 
70  LWORK (input) INTEGER
71  The length of the array WORK. LWORK >= max(1,N).
72  For optimum performance LWORK >= N*NB, where NB is the
73  optimal blocksize.
74 
75  If LWORK = -1, then a workspace query is assumed; the routine
76  only calculates the optimal size of the WORK array, returns
77  this value as the first entry of the WORK array, and no error
78  message related to LWORK is issued by XERBLA.
79 
80  dT (output) DOUBLE_PRECISION array on the GPU, dimension NB*N,
81  where NB is the optimal blocksize. It stores the NB*NB blocks
82  of the triangular T matrices used in the reduction.
83 
84  INFO (output) INTEGER
85  = 0: successful exit
86  < 0: if INFO = -i, the i-th argument had an illegal value.
87 
88  Further Details
89  ===============
90  The matrix Q is represented as a product of (ihi-ilo) elementary
91  reflectors
92 
93  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
94 
95  Each H(i) has the form
96 
97  H(i) = I - tau * v * v'
98 
99  where tau is a real scalar, and v is a real vector with
100  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
101  exit in A(i+2:ihi,i), and tau in TAU(i).
102 
103  The contents of A are illustrated by the following example, with
104  n = 7, ilo = 2 and ihi = 6:
105 
106  on entry, on exit,
107 
108  ( a a a a a a a ) ( a a h h h h a )
109  ( a a a a a a ) ( a h h h h a )
110  ( a a a a a a ) ( h h h h h h )
111  ( a a a a a a ) ( v2 h h h h h )
112  ( a a a a a a ) ( v2 v3 h h h h )
113  ( a a a a a a ) ( v2 v3 v4 h h h )
114  ( a ) ( a )
115 
116  where a denotes an element of the original matrix A, h denotes a
117  modified element of the upper Hessenberg matrix H, and vi denotes an
118  element of the vector defining H(i).
119 
120  This implementation follows the hybrid algorithm and notations described in
121 
122  S. Tomov and J. Dongarra, "Accelerating the reduction to upper Hessenberg
123  form through hybrid GPU-based computing," University of Tennessee Computer
124  Science Technical Report, UT-CS-09-642 (also LAPACK Working Note 219),
125  May 24, 2009.
126 
127  This version stores the T matrices in dT, for later use in magma_dorghr.
128 
129  ===================================================================== */
130 
131  #define A( i, j ) ( A + (i) + (j)*lda)
132  #define dA( i, j ) (dA + (i) + (j-ilo)*ldda)
133 
134  double c_one = MAGMA_D_ONE;
135  double c_zero = MAGMA_D_ZERO;
136 
138  magma_int_t ldda = n; // assumed in dlahru
139 
140  magma_int_t nh, iws;
141  magma_int_t iinfo;
142  magma_int_t ldwork;
143  magma_int_t lquery;
144 
145  *info = 0;
146  iws = n*nb;
147  MAGMA_D_SET2REAL( work[0], (double) iws );
148 
149  lquery = lwork == -1;
150  if (n < 0) {
151  *info = -1;
152  } else if (ilo < 1 || ilo > max(1,n)) {
153  *info = -2;
154  } else if (ihi < min(ilo,n) || ihi > n) {
155  *info = -3;
156  } else if (lda < max(1,n)) {
157  *info = -5;
158  } else if (lwork < max(1,n) && ! lquery) {
159  *info = -8;
160  }
161  if (*info != 0) {
162  magma_xerbla( __func__, -(*info) );
163  return *info;
164  }
165  else if (lquery)
166  return *info;
167 
168  // Adjust from 1-based indexing
169  ilo -= 1;
170 
171  // Quick return if possible
172  nh = ihi - ilo;
173  if (nh <= 1) {
174  work[0] = c_one;
175  return *info;
176  }
177 
178  // GPU workspace is:
179  // nb*ldda for dwork for dlahru
180  // nb*ldda for dV
181  // n*ldda for dA
182  double *dwork;
183  if (MAGMA_SUCCESS != magma_dmalloc( &dwork, 2*nb*ldda + n*ldda )) {
184  *info = MAGMA_ERR_DEVICE_ALLOC;
185  return *info;
186  }
187  double *dV = dwork + nb*ldda;
188  double *dA = dwork + nb*ldda*2;
189  ldwork = n;
190 
191  magma_int_t i;
192 
193  double *T, *dTi;
194  magma_dmalloc_cpu( &T, nb*nb );
195  if ( T == NULL ) {
196  magma_free( dwork );
197  *info = MAGMA_ERR_HOST_ALLOC;
198  return *info;
199  }
200 
201  // zero first block of V, which is lower triangular
202  dzero_nbxnb_block(nb, dV, ldda);
203 
204  // Set elements 0:ILO-1 and IHI-1:N-2 of TAU to zero
205  for(i = 0; i < ilo; ++i)
206  tau[i] = c_zero;
207 
208  for(i = max(0,ihi-1); i < n-1; ++i)
209  tau[i] = c_zero;
210 
211  for(i=0; i < nb*nb; i += 4)
212  T[i] = T[i+1] = T[i+2] = T[i+3] = c_zero;
213  magmablas_dlaset( 'F', nb, n, dT, nb );
214 
215  // If not enough workspace, use unblocked code
216  if ( lwork < iws ) {
217  nb = 1;
218  }
219 
220  if (nb == 1 || nb > nh) {
221  // Use unblocked code below
222  i = ilo;
223  }
224  else {
225  // Use blocked code
226  // Copy the matrix to the GPU
227  magma_dsetmatrix( n, n-ilo, A(0,ilo), lda, dA, ldda );
228 
229  for (i = ilo; i < ihi-1 - nb; i += nb) {
230  // Reduce columns i:i+nb-1 to Hessenberg form, returning the
231  // matrices V and T of the block reflector H = I - V*T*V'
232  // which performs the reduction, and also the matrix Y = A*V*T
233 
234  // Get the current panel (no need for the 1st iteration)
235  magma_dgetmatrix( ihi-i, nb,
236  dA(i,i), ldda,
237  A (i,i), lda );
238 
239  // add 1 to i for 1-based index
240  magma_dlahr2( ihi, i+1, nb,
241  dA(0,i),
242  dV,
243  A (0,i), lda,
244  &tau[i], T, nb, work, ldwork);
245 
246  // Copy T from the CPU to dT on the GPU
247  dTi = dT + (i - ilo)*nb;
248  magma_dsetmatrix( nb, nb, T, nb, dTi, nb );
249 
250  magma_dlahru( n, ihi, i, nb,
251  A (0,i), lda,
252  dA(0,i), // dA
253  dA(i,i), // dY, stored over current panel
254  dV, dTi, dwork );
255  }
256 
257  // Copy remainder to host
258  magma_dgetmatrix( n, n-i,
259  dA(0,i), ldda,
260  A (0,i), lda );
261  }
262 
263  // Use unblocked code to reduce the rest of the matrix
264  // add 1 to i for 1-based index
265  i += 1;
266  lapackf77_dgehd2(&n, &i, &ihi, A, &lda, tau, work, &iinfo);
267  MAGMA_D_SET2REAL( work[0], (double) iws );
268 
269  magma_free( dwork );
270  magma_free_cpu( T );
271 
272  return *info;
273 } /* magma_dgehrd */
void dzero_nbxnb_block(magma_int_t nb, magmaDouble_ptr dA, magma_int_t ldda)
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_D_ONE
Definition: magma.h:176
magma_int_t magma_dlahru(magma_int_t n, magma_int_t ihi, magma_int_t k, magma_int_t nb, double *a, magma_int_t lda, double *da, double *y, double *v, double *t, double *dwork)
Definition: dlahru.cpp:17
#define A(i, j)
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define T(m)
Definition: zgeqrf_mc.cpp:14
#define magma_free(ptr)
Definition: magma.h:57
void magmablas_dlaset(magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda)
int magma_int_t
Definition: magmablas.h:12
#define magma_dgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_d.h:705
#define dTi(d)
#define dwork(dev, i, j)
magma_int_t magma_get_dgehrd_nb(magma_int_t m)
Definition: get_nb.cpp:355
#define dV(m)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
void lapackf77_dgehd2(const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, double *A, const magma_int_t *lda, double *tau, double *work, magma_int_t *info)
static magma_err_t magma_dmalloc_cpu(double **ptrPtr, size_t n)
Definition: magma.h:84
magma_int_t ldda
#define MAGMA_SUCCESS
Definition: magma.h:106
magma_int_t magma_dlahr2(magma_int_t m, magma_int_t n, magma_int_t nb, double *da, double *dv, double *a, magma_int_t lda, double *tau, double *t, magma_int_t ldt, double *y, magma_int_t ldy)
Definition: dlahr2.cpp:17
#define MAGMA_D_SET2REAL(v, t)
Definition: magma.h:159
#define dT(m)
#define MAGMA_D_ZERO
Definition: magma.h:175
#define dA(i, j)
#define max(a, b)
Definition: common_magma.h:82
magma_err_t magma_free_cpu(void *ptr)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define magma_dsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_d.h:702

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgehrd2 ( magma_int_t  n,
magma_int_t  ilo,
magma_int_t  ihi,
double *  A,
magma_int_t  lda,
double *  tau,
double *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file dgehrd2.cpp.

References __func__, dzero_nbxnb_block(), lapackf77_dgehd2(), MAGMA_D_ONE, MAGMA_D_SET2REAL, MAGMA_D_ZERO, magma_dgetmatrix, magma_dlahr2(), magma_dlahru(), magma_dmalloc(), magma_dmalloc_cpu(), magma_dsetmatrix, MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_dgehrd_nb(), MAGMA_SUCCESS, magma_xerbla(), max, and min.

18 {
19 /* -- MAGMA (version 1.4.0) --
20  Univ. of Tennessee, Knoxville
21  Univ. of California, Berkeley
22  Univ. of Colorado, Denver
23  August 2013
24 
25  Purpose
26  =======
27  DGEHRD2 reduces a DOUBLE_PRECISION general matrix A to upper Hessenberg form H by
28  an orthogonal similarity transformation: Q' * A * Q = H .
29 
30  Arguments
31  =========
32  N (input) INTEGER
33  The order of the matrix A. N >= 0.
34 
35  ILO (input) INTEGER
36  IHI (input) INTEGER
37  It is assumed that A is already upper triangular in rows
38  and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
39  set by a previous call to DGEBAL; otherwise they should be
40  set to 1 and N respectively. See Further Details.
41  1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
42 
43  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
44  On entry, the N-by-N general matrix to be reduced.
45  On exit, the upper triangle and the first subdiagonal of A
46  are overwritten with the upper Hessenberg matrix H, and the
47  elements below the first subdiagonal, with the array TAU,
48  represent the orthogonal matrix Q as a product of elementary
49  reflectors. See Further Details.
50 
51  LDA (input) INTEGER
52  The leading dimension of the array A. LDA >= max(1,N).
53 
54  TAU (output) DOUBLE_PRECISION array, dimension (N-1)
55  The scalar factors of the elementary reflectors (see Further
56  Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
57  zero.
58 
59  WORK (workspace/output) DOUBLE_PRECISION array, dimension (LWORK)
60  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
61 
62  LWORK (input) INTEGER
63  The length of the array WORK. LWORK >= max(1,N).
64  For optimum performance LWORK >= N*NB, where NB is the
65  optimal blocksize.
66 
67  If LWORK = -1, then a workspace query is assumed; the routine
68  only calculates the optimal size of the WORK array, returns
69  this value as the first entry of the WORK array, and no error
70  message related to LWORK is issued by XERBLA.
71 
72  INFO (output) INTEGER
73  = 0: successful exit
74  < 0: if INFO = -i, the i-th argument had an illegal value.
75 
76  Further Details
77  ===============
78  The matrix Q is represented as a product of (ihi-ilo) elementary
79  reflectors
80 
81  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
82 
83  Each H(i) has the form
84 
85  H(i) = I - tau * v * v'
86 
87  where tau is a real scalar, and v is a real vector with
88  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
89  exit in A(i+2:ihi,i), and tau in TAU(i).
90 
91  The contents of A are illustrated by the following example, with
92  n = 7, ilo = 2 and ihi = 6:
93 
94  on entry, on exit,
95 
96  ( a a a a a a a ) ( a a h h h h a )
97  ( a a a a a a ) ( a h h h h a )
98  ( a a a a a a ) ( h h h h h h )
99  ( a a a a a a ) ( v2 h h h h h )
100  ( a a a a a a ) ( v2 v3 h h h h )
101  ( a a a a a a ) ( v2 v3 v4 h h h )
102  ( a ) ( a )
103 
104  where a denotes an element of the original matrix A, h denotes a
105  modified element of the upper Hessenberg matrix H, and vi denotes an
106  element of the vector defining H(i).
107 
108  This implementation follows the hybrid algorithm and notations described in
109 
110  S. Tomov and J. Dongarra, "Accelerating the reduction to upper Hessenberg
111  form through hybrid GPU-based computing," University of Tennessee Computer
112  Science Technical Report, UT-CS-09-642 (also LAPACK Working Note 219),
113  May 24, 2009.
114  ===================================================================== */
115 
116 
117  double c_one = MAGMA_D_ONE;
118  double c_zero = MAGMA_D_ZERO;
119 
121  magma_int_t N = n, ldda = n;
122 
123  magma_int_t ib;
124  magma_int_t nh, iws;
125  magma_int_t nbmin, iinfo;
126  magma_int_t ldwork;
127  magma_int_t lquery;
128 
129  --tau;
130 
131  *info = 0;
132  MAGMA_D_SET2REAL( work[0], (double) n * nb );
133 
134  lquery = lwork == -1;
135  if (n < 0) {
136  *info = -1;
137  } else if (ilo < 1 || ilo > max(1,n)) {
138  *info = -2;
139  } else if (ihi < min(ilo,n) || ihi > n) {
140  *info = -3;
141  } else if (lda < max(1,n)) {
142  *info = -5;
143  } else if (lwork < max(1,n) && ! lquery) {
144  *info = -8;
145  }
146  if (*info != 0) {
147  magma_xerbla( __func__, -(*info) );
148  return *info;
149  }
150  else if (lquery)
151  return *info;
152 
153  /* Quick return if possible */
154  nh = ihi - ilo + 1;
155  if (nh <= 1) {
156  work[0] = c_one;
157  return *info;
158  }
159 
160  double *da;
161  if (MAGMA_SUCCESS != magma_dmalloc( &da, N*ldda + 2*N*nb + nb*nb )) {
162  *info = MAGMA_ERR_DEVICE_ALLOC;
163  return *info;
164  }
165 
166  double *d_A = da;
167  double *d_work = da + (N+nb)*ldda;
168 
169  magma_int_t i__;
170 
171  double *t, *d_t;
172  magma_dmalloc_cpu( &t, nb*nb );
173  if ( t == NULL ) {
174  magma_free( da );
175  *info = MAGMA_ERR_HOST_ALLOC;
176  return *info;
177  }
178  d_t = d_work + nb * ldda;
179 
180  dzero_nbxnb_block(nb, d_A+N*ldda, ldda);
181 
182  /* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
183  for (i__ = 1; i__ < ilo; ++i__)
184  tau[i__] = c_zero;
185 
186  for (i__ = max(1,ihi); i__ < n; ++i__)
187  tau[i__] = c_zero;
188 
189  for(i__=0; i__< nb*nb; i__+=4)
190  t[i__] = t[i__+1] = t[i__+2] = t[i__+3] = c_zero;
191 
192  nbmin = 2;
193  iws = 1;
194  if (nb > 1 && nb < nh) {
195  /* Determine when to cross over from blocked to unblocked code
196  (last block is always handled by unblocked code) */
197  if (nb < nh) {
198  /* Determine if workspace is large enough for blocked code */
199  iws = n * nb;
200  if (lwork < iws) {
201  /* Not enough workspace to use optimal NB: determine the
202  minimum value of NB, and reduce NB or force use of
203  unblocked code */
204  nbmin = nb;
205  if (lwork >= n * nbmin)
206  nb = lwork / n;
207  else
208  nb = 1;
209  }
210  }
211  }
212  ldwork = n;
213 
214  if (nb < nbmin || nb >= nh) {
215  /* Use unblocked code below */
216  i__ = ilo;
217  }
218  else {
219  /* Use blocked code */
220  /* Copy the matrix to the GPU */
221  magma_dsetmatrix( N, N-ilo+1, a+(ilo-1)*(lda), lda, d_A, ldda );
222 
223  for (i__ = ilo; i__ < ihi - nb; i__ += nb) {
224  /* Computing MIN */
225  ib = min(nb, ihi - i__);
226 
227  /* Reduce columns i:i+ib-1 to Hessenberg form, returning the
228  matrices V and T of the block reflector H = I - V*T*V'
229  which performs the reduction, and also the matrix Y = A*V*T */
230 
231  /* Get the current panel (no need for the 1st iteration) */
232  magma_dgetmatrix( ihi-i__+1, ib,
233  d_A + (i__ - ilo)*ldda + i__ - 1, ldda,
234  a + (i__ - 1 )*lda + i__ - 1, lda );
235 
236  magma_dlahr2(ihi, i__, ib,
237  d_A + (i__ - ilo)*ldda,
238  d_A + N*ldda + 1,
239  a + (i__ - 1 )*(lda) , lda,
240  &tau[i__], t, nb, work, ldwork);
241 
242  /* Copy T from the CPU to D_T on the GPU */
243  magma_dsetmatrix( nb, nb, t, nb, d_t, nb );
244 
245  magma_dlahru(n, ihi, i__ - 1, ib,
246  a + (i__ - 1 )*(lda), lda,
247  d_A + (i__ - ilo)*ldda,
248  d_A + (i__ - ilo)*ldda + i__ - 1,
249  d_A + N*ldda, d_t, d_work);
250  }
251  }
252 
253  /* Use unblocked code to reduce the rest of the matrix */
254  if (!(nb < nbmin || nb >= nh)) {
255  magma_dgetmatrix( n, n-i__+1,
256  d_A+ (i__-ilo)*ldda, ldda,
257  a + (i__-1)*(lda), lda );
258  }
259  lapackf77_dgehd2(&n, &i__, &ihi, a, &lda, &tau[1], work, &iinfo);
260  MAGMA_D_SET2REAL( work[0], (double) iws );
261 
262  magma_free( da );
263  magma_free_cpu(t);
264 
265  return *info;
266 } /* magma_dgehrd2 */
void dzero_nbxnb_block(magma_int_t nb, magmaDouble_ptr dA, magma_int_t ldda)
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_D_ONE
Definition: magma.h:176
magma_int_t magma_dlahru(magma_int_t n, magma_int_t ihi, magma_int_t k, magma_int_t nb, double *a, magma_int_t lda, double *da, double *y, double *v, double *t, double *dwork)
Definition: dlahru.cpp:17
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define magma_dgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_d.h:705
magma_int_t magma_get_dgehrd_nb(magma_int_t m)
Definition: get_nb.cpp:355
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
void lapackf77_dgehd2(const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, double *A, const magma_int_t *lda, double *tau, double *work, magma_int_t *info)
static magma_err_t magma_dmalloc_cpu(double **ptrPtr, size_t n)
Definition: magma.h:84
magma_int_t ldda
#define MAGMA_SUCCESS
Definition: magma.h:106
magma_int_t magma_dlahr2(magma_int_t m, magma_int_t n, magma_int_t nb, double *da, double *dv, double *a, magma_int_t lda, double *tau, double *t, magma_int_t ldt, double *y, magma_int_t ldy)
Definition: dlahr2.cpp:17
#define MAGMA_D_SET2REAL(v, t)
Definition: magma.h:159
#define MAGMA_D_ZERO
Definition: magma.h:175
#define max(a, b)
Definition: common_magma.h:82
magma_err_t magma_free_cpu(void *ptr)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define magma_dsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_d.h:702

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgehrd_m ( magma_int_t  n,
magma_int_t  ilo,
magma_int_t  ihi,
double *  A,
magma_int_t  lda,
double *  tau,
double *  work,
magma_int_t  lwork,
double *  T,
magma_int_t info 
)

Definition at line 16 of file dgehrd_m.cpp.

References __func__, dgehrd_data::A, A, dA, lapackf77_dgehd2(), lapackf77_dlaset(), dgehrd_data::ldda, dgehrd_data::ldv, dgehrd_data::ldvd, MAGMA_D_ONE, MAGMA_D_SET2REAL, MAGMA_D_ZERO, magma_dgetmatrix, magma_dgetmatrix_async, magma_dlahr2_m(), magma_dlahru_m(), magma_dmalloc(), magma_dsetmatrix_1D_col_bcyclic(), MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_dgehrd_nb(), magma_getdevice(), magma_num_gpus(), magma_queue_create, magma_queue_destroy, magma_setdevice(), MAGMA_SUCCESS, magma_xerbla(), magmablasSetKernelStream(), max, min, dgehrd_data::ngpu, dgehrd_data::streams, dgehrd_data::Ti, dgehrd_data::V, dgehrd_data::Vd, dgehrd_data::W, and dgehrd_data::Y.

23 {
24 /* -- MAGMA (version 1.4.0) --
25  Univ. of Tennessee, Knoxville
26  Univ. of California, Berkeley
27  Univ. of Colorado, Denver
28  August 2013
29 
30  Purpose
31  =======
32  DGEHRD reduces a DOUBLE_PRECISION general matrix A to upper Hessenberg form H by
33  an orthogonal similarity transformation: Q' * A * Q = H . This version
34  stores the triangular matrices used in the factorization so that they can
35  be applied directly (i.e., without being recomputed) later. As a result,
36  the application of Q is much faster.
37 
38  Arguments
39  =========
40  N (input) INTEGER
41  The order of the matrix A. N >= 0.
42 
43  ILO (input) INTEGER
44  IHI (input) INTEGER
45  It is assumed that A is already upper triangular in rows
46  and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
47  set by a previous call to DGEBAL; otherwise they should be
48  set to 1 and N respectively. See Further Details.
49  1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
50 
51  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
52  On entry, the N-by-N general matrix to be reduced.
53  On exit, the upper triangle and the first subdiagonal of A
54  are overwritten with the upper Hessenberg matrix H, and the
55  elements below the first subdiagonal, with the array TAU,
56  represent the orthogonal matrix Q as a product of elementary
57  reflectors. See Further Details.
58 
59  LDA (input) INTEGER
60  The leading dimension of the array A. LDA >= max(1,N).
61 
62  TAU (output) DOUBLE_PRECISION array, dimension (N-1)
63  The scalar factors of the elementary reflectors (see Further
64  Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
65  zero.
66 
67  WORK (workspace/output) DOUBLE_PRECISION array, dimension (LWORK)
68  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
69 
70  LWORK (input) INTEGER
71  The length of the array WORK. LWORK >= max(1,N).
72  For optimum performance LWORK >= N*NB, where NB is the
73  optimal blocksize.
74 
75  If LWORK = -1, then a workspace query is assumed; the routine
76  only calculates the optimal size of the WORK array, returns
77  this value as the first entry of the WORK array, and no error
78  message related to LWORK is issued by XERBLA.
79 
80  T (output) DOUBLE_PRECISION array, dimension NB*N,
81  where NB is the optimal blocksize. It stores the NB*NB blocks
82  of the triangular T matrices used in the reduction.
83 
84  INFO (output) INTEGER
85  = 0: successful exit
86  < 0: if INFO = -i, the i-th argument had an illegal value.
87 
88  Further Details
89  ===============
90  The matrix Q is represented as a product of (ihi-ilo) elementary
91  reflectors
92 
93  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
94 
95  Each H(i) has the form
96 
97  H(i) = I - tau * v * v'
98 
99  where tau is a real scalar, and v is a real vector with
100  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
101  exit in A(i+2:ihi,i), and tau in TAU(i).
102 
103  The contents of A are illustrated by the following example, with
104  n = 7, ilo = 2 and ihi = 6:
105 
106  on entry, on exit,
107 
108  ( a a a a a a a ) ( a a h h h h a )
109  ( a a a a a a ) ( a h h h h a )
110  ( a a a a a a ) ( h h h h h h )
111  ( a a a a a a ) ( v2 h h h h h )
112  ( a a a a a a ) ( v2 v3 h h h h )
113  ( a a a a a a ) ( v2 v3 v4 h h h )
114  ( a ) ( a )
115 
116  where a denotes an element of the original matrix A, h denotes a
117  modified element of the upper Hessenberg matrix H, and vi denotes an
118  element of the vector defining H(i).
119 
120  This implementation follows the hybrid algorithm and notations described in
121 
122  S. Tomov and J. Dongarra, "Accelerating the reduction to upper Hessenberg
123  form through hybrid GPU-based computing," University of Tennessee Computer
124  Science Technical Report, UT-CS-09-642 (also LAPACK Working Note 219),
125  May 24, 2009.
126 
127  This version stores the T matrices, for later use in magma_dorghr.
128 
129  ===================================================================== */
130 
131  #define A( i, j ) (A + (i) + (j)*lda)
132  #define dA( d, i, j ) (data.A[d] + (i) + (j)*ldda)
133 
134  double c_one = MAGMA_D_ONE;
135  double c_zero = MAGMA_D_ZERO;
136 
138 
139  magma_int_t nh, iws, ldda, min_lblocks, max_lblocks, last_dev, d;
140  magma_int_t dpanel, di, nlocal, i, i2, ib, ldwork;
141  magma_int_t iinfo;
142  magma_int_t lquery;
143  struct dgehrd_data data;
144 
145  int ngpu = magma_num_gpus();
146 
147  *info = 0;
148  iws = n*(nb + nb*ngpu);
149  MAGMA_D_SET2REAL( work[0], (double) iws );
150 
151  lquery = lwork == -1;
152  if (n < 0) {
153  *info = -1;
154  } else if (ilo < 1 || ilo > max(1,n)) {
155  *info = -2;
156  } else if (ihi < min(ilo,n) || ihi > n) {
157  *info = -3;
158  } else if (lda < max(1,n)) {
159  *info = -5;
160  } else if (lwork < max(1,n) && ! lquery) {
161  *info = -8;
162  }
163  if (*info != 0) {
164  magma_xerbla( __func__, -(*info) );
165  return *info;
166  }
167  else if (lquery)
168  return *info;
169 
170  magma_device_t cdevice;
171  magma_getdevice( &cdevice );
172 
173  // Adjust from 1-based indexing
174  ilo -= 1;
175 
176  // Quick return if possible
177  nh = ihi - ilo;
178  if (nh <= 1) {
179  work[0] = c_one;
180  return *info;
181  }
182 
183  // Set elements 0:ILO-1 and IHI-1:N-2 of TAU to zero
184  for(i = 0; i < ilo; ++i)
185  tau[i] = c_zero;
186 
187  for(i = max(0,ihi-1); i < n-1; ++i)
188  tau[i] = c_zero;
189 
190  // set T to zero
191  lapackf77_dlaset( "Full", &nb, &n, &c_zero, &c_zero, T, &nb );
192 
193  // set to null, to simplify cleanup code
194  for( d = 0; d < ngpu; ++d ) {
195  data.A[d] = NULL;
196  data.streams[d] = NULL;
197  }
198 
199  // If not enough workspace, use unblocked code
200  if ( lwork < iws ) {
201  nb = 1;
202  }
203 
204  if (nb == 1 || nb >= nh) {
205  // Use unblocked code below
206  i = ilo;
207  }
208  else {
209  // Use blocked code
210  // allocate memory on GPUs for A and workspaces
211  ldda = ((n+31)/32)*32;
212  min_lblocks = (n / nb) / ngpu;
213  max_lblocks = ((n-1) / nb) / ngpu + 1;
214  last_dev = (n / nb) % ngpu;
215 
216  // V and Vd need to be padded for copying in mdlahr2
217  data.ngpu = ngpu;
218  data.ldda = ldda;
219  data.ldv = nb*max_lblocks*ngpu;
220  data.ldvd = nb*max_lblocks;
221 
222  for( d = 0; d < ngpu; ++d ) {
223  magma_setdevice( d );
224  nlocal = min_lblocks*nb;
225  if ( d < last_dev ) {
226  nlocal += nb;
227  }
228  else if ( d == last_dev ) {
229  nlocal += (n % nb);
230  }
231 
232  ldwork = nlocal*ldda // A
233  + nb*data.ldv // V
234  + nb*data.ldvd // Vd
235  + nb*ldda // Y
236  + nb*ldda // W
237  + nb*nb; // Ti
238  if ( MAGMA_SUCCESS != magma_dmalloc( &data.A[d], ldwork )) {
239  *info = MAGMA_ERR_DEVICE_ALLOC;
240  goto CLEANUP;
241  }
242  data.V [d] = data.A [d] + nlocal*ldda;
243  data.Vd[d] = data.V [d] + nb*data.ldv;
244  data.Y [d] = data.Vd[d] + nb*data.ldvd;
245  data.W [d] = data.Y [d] + nb*ldda;
246  data.Ti[d] = data.W [d] + nb*ldda;
247 
248  magma_queue_create( &data.streams[d] );
249  }
250 
251  // Copy the matrix to GPUs
252  magma_dsetmatrix_1D_col_bcyclic( n, n, A, lda, data.A, ldda, ngpu, nb );
253 
254  // round ilo down to block boundary
255  ilo = (ilo/nb)*nb;
256  for (i = ilo; i < ihi - 1 - nb; i += nb) {
257  // Reduce columns i:i+nb-1 to Hessenberg form, returning the
258  // matrices V and T of the block reflector H = I - V*T*V'
259  // which performs the reduction, and also the matrix Y = A*V*T
260 
261  // Get the current panel (no need for the 1st iteration)
262  dpanel = (i / nb) % ngpu;
263  di = ((i / nb) / ngpu) * nb;
264  if ( i > ilo ) {
265  magma_setdevice( dpanel );
266  magma_dgetmatrix( ihi-i, nb,
267  dA(dpanel, i, di), ldda,
268  A(i,i), lda );
269  }
270 
271  // add 1 to i for 1-based index
272  magma_dlahr2_m( ihi, i+1, nb, A(0,i), lda,
273  &tau[i], &T[i*nb], nb, work, n, &data );
274 
275  magma_dlahru_m( n, ihi, i, nb, A, lda, &data );
276 
277  // copy first i rows above panel to host
278  magma_setdevice( dpanel );
279  magma_dgetmatrix_async( i, nb,
280  dA(dpanel, 0, di), ldda,
281  A(0,i), lda, data.streams[dpanel] );
282  }
283 
284  // Copy remainder to host, block-by-block
285  for( i2 = i; i2 < n; i2 += nb ) {
286  ib = min( nb, n-i2 );
287  d = (i2 / nb) % ngpu;
288  di = (i2 / nb) / ngpu * nb;
289  magma_setdevice( d );
290  magma_dgetmatrix( n, ib,
291  dA(d, 0, di), ldda,
292  A(0,i2), lda );
293  }
294  }
295 
296  // Use unblocked code to reduce the rest of the matrix
297  // add 1 to i for 1-based index
298  i += 1;
299  lapackf77_dgehd2(&n, &i, &ihi, A, &lda, tau, work, &iinfo);
300  MAGMA_D_SET2REAL( work[0], (double) iws );
301 
302 CLEANUP:
303  for( d = 0; d < ngpu; ++d ) {
304  magma_setdevice( d );
305  magmablasSetKernelStream( NULL );
306  magma_free( data.A[d] );
307  data.A[d] = NULL;
308  if ( data.streams[d] != NULL ) {
309  magma_queue_destroy( data.streams[d] );
310  }
311  }
312  magma_setdevice( cdevice );
313 
314  return *info;
315 } // magma_dgehrd
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_D_ONE
Definition: magma.h:176
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
magma_int_t magma_dlahr2_m(magma_int_t n, magma_int_t k, magma_int_t nb, double *A, magma_int_t lda, double *tau, double *T, magma_int_t ldt, double *Y, magma_int_t ldy, struct dgehrd_data *data)
Definition: dlahr2_m.cpp:16
magma_int_t magma_num_gpus(void)
Definition: auxiliary.cpp:83
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define A(i, j)
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define T(m)
Definition: zgeqrf_mc.cpp:14
magma_int_t ldda
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define magma_dgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_d.h:714
#define magma_dgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_d.h:705
#define magma_queue_destroy(queue)
Definition: magma.h:116
cublasStatus_t magmablasSetKernelStream(magma_queue_t stream)
void magma_setdevice(magma_device_t dev)
void magma_getdevice(magma_device_t *dev)
magma_int_t magma_get_dgehrd_nb(magma_int_t m)
Definition: get_nb.cpp:355
magma_int_t magma_dlahru_m(magma_int_t n, magma_int_t ihi, magma_int_t k, magma_int_t nb, double *A, magma_int_t lda, struct dgehrd_data *data)
Definition: dlahru_m.cpp:16
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
void lapackf77_dgehd2(const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, double *A, const magma_int_t *lda, double *tau, double *work, magma_int_t *info)
magma_int_t ldda
#define MAGMA_SUCCESS
Definition: magma.h:106
#define MAGMA_D_SET2REAL(v, t)
Definition: magma.h:159
#define MAGMA_D_ZERO
Definition: magma.h:175
void lapackf77_dlaset(const char *uplo, const magma_int_t *m, const magma_int_t *n, const double *alpha, const double *beta, double *A, const magma_int_t *lda)
#define max(a, b)
Definition: common_magma.h:82
#define dA(d, i, j)
void magma_dsetmatrix_1D_col_bcyclic(magma_int_t m, magma_int_t n, const double *hA, magma_int_t lda, magmaDouble_ptr dA[], magma_int_t ldda, magma_int_t ngpu, magma_int_t nb)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgelqf ( magma_int_t  m,
magma_int_t  n,
double *  A,
magma_int_t  lda,
double *  tau,
double *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file dgelqf.cpp.

References __func__, dA, dAT, dgehrd_data::ldda, MAGMA_D_MAKE, MAGMA_D_ONE, magma_dgeqrf2_gpu(), magma_dgetmatrix, magma_dmalloc(), magma_dsetmatrix, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_dgelqf_nb(), MAGMA_SUCCESS, magma_xerbla(), magmablas_dtranspose2(), magmablas_dtranspose_inplace(), max, and min.

17 {
18 /* -- MAGMA (version 1.4.0) --
19  Univ. of Tennessee, Knoxville
20  Univ. of California, Berkeley
21  Univ. of Colorado, Denver
22  August 2013
23 
24  Purpose
25  =======
26  DGELQF computes an LQ factorization of a DOUBLE_PRECISION M-by-N matrix A:
27  A = L * Q.
28 
29  Arguments
30  =========
31  M (input) INTEGER
32  The number of rows of the matrix A. M >= 0.
33 
34  N (input) INTEGER
35  The number of columns of the matrix A. N >= 0.
36 
37  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
38  On entry, the M-by-N matrix A.
39  On exit, the elements on and below the diagonal of the array
40  contain the m-by-min(m,n) lower trapezoidal matrix L (L is
41  lower triangular if m <= n); the elements above the diagonal,
42  with the array TAU, represent the orthogonal matrix Q as a
43  product of elementary reflectors (see Further Details).
44 
45  Higher performance is achieved if A is in pinned memory, e.g.
46  allocated using magma_malloc_pinned.
47 
48  LDA (input) INTEGER
49  The leading dimension of the array A. LDA >= max(1,M).
50 
51  TAU (output) DOUBLE_PRECISION array, dimension (min(M,N))
52  The scalar factors of the elementary reflectors (see Further
53  Details).
54 
55  WORK (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK))
56  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
57 
58  Higher performance is achieved if WORK is in pinned memory, e.g.
59  allocated using magma_malloc_pinned.
60 
61  LWORK (input) INTEGER
62  The dimension of the array WORK. LWORK >= max(1,M).
63  For optimum performance LWORK >= M*NB, where NB is the
64  optimal blocksize.
65 
66  If LWORK = -1, then a workspace query is assumed; the routine
67  only calculates the optimal size of the WORK array, returns
68  this value as the first entry of the WORK array, and no error
69  message related to LWORK is issued.
70 
71  INFO (output) INTEGER
72  = 0: successful exit
73  < 0: if INFO = -i, the i-th argument had an illegal value
74  if INFO = -10 internal GPU memory allocation failed.
75 
76  Further Details
77  ===============
78  The matrix Q is represented as a product of elementary reflectors
79 
80  Q = H(k) . . . H(2) H(1), where k = min(m,n).
81 
82  Each H(i) has the form
83 
84  H(i) = I - tau * v * v'
85 
86  where tau is a real scalar, and v is a real vector with
87  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
88  and tau in TAU(i).
89  ===================================================================== */
90 
91  #define a_ref(a_1,a_2) ( a+(a_2)*(lda) + (a_1))
92 
93  double *dA, *dAT;
94  double c_one = MAGMA_D_ONE;
95  magma_int_t maxm, maxn, maxdim, nb;
96  magma_int_t iinfo, ldda;
97  int lquery;
98 
99  /* Function Body */
100  *info = 0;
101  nb = magma_get_dgelqf_nb(m);
102 
103  work[0] = MAGMA_D_MAKE( (double)(m*nb), 0 );
104  lquery = (lwork == -1);
105  if (m < 0) {
106  *info = -1;
107  } else if (n < 0) {
108  *info = -2;
109  } else if (lda < max(1,m)) {
110  *info = -4;
111  } else if (lwork < max(1,m) && ! lquery) {
112  *info = -7;
113  }
114  if (*info != 0) {
115  magma_xerbla( __func__, -(*info) );
116  return *info;
117  }
118  else if (lquery) {
119  return *info;
120  }
121 
122  /* Quick return if possible */
123  if (min(m, n) == 0) {
124  work[0] = c_one;
125  return *info;
126  }
127 
128  maxm = ((m + 31)/32)*32;
129  maxn = ((n + 31)/32)*32;
130  maxdim = max(maxm, maxn);
131 
132  if (maxdim*maxdim < 2*maxm*maxn)
133  {
134  ldda = maxdim;
135 
136  if (MAGMA_SUCCESS != magma_dmalloc( &dA, maxdim*maxdim )) {
137  *info = MAGMA_ERR_DEVICE_ALLOC;
138  return *info;
139  }
140 
141  magma_dsetmatrix( m, n, a, lda, dA, ldda );
142  dAT = dA;
143  magmablas_dtranspose_inplace( ldda, dAT, ldda );
144  }
145  else
146  {
147  ldda = maxn;
148 
149  if (MAGMA_SUCCESS != magma_dmalloc( &dA, 2*maxn*maxm )) {
150  *info = MAGMA_ERR_DEVICE_ALLOC;
151  return *info;
152  }
153 
154  magma_dsetmatrix( m, n, a, lda, dA, maxm );
155 
156  dAT = dA + maxn * maxm;
157  magmablas_dtranspose2( dAT, ldda, dA, maxm, m, n );
158  }
159 
160  magma_dgeqrf2_gpu(n, m, dAT, ldda, tau, &iinfo);
161 
162  if (maxdim*maxdim < 2*maxm*maxn) {
163  magmablas_dtranspose_inplace( ldda, dAT, ldda );
164  magma_dgetmatrix( m, n, dA, ldda, a, lda );
165  } else {
166  magmablas_dtranspose2( dA, maxm, dAT, ldda, n, m );
167  magma_dgetmatrix( m, n, dA, maxm, a, lda );
168  }
169 
170  magma_free( dA );
171 
172  return *info;
173 } /* magma_dgelqf */
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_D_ONE
Definition: magma.h:176
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
magma_int_t ldda
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define magma_dgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_d.h:705
magma_int_t magma_dgeqrf2_gpu(magma_int_t m, magma_int_t n, double *dA, magma_int_t ldda, double *tau, magma_int_t *info)
Definition: dgeqrf2_gpu.cpp:15
void magmablas_dtranspose_inplace(magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda)
#define dAT(i, j)
#define MAGMA_D_MAKE(r, i)
Definition: magma.h:167
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
magma_int_t magma_get_dgelqf_nb(magma_int_t m)
Definition: get_nb.cpp:237
#define MAGMA_SUCCESS
Definition: magma.h:106
void magmablas_dtranspose2(magmaDouble_ptr odata, magma_int_t ldo, magmaDouble_const_ptr idata, magma_int_t ldi, magma_int_t m, magma_int_t n)
#define max(a, b)
Definition: common_magma.h:82
#define dA(dev, i, j)
#define magma_dsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_d.h:702

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgelqf_gpu ( magma_int_t  m,
magma_int_t  n,
double *  dA,
magma_int_t  ldda,
double *  tau,
double *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file dgelqf_gpu.cpp.

References __func__, dA, dAT, MAGMA_D_MAKE, MAGMA_D_ONE, magma_dgeqrf2_gpu(), magma_dmalloc(), MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_dgelqf_nb(), MAGMA_SUCCESS, magma_xerbla(), magmablas_dtranspose2(), magmablas_dtranspose_inplace(), max, and min.

17 {
18 /* -- MAGMA (version 1.4.0) --
19  Univ. of Tennessee, Knoxville
20  Univ. of California, Berkeley
21  Univ. of Colorado, Denver
22  August 2013
23 
24  Purpose
25  =======
26  DGELQF computes an LQ factorization of a DOUBLE_PRECISION M-by-N matrix dA:
27  dA = L * Q.
28 
29  Arguments
30  =========
31  M (input) INTEGER
32  The number of rows of the matrix A. M >= 0.
33 
34  N (input) INTEGER
35  The number of columns of the matrix A. N >= 0.
36 
37  dA (input/output) DOUBLE_PRECISION array on the GPU, dimension (LDA,N)
38  On entry, the M-by-N matrix dA.
39  On exit, the elements on and below the diagonal of the array
40  contain the m-by-min(m,n) lower trapezoidal matrix L (L is
41  lower triangular if m <= n); the elements above the diagonal,
42  with the array TAU, represent the orthogonal matrix Q as a
43  product of elementary reflectors (see Further Details).
44 
45  LDA (input) INTEGER
46  The leading dimension of the array dA. LDA >= max(1,M).
47 
48  TAU (output) DOUBLE_PRECISION array, dimension (min(M,N))
49  The scalar factors of the elementary reflectors (see Further
50  Details).
51 
52  WORK (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK))
53  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
54 
55  Higher performance is achieved if WORK is in pinned memory, e.g.
56  allocated using magma_malloc_pinned.
57 
58  LWORK (input) INTEGER
59  The dimension of the array WORK. LWORK >= max(1,M).
60  For optimum performance LWORK >= M*NB, where NB is the
61  optimal blocksize.
62 
63  If LWORK = -1, then a workspace query is assumed; the routine
64  only calculates the optimal size of the WORK array, returns
65  this value as the first entry of the WORK array, and no error
66  message related to LWORK is issued.
67 
68  INFO (output) INTEGER
69  = 0: successful exit
70  < 0: if INFO = -i, the i-th argument had an illegal value
71  if INFO = -10 internal GPU memory allocation failed.
72 
73  Further Details
74  ===============
75  The matrix Q is represented as a product of elementary reflectors
76 
77  Q = H(k) . . . H(2) H(1), where k = min(m,n).
78 
79  Each H(i) has the form
80 
81  H(i) = I - tau * v * v'
82 
83  where tau is a real scalar, and v is a real vector with
84  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
85  and tau in TAU(i).
86  ===================================================================== */
87 
88  #define a_ref(a_1,a_2) ( dA+(a_2)*(lda) + (a_1))
89 
90  double *dAT;
91  double c_one = MAGMA_D_ONE;
92  magma_int_t maxm, maxn, maxdim, nb;
93  magma_int_t iinfo;
94  int lquery;
95 
96  *info = 0;
97  nb = magma_get_dgelqf_nb(m);
98 
99  work[0] = MAGMA_D_MAKE( (double)(m*nb), 0 );
100  lquery = (lwork == -1);
101  if (m < 0) {
102  *info = -1;
103  } else if (n < 0) {
104  *info = -2;
105  } else if (lda < max(1,m)) {
106  *info = -4;
107  } else if (lwork < max(1,m) && ! lquery) {
108  *info = -7;
109  }
110  if (*info != 0) {
111  magma_xerbla( __func__, -(*info) );
112  return *info;
113  }
114  else if (lquery) {
115  return *info;
116  }
117 
118  /* Quick return if possible */
119  if (min(m, n) == 0) {
120  work[0] = c_one;
121  return *info;
122  }
123 
124  maxm = ((m + 31)/32)*32;
125  maxn = ((n + 31)/32)*32;
126  maxdim = max(maxm, maxn);
127 
128  int ldat = maxn;
129 
130  dAT = dA;
131 
132  if ( m == n ) {
133  ldat = lda;
134  magmablas_dtranspose_inplace( m, dAT, lda );
135  }
136  else {
137  if (MAGMA_SUCCESS != magma_dmalloc( &dAT, maxm*maxn ) ){
138  *info = MAGMA_ERR_DEVICE_ALLOC;
139  return *info;
140  }
141 
142  magmablas_dtranspose2( dAT, ldat, dA, lda, m, n );
143  }
144 
145  magma_dgeqrf2_gpu(n, m, dAT, ldat, tau, &iinfo);
146 
147  if ( m == n ) {
148  magmablas_dtranspose_inplace( m, dAT, ldat );
149  }
150  else {
151  magmablas_dtranspose2( dA, lda, dAT, ldat, n, m );
152  magma_free( dAT );
153  }
154 
155  return *info;
156 } /* magma_dgelqf_gpu */
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_D_ONE
Definition: magma.h:176
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_dgeqrf2_gpu(magma_int_t m, magma_int_t n, double *dA, magma_int_t ldda, double *tau, magma_int_t *info)
Definition: dgeqrf2_gpu.cpp:15
void magmablas_dtranspose_inplace(magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda)
#define dAT(i, j)
#define MAGMA_D_MAKE(r, i)
Definition: magma.h:167
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
magma_int_t magma_get_dgelqf_nb(magma_int_t m)
Definition: get_nb.cpp:237
#define MAGMA_SUCCESS
Definition: magma.h:106
void magmablas_dtranspose2(magmaDouble_ptr odata, magma_int_t ldo, magmaDouble_const_ptr idata, magma_int_t ldi, magma_int_t m, magma_int_t n)
#define max(a, b)
Definition: common_magma.h:82
#define dA(dev, i, j)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgels3_gpu ( char  trans,
magma_int_t  m,
magma_int_t  n,
magma_int_t  nrhs,
double *  dA,
magma_int_t  ldda,
double *  dB,
magma_int_t  lddb,
double *  hwork,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file dgels3_gpu.cpp.

References __func__, dT, MAGMA_D_MAKE, MAGMA_D_ONE, magma_dgeqrf3_gpu(), magma_dgeqrs3_gpu(), magma_dmalloc(), magma_dmalloc_cpu(), MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_dgeqrf_nb(), MAGMA_SUCCESS, magma_xerbla(), max, and min.

19 {
20 /* -- MAGMA (version 1.4.0) --
21  Univ. of Tennessee, Knoxville
22  Univ. of California, Berkeley
23  Univ. of Colorado, Denver
24  August 2013
25 
26  Purpose
27  =======
28  Solves the overdetermined, least squares problem
29  min || A*X - C ||
30  using the QR factorization A.
31  The underdetermined problem (m < n) is not currently handled.
32 
33 
34  Arguments
35  =========
36  TRANS (input) CHARACTER*1
37  = 'N': the linear system involves A.
38  Only trans='N' is currently handled.
39 
40  M (input) INTEGER
41  The number of rows of the matrix A. M >= 0.
42 
43  N (input) INTEGER
44  The number of columns of the matrix A. M >= N >= 0.
45 
46  NRHS (input) INTEGER
47  The number of columns of the matrix C. NRHS >= 0.
48 
49  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
50  On entry, the M-by-N matrix A.
51  On exit, A is overwritten by details of its QR
52  factorization as returned by DGEQRF3.
53 
54  LDDA (input) INTEGER
55  The leading dimension of the array A, LDDA >= M.
56 
57  DB (input/output) DOUBLE_PRECISION array on the GPU, dimension (LDDB,NRHS)
58  On entry, the M-by-NRHS matrix C.
59  On exit, the N-by-NRHS solution matrix X.
60 
61  LDDB (input) INTEGER
62  The leading dimension of the array DB. LDDB >= M.
63 
64  HWORK (workspace/output) DOUBLE_PRECISION array, dimension MAX(1,LWORK).
65  On exit, if INFO = 0, HWORK(1) returns the optimal LWORK.
66 
67  LWORK (input) INTEGER
68  The dimension of the array HWORK,
69  LWORK >= (M - N + NB)*(NRHS + NB) + NRHS*NB,
70  where NB is the blocksize given by magma_get_dgeqrf_nb( M ).
71 
72  If LWORK = -1, then a workspace query is assumed; the routine
73  only calculates the optimal size of the HWORK array, returns
74  this value as the first entry of the HWORK array.
75 
76  INFO (output) INTEGER
77  = 0: successful exit
78  < 0: if INFO = -i, the i-th argument had an illegal value
79  ===================================================================== */
80 
81  #define a_ref(a_1,a_2) (dA + (a_2)*(ldda) + (a_1))
82 
83  double *dT, *tau;
84  magma_int_t k;
85 
87  magma_int_t lwkopt = (m - n + nb)*(nrhs + nb) + nrhs*nb;
88  int lquery = (lwork == -1);
89 
90  hwork[0] = MAGMA_D_MAKE( (double)lwkopt, 0. );
91 
92  *info = 0;
93  /* For now, N is the only case working */
94  if ( (trans != 'N') && (trans != 'n' ) )
95  *info = -1;
96  else if (m < 0)
97  *info = -2;
98  else if (n < 0 || m < n) /* LQ is not handle for now*/
99  *info = -3;
100  else if (nrhs < 0)
101  *info = -4;
102  else if (ldda < max(1,m))
103  *info = -6;
104  else if (lddb < max(1,m))
105  *info = -8;
106  else if (lwork < lwkopt && ! lquery)
107  *info = -10;
108 
109  if (*info != 0) {
110  magma_xerbla( __func__, -(*info) );
111  return *info;
112  }
113  else if (lquery)
114  return *info;
115 
116  k = min(m,n);
117  if (k == 0) {
118  hwork[0] = MAGMA_D_ONE;
119  return *info;
120  }
121 
122  /*
123  * Allocate temporary buffers
124  */
125  int ldtwork = ( 2*k + ((n+31)/32)*32 )*nb;
126  if (nb < nrhs)
127  ldtwork = ( 2*k + ((n+31)/32)*32 )*nrhs;
128  if (MAGMA_SUCCESS != magma_dmalloc( &dT, ldtwork )) {
129  *info = MAGMA_ERR_DEVICE_ALLOC;
130  return *info;
131  }
132 
133  magma_dmalloc_cpu( &tau, k );
134  if ( tau == NULL ) {
135  magma_free( dT );
136  *info = MAGMA_ERR_HOST_ALLOC;
137  return *info;
138  }
139 
140  magma_dgeqrf3_gpu( m, n, dA, ldda, tau, dT, info );
141  if ( *info == 0 ) {
142  magma_dgeqrs3_gpu( m, n, nrhs,
143  dA, ldda, tau, dT,
144  dB, lddb, hwork, lwork, info );
145  }
146 
147  magma_free( dT );
148  magma_free_cpu(tau);
149  return *info;
150 }
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_D_ONE
Definition: magma.h:176
#define __func__
Definition: common_magma.h:65
#define hwork
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
magma_int_t ldda
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define dB(dev, i, j)
magma_int_t magma_dgeqrf3_gpu(magma_int_t m, magma_int_t n, double *dA, magma_int_t ldda, double *tau, double *dT, magma_int_t *info)
Definition: dgeqrf3_gpu.cpp:38
#define MAGMA_D_MAKE(r, i)
Definition: magma.h:167
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
static magma_err_t magma_dmalloc_cpu(double **ptrPtr, size_t n)
Definition: magma.h:84
magma_int_t magma_get_dgeqrf_nb(magma_int_t m)
Definition: get_nb.cpp:141
#define MAGMA_SUCCESS
Definition: magma.h:106
magma_int_t magma_dgeqrs3_gpu(magma_int_t m, magma_int_t n, magma_int_t nrhs, double *dA, magma_int_t ldda, double *tau, double *dT, double *dB, magma_int_t lddb, double *hwork, magma_int_t lhwork, magma_int_t *info)
Definition: dgeqrs3_gpu.cpp:14
#define dT(m)
#define max(a, b)
Definition: common_magma.h:82
magma_err_t magma_free_cpu(void *ptr)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define dA(dev, i, j)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgels_gpu ( char  trans,
magma_int_t  m,
magma_int_t  n,
magma_int_t  nrhs,
double *  dA,
magma_int_t  ldda,
double *  dB,
magma_int_t  lddb,
double *  hwork,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file dgels_gpu.cpp.

References __func__, dT, MAGMA_D_MAKE, MAGMA_D_ONE, magma_dgeqrf_gpu(), magma_dgeqrs_gpu(), magma_dmalloc(), magma_dmalloc_cpu(), MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_dgeqrf_nb(), MAGMA_SUCCESS, magma_xerbla(), max, and min.

19 {
20 /* -- MAGMA (version 1.4.0) --
21  Univ. of Tennessee, Knoxville
22  Univ. of California, Berkeley
23  Univ. of Colorado, Denver
24  August 2013
25 
26  Purpose
27  =======
28  Solves the overdetermined, least squares problem
29  min || A*X - C ||
30  using the QR factorization A.
31  The underdetermined problem (m < n) is not currently handled.
32 
33 
34  Arguments
35  =========
36  TRANS (input) CHARACTER*1
37  = 'N': the linear system involves A.
38  Only trans='N' is currently handled.
39 
40  M (input) INTEGER
41  The number of rows of the matrix A. M >= 0.
42 
43  N (input) INTEGER
44  The number of columns of the matrix A. M >= N >= 0.
45 
46  NRHS (input) INTEGER
47  The number of columns of the matrix C. NRHS >= 0.
48 
49  DA (input/output) DOUBLE_PRECISION array on the GPU, dimension (LDA,N)
50  On entry, the M-by-N matrix A.
51  On exit, A is overwritten by details of its QR
52  factorization as returned by DGEQRF.
53 
54  LDDA (input) INTEGER
55  The leading dimension of the array A, LDDA >= M.
56 
57  DB (input/output) DOUBLE_PRECISION array on the GPU, dimension (LDDB,NRHS)
58  On entry, the M-by-NRHS matrix C.
59  On exit, the N-by-NRHS solution matrix X.
60 
61  LDDB (input) INTEGER
62  The leading dimension of the array DB. LDDB >= M.
63 
64  HWORK (workspace/output) DOUBLE_PRECISION array, dimension MAX(1,LWORK).
65  On exit, if INFO = 0, HWORK(1) returns the optimal LWORK.
66 
67  LWORK (input) INTEGER
68  The dimension of the array HWORK,
69  LWORK >= (M - N + NB)*(NRHS + NB) + NRHS*NB,
70  where NB is the blocksize given by magma_get_dgeqrf_nb( M ).
71 
72  If LWORK = -1, then a workspace query is assumed; the routine
73  only calculates the optimal size of the HWORK array, returns
74  this value as the first entry of the HWORK array.
75 
76  INFO (output) INTEGER
77  = 0: successful exit
78  < 0: if INFO = -i, the i-th argument had an illegal value
79  ===================================================================== */
80 
81  double *dT, *tau;
82  magma_int_t k;
83 
85  magma_int_t lwkopt = (m - n + nb)*(nrhs + nb) + nrhs*nb;
86  int lquery = (lwork == -1);
87 
88  hwork[0] = MAGMA_D_MAKE( (double)lwkopt, 0. );
89 
90  *info = 0;
91  /* For now, N is the only case working */
92  if ( (trans != 'N') && (trans != 'n' ) )
93  *info = -1;
94  else if (m < 0)
95  *info = -2;
96  else if (n < 0 || m < n) /* LQ is not handle for now*/
97  *info = -3;
98  else if (nrhs < 0)
99  *info = -4;
100  else if (ldda < max(1,m))
101  *info = -6;
102  else if (lddb < max(1,m))
103  *info = -8;
104  else if (lwork < lwkopt && ! lquery)
105  *info = -10;
106 
107  if (*info != 0) {
108  magma_xerbla( __func__, -(*info) );
109  return *info;
110  }
111  else if (lquery)
112  return *info;
113 
114  k = min(m,n);
115  if (k == 0) {
116  hwork[0] = MAGMA_D_ONE;
117  return *info;
118  }
119 
120  /*
121  * Allocate temporary buffers
122  */
123  int ldtwork = ( 2*k + ((n+31)/32)*32 )*nb;
124  if (nb < nrhs)
125  ldtwork = ( 2*k + ((n+31)/32)*32 )*nrhs;
126  if (MAGMA_SUCCESS != magma_dmalloc( &dT, ldtwork )) {
127  *info = MAGMA_ERR_DEVICE_ALLOC;
128  return *info;
129  }
130 
131  magma_dmalloc_cpu( &tau, k );
132  if ( tau == NULL ) {
133  magma_free( dT );
134  *info = MAGMA_ERR_HOST_ALLOC;
135  return *info;
136  }
137 
138  magma_dgeqrf_gpu( m, n, dA, ldda, tau, dT, info );
139 
140  if ( *info == 0 ) {
141  magma_dgeqrs_gpu( m, n, nrhs,
142  dA, ldda, tau, dT,
143  dB, lddb, hwork, lwork, info );
144  }
145 
146  magma_free( dT );
147  magma_free_cpu(tau);
148  return *info;
149 }
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_D_ONE
Definition: magma.h:176
#define __func__
Definition: common_magma.h:65
#define hwork
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
magma_int_t ldda
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define dB(dev, i, j)
magma_int_t magma_dgeqrs_gpu(magma_int_t m, magma_int_t n, magma_int_t nrhs, double *dA, magma_int_t ldda, double *tau, double *dT, double *dB, magma_int_t lddb, double *hwork, magma_int_t lhwork, magma_int_t *info)
Definition: dgeqrs_gpu.cpp:14
#define MAGMA_D_MAKE(r, i)
Definition: magma.h:167
magma_int_t magma_dgeqrf_gpu(magma_int_t m, magma_int_t n, double *dA, magma_int_t ldda, double *tau, double *dT, magma_int_t *info)
Definition: dgeqrf_gpu.cpp:43
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
static magma_err_t magma_dmalloc_cpu(double **ptrPtr, size_t n)
Definition: magma.h:84
magma_int_t magma_get_dgeqrf_nb(magma_int_t m)
Definition: get_nb.cpp:141
#define MAGMA_SUCCESS
Definition: magma.h:106
#define dT(m)
#define max(a, b)
Definition: common_magma.h:82
magma_err_t magma_free_cpu(void *ptr)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define dA(dev, i, j)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgeqlf ( magma_int_t  m,
magma_int_t  n,
double *  A,
magma_int_t  lda,
double *  tau,
double *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file dgeqlf.cpp.

References __func__, a_ref, da_ref, dpanel_to_q(), dq_to_panel(), dwork, lapackf77_dgeqlf(), lapackf77_dlarft(), dgehrd_data::ldda, MAGMA_D_MAKE, MAGMA_D_ONE, magma_dgetmatrix, magma_dgetmatrix_async, magma_dlarfb_gpu(), magma_dmalloc(), magma_dsetmatrix, magma_dsetmatrix_async, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_dgeqlf_nb(), magma_queue_create, magma_queue_destroy, magma_queue_sync, MAGMA_SUCCESS, magma_xerbla(), MagmaBackward, MagmaBackwardStr, MagmaColumnwise, MagmaColumnwiseStr, MagmaLeft, MagmaLower, MagmaTrans, max, and min.

17 {
18 /* -- MAGMA (version 1.4.0) --
19  Univ. of Tennessee, Knoxville
20  Univ. of California, Berkeley
21  Univ. of Colorado, Denver
22  August 2013
23 
24  Purpose
25  =======
26  SGEQLF computes a QL factorization of a DOUBLE_PRECISION M-by-N matrix A:
27  A = Q * L.
28 
29  Arguments
30  =========
31  M (input) INTEGER
32  The number of rows of the matrix A. M >= 0.
33 
34  N (input) INTEGER
35  The number of columns of the matrix A. N >= 0.
36 
37  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
38  On entry, the M-by-N matrix A.
39  On exit, if m >= n, the lower triangle of the subarray
40  A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L;
41  if m <= n, the elements on and below the (n-m)-th
42  superdiagonal contain the M-by-N lower trapezoidal matrix L;
43  the remaining elements, with the array TAU, represent the
44  orthogonal matrix Q as a product of elementary reflectors
45  (see Further Details).
46 
47  Higher performance is achieved if A is in pinned memory, e.g.
48  allocated using magma_malloc_pinned.
49 
50  LDA (input) INTEGER
51  The leading dimension of the array A. LDA >= max(1,M).
52 
53  TAU (output) DOUBLE_PRECISION array, dimension (min(M,N))
54  The scalar factors of the elementary reflectors (see Further
55  Details).
56 
57  WORK (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK))
58  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
59 
60  Higher performance is achieved if WORK is in pinned memory, e.g.
61  allocated using magma_malloc_pinned.
62 
63  LWORK (input) INTEGER
64  The dimension of the array WORK. LWORK >= max(1,N).
65  For optimum performance LWORK >= N*NB, where NB can be obtained
66  through magma_get_dgeqlf_nb(M).
67 
68  If LWORK = -1, then a workspace query is assumed; the routine
69  only calculates the optimal size of the WORK array, returns
70  this value as the first entry of the WORK array, and no error
71  message related to LWORK is issued by XERBLA.
72 
73  INFO (output) INTEGER
74  = 0: successful exit
75  < 0: if INFO = -i, the i-th argument had an illegal value
76  or another error occured, such as memory allocation failed.
77 
78  Further Details
79  ===============
80  The matrix Q is represented as a product of elementary reflectors
81 
82  Q = H(k) . . . H(2) H(1), where k = min(m,n).
83 
84  Each H(i) has the form
85 
86  H(i) = I - tau * v * v'
87 
88  where tau is a real scalar, and v is a real vector with
89  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in
90  A(1:m-k+i-1,n-k+i), and tau in TAU(i).
91  ===================================================================== */
92 
93  #define a_ref(a_1,a_2) ( a+(a_2)*(lda) + (a_1))
94  #define da_ref(a_1,a_2) (da+(a_2)*ldda + (a_1))
95 
96  double *da, *dwork;
97  double c_one = MAGMA_D_ONE;
98  magma_int_t i, k, lddwork, old_i, old_ib, nb;
99  magma_int_t rows, cols;
100  magma_int_t ib, ki, kk, mu, nu, iinfo, ldda;
101  int lquery;
102 
103  nb = magma_get_dgeqlf_nb(m);
104  *info = 0;
105  lquery = (lwork == -1);
106 
107  // silence "uninitialized" warnings
108  old_ib = nb;
109  old_i = 0;
110 
111  if (m < 0) {
112  *info = -1;
113  } else if (n < 0) {
114  *info = -2;
115  } else if (lda < max(1,m)) {
116  *info = -4;
117  }
118 
119  if (*info == 0) {
120  k = min(m,n);
121  if (k == 0)
122  work[0] = c_one;
123  else {
124  work[0] = MAGMA_D_MAKE( n*nb, 0 );
125  }
126 
127  if (lwork < max(1,n) && ! lquery)
128  *info = -7;
129  }
130 
131  if (*info != 0) {
132  magma_xerbla( __func__, -(*info) );
133  return *info;
134  }
135  else if (lquery)
136  return *info;
137 
138  /* Quick return if possible */
139  if (k == 0)
140  return *info;
141 
142  lddwork = ((n+31)/32)*32;
143  ldda = ((m+31)/32)*32;
144 
145  if (MAGMA_SUCCESS != magma_dmalloc( &da, (n)*ldda + nb*lddwork )) {
146  *info = MAGMA_ERR_DEVICE_ALLOC;
147  return *info;
148  }
149  dwork = da + ldda*(n);
150 
151  magma_queue_t stream[2];
152  magma_queue_create( &stream[0] );
153  magma_queue_create( &stream[1] );
154 
155  if ( (nb > 1) && (nb < k) ) {
156  /* Use blocked code initially.
157  The last kk columns are handled by the block method.
158  First, copy the matrix on the GPU except the last kk columns */
159  magma_dsetmatrix_async( (m), (n-nb),
160  a_ref(0, 0), lda,
161  da_ref(0, 0), ldda, stream[0] );
162 
163  ki = ((k - nb - 1) / nb) * nb;
164  kk = min(k, ki + nb);
165  for (i = k - kk + ki; i >= k -kk; i -= nb) {
166  ib = min(k-i,nb);
167 
168  if (i < k - kk + ki) {
169  /* 1. Copy asynchronously the current panel to the CPU.
170  2. Copy asynchronously the submatrix below the panel
171  to the CPU) */
172  rows = m - k + i + ib;
173  magma_dgetmatrix_async( rows, ib,
174  da_ref(0, n-k+i), ldda,
175  a_ref(0, n-k+i), lda, stream[1] );
176 
177  magma_dgetmatrix_async( (m-rows), ib,
178  da_ref(rows, n-k+i), ldda,
179  a_ref(rows, n-k+i), lda, stream[0] );
180 
181  /* Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left in
182  two steps - implementing the lookahead techniques.
183  This is the main update from the lookahead techniques. */
184  rows = m - k + old_i + old_ib;
185  cols = n - k + old_i - old_ib;
187  rows, cols, old_ib,
188  da_ref(0, cols+old_ib), ldda, dwork, lddwork,
189  da_ref(0, 0 ), ldda, dwork+old_ib, lddwork);
190  }
191 
192  magma_queue_sync( stream[1] );
193  /* Compute the QL factorization of the current block
194  A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) */
195  rows = m - k + i + ib;
196  cols = n - k + i;
197  lapackf77_dgeqlf(&rows,&ib, a_ref(0,cols), &lda, tau+i, work, &lwork, &iinfo);
198 
199  if (cols > 0) {
200  /* Form the triangular factor of the block reflector
201  H = H(i+ib-1) . . . H(i+1) H(i) */
203  &rows, &ib,
204  a_ref(0, cols), &lda, tau + i, work, &ib);
205 
206  dpanel_to_q( MagmaLower, ib, a_ref(rows-ib,cols), lda, work+ib*ib);
207  magma_dsetmatrix( rows, ib,
208  a_ref(0,cols), lda,
209  da_ref(0,cols), ldda );
210  dq_to_panel( MagmaLower, ib, a_ref(rows-ib,cols), lda, work+ib*ib);
211 
212  // Send the triangular part on the GPU
213  magma_dsetmatrix( ib, ib, work, ib, dwork, lddwork );
214 
215  /* Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left in
216  two steps - implementing the lookahead techniques.
217  This is the update of first ib columns. */
218  if (i-ib >= k -kk)
220  rows, ib, ib,
221  da_ref(0, cols), ldda, dwork, lddwork,
222  da_ref(0,cols-ib), ldda, dwork+ib, lddwork);
223  else{
225  rows, cols, ib,
226  da_ref(0, cols), ldda, dwork, lddwork,
227  da_ref(0, 0 ), ldda, dwork+ib, lddwork);
228  }
229 
230  old_i = i;
231  old_ib = ib;
232  }
233  }
234  mu = m - k + i + nb;
235  nu = n - k + i + nb;
236 
237  magma_dgetmatrix( m, nu, da_ref(0,0), ldda, a_ref(0,0), lda );
238  } else {
239  mu = m;
240  nu = n;
241  }
242 
243  /* Use unblocked code to factor the last or only block */
244  if (mu > 0 && nu > 0)
245  lapackf77_dgeqlf(&mu, &nu, a_ref(0,0), &lda, tau, work, &lwork, &iinfo);
246 
247  magma_queue_destroy( stream[0] );
248  magma_queue_destroy( stream[1] );
249  magma_free( da );
250  return *info;
251 } /* magma_dgeqlf */
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_D_ONE
Definition: magma.h:176
#define magma_dsetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_d.h:711
#define MagmaLeft
Definition: magma.h:68
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
magma_int_t ldda
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define magma_dgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_d.h:714
void lapackf77_dlarft(const char *direct, const char *storev, const magma_int_t *n, const magma_int_t *k, double *V, const magma_int_t *ldv, const double *tau, double *T, const magma_int_t *ldt)
#define magma_dgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_d.h:705
#define magma_queue_destroy(queue)
Definition: magma.h:116
#define MagmaBackward
Definition: magma.h:72
#define dwork(dev, i, j)
#define a_ref(a_1, a_2)
#define MagmaLower
Definition: magma.h:62
#define MAGMA_D_MAKE(r, i)
Definition: magma.h:167
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
magma_int_t magma_dlarfb_gpu(char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, const double *dv, magma_int_t ldv, const double *dt, magma_int_t ldt, double *dc, magma_int_t ldc, double *dwork, magma_int_t ldwork)
Definition: dlarfb_gpu.cpp:15
#define MagmaTrans
Definition: magma.h:58
void lapackf77_dgeqlf(const magma_int_t *m, const magma_int_t *n, double *A, const magma_int_t *lda, double *tau, double *work, const magma_int_t *lwork, magma_int_t *info)
#define MagmaColumnwiseStr
Definition: magma.h:97
#define MAGMA_SUCCESS
Definition: magma.h:106
void dpanel_to_q(char uplo, magma_int_t ib, double *A, magma_int_t lda, double *work)
Definition: dpanel_to_q.cpp:17
magma_int_t magma_get_dgeqlf_nb(magma_int_t m)
Definition: get_nb.cpp:203
void dq_to_panel(char uplo, magma_int_t ib, double *A, magma_int_t lda, double *work)
Definition: dpanel_to_q.cpp:57
#define MagmaBackwardStr
Definition: magma.h:95
#define da_ref(a_1, a_2)
#define max(a, b)
Definition: common_magma.h:82
#define MagmaColumnwise
Definition: magma.h:74
#define magma_dsetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_d.h:702
#define magma_queue_sync(queue)
Definition: magma.h:119

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgeqp3 ( magma_int_t  m,
magma_int_t  n,
double *  a,
magma_int_t  lda,
magma_int_t jpvt,
double *  tau,
double *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 18 of file dgeqp3.cpp.

References __func__, A, blasf77_dswap(), cblas_dnrm2(), dA, dwork, lapackf77_dgeqrf(), lapackf77_dlaqp2(), lapackf77_dormqr(), dgehrd_data::ldda, MAGMA_D_MAKE, magma_dgetmatrix, magma_dlaqps(), magma_dmalloc(), magma_dsetmatrix_async, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_dgeqp3_nb(), magma_queue_create, magma_queue_destroy, magma_queue_sync, MAGMA_SUCCESS, magma_xerbla(), MagmaLeftStr, MagmaTransStr, max, and min.

26 {
27 /* -- MAGMA (version 1.4.0) --
28  Univ. of Tennessee, Knoxville
29  Univ. of California, Berkeley
30  Univ. of Colorado, Denver
31  August 2013
32 
33  Purpose
34  =======
35  DGEQP3 computes a QR factorization with column pivoting of a
36  matrix A: A*P = Q*R using Level 3 BLAS.
37 
38  Arguments
39  =========
40  M (input) INTEGER
41  The number of rows of the matrix A. M >= 0.
42 
43  N (input) INTEGER
44  The number of columns of the matrix A. N >= 0.
45 
46  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
47  On entry, the M-by-N matrix A.
48  On exit, the upper triangle of the array contains the
49  min(M,N)-by-N upper trapezoidal matrix R; the elements below
50  the diagonal, together with the array TAU, represent the
51  unitary matrix Q as a product of min(M,N) elementary
52  reflectors.
53 
54  LDA (input) INTEGER
55  The leading dimension of the array A. LDA >= max(1,M).
56 
57  JPVT (input/output) INTEGER array, dimension (N)
58  On entry, if JPVT(J).ne.0, the J-th column of A is permuted
59  to the front of A*P (a leading column); if JPVT(J)=0,
60  the J-th column of A is a free column.
61  On exit, if JPVT(J)=K, then the J-th column of A*P was the
62  the K-th column of A.
63 
64  TAU (output) DOUBLE_PRECISION array, dimension (min(M,N))
65  The scalar factors of the elementary reflectors.
66 
67  WORK (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK))
68  On exit, if INFO=0, WORK(1) returns the optimal LWORK.
69 
70  LWORK (input) INTEGER
71  The dimension of the array WORK.
72  For [sd]geqp3, LWORK >= (N+1)*NB + 2*N;
73  for [cz]geqp3, LWORK >= (N+1)*NB,
74  where NB is the optimal blocksize.
75 
76  If LWORK = -1, then a workspace query is assumed; the routine
77  only calculates the optimal size of the WORK array, returns
78  this value as the first entry of the WORK array, and no error
79  message related to LWORK is issued by XERBLA.
80 
81  For [cz]geqp3 only:
82  RWORK (workspace) DOUBLE PRECISION array, dimension (2*N)
83 
84  INFO (output) INTEGER
85  = 0: successful exit.
86  < 0: if INFO = -i, the i-th argument had an illegal value.
87 
88  Further Details
89  ===============
90  The matrix Q is represented as a product of elementary reflectors
91 
92  Q = H(1) H(2) . . . H(k), where k = min(m,n).
93 
94  Each H(i) has the form
95 
96  H(i) = I - tau * v * v'
97 
98  where tau is a real scalar, and v is a real vector
99  with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in
100  A(i+1:m,i), and tau in TAU(i).
101  ===================================================================== */
102 
103 #define A(i, j) (A + (i) + (j)*(lda ))
104 #define dA(i, j) (dwork + (i) + (j)*(ldda))
105 
106  double *dwork, *df;
107 
108  magma_int_t ione = 1;
109 
110  magma_int_t n_j, ldda, ldwork;
111  magma_int_t j, jb, na, nb, sm, sn, fjb, nfxd, minmn;
112  magma_int_t topbmn, sminmn, lwkopt, lquery;
113 
114  *info = 0;
115  lquery = (lwork == -1);
116  if (m < 0) {
117  *info = -1;
118  } else if (n < 0) {
119  *info = -2;
120  } else if (lda < max(1,m)) {
121  *info = -4;
122  }
123 
124  nb = magma_get_dgeqp3_nb(min(m, n));
125  if (*info == 0) {
126  minmn = min(m,n);
127  if (minmn == 0) {
128  lwkopt = 1;
129  } else {
130  lwkopt = (n + 1)*nb;
131 #if defined(PRECISION_d) || defined(PRECISION_s)
132  lwkopt += 2*n;
133 #endif
134  }
135  work[0] = MAGMA_D_MAKE( lwkopt, 0. );
136 
137  if (lwork < lwkopt && ! lquery) {
138  *info = -8;
139  }
140  }
141 
142  if (*info != 0) {
143  magma_xerbla( __func__, -(*info) );
144  return *info;
145  } else if (lquery) {
146  return *info;
147  }
148 
149  if (minmn == 0)
150  return *info;
151 
152 #if defined(PRECISION_d) || defined(PRECISION_s)
153  double *rwork = work + (n + 1)*nb;
154 #endif
155 
156  ldda = ((m+31)/32)*32;
157  ldwork = n*ldda + (n+1)*nb;
158  if (MAGMA_SUCCESS != magma_dmalloc( &dwork, ldwork )) {
159  *info = MAGMA_ERR_DEVICE_ALLOC;
160  return *info;
161  }
162  df = dwork + n*ldda;
163  // dwork used for dA
164 
165  magma_queue_t stream;
166  magma_queue_create( &stream );
167 
168  /* Move initial columns up front.
169  * Note jpvt uses 1-based indices for historical compatibility. */
170  nfxd = 0;
171  for (j = 0; j < n; ++j) {
172  if (jpvt[j] != 0) {
173  if (j != nfxd) {
174  blasf77_dswap(&m, A(0, j), &ione, A(0, nfxd), &ione);
175  jpvt[j] = jpvt[nfxd];
176  jpvt[nfxd] = j + 1;
177  }
178  else {
179  jpvt[j] = j + 1;
180  }
181  ++nfxd;
182  }
183  else {
184  jpvt[j] = j + 1;
185  }
186  }
187 
188  /* Factorize fixed columns
189  =======================
190  Compute the QR factorization of fixed columns and update
191  remaining columns. */
192  if (nfxd > 0) {
193  na = min(m,nfxd);
194  lapackf77_dgeqrf(&m, &na, A, &lda, tau, work, &lwork, info);
195  if (na < n) {
196  n_j = n - na;
198  A, &lda, tau, A(0, na), &lda,
199  work, &lwork, info );
200  }
201  }
202 
203  /* Factorize free columns */
204  if (nfxd < minmn) {
205  sm = m - nfxd;
206  sn = n - nfxd;
207  sminmn = minmn - nfxd;
208 
209  if (nb < sminmn) {
210  j = nfxd;
211 
212  // Set the original matrix to the GPU
213  magma_dsetmatrix_async( m, sn,
214  A (0,j), lda,
215  dA(0,j), ldda, stream );
216  }
217 
218  /* Initialize partial column norms. */
219  for (j = nfxd; j < n; ++j) {
220  rwork[j] = cblas_dnrm2(sm, A(nfxd, j), ione);
221  rwork[n + j] = rwork[j];
222  }
223 
224  j = nfxd;
225  if (nb < sminmn) {
226  /* Use blocked code initially. */
227  magma_queue_sync( stream );
228 
229  /* Compute factorization: while loop. */
230  topbmn = minmn - nb;
231  while(j < topbmn) {
232  jb = min(nb, topbmn - j);
233 
234  /* Factorize JB columns among columns J:N. */
235  n_j = n - j;
236 
237  if (j>nfxd) {
238  // Get panel to the CPU
239  magma_dgetmatrix( m-j, jb,
240  dA(j,j), ldda,
241  A (j,j), lda );
242 
243  // Get the rows
244  magma_dgetmatrix( jb, n_j - jb,
245  dA(j,j + jb), ldda,
246  A (j,j + jb), lda );
247  }
248 
249  magma_dlaqps( m, n_j, j, jb, &fjb,
250  A (0, j), lda,
251  dA(0, j), ldda,
252  &jpvt[j], &tau[j], &rwork[j], &rwork[n + j],
253  work,
254  &work[jb], n_j,
255  &df[jb], n_j );
256 
257  j += fjb; /* fjb is actual number of columns factored */
258  }
259  }
260 
261  /* Use unblocked code to factor the last or only block. */
262  if (j < minmn) {
263  n_j = n - j;
264  if (j > nfxd) {
265  magma_dgetmatrix( m-j, n_j,
266  dA(j,j), ldda,
267  A (j,j), lda );
268  }
269  lapackf77_dlaqp2(&m, &n_j, &j, A(0, j), &lda, &jpvt[j],
270  &tau[j], &rwork[j], &rwork[n+j], work );
271  }
272  }
273 
274  work[0] = MAGMA_D_MAKE( lwkopt, 0. );
275  magma_free( dwork );
276 
277  magma_queue_destroy( stream );
278 
279  return *info;
280 } /* dgeqp3 */
#define MagmaTransStr
Definition: magma.h:81
#define min(a, b)
Definition: common_magma.h:86
#define magma_dsetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_d.h:711
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
magma_int_t magma_dlaqps(magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, double *A, magma_int_t lda, double *dA, magma_int_t ldda, magma_int_t *jpvt, double *tau, double *vn1, double *vn2, double *auxv, double *F, magma_int_t ldf, double *dF, magma_int_t lddf)
Definition: dlaqps.cpp:18
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
magma_int_t ldda
#define magma_free(ptr)
Definition: magma.h:57
#define dA(i, j)
int magma_int_t
Definition: magmablas.h:12
#define MagmaLeftStr
Definition: magma.h:91
#define magma_dgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_d.h:705
#define magma_queue_destroy(queue)
Definition: magma.h:116
#define dwork(dev, i, j)
#define MAGMA_D_MAKE(r, i)
Definition: magma.h:167
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
void lapackf77_dgeqrf(const magma_int_t *m, const magma_int_t *n, double *A, const magma_int_t *lda, double *tau, double *work, const magma_int_t *lwork, magma_int_t *info)
void lapackf77_dormqr(const char *side, const char *trans, const magma_int_t *m, const magma_int_t *n, const magma_int_t *k, const double *A, const magma_int_t *lda, const double *tau, double *C, const magma_int_t *ldc, double *work, const magma_int_t *lwork, magma_int_t *info)
double cblas_dnrm2(const int N, const double *X, const int incX)
#define A(i, j)
void lapackf77_dlaqp2(magma_int_t *m, magma_int_t *n, magma_int_t *offset, double *a, magma_int_t *lda, magma_int_t *jpvt, double *tau, double *vn1, double *vn2, double *work)
#define MAGMA_SUCCESS
Definition: magma.h:106
void blasf77_dswap(const magma_int_t *n, double *x, const magma_int_t *incx, double *y, const magma_int_t *incy)
magma_int_t magma_get_dgeqp3_nb(magma_int_t m)
Definition: get_nb.cpp:102
#define max(a, b)
Definition: common_magma.h:82
#define magma_queue_sync(queue)
Definition: magma.h:119

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgeqp3_gpu ( magma_int_t  m,
magma_int_t  n,
double *  A,
magma_int_t  lda,
magma_int_t jpvt,
double *  tau,
double *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 17 of file dgeqp3_gpu.cpp.

References __func__, A, blasf77_dswap(), magma_dcopymatrix, magma_dlaqps2_gpu(), magma_dmalloc(), MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_dgeqp3_nb(), magma_scopymatrix, MAGMA_SUCCESS, magma_xerbla(), magmablas_dnrm2_cols(), max, and min.

25 {
26 /* -- MAGMA (version 1.4.0) --
27  Univ. of Tennessee, Knoxville
28  Univ. of California, Berkeley
29  Univ. of Colorado, Denver
30  August 2013
31 
32  Purpose
33  =======
34  DGEQP3 computes a QR factorization with column pivoting of a
35  matrix A: A*P = Q*R using Level 3 BLAS.
36 
37  Arguments
38  =========
39  M (input) INTEGER
40  The number of rows of the matrix A. M >= 0.
41 
42  N (input) INTEGER
43  The number of columns of the matrix A. N >= 0.
44 
45  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
46  On entry, the M-by-N matrix A.
47  On exit, the upper triangle of the array contains the
48  min(M,N)-by-N upper trapezoidal matrix R; the elements below
49  the diagonal, together with the array TAU, represent the
50  unitary matrix Q as a product of min(M,N) elementary
51  reflectors.
52 
53  LDA (input) INTEGER
54  The leading dimension of the array A. LDA >= max(1,M).
55 
56  JPVT (input/output) INTEGER array, dimension (N)
57  On entry, if JPVT(J).ne.0, the J-th column of A is permuted
58  to the front of A*P (a leading column); if JPVT(J)=0,
59  the J-th column of A is a free column.
60  On exit, if JPVT(J)=K, then the J-th column of A*P was the
61  the K-th column of A.
62 
63  TAU (output) DOUBLE_PRECISION array, dimension (min(M,N))
64  The scalar factors of the elementary reflectors.
65 
66  WORK (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK))
67  On exit, if INFO=0, WORK(1) returns the optimal LWORK.
68 
69  LWORK (input) INTEGER
70  The dimension of the array WORK.
71  For [sd]geqp3, LWORK >= (N+1)*NB + 2*N;
72  for [cz]geqp3, LWORK >= (N+1)*NB,
73  where NB is the optimal blocksize.
74 
75  If LWORK = -1, then a workspace query is assumed; the routine
76  only calculates the optimal size of the WORK array, returns
77  this value as the first entry of the WORK array, and no error
78  message related to LWORK is issued by XERBLA.
79 
80  For [cz]geqp3 only:
81  RWORK (workspace) DOUBLE PRECISION array, dimension (2*N)
82 
83  INFO (output) INTEGER
84  = 0: successful exit.
85  < 0: if INFO = -i, the i-th argument had an illegal value.
86 
87  Further Details
88  ===============
89  The matrix Q is represented as a product of elementary reflectors
90 
91  Q = H(1) H(2) . . . H(k), where k = min(m,n).
92 
93  Each H(i) has the form
94 
95  H(i) = I - tau * v * v'
96 
97  where tau is a real scalar, and v is a real vector
98  with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in
99  A(i+1:m,i), and tau in TAU(i).
100  ===================================================================== */
101 
102 #define A(i, j) (A + (i) + (j)*(lda ))
103 
104  magma_int_t ione = 1;
105 
106  //magma_int_t na;
107  magma_int_t n_j;
108  magma_int_t j, jb, nb, sm, sn, fjb, nfxd, minmn;
109  magma_int_t topbmn, sminmn, lwkopt, lquery;
110 
111  *info = 0;
112  lquery = (lwork == -1);
113  if (m < 0) {
114  *info = -1;
115  } else if (n < 0) {
116  *info = -2;
117  } else if (lda < max(1,m)) {
118  *info = -4;
119  }
120 
121  nb = magma_get_dgeqp3_nb(min(m, n));
122  if (*info == 0) {
123  minmn = min(m,n);
124  if (minmn == 0) {
125  lwkopt = 1;
126  } else {
127  lwkopt = (n + 1)*nb;
128 #if defined(PRECISION_d) || defined(PRECISION_s)
129  lwkopt += 2*n;
130 #endif
131  }
132  //work[0] = MAGMA_D_MAKE( lwkopt, 0. );
133 
134  if (lwork < lwkopt && ! lquery) {
135  *info = -8;
136  }
137  }
138 
139  if (*info != 0) {
140  magma_xerbla( __func__, -(*info) );
141  return *info;
142  } else if (lquery) {
143  return *info;
144  }
145 
146  if (minmn == 0)
147  return *info;
148 
149 #if defined(PRECISION_d) || defined(PRECISION_s)
150  double *rwork = work + (n + 1)*nb;
151 #endif
152  double *df;
153  if (MAGMA_SUCCESS != magma_dmalloc( &df, (n+1)*nb )) {
154  *info = MAGMA_ERR_DEVICE_ALLOC;
155  return *info;
156  }
157  cudaMemset( df, 0, (n+1)*nb*sizeof(double) );
158 
159  nfxd = 0;
160  /* Move initial columns up front.
161  * Note jpvt uses 1-based indices for historical compatibility. */
162  for (j = 0; j < n; ++j) {
163  if (jpvt[j] != 0) {
164  if (j != nfxd) {
165  blasf77_dswap(&m, A(0, j), &ione, A(0, nfxd), &ione);
166  jpvt[j] = jpvt[nfxd];
167  jpvt[nfxd] = j + 1;
168  }
169  else {
170  jpvt[j] = j + 1;
171  }
172  ++nfxd;
173  }
174  else {
175  jpvt[j] = j + 1;
176  }
177  }
178 
179  /* Factorize fixed columns
180  =======================
181  Compute the QR factorization of fixed columns and update
182  remaining columns.
183  if (nfxd > 0) {
184  na = min(m,nfxd);
185  lapackf77_dgeqrf(&m, &na, A, &lda, tau, work, &lwork, info);
186  if (na < n) {
187  n_j = n - na;
188  lapackf77_dormqr( MagmaLeftStr, MagmaTransStr, &m, &n_j, &na,
189  A, &lda, tau, A(0, na), &lda,
190  work, &lwork, info );
191  }
192  }*/
193 
194  /* Factorize free columns */
195  if (nfxd < minmn) {
196  sm = m - nfxd;
197  sn = n - nfxd;
198  sminmn = minmn - nfxd;
199 
200  /*if (nb < sminmn) {
201  j = nfxd;
202 
203  // Set the original matrix to the GPU
204  magma_dsetmatrix_async( m, sn,
205  A (0,j), lda,
206  dA(0,j), ldda, stream[0] );
207  }*/
208 
209  /* Initialize partial column norms. */
210  magmablas_dnrm2_cols(sm, sn, A(nfxd,nfxd), lda, &rwork[nfxd]);
211 #if defined(PRECISION_d) || defined(PRECISION_z)
212  magma_dcopymatrix( sn, 1, &rwork[nfxd], sn, &rwork[n+nfxd], sn);
213 #else
214  magma_scopymatrix( sn, 1, &rwork[nfxd], sn, &rwork[n+nfxd], sn);
215 #endif
216  /*for (j = nfxd; j < n; ++j) {
217  rwork[j] = cblas_dnrm2(sm, A(nfxd, j), ione);
218  rwork[n + j] = rwork[j];
219  }*/
220 
221  j = nfxd;
222  //if (nb < sminmn)
223  {
224  /* Use blocked code initially. */
225  //magma_queue_sync( stream[0] );
226 
227  /* Compute factorization: while loop. */
228  topbmn = minmn;// - nb;
229  while(j < topbmn) {
230  jb = min(nb, topbmn - j);
231 
232  /* Factorize JB columns among columns J:N. */
233  n_j = n - j;
234 
235  /*if (j>nfxd) {
236  // Get panel to the CPU
237  magma_dgetmatrix( m-j, jb,
238  dA(j,j), ldda,
239  A (j,j), lda );
240 
241  // Get the rows
242  magma_dgetmatrix( jb, n_j - jb,
243  dA(j,j + jb), ldda,
244  A (j,j + jb), lda );
245  }*/
246 
247  //magma_dlaqps_gpu // this is a cpp-file
248  magma_dlaqps2_gpu // this is a cuda-file
249  ( m, n_j, j, jb, &fjb,
250  A (0, j), lda,
251  &jpvt[j], &tau[j], &rwork[j], &rwork[n + j],
252  work,
253  &df[jb], n_j );
254 
255  j += fjb; /* fjb is actual number of columns factored */
256  }
257  }
258 
259  /* Use unblocked code to factor the last or only block.
260  if (j < minmn) {
261  n_j = n - j;
262  if (j > nfxd) {
263  magma_dgetmatrix( m-j, n_j,
264  dA(j,j), ldda,
265  A (j,j), lda );
266  }
267  lapackf77_dlaqp2(&m, &n_j, &j, A(0, j), &lda, &jpvt[j],
268  &tau[j], &rwork[j], &rwork[n+j], work );
269  }*/
270  }
271  //work[0] = MAGMA_D_MAKE( lwkopt, 0. );
272  magma_free(df);
273 
274  return *info;
275 } /* dgeqp3 */
#define min(a, b)
Definition: common_magma.h:86
#define A(i, j)
#define __func__
Definition: common_magma.h:65
static magma_err_t magma_dmalloc(magmaDouble_ptr *ptrPtr, size_t n)
Definition: magma.h:78
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_dlaqps2_gpu(magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, double *A, magma_int_t lda, magma_int_t *jpvt, double *tau, double *vn1, double *vn2, double *auxv, double *dF, magma_int_t lddf)
#define magma_scopymatrix(m, n, dA_src, ldda, dB_dst, lddb)
Definition: magmablas_s.h:708
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
void magmablas_dnrm2_cols(magma_int_t m, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magmaDouble_ptr dxnorm)
#define magma_dcopymatrix(m, n, dA_src, ldda, dB_dst, lddb)
Definition: magmablas_d.h:708
#define MAGMA_SUCCESS
Definition: magma.h:106
void blasf77_dswap(const magma_int_t *n, double *x, const magma_int_t *incx, double *y, const magma_int_t *incy)
magma_int_t magma_get_dgeqp3_nb(magma_int_t m)
Definition: get_nb.cpp:102
#define max(a, b)
Definition: common_magma.h:82

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgeqr2_gpu ( magma_int_t  m,
magma_int_t  n,
double *  dA,
magma_int_t  lda,
double *  tau,
double *  work,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_dgeqr2x2_gpu ( magma_int_t m,
magma_int_t n,
double *  dA,
magma_int_t ldda,
double *  dtau,
double *  dT,
double *  ddA,
double *  dwork,
magma_int_t info 
)

Definition at line 14 of file dgeqr2x_gpu-v2.cpp.

References __func__, da_ref, magma_dlarfbx_gpu(), magma_dlarfgtx_gpu(), magma_xerbla(), magmablas_dnrm2_adjust(), magmablas_dnrm2_cols(), max, and min.

18 {
19 /* -- MAGMA (version 1.4.0) --
20  Univ. of Tennessee, Knoxville
21  Univ. of California, Berkeley
22  Univ. of Colorado, Denver
23  August 2013
24 
25  Purpose
26  =======
27  DGEQR2 computes a QR factorization of a real m by n matrix A:
28  A = Q * R.
29 
30  This expert routine requires two more arguments than the standard
31  dgeqr2, namely, dT and ddA, explained below. The storage for A is
32  also not as in the LAPACK's dgeqr2 routine (see below).
33 
34  The first is used to output the triangular
35  n x n factor T of the block reflector used in the factorization.
36  The second holds the diagonal nxn blocks of A, i.e., the diagonal
37  submatrices of R. This routine implements the left looking QR.
38 
39  Arguments
40  =========
41  M (input) INTEGER
42  The number of rows of the matrix A. M >= 0.
43 
44  N (input) INTEGER
45  The number of columns of the matrix A. N >= 0.
46 
47  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
48  On entry, the m by n matrix A.
49  On exit, the unitary matrix Q as a
50  product of elementary reflectors (see Further Details).
51 
52  the elements on and above the diagonal of the array
53  contain the min(m,n) by n upper trapezoidal matrix R (R is
54  upper triangular if m >= n); the elements below the diagonal,
55  with the array TAU, represent the unitary matrix Q as a
56  product of elementary reflectors (see Further Details).
57 
58  LDA (input) INTEGER
59  The leading dimension of the array A. LDA >= max(1,M).
60 
61  TAU (output) DOUBLE_PRECISION array, dimension (min(M,N))
62  The scalar factors of the elementary reflectors (see Further
63  Details).
64 
65  dT (output) DOUBLE_PRECISION array, dimension N x N.
66  Stores the triangular N x N factor T of the block reflector
67  used in the factorization. The lower triangular part is 0.
68 
69  ddA (output) DOUBLE_PRECISION array, dimension N x N.
70  Stores the elements of the upper N x N diagonal block of A.
71  LAPACK stores this array in A. There are 0s below the diagonal.
72 
73  RWORK (workspace) DOUBLE_PRECISION array, dimension (3 N)
74 
75  INFO (output) INTEGER
76  = 0: successful exit
77  < 0: if INFO = -i, the i-th argument had an illegal value
78 
79  Further Details
80  ===============
81  The matrix Q is represented as a product of elementary reflectors
82 
83  Q = H(1) H(2) . . . H(k), where k = min(m,n).
84 
85  Each H(i) has the form
86 
87  H(i) = I - tau * v * v'
88 
89  where tau is a real scalar, and v is a real vector with
90  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
91  and tau in TAU(i).
92  ===================================================================== */
93 
94  #define da_ref(a_1,a_2) ( dA+(a_2)*(*ldda) + (a_1))
95 
96  magma_int_t i, k;
97 
98  double *work = (double *)dwork;
99  double *dnorm = dwork + 4*(*n);
100 
101 
102  *info = 0;
103  if (*m < 0) {
104  *info = -1;
105  } else if (*n < 0) {
106  *info = -2;
107  } else if (*ldda < max(1,*m)) {
108  *info = -4;
109  }
110  if (*info != 0) {
111  magma_xerbla( __func__, -(*info) );
112  return *info;
113  }
114 
115  /* Compute the norms of the trailing columns */
116  k = min(*m,*n);
117  magmablas_dnrm2_cols(*m, k, da_ref(0,0), *ldda, dnorm);
118 
119  for (i = 0; i < k; ++i) {
120  /* 1. Apply H' to A(:,i) from the left
121  2. Adjust the dnorm[i] to hold the norm of A(i:m,i) */
122  if (i>0) {
123  magma_dlarfbx_gpu(*m, i, da_ref(0, 0), *ldda,
124  dT, k, da_ref(0, i), work);
125  magmablas_dnrm2_adjust(i, dnorm+i, da_ref(0, i));
126  }
127 
128  /* Generate elementary reflector H(i) to annihilate A(i+1:m,i)
129  1. 1 is not yet put on the diagonal of A
130  2. Elements above the diagonal are copied in ddA and the ones
131  in A are set to zero
132  3. update T */
133  magma_dlarfgtx_gpu(*m-i, da_ref(i, i), da_ref(min(i+1,*m), i), dtau+i,
134  dnorm+i, ddA + i + i*(*n), i,
135  da_ref(i,0), *ldda, dT, k, work);
136  }
137 
138  return *info;
139 } /* magma_dgeqr2 */
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
magma_int_t ldda
int magma_int_t
Definition: magmablas.h:12
void magmablas_dnrm2_adjust(magma_int_t k, double *xnorm, double *c)
void magma_dlarfgtx_gpu(magma_int_t n, double *dx0, double *dx, double *dtau, double *dxnorm, double *dA, magma_int_t it, double *V, magma_int_t ldv, double *T, magma_int_t ldt, double *dwork)
#define dwork(dev, i, j)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
void magmablas_dnrm2_cols(magma_int_t m, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magmaDouble_ptr dxnorm)
#define da_ref(a_1, a_2)
void magma_dlarfbx_gpu(magma_int_t m, magma_int_t k, double *V, magma_int_t ldv, double *dT, magma_int_t ldt, double *c, double *dwork)
#define dT(m)
#define max(a, b)
Definition: common_magma.h:82

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgeqr2x3_gpu ( magma_int_t m,
magma_int_t n,
double *  dA,
magma_int_t ldda,
double *  dtau,
double *  dT,
double *  ddA,
double *  dwork,
magma_int_t info 
)

Definition at line 54 of file dgeqr2x_gpu-v3.cpp.

References __func__, BLOCK_SIZE, da_ref, dwork, magma_dlarfb2_gpu(), magma_dlarfbx_gpu(), magma_dlarfgtx_gpu(), magma_xerbla(), magmablas_dnrm2_adjust(), magmablas_dnrm2_cols(), max, and min.

58 {
59 /* -- MAGMA (version 1.4.0) --
60  Univ. of Tennessee, Knoxville
61  Univ. of California, Berkeley
62  Univ. of Colorado, Denver
63  August 2013
64 
65  Purpose
66  =======
67  DGEQR2 computes a QR factorization of a real m by n matrix A:
68  A = Q * R.
69 
70  This expert routine requires two more arguments than the standard
71  dgeqr2, namely, dT and ddA, explained below. The storage for A is
72  also not as in the LAPACK's dgeqr2 routine (see below).
73 
74  The first is used to output the triangular
75  n x n factor T of the block reflector used in the factorization.
76  The second holds the diagonal nxn blocks of A, i.e., the diagonal
77  submatrices of R. This routine implements the left looking QR.
78 
79  This version adds internal blocking.
80 
81  Arguments
82  =========
83  M (input) INTEGER
84  The number of rows of the matrix A. M >= 0.
85 
86  N (input) INTEGER
87  The number of columns of the matrix A. N >= 0.
88 
89  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
90  On entry, the m by n matrix A.
91  On exit, the unitary matrix Q as a
92  product of elementary reflectors (see Further Details).
93 
94  the elements on and above the diagonal of the array
95  contain the min(m,n) by n upper trapezoidal matrix R (R is
96  upper triangular if m >= n); the elements below the diagonal,
97  with the array TAU, represent the unitary matrix Q as a
98  product of elementary reflectors (see Further Details).
99 
100  LDA (input) INTEGER
101  The leading dimension of the array A. LDA >= max(1,M).
102 
103  TAU (output) DOUBLE_PRECISION array, dimension (min(M,N))
104  The scalar factors of the elementary reflectors (see Further
105  Details).
106 
107  dT (output) DOUBLE_PRECISION array, dimension N x N.
108  Stores the triangular N x N factor T of the block reflector
109  used in the factorization. The lower triangular part is 0.
110 
111  ddA (output) DOUBLE_PRECISION array, dimension N x N.
112  Stores the elements of the upper N x N diagonal block of A.
113  LAPACK stores this array in A. There are 0s below the diagonal.
114 
115  RWORK (workspace) DOUBLE_PRECISION array, dimension (3 N)
116 
117  INFO (output) INTEGER
118  = 0: successful exit
119  < 0: if INFO = -i, the i-th argument had an illegal value
120 
121  Further Details
122  ===============
123  The matrix Q is represented as a product of elementary reflectors
124 
125  Q = H(1) H(2) . . . H(k), where k = min(m,n).
126 
127  Each H(i) has the form
128 
129  H(i) = I - tau * v * v'
130 
131  where tau is a real scalar, and v is a real vector with
132  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
133  and tau in TAU(i).
134  ===================================================================== */
135 
136  #define da_ref(a_1,a_2) ( dA+(a_2)*(*ldda) + (a_1))
137  #define BLOCK_SIZE 32
138 
139  magma_int_t i, k;
140 
141  double *dnorm = dwork;
142  double *work = (double *)(dwork+2*(*n));
143 
144  *info = 0;
145  if (*m < 0) {
146  *info = -1;
147  } else if (*n < 0) {
148  *info = -2;
149  } else if (*ldda < max(1,*m)) {
150  *info = -4;
151  }
152  if (*info != 0) {
153  magma_xerbla( __func__, -(*info) );
154  return *info;
155  }
156 
157  /* Compute the norms of the trailing columns */
158  k = min(*m,*n);
159  magmablas_dnrm2_cols(*m, k, da_ref(0,0), *ldda, dnorm);
160 
161  for (int b=0; b < k; b += BLOCK_SIZE) {
162  for (i = b; i < min(k, b+BLOCK_SIZE); ++i) {
163 
164  /* Apply H' to A(:,i) from the left */
165  if ( i-b > 0)
166  magma_dlarfbx_gpu(*m-b, i-b, da_ref(b, b), *ldda,
167  dT+b+b*k, k, da_ref(b, i), work);
168 
169  /* Adjust the dnorm[i] to hold the norm of A(i:m,i) */
170  if ( i > 0 )
171  magmablas_dnrm2_adjust(i, dnorm+i, da_ref(0, i));
172 
173  /* Generate elementary reflector H(i) to annihilate A(i+1:m,i)
174  1. 1 is not yet put on the diagonal of A
175  2. Elements above the diagonal are copied in ddA and
176  the ones in A are set to zero
177  3. update T */
178  magma_dlarfgtx_gpu(*m-i, da_ref(i, i), da_ref(min(i+1,*m), i), dtau+i,
179  dnorm+i, ddA + i + i*(*n), i,
180  da_ref(i,0), *ldda, dT, k, work);
181  }
182 
183  /* Apply the transformations to the trailing matrix. */
184  //magma_dlarfb2_gpu( MagmaLeft, MagmaTrans, MagmaForward, MagmaColumnwise,
186  *m-b, k-i, BLOCK_SIZE,
187  da_ref(b, b), *ldda, dT+b+b*k, k,
188  da_ref(b, i), *ldda, work, k-i);
189  }
190 
191  return *info;
192 } /* magma_dgeqr2 */
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
magma_int_t ldda
int magma_int_t
Definition: magmablas.h:12
#define BLOCK_SIZE
void magmablas_dnrm2_adjust(magma_int_t k, double *xnorm, double *c)
void magma_dlarfgtx_gpu(magma_int_t n, double *dx0, double *dx, double *dtau, double *dxnorm, double *dA, magma_int_t it, double *V, magma_int_t ldv, double *T, magma_int_t ldt, double *dwork)
#define dwork(dev, i, j)
#define da_ref(a_1, a_2)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
magma_int_t magma_dlarfb2_gpu(magma_int_t m, magma_int_t n, magma_int_t k, const double *dV, magma_int_t ldv, const double *dT, magma_int_t ldt, double *dC, magma_int_t ldc, double *dwork, magma_int_t ldwork)
void magmablas_dnrm2_cols(magma_int_t m, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magmaDouble_ptr dxnorm)
void magma_dlarfbx_gpu(magma_int_t m, magma_int_t k, double *V, magma_int_t ldv, double *dT, magma_int_t ldt, double *c, double *dwork)
#define dT(m)
#define max(a, b)
Definition: common_magma.h:82

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgeqr2x4_gpu ( magma_int_t m,
magma_int_t n,
double *  dA,
magma_int_t ldda,
double *  dtau,
double *  dT,
double *  ddA,
double *  dwork,
magma_int_t info,
magma_queue_t  stream 
)

Here is the caller graph for this function:

magma_int_t magma_dgeqr2x_gpu ( magma_int_t m,
magma_int_t n,
double *  dA,
magma_int_t ldda,
double *  dtau,
double *  dT,
double *  ddA,
double *  dwork,
magma_int_t info 
)

Definition at line 14 of file dgeqr2x_gpu.cpp.

References __func__, da_ref, dwork, magma_dlarfgx_gpu(), magma_dlarfx_gpu(), magma_xerbla(), magmablas_dnrm2_cols(), max, and min.

18 {
19 /* -- MAGMA (version 1.4.0) --
20  Univ. of Tennessee, Knoxville
21  Univ. of California, Berkeley
22  Univ. of Colorado, Denver
23  August 2013
24 
25  Purpose
26  =======
27  DGEQR2 computes a QR factorization of a real m by n matrix A:
28  A = Q * R.
29 
30  This expert routine requires two more arguments than the standard
31  dgeqr2, namely, dT and ddA, explained below. The storage for A is
32  also not as in the LAPACK's dgeqr2 routine (see below).
33 
34  The first is used to output the triangular
35  n x n factor T of the block reflector used in the factorization.
36  The second holds the diagonal nxn blocks of A, i.e., the diagonal
37  submatrices of R.
38 
39  This version implements the right-looking QR.
40 
41  Arguments
42  =========
43  M (input) INTEGER
44  The number of rows of the matrix A. M >= 0.
45 
46  N (input) INTEGER
47  The number of columns of the matrix A. N >= 0.
48 
49  A (input/output) COMPLEX*16 array, dimension (LDA,N)
50  On entry, the m by n matrix A.
51  On exit, the unitary matrix Q as a
52  product of elementary reflectors (see Further Details).
53 
54  the elements on and above the diagonal of the array
55  contain the min(m,n) by n upper trapezoidal matrix R (R is
56  upper triangular if m >= n); the elements below the diagonal,
57  with the array TAU, represent the unitary matrix Q as a
58  product of elementary reflectors (see Further Details).
59 
60  LDA (input) INTEGER
61  The leading dimension of the array A. LDA >= max(1,M).
62 
63  TAU (output) COMPLEX*16 array, dimension (min(M,N))
64  The scalar factors of the elementary reflectors (see Further
65  Details).
66 
67  dT (output) COMPLEX*16 array, dimension N x N.
68  Stores the triangular N x N factor T of the block reflector
69  used in the factorization. The lower triangular part is 0.
70 
71  ddA (output) COMPLEX*16 array, dimension N x N.
72  Stores the elements of the upper N x N diagonal block of A.
73  LAPACK stores this array in A. There are 0s below the diagonal.
74 
75  WORK (workspace) COMPLEX*16 array, dimension (N)
76 
77  INFO (output) INTEGER
78  = 0: successful exit
79  < 0: if INFO = -i, the i-th argument had an illegal value
80 
81  Further Details
82  ===============
83  The matrix Q is represented as a product of elementary reflectors
84 
85  Q = H(1) H(2) . . . H(k), where k = min(m,n).
86 
87  Each H(i) has the form
88 
89  H(i) = I - tau * v * v'
90 
91  where tau is a real scalar, and v is a real vector with
92  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
93  and tau in TAU(i).
94  ===================================================================== */
95 
96  #define da_ref(a_1,a_2) ( dA+(a_2)*(*ldda) + (a_1))
97 
98  magma_int_t i, k;
99 
100  double *dnorm = dwork;
101  double *work = (double *)(dwork+2*(*n));
102 
103  *info = 0;
104  if (*m < 0) {
105  *info = -1;
106  } else if (*n < 0) {
107  *info = -2;
108  } else if (*ldda < max(1,*m)) {
109  *info = -4;
110  }
111  if (*info != 0) {
112  magma_xerbla( __func__, -(*info) );
113  return *info;
114  }
115 
116  /* Compute the norms of the trailing columns */
117  k = min(*m,*n);
118  magmablas_dnrm2_cols(*m, k, da_ref(0,0), *ldda, dnorm);
119 
120  for (i = 0; i < k; ++i) {
121  /* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
122  magma_dlarfgx_gpu(*m-i, da_ref(i, i), da_ref(min(i+1,*m), i), dtau+i, dnorm+i,
123  ddA + i + i*(*n), i);
124 
125  if (i < *n) {
126  /* Apply H(i)' to A(i:m,i+1:n) from the left */
127  magma_dlarfx_gpu(*m-i, *n-i-1, da_ref(i, i), dtau+i,
128  //da_ref(i, i+1), *ldda, dnorm+i+1,
129  da_ref(i, 0), *ldda, dnorm+i+1,
130  dT, i, work );
131  }
132  }
133 
134  return *info;
135 } /* magma_dgeqr2 */
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
magma_int_t ldda
int magma_int_t
Definition: magmablas.h:12
void magma_dlarfgx_gpu(magma_int_t n, double *dx0, double *dx, double *dtau, double *dxnorm, double *ddx0, magma_int_t iter)
#define dwork(dev, i, j)
void magma_dlarfx_gpu(magma_int_t m, magma_int_t n, double *v, double *tau, double *c, magma_int_t ldc, double *xnorm, double *dT, magma_int_t iter, double *work)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define da_ref(a_1, a_2)
void magmablas_dnrm2_cols(magma_int_t m, magma_int_t n, magmaDouble_ptr dA, magma_int_t ldda, magmaDouble_ptr dxnorm)
#define dT(m)
#define max(a, b)
Definition: common_magma.h:82

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_dgeqrf ( magma_int_t  m,
magma_int_t  n,
double *  A,
magma_int_t  lda,
double *  tau,
double *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 15 of file dgeqrf.cpp.

References __func__, A, dA, dpanel_to_q(), dq_to_panel(), dT, dwork, lapackf77_dgeqrf(), lapackf77_dlarft(), dgehrd_data::ldda, MAGMA_D_MAKE, MAGMA_D_ONE, magma_dgeqrf4(), magma_dgeqrf_ooc(), magma_dgetmatrix, magma_dgetmatrix_async, magma_dlarfb_gpu(), magma_dmalloc(), magma_dsetmatrix_async, magma_free, magma_get_dgeqrf_nb(), magma_num_gpus(), magma_queue_create, magma_queue_destroy, magma_queue_sync, MAGMA_SUCCESS, magma_xerbla(), magmablasGetKernelStream(), magmablasSetKernelStream(), MagmaColumnwise, MagmaColumnwiseStr, MagmaForward, MagmaForwardStr, MagmaLeft, MagmaTrans, MagmaUpper, max, and min.

19 {
20 /* -- MAGMA (version 1.4.0) --
21  Univ. of Tennessee, Knoxville
22  Univ. of California, Berkeley
23  Univ. of Colorado, Denver
24  August 2013
25 
26  Purpose
27  =======
28  DGEQRF computes a QR factorization of a DOUBLE_PRECISION M-by-N matrix A:
29  A = Q * R. This version does not require work space on the GPU
30  passed as input. GPU memory is allocated in the routine.
31 
32  If the current stream is NULL, this version replaces it with user defined
33  stream to overlap computation with communication.
34 
35  Arguments
36  =========
37  M (input) INTEGER
38  The number of rows of the matrix A. M >= 0.
39 
40  N (input) INTEGER
41  The number of columns of the matrix A. N >= 0.
42 
43  A (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
44  On entry, the M-by-N matrix A.
45  On exit, the elements on and above the diagonal of the array
46  contain the min(M,N)-by-N upper trapezoidal matrix R (R is
47  upper triangular if m >= n); the elements below the diagonal,
48  with the array TAU, represent the orthogonal matrix Q as a
49  product of min(m,n) elementary reflectors (see Further
50  Details).
51 
52  Higher performance is achieved if A is in pinned memory, e.g.
53  allocated using magma_malloc_pinned.
54 
55  LDA (input) INTEGER
56  The leading dimension of the array A. LDA >= max(1,M).
57 
58  TAU (output) DOUBLE_PRECISION array, dimension (min(M,N))
59  The scalar factors of the elementary reflectors (see Further
60  Details).
61 
62  WORK (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK))
63  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
64 
65  Higher performance is achieved if WORK is in pinned memory, e.g.
66  allocated using magma_malloc_pinned.
67 
68  LWORK (input) INTEGER
69  The dimension of the array WORK. LWORK >= max( N*NB, 2*NB*NB ),
70  where NB can be obtained through magma_get_dgeqrf_nb(M).
71 
72  If LWORK = -1, then a workspace query is assumed; the routine
73  only calculates the optimal size of the WORK array, returns
74  this value as the first entry of the WORK array, and no error
75  message related to LWORK is issued.
76 
77  INFO (output) INTEGER
78  = 0: successful exit
79  < 0: if INFO = -i, the i-th argument had an illegal value
80  or another error occured, such as memory allocation failed.
81 
82  Further Details
83  ===============
84  The matrix Q is represented as a product of elementary reflectors
85 
86  Q = H(1) H(2) . . . H(k), where k = min(m,n).
87 
88  Each H(i) has the form
89 
90  H(i) = I - tau * v * v'
91 
92  where tau is a real scalar, and v is a real vector with
93  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
94  and tau in TAU(i).
95  ===================================================================== */
96 
97  #define A(i,j) ( A + (i) + (j)*lda )
98  #define dA(i,j) (dA + (i) + (j)*ldda)
99 
100  double *dA, *dwork, *dT;
101  double c_one = MAGMA_D_ONE;
102 
103  magma_int_t i, k, lddwork, old_i, old_ib;
104  magma_int_t ib, ldda;
105 
106  /* Function Body */
107  *info = 0;
108  magma_int_t nb = magma_get_dgeqrf_nb(min(m, n));
109 
110  // need 2*nb*nb to store T and upper triangle of V simultaneously
111  magma_int_t lwkopt = max(n*nb, 2*nb*nb);
112  work[0] = MAGMA_D_MAKE( (double)lwkopt, 0 );
113  int lquery = (lwork == -1);
114  if (m < 0) {
115  *info = -1;
116  } else if (n < 0) {
117  *info = -2;
118  } else if (lda < max(1,m)) {
119  *info = -4;
120  } else if (lwork < max(1, lwkopt) && ! lquery) {
121  *info = -7;
122  }
123  if (*info != 0) {
124  magma_xerbla( __func__, -(*info) );
125  return *info;
126  }
127  else if (lquery)
128  return *info;
129 
130  k = min(m,n);
131  if (k == 0) {
132  work[0] = c_one;
133  return *info;
134  }
135 
136  // largest N for larfb is n-nb (trailing matrix lacks 1st panel)
137  lddwork = ((n+31)/32)*32 - nb;
138  ldda = ((m+31)/32)*32;
139 
140  magma_int_t num_gpus = magma_num_gpus();
141  if( num_gpus > 1 ) {
142  /* call multiple-GPU interface */
143  return magma_dgeqrf4(num_gpus, m, n, A, lda, tau, work, lwork, info);
144  }
145 
146  // allocate space for dA, dwork, and dT
147  if (MAGMA_SUCCESS != magma_dmalloc( &dA, n*ldda + nb*lddwork + nb*nb )) {
148  /* Switch to the "out-of-core" (out of GPU-memory) version */
149  return magma_dgeqrf_ooc(m, n, A, lda, tau, work, lwork, info);
150  }
151 
152  /* Define user stream if current stream is NULL */
153  magma_queue_t stream[3], current_stream;
154  magmablasGetKernelStream(&current_stream);
155 
156  magma_queue_create( &stream[0] );
157  magma_queue_create( &stream[2] );
158  if (current_stream == NULL) {
159  magma_queue_create( &stream[1] );
160  magmablasSetKernelStream(stream[1]);
161  }
162  else
163  stream[1] = current_stream;
164 
165  dwork = dA + n*ldda;
166  dT = dA + n*ldda + nb*lddwork;
167 
168  if ( (nb > 1) && (nb < k) ) {
169  /* Use blocked code initially.
170  Asynchronously send the matrix to the GPU except the first panel. */
171  magma_dsetmatrix_async( m, n-nb,
172  A(0,nb), lda,
173  dA(0,nb), ldda, stream[2] );
174 
175  old_i = 0;
176  old_ib = nb;
177  for (i = 0; i < k-nb; i += nb) {
178  ib = min(k-i, nb);
179  if (i>0) {
180  /* download i-th panel */
181  magma_queue_sync( stream[1] );
182  magma_dgetmatrix_async( m-i, ib,
183  dA(i,i), ldda,
184  A(i,i), lda, stream[0] );
185 
186  /* Apply H' to A(i:m,i+2*ib:n) from the left */
188  m-old_i, n-old_i-2*old_ib, old_ib,
189  dA(old_i, old_i), ldda, dT, nb,
190  dA(old_i, old_i+2*old_ib), ldda, dwork, lddwork);
191 
192  magma_dgetmatrix_async( i, ib,
193  dA(0,i), ldda,
194  A(0,i), lda, stream[2] );
195  magma_queue_sync( stream[0] );
196  }
197 
198  magma_int_t rows = m-i;
199  lapackf77_dgeqrf(&rows, &ib, A(i,i), &lda, tau+i, work, &lwork, info);
200  /* Form the triangular factor of the block reflector
201  H = H(i) H(i+1) . . . H(i+ib-1) */
203  &rows, &ib, A(i,i), &lda, tau+i, work, &ib);
204 
205  dpanel_to_q(MagmaUpper, ib, A(i,i), lda, work+ib*ib);
206 
207  /* download the i-th V matrix */
208  magma_dsetmatrix_async( rows, ib, A(i,i), lda, dA(i,i), ldda, stream[0] );
209 
210  /* download the T matrix */
211  magma_dsetmatrix_async( ib, ib, work, ib, dT, nb, stream[0] );
212  magma_queue_sync( stream[0] );
213 
214  if (i + ib < n) {
215 
216  if (i+ib < k-nb) {
217  /* Apply H' to A(i:m,i+ib:i+2*ib) from the left (look-ahead) */
219  rows, ib, ib,
220  dA(i, i ), ldda, dT, nb,
221  dA(i, i+ib), ldda, dwork, lddwork);
222  dq_to_panel(MagmaUpper, ib, A(i,i), lda, work+ib*ib);
223  }
224  else {
225  /* After last panel, update whole trailing matrix. */
226  /* Apply H' to A(i:m,i+ib:n) from the left */
228  rows, n-i-ib, ib,
229  dA(i, i ), ldda, dT, nb,
230  dA(i, i+ib), ldda, dwork, lddwork);