MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
magma_s.h File Reference
#include "magma_types.h"
#include "magma_sgehrd_m.h"
Include dependency graph for magma_s.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define PRECISION_s
 

Functions

magma_int_t magma_get_spotrf_nb (magma_int_t m)
 
magma_int_t magma_get_sgetrf_nb (magma_int_t m)
 
magma_int_t magma_get_sgetri_nb (magma_int_t m)
 
magma_int_t magma_get_sgeqp3_nb (magma_int_t m)
 
magma_int_t magma_get_sgeqrf_nb (magma_int_t m)
 
magma_int_t magma_get_sgeqlf_nb (magma_int_t m)
 
magma_int_t magma_get_sgehrd_nb (magma_int_t m)
 
magma_int_t magma_get_ssytrd_nb (magma_int_t m)
 
magma_int_t magma_get_sgelqf_nb (magma_int_t m)
 
magma_int_t magma_get_sgebrd_nb (magma_int_t m)
 
magma_int_t magma_get_ssygst_nb (magma_int_t m)
 
magma_int_t magma_get_sgesvd_nb (magma_int_t m)
 
magma_int_t magma_get_ssygst_nb_m (magma_int_t m)
 
magma_int_t magma_get_sbulge_nb (magma_int_t m, magma_int_t nbthreads)
 
magma_int_t magma_get_sbulge_nb_mgpu (magma_int_t m)
 
magma_int_t magma_sbulge_get_Vblksiz (magma_int_t m, magma_int_t nb, magma_int_t nbthreads)
 
magma_int_t magma_get_sbulge_gcperf ()
 
magma_int_t magma_get_smlsize_divideconquer ()
 
void magma_smove_eig (char range, magma_int_t n, float *w, magma_int_t *il, magma_int_t *iu, float vl, float vu, magma_int_t *m)
 
magma_int_t magma_sgebrd (magma_int_t m, magma_int_t n, float *A, magma_int_t lda, float *d, float *e, float *tauq, float *taup, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sgehrd2 (magma_int_t n, magma_int_t ilo, magma_int_t ihi, float *A, magma_int_t lda, float *tau, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sgehrd (magma_int_t n, magma_int_t ilo, magma_int_t ihi, float *A, magma_int_t lda, float *tau, float *work, magma_int_t lwork, float *dT, magma_int_t *info)
 
magma_int_t magma_sgelqf (magma_int_t m, magma_int_t n, float *A, magma_int_t lda, float *tau, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sgeqlf (magma_int_t m, magma_int_t n, float *A, magma_int_t lda, float *tau, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sgeqrf (magma_int_t m, magma_int_t n, float *A, magma_int_t lda, float *tau, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sgeqrf4 (magma_int_t num_gpus, magma_int_t m, magma_int_t n, float *a, magma_int_t lda, float *tau, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sgeqrf_ooc (magma_int_t m, magma_int_t n, float *A, magma_int_t lda, float *tau, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sgesv (magma_int_t n, magma_int_t nrhs, float *A, magma_int_t lda, magma_int_t *ipiv, float *B, magma_int_t ldb, magma_int_t *info)
 
magma_int_t magma_sgetrf (magma_int_t m, magma_int_t n, float *A, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_sgetrf2 (magma_int_t m, magma_int_t n, float *a, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_slaqps (magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, float *A, magma_int_t lda, float *dA, magma_int_t ldda, magma_int_t *jpvt, float *tau, float *vn1, float *vn2, float *auxv, float *F, magma_int_t ldf, float *dF, magma_int_t lddf)
 
void magma_slarfg (magma_int_t n, float *alpha, float *x, magma_int_t incx, float *tau)
 
magma_int_t magma_slatrd (char uplo, magma_int_t n, magma_int_t nb, float *a, magma_int_t lda, float *e, float *tau, float *w, magma_int_t ldw, float *da, magma_int_t ldda, float *dw, magma_int_t lddw)
 
magma_int_t magma_slatrd2 (char uplo, magma_int_t n, magma_int_t nb, float *a, magma_int_t lda, float *e, float *tau, float *w, magma_int_t ldw, float *da, magma_int_t ldda, float *dw, magma_int_t lddw, float *dwork, magma_int_t ldwork)
 
magma_int_t magma_slahr2 (magma_int_t m, magma_int_t n, magma_int_t nb, float *da, float *dv, float *a, magma_int_t lda, float *tau, float *t, magma_int_t ldt, float *y, magma_int_t ldy)
 
magma_int_t magma_slahru (magma_int_t n, magma_int_t ihi, magma_int_t k, magma_int_t nb, float *a, magma_int_t lda, float *da, float *y, float *v, float *t, float *dwork)
 
magma_int_t magma_sposv (char uplo, magma_int_t n, magma_int_t nrhs, float *A, magma_int_t lda, float *B, magma_int_t ldb, magma_int_t *info)
 
magma_int_t magma_spotrf (char uplo, magma_int_t n, float *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_spotri (char uplo, magma_int_t n, float *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_slauum (char uplo, magma_int_t n, float *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_strtri (char uplo, char diag, magma_int_t n, float *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_ssytrd (char uplo, magma_int_t n, float *A, magma_int_t lda, float *d, float *e, float *tau, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sorgqr (magma_int_t m, magma_int_t n, magma_int_t k, float *a, magma_int_t lda, float *tau, float *dT, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_sorgqr2 (magma_int_t m, magma_int_t n, magma_int_t k, float *a, magma_int_t lda, float *tau, magma_int_t *info)
 
magma_int_t magma_sormql (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, float *a, magma_int_t lda, float *tau, float *c, magma_int_t ldc, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sormqr (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, float *a, magma_int_t lda, float *tau, float *c, magma_int_t ldc, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sormtr (char side, char uplo, char trans, magma_int_t m, magma_int_t n, float *a, magma_int_t lda, float *tau, float *c, magma_int_t ldc, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sorghr (magma_int_t n, magma_int_t ilo, magma_int_t ihi, float *a, magma_int_t lda, float *tau, float *dT, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_sgeev (char jobvl, char jobvr, magma_int_t n, float *a, magma_int_t lda, float *wr, float *wi, float *vl, magma_int_t ldvl, float *vr, magma_int_t ldvr, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sgeqp3 (magma_int_t m, magma_int_t n, float *a, magma_int_t lda, magma_int_t *jpvt, float *tau, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sgesvd (char jobu, char jobvt, magma_int_t m, magma_int_t n, float *a, magma_int_t lda, float *s, float *u, magma_int_t ldu, float *vt, magma_int_t ldvt, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_ssyevd (char jobz, char uplo, magma_int_t n, float *a, magma_int_t lda, float *w, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_ssyevdx (char jobz, char range, char uplo, magma_int_t n, float *a, magma_int_t lda, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *m, float *w, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_ssyevdx_2stage (char jobz, char range, char uplo, magma_int_t n, float *a, magma_int_t lda, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *m, float *w, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_ssygvd (magma_int_t itype, char jobz, char uplo, magma_int_t n, float *a, magma_int_t lda, float *b, magma_int_t ldb, float *w, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_ssygvdx (magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, float *a, magma_int_t lda, float *b, magma_int_t ldb, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *m, float *w, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_ssygvdx_2stage (magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, float *a, magma_int_t lda, float *b, magma_int_t ldb, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *m, float *w, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_sstedx (char range, magma_int_t n, float vl, float vu, magma_int_t il, magma_int_t iu, float *d, float *e, float *z, magma_int_t ldz, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, float *dwork, magma_int_t *info)
 
magma_int_t magma_slaex0 (magma_int_t n, float *d, float *e, float *q, magma_int_t ldq, float *work, magma_int_t *iwork, float *dwork, char range, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *info)
 
magma_int_t magma_slaex1 (magma_int_t n, float *d, float *q, magma_int_t ldq, magma_int_t *indxq, float rho, magma_int_t cutpnt, float *work, magma_int_t *iwork, float *dwork, char range, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *info)
 
magma_int_t magma_slaex3 (magma_int_t k, magma_int_t n, magma_int_t n1, float *d, float *q, magma_int_t ldq, float rho, float *dlamda, float *q2, magma_int_t *indx, magma_int_t *ctot, float *w, float *s, magma_int_t *indxq, float *dwork, char range, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *info)
 
magma_int_t magma_ssygst (magma_int_t itype, char uplo, magma_int_t n, float *a, magma_int_t lda, float *b, magma_int_t ldb, magma_int_t *info)
 
magma_int_t magma_slahr2_m (magma_int_t n, magma_int_t k, magma_int_t nb, float *A, magma_int_t lda, float *tau, float *T, magma_int_t ldt, float *Y, magma_int_t ldy, struct sgehrd_data *data)
 
magma_int_t magma_slahru_m (magma_int_t n, magma_int_t ihi, magma_int_t k, magma_int_t nb, float *A, magma_int_t lda, struct sgehrd_data *data)
 
magma_int_t magma_sgeev_m (char jobvl, char jobvr, magma_int_t n, float *A, magma_int_t lda, float *WR, float *WI, float *vl, magma_int_t ldvl, float *vr, magma_int_t ldvr, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sgehrd_m (magma_int_t n, magma_int_t ilo, magma_int_t ihi, float *A, magma_int_t lda, float *tau, float *work, magma_int_t lwork, float *T, magma_int_t *info)
 
magma_int_t magma_sorghr_m (magma_int_t n, magma_int_t ilo, magma_int_t ihi, float *A, magma_int_t lda, float *tau, float *T, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_sorgqr_m (magma_int_t m, magma_int_t n, magma_int_t k, float *A, magma_int_t lda, float *tau, float *T, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_spotrf_m (magma_int_t num_gpus, char uplo, magma_int_t n, float *A, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_sstedx_m (magma_int_t nrgpu, char range, magma_int_t n, float vl, float vu, magma_int_t il, magma_int_t iu, float *D, float *E, float *Z, magma_int_t ldz, float *rwork, magma_int_t ldrwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_strsm_m (magma_int_t nrgpu, char side, char uplo, char transa, char diag, magma_int_t m, magma_int_t n, float alpha, float *a, magma_int_t lda, float *b, magma_int_t ldb)
 
magma_int_t magma_sormqr_m (magma_int_t nrgpu, char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, float *a, magma_int_t lda, float *tau, float *c, magma_int_t ldc, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sormtr_m (magma_int_t nrgpu, char side, char uplo, char trans, magma_int_t m, magma_int_t n, float *a, magma_int_t lda, float *tau, float *c, magma_int_t ldc, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_ssygst_m (magma_int_t nrgpu, magma_int_t itype, char uplo, magma_int_t n, float *a, magma_int_t lda, float *b, magma_int_t ldb, magma_int_t *info)
 
magma_int_t magma_ssyevd_m (magma_int_t nrgpu, char jobz, char uplo, magma_int_t n, float *a, magma_int_t lda, float *w, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_ssygvd_m (magma_int_t nrgpu, magma_int_t itype, char jobz, char uplo, magma_int_t n, float *a, magma_int_t lda, float *b, magma_int_t ldb, float *w, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_ssyevdx_m (magma_int_t nrgpu, char jobz, char range, char uplo, magma_int_t n, float *a, magma_int_t lda, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *m, float *w, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_ssygvdx_m (magma_int_t nrgpu, magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, float *a, magma_int_t lda, float *b, magma_int_t ldb, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *m, float *w, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_ssyevdx_2stage_m (magma_int_t nrgpu, char jobz, char range, char uplo, magma_int_t n, float *a, magma_int_t lda, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *m, float *w, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_ssygvdx_2stage_m (magma_int_t nrgpu, magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, float *a, magma_int_t lda, float *b, magma_int_t ldb, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *m, float *w, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_sgels_gpu (char trans, magma_int_t m, magma_int_t n, magma_int_t nrhs, float *dA, magma_int_t ldda, float *dB, magma_int_t lddb, float *hwork, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sgels3_gpu (char trans, magma_int_t m, magma_int_t n, magma_int_t nrhs, float *dA, magma_int_t ldda, float *dB, magma_int_t lddb, float *hwork, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sgelqf_gpu (magma_int_t m, magma_int_t n, float *dA, magma_int_t ldda, float *tau, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sgeqr2x_gpu (magma_int_t *m, magma_int_t *n, float *dA, magma_int_t *ldda, float *dtau, float *dT, float *ddA, float *dwork, magma_int_t *info)
 
magma_int_t magma_sgeqr2x2_gpu (magma_int_t *m, magma_int_t *n, float *dA, magma_int_t *ldda, float *dtau, float *dT, float *ddA, float *dwork, magma_int_t *info)
 
magma_int_t magma_sgeqr2x3_gpu (magma_int_t *m, magma_int_t *n, float *dA, magma_int_t *ldda, float *dtau, float *dT, float *ddA, float *dwork, magma_int_t *info)
 
magma_int_t magma_sgeqr2x4_gpu (magma_int_t *m, magma_int_t *n, float *dA, magma_int_t *ldda, float *dtau, float *dT, float *ddA, float *dwork, magma_int_t *info, magma_queue_t stream)
 
magma_int_t magma_sgeqrf_gpu (magma_int_t m, magma_int_t n, float *dA, magma_int_t ldda, float *tau, float *dT, magma_int_t *info)
 
magma_int_t magma_sgeqrf2_gpu (magma_int_t m, magma_int_t n, float *dA, magma_int_t ldda, float *tau, magma_int_t *info)
 
magma_int_t magma_sgeqrf2_mgpu (magma_int_t num_gpus, magma_int_t m, magma_int_t n, float **dlA, magma_int_t ldda, float *tau, magma_int_t *info)
 
magma_int_t magma_sgeqrf3_gpu (magma_int_t m, magma_int_t n, float *dA, magma_int_t ldda, float *tau, float *dT, magma_int_t *info)
 
magma_int_t magma_sgeqr2_gpu (magma_int_t m, magma_int_t n, float *dA, magma_int_t lda, float *tau, float *work, magma_int_t *info)
 
magma_int_t magma_sgeqrs_gpu (magma_int_t m, magma_int_t n, magma_int_t nrhs, float *dA, magma_int_t ldda, float *tau, float *dT, float *dB, magma_int_t lddb, float *hwork, magma_int_t lhwork, magma_int_t *info)
 
magma_int_t magma_sgeqrs3_gpu (magma_int_t m, magma_int_t n, magma_int_t nrhs, float *dA, magma_int_t ldda, float *tau, float *dT, float *dB, magma_int_t lddb, float *hwork, magma_int_t lhwork, magma_int_t *info)
 
magma_int_t magma_sgessm_gpu (char storev, magma_int_t m, magma_int_t n, magma_int_t k, magma_int_t ib, magma_int_t *ipiv, float *dL1, magma_int_t lddl1, float *dL, magma_int_t lddl, float *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_sgesv_gpu (magma_int_t n, magma_int_t nrhs, float *dA, magma_int_t ldda, magma_int_t *ipiv, float *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_sgetf2_gpu (magma_int_t m, magma_int_t n, float *dA, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_sgetrf_incpiv_gpu (char storev, magma_int_t m, magma_int_t n, magma_int_t ib, float *hA, magma_int_t ldha, float *dA, magma_int_t ldda, float *hL, magma_int_t ldhl, float *dL, magma_int_t lddl, magma_int_t *ipiv, float *dwork, magma_int_t lddwork, magma_int_t *info)
 
magma_int_t magma_sgetrf_gpu (magma_int_t m, magma_int_t n, float *dA, magma_int_t ldda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_sgetrf_mgpu (magma_int_t num_gpus, magma_int_t m, magma_int_t n, float **d_lA, magma_int_t ldda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_sgetrf_m (magma_int_t num_gpus0, magma_int_t m, magma_int_t n, float *a, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_sgetrf_piv (magma_int_t m, magma_int_t n, magma_int_t NB, float *a, magma_int_t lda, magma_int_t *ipiv, magma_int_t *info)
 
magma_int_t magma_sgetrf2_mgpu (magma_int_t num_gpus, magma_int_t m, magma_int_t n, magma_int_t nb, magma_int_t offset, float *d_lAT[], magma_int_t lddat, magma_int_t *ipiv, float *d_lAP[], float *a, magma_int_t lda, magma_queue_t streaml[][2], magma_int_t *info)
 
magma_int_t magma_sgetrf_nopiv_gpu (magma_int_t m, magma_int_t n, float *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_sgetri_gpu (magma_int_t n, float *dA, magma_int_t ldda, magma_int_t *ipiv, float *dwork, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_sgetrs_gpu (char trans, magma_int_t n, magma_int_t nrhs, float *dA, magma_int_t ldda, magma_int_t *ipiv, float *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_slabrd_gpu (magma_int_t m, magma_int_t n, magma_int_t nb, float *a, magma_int_t lda, float *da, magma_int_t ldda, float *d, float *e, float *tauq, float *taup, float *x, magma_int_t ldx, float *dx, magma_int_t lddx, float *y, magma_int_t ldy, float *dy, magma_int_t lddy)
 
magma_int_t magma_slaqps_gpu (magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, float *A, magma_int_t lda, magma_int_t *jpvt, float *tau, float *vn1, float *vn2, float *auxv, float *dF, magma_int_t lddf)
 
magma_int_t magma_slaqps2_gpu (magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, float *A, magma_int_t lda, magma_int_t *jpvt, float *tau, float *vn1, float *vn2, float *auxv, float *dF, magma_int_t lddf)
 
magma_int_t magma_slaqps3_gpu (magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, float *A, magma_int_t lda, magma_int_t *jpvt, float *tau, float *vn1, float *vn2, float *auxv, float *dF, magma_int_t lddf)
 
magma_int_t magma_slarf_gpu (magma_int_t m, magma_int_t n, float *v, float *tau, float *c, magma_int_t ldc, float *xnorm)
 
magma_int_t magma_slarfb_gpu (char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, const float *dv, magma_int_t ldv, const float *dt, magma_int_t ldt, float *dc, magma_int_t ldc, float *dwork, magma_int_t ldwork)
 
magma_int_t magma_slarfb2_gpu (magma_int_t m, magma_int_t n, magma_int_t k, const float *dV, magma_int_t ldv, const float *dT, magma_int_t ldt, float *dC, magma_int_t ldc, float *dwork, magma_int_t ldwork)
 
magma_int_t magma_slarfb_gpu_gemm (char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, const float *dv, magma_int_t ldv, const float *dt, magma_int_t ldt, float *dc, magma_int_t ldc, float *dwork, magma_int_t ldwork, float *dworkvt, magma_int_t ldworkvt)
 
magma_int_t magma_slarfg_gpu (magma_int_t n, float *dx0, float *dx, float *dtau, float *dxnorm, float *dAkk)
 
magma_int_t magma_sposv_gpu (char uplo, magma_int_t n, magma_int_t nrhs, float *dA, magma_int_t ldda, float *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_spotf2_gpu (magma_uplo_t uplo, magma_int_t n, float *dA, magma_int_t lda, magma_int_t *info)
 
magma_int_t magma_spotrf_gpu (char uplo, magma_int_t n, float *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_spotrf_mgpu (magma_int_t ngpu, char uplo, magma_int_t n, float **d_lA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_spotrf3_mgpu (magma_int_t num_gpus, char uplo, magma_int_t m, magma_int_t n, magma_int_t off_i, magma_int_t off_j, magma_int_t nb, float *d_lA[], magma_int_t ldda, float *d_lP[], magma_int_t lddp, float *a, magma_int_t lda, magma_int_t h, magma_queue_t stream[][3], magma_event_t event[][5], magma_int_t *info)
 
magma_int_t magma_spotri_gpu (char uplo, magma_int_t n, float *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_slauum_gpu (char uplo, magma_int_t n, float *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_strtri_gpu (char uplo, char diag, magma_int_t n, float *dA, magma_int_t ldda, magma_int_t *info)
 
magma_int_t magma_ssytrd_gpu (char uplo, magma_int_t n, float *da, magma_int_t ldda, float *d, float *e, float *tau, float *wa, magma_int_t ldwa, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_ssytrd2_gpu (char uplo, magma_int_t n, float *da, magma_int_t ldda, float *d, float *e, float *tau, float *wa, magma_int_t ldwa, float *work, magma_int_t lwork, float *dwork, magma_int_t ldwork, magma_int_t *info)
 
float magma_slatrd_mgpu (magma_int_t num_gpus, char uplo, magma_int_t n0, magma_int_t n, magma_int_t nb, magma_int_t nb0, float *a, magma_int_t lda, float *e, float *tau, float *w, magma_int_t ldw, float **da, magma_int_t ldda, magma_int_t offset, float **dw, magma_int_t lddw, float *dwork[MagmaMaxGPUs], magma_int_t ldwork, magma_int_t k, float *dx[MagmaMaxGPUs], float *dy[MagmaMaxGPUs], float *work, magma_queue_t stream[][10], float *times)
 
magma_int_t magma_ssytrd_mgpu (magma_int_t num_gpus, magma_int_t k, char uplo, magma_int_t n, float *a, magma_int_t lda, float *d, float *e, float *tau, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_ssytrd_sb2st (magma_int_t threads, char uplo, magma_int_t n, magma_int_t nb, magma_int_t Vblksiz, float *A, magma_int_t lda, float *D, float *E, float *V, magma_int_t ldv, float *TAU, magma_int_t compT, float *T, magma_int_t ldt)
 
magma_int_t magma_ssytrd_sy2sb (char uplo, magma_int_t n, magma_int_t NB, float *a, magma_int_t lda, float *tau, float *work, magma_int_t lwork, float *dT, magma_int_t threads, magma_int_t *info)
 
magma_int_t magma_ssytrd_sy2sb_mgpu (char uplo, magma_int_t n, magma_int_t nb, float *a, magma_int_t lda, float *tau, float *work, magma_int_t lwork, float *dAmgpu[], magma_int_t ldda, float *dTmgpu[], magma_int_t lddt, magma_int_t ngpu, magma_int_t distblk, magma_queue_t streams[][20], magma_int_t nstream, magma_int_t threads, magma_int_t *info)
 
magma_int_t magma_ssytrd_sy2sb_mgpu_spec (char uplo, magma_int_t n, magma_int_t nb, float *a, magma_int_t lda, float *tau, float *work, magma_int_t lwork, float *dAmgpu[], magma_int_t ldda, float *dTmgpu[], magma_int_t lddt, magma_int_t ngpu, magma_int_t distblk, magma_queue_t streams[][20], magma_int_t nstream, magma_int_t threads, magma_int_t *info)
 
magma_int_t magma_spotrs_gpu (char uplo, magma_int_t n, magma_int_t nrhs, float *dA, magma_int_t ldda, float *dB, magma_int_t lddb, magma_int_t *info)
 
magma_int_t magma_sssssm_gpu (char storev, magma_int_t m1, magma_int_t n1, magma_int_t m2, magma_int_t n2, magma_int_t k, magma_int_t ib, float *dA1, magma_int_t ldda1, float *dA2, magma_int_t ldda2, float *dL1, magma_int_t lddl1, float *dL2, magma_int_t lddl2, magma_int_t *IPIV, magma_int_t *info)
 
magma_int_t magma_ststrf_gpu (char storev, magma_int_t m, magma_int_t n, magma_int_t ib, magma_int_t nb, float *hU, magma_int_t ldhu, float *dU, magma_int_t lddu, float *hA, magma_int_t ldha, float *dA, magma_int_t ldda, float *hL, magma_int_t ldhl, float *dL, magma_int_t lddl, magma_int_t *ipiv, float *hwork, magma_int_t ldhwork, float *dwork, magma_int_t lddwork, magma_int_t *info)
 
magma_int_t magma_sorgqr_gpu (magma_int_t m, magma_int_t n, magma_int_t k, float *da, magma_int_t ldda, float *tau, float *dwork, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_sormql2_gpu (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, float *da, magma_int_t ldda, float *tau, float *dc, magma_int_t lddc, float *wa, magma_int_t ldwa, magma_int_t *info)
 
magma_int_t magma_sormqr_gpu (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, float *dA, magma_int_t ldda, float *tau, float *dC, magma_int_t lddc, float *hwork, magma_int_t lwork, float *dT, magma_int_t nb, magma_int_t *info)
 
magma_int_t magma_sormqr2_gpu (char side, char trans, magma_int_t m, magma_int_t n, magma_int_t k, float *da, magma_int_t ldda, float *tau, float *dc, magma_int_t lddc, float *wa, magma_int_t ldwa, magma_int_t *info)
 
magma_int_t magma_sormtr_gpu (char side, char uplo, char trans, magma_int_t m, magma_int_t n, float *da, magma_int_t ldda, float *tau, float *dc, magma_int_t lddc, float *wa, magma_int_t ldwa, magma_int_t *info)
 
magma_int_t magma_sgeqp3_gpu (magma_int_t m, magma_int_t n, float *A, magma_int_t lda, magma_int_t *jpvt, float *tau, float *work, magma_int_t lwork, magma_int_t *info)
 
magma_int_t magma_ssyevd_gpu (char jobz, char uplo, magma_int_t n, float *da, magma_int_t ldda, float *w, float *wa, magma_int_t ldwa, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_ssyevdx_gpu (char jobz, char range, char uplo, magma_int_t n, float *da, magma_int_t ldda, float vl, float vu, magma_int_t il, magma_int_t iu, magma_int_t *m, float *w, float *wa, magma_int_t ldwa, float *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info)
 
magma_int_t magma_ssygst_gpu (magma_int_t itype, char uplo, magma_int_t n, float *da, magma_int_t ldda, float *db, magma_int_t lddb, magma_int_t *info)
 
void magma_sprint (magma_int_t m, magma_int_t n, const float *A, magma_int_t lda)
 
void magma_sprint_gpu (magma_int_t m, magma_int_t n, const float *dA, magma_int_t ldda)
 
void spanel_to_q (magma_uplo_t uplo, magma_int_t ib, float *A, magma_int_t lda, float *work)
 
void sq_to_panel (magma_uplo_t uplo, magma_int_t ib, float *A, magma_int_t lda, float *work)
 

Macro Definition Documentation

#define PRECISION_s

Definition at line 17 of file magma_s.h.

Function Documentation

magma_int_t magma_get_sbulge_gcperf ( )

Definition at line 707 of file get_nb.cpp.

References magma_getdevice_arch().

708 {
710  if ( arch >= 300 ) { // 3.x Kepler + SB
711  return 37;
712  }
713  else if ( arch >= 200 ) { // 2.x Fermi
714  return 15000;
715  }
716  else { // 1.x
717  return 10000;
718  }
719 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_sbulge_nb ( magma_int_t  m,
magma_int_t  nbthreads 
)

Definition at line 778 of file get_nb.cpp.

References magma_getdevice_arch().

779 {
781  if ( arch >= 300 ) { // 3.x Kepler + SB
782  return 128;
783  }
784  else if ( arch >= 200 ) { // 2.x Fermi
785  return 64;
786  }
787  else { // 1.x
788  return 64;
789  }
790 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_sbulge_nb_mgpu ( magma_int_t  m)

Definition at line 900 of file get_nb.cpp.

References magma_getdevice_arch().

901 {
903  if ( arch >= 300 ) { // 3.x Kepler + SB
904  return 128;
905  }
906  else if ( arch >= 200 ) { // 2.x Fermi
907  return 64;
908  }
909  else { // 1.x
910  return 64;
911  }
912 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

magma_int_t magma_get_sgebrd_nb ( magma_int_t  m)

Definition at line 443 of file get_nb.cpp.

References magma_getdevice_arch().

444 {
446  if ( arch >= 200 ) { // 2.x Fermi
447  return 32;
448  //if (m < 1024)
449  // return 64;
450  //else
451  // return 64;
452  }
453  else { // 1.x
454  return 32;
455  }
456 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_sgehrd_nb ( magma_int_t  m)

Definition at line 342 of file get_nb.cpp.

References magma_getdevice_arch().

343 {
345  if ( arch >= 200 ) { // 2.x Fermi
346  if (m < 1024) return 32;
347  else return 96;
348  }
349  else { // 1.x
350  if (m < 1024) return 32;
351  else return 64;
352  }
353 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_sgelqf_nb ( magma_int_t  m)

Definition at line 232 of file get_nb.cpp.

References magma_get_sgeqrf_nb().

233 {
234  return magma_get_sgeqrf_nb( m );
235 }
magma_int_t magma_get_sgeqrf_nb(magma_int_t m)
Definition: get_nb.cpp:120

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_sgeqlf_nb ( magma_int_t  m)

Definition at line 190 of file get_nb.cpp.

References magma_get_sgeqrf_nb(), and magma_getdevice_arch().

191 {
193  if ( arch >= 200 ) { // 2.x Fermi
194  return magma_get_sgeqrf_nb( m );
195  }
196  else { // 1.x
197  if (m < 1024) return 32;
198  else if (m < 4032) return 64;
199  else return 128;
200  }
201 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_get_sgeqrf_nb(magma_int_t m)
Definition: get_nb.cpp:120
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_sgeqp3_nb ( magma_int_t  m)

Definition at line 97 of file get_nb.cpp.

98 {
99  return 32;
100 }

Here is the caller graph for this function:

magma_int_t magma_get_sgeqrf_nb ( magma_int_t  m)

Definition at line 120 of file get_nb.cpp.

References magma_getdevice_arch().

121 {
123  if ( arch >= 300 ) { // 3.x Kepler
124  if (m < 4096) return 96;
125  else if (m < 7168) return 128;
126  else if (m < 18432) return 256;
127  else return 512;
128  }
129  else if ( arch >= 200 ) { // 2.x Fermi
130  if (m < 3072) return 64;
131  else if (m < 8192) return 128;
132  else return 256;
133  }
134  else { // 1.x
135  if (m < 2048) return 32;
136  else if (m < 4096) return 64;
137  else return 128;
138  }
139 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_sgesvd_nb ( magma_int_t  m)

Definition at line 592 of file get_nb.cpp.

References magma_get_sgebrd_nb().

593 {
594  return magma_get_sgebrd_nb( m );
595 }
magma_int_t magma_get_sgebrd_nb(magma_int_t m)
Definition: get_nb.cpp:443

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_sgetrf_nb ( magma_int_t  m)

Definition at line 266 of file get_nb.cpp.

References magma_getdevice_arch().

267 {
269  if ( arch >= 300 ) { // 3.x Kepler
270  if (m < 4096) return 256;
271  else if (m < 18432) return 512;
272  else return 1024;
273  }
274  else if ( arch >= 200 ) { // 2.x Fermi
275  if (m < 3072) return 128;
276  else if (m < 10240) return 256;
277  else return 512;
278  }
279  else { // 1.x
280  if (m < 2048) return 64;
281  else return 128;
282  }
283 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_sgetri_nb ( magma_int_t  m)

Definition at line 569 of file get_nb.cpp.

570 {
571  return 64;
572 }

Here is the caller graph for this function:

magma_int_t magma_get_smlsize_divideconquer ( )

Definition at line 768 of file get_nb.cpp.

769  {
770  return 128;
771  }
magma_int_t magma_get_spotrf_nb ( magma_int_t  m)

Definition at line 29 of file get_nb.cpp.

References magma_getdevice_arch().

30 {
32  if ( arch >= 300 ) { // 3.x Kepler
33  if (m < 1500) return 256;
34  else return 512;
35  }
36  else if ( arch >= 200 ) { // 2.x Fermi
37  if (m < 2048) return 256;
38  else return 512;
39  }
40  else { // 1.x
41  if (m < 3328) return 128;
42  else if (m < 4256) return 224;
43  else return 288;
44  }
45 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_ssygst_nb ( magma_int_t  m)

Definition at line 506 of file get_nb.cpp.

References magma_getdevice_arch().

507 {
509  if ( arch >= 300 ) { // 3.x Kepler
510  if (m < 4096) return 768;
511  else return 1536;
512  }
513  else if ( arch >= 200 ) { // 2.x Fermi
514  if (m < 2048) return 512;
515  else return 1024;
516  }
517  else { // 1.x
518  return 64;
519  }
520 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_ssygst_nb_m ( magma_int_t  m)

Definition at line 623 of file get_nb.cpp.

References magma_getdevice_arch().

624 {
625  return 256; //to be updated
626 
628  if ( arch >= 300 ) { // 3.x Kepler
629  if (m < 4096) return 768;
630  else return 1536;
631  }
632  else if ( arch >= 200 ) { // 2.x Fermi
633  if (m < 2048) return 512;
634  else return 1024;
635  }
636  else { // 1.x
637  return 64;
638  }
639 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_get_ssytrd_nb ( magma_int_t  m)

Definition at line 376 of file get_nb.cpp.

References magma_getdevice_arch().

377 {
379  if ( arch >= 200 ) { // 2.x Fermi
380  return 32;
381  //if (m < 1024)
382  // return 64;
383  //else
384  // return 64;
385  }
386  else { // 1.x
387  return 32;
388  }
389 }
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sbulge_get_Vblksiz ( magma_int_t  m,
magma_int_t  nb,
magma_int_t  nbthreads 
)

Definition at line 844 of file get_nb.cpp.

References magma_getdevice_arch(), and min.

845 {
847  if ( arch >= 300 ) { // 3.x Kepler + SB
848  return min(nb,128);
849  }
850  else { // 2.x Fermi or 1.x
851  return min(nb,64);
852  }
853 }
#define min(a, b)
Definition: common_magma.h:86
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_getdevice_arch()

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgebrd ( magma_int_t  m,
magma_int_t  n,
float *  A,
magma_int_t  lda,
float *  d,
float *  e,
float *  tauq,
float *  taup,
float *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 17 of file sgebrd.cpp.

References __func__, A, dA, dwork, lapackf77_sgebrd(), MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_sgebrd_nb(), MAGMA_S_MAKE, MAGMA_S_NEG_ONE, MAGMA_S_ONE, magma_sgemm(), magma_sgetmatrix, magma_slabrd_gpu(), magma_smalloc(), magma_ssetmatrix, MAGMA_SUCCESS, magma_xerbla(), MagmaNoTrans, MagmaTrans, max, and min.

22 {
23 /* -- MAGMA (version 1.4.0) --
24  Univ. of Tennessee, Knoxville
25  Univ. of California, Berkeley
26  Univ. of Colorado, Denver
27  August 2013
28 
29  Purpose
30  =======
31  SGEBRD reduces a general real M-by-N matrix A to upper or lower
32  bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
33 
34  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
35 
36  Arguments
37  =========
38  M (input) INTEGER
39  The number of rows in the matrix A. M >= 0.
40 
41  N (input) INTEGER
42  The number of columns in the matrix A. N >= 0.
43 
44  A (input/output) REAL array, dimension (LDA,N)
45  On entry, the M-by-N general matrix to be reduced.
46  On exit,
47  if m >= n, the diagonal and the first superdiagonal are
48  overwritten with the upper bidiagonal matrix B; the
49  elements below the diagonal, with the array TAUQ, represent
50  the orthogonal matrix Q as a product of elementary
51  reflectors, and the elements above the first superdiagonal,
52  with the array TAUP, represent the orthogonal matrix P as
53  a product of elementary reflectors;
54  if m < n, the diagonal and the first subdiagonal are
55  overwritten with the lower bidiagonal matrix B; the
56  elements below the first subdiagonal, with the array TAUQ,
57  represent the orthogonal matrix Q as a product of
58  elementary reflectors, and the elements above the diagonal,
59  with the array TAUP, represent the orthogonal matrix P as
60  a product of elementary reflectors.
61  See Further Details.
62 
63  LDA (input) INTEGER
64  The leading dimension of the array A. LDA >= max(1,M).
65 
66  D (output) real array, dimension (min(M,N))
67  The diagonal elements of the bidiagonal matrix B:
68  D(i) = A(i,i).
69 
70  E (output) real array, dimension (min(M,N)-1)
71  The off-diagonal elements of the bidiagonal matrix B:
72  if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
73  if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
74 
75  TAUQ (output) REAL array dimension (min(M,N))
76  The scalar factors of the elementary reflectors which
77  represent the orthogonal matrix Q. See Further Details.
78 
79  TAUP (output) REAL array, dimension (min(M,N))
80  The scalar factors of the elementary reflectors which
81  represent the orthogonal matrix P. See Further Details.
82 
83  WORK (workspace/output) REAL array, dimension (MAX(1,LWORK))
84  On exit, if INFO = 0, WORK[0] returns the optimal LWORK.
85 
86  LWORK (input) INTEGER
87  The length of the array WORK. LWORK >= (M+N)*NB, where NB
88  is the optimal blocksize.
89 
90  If LWORK = -1, then a workspace query is assumed; the routine
91  only calculates the optimal size of the WORK array, returns
92  this value as the first entry of the WORK array, and no error
93  message related to LWORK is issued by XERBLA.
94 
95  INFO (output) INTEGER
96  = 0: successful exit
97  < 0: if INFO = -i, the i-th argument had an illegal value.
98 
99  Further Details
100  ===============
101  The matrices Q and P are represented as products of elementary
102  reflectors:
103 
104  If m >= n,
105  Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
106  Each H(i) and G(i) has the form:
107  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
108  where tauq and taup are real scalars, and v and u are real vectors;
109  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
110  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
111  tauq is stored in TAUQ(i) and taup in TAUP(i).
112 
113  If m < n,
114  Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
115  Each H(i) and G(i) has the form:
116  H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
117  where tauq and taup are real scalars, and v and u are real vectors;
118  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
119  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
120  tauq is stored in TAUQ(i) and taup in TAUP(i).
121 
122  The contents of A on exit are illustrated by the following examples:
123 
124  m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
125 
126  ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
127  ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
128  ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
129  ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
130  ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
131  ( v1 v2 v3 v4 v5 )
132 
133  where d and e denote diagonal and off-diagonal elements of B, vi
134  denotes an element of the vector defining H(i), and ui an element of
135  the vector defining G(i).
136  ===================================================================== */
137 
138  float c_neg_one = MAGMA_S_NEG_ONE;
139  float c_one = MAGMA_S_ONE;
140  float *da, *dwork;
141 
142  magma_int_t ncol, nrow, jmax, nb, ldda;
143 
144  magma_int_t i, j, nx;
145  magma_int_t iinfo;
146 
147  magma_int_t minmn;
148  magma_int_t ldwrkx, ldwrky, lwkopt;
149  magma_int_t lquery;
150 
151  nb = magma_get_sgebrd_nb(n);
152  ldda = m;
153 
154  lwkopt = (m + n) * nb;
155  work[0] = MAGMA_S_MAKE( lwkopt, 0. );
156  lquery = (lwork == -1);
157 
158  /* Check arguments */
159  *info = 0;
160  if (m < 0) {
161  *info = -1;
162  } else if (n < 0) {
163  *info = -2;
164  } else if (lda < max(1,m)) {
165  *info = -4;
166  } else if (lwork < lwkopt && (! lquery) ) {
167  *info = -10;
168  }
169  if (*info < 0) {
170  magma_xerbla( __func__, -(*info) );
171  return *info;
172  }
173  else if (lquery)
174  return *info;
175 
176  /* Quick return if possible */
177  minmn = min(m,n);
178  if (minmn == 0) {
179  work[0] = c_one;
180  return *info;
181  }
182 
183  if (MAGMA_SUCCESS != magma_smalloc( &da, n*ldda + (m + n)*nb )) {
184  fprintf (stderr, "!!!! device memory allocation error in sgebrd\n" );
185  *info = MAGMA_ERR_DEVICE_ALLOC;
186  return *info;
187  }
188  dwork = da + (n)*ldda;
189 
190  ldwrkx = m;
191  ldwrky = n;
192 
193  /* Set the block/unblock crossover point NX. */
194  nx = 128;
195 
196  /* Copy the matrix to the GPU */
197  if (minmn - nx >= 1) {
198  magma_ssetmatrix( m, n, a, lda, da, ldda );
199  }
200 
201  for (i=0; i< (minmn - nx); i += nb) {
202 
203  /* Reduce rows and columns i:i+nb-1 to bidiagonal form and return
204  the matrices X and Y which are needed to update the unreduced
205  part of the matrix */
206  nrow = m - i;
207  ncol = n - i;
208 
209  /* Get the current panel (no need for the 1st iteration) */
210  if ( i > 0 ) {
211  magma_sgetmatrix( nrow, nb, dA(i, i), ldda, A( i, i), lda );
212  magma_sgetmatrix( nb, ncol - nb,
213  dA(i, i+nb), ldda,
214  A( i, i+nb), lda );
215  }
216 
217  magma_slabrd_gpu(nrow, ncol, nb,
218  A(i, i), lda, dA(i, i), ldda,
219  d+i, e+i, tauq+i, taup+i,
220  work, ldwrkx, dwork, ldwrkx, // x, dx
221  work+(ldwrkx*nb), ldwrky, dwork+(ldwrkx*nb), ldwrky); // y, dy
222 
223  /* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
224  of the form A := A - V*Y' - X*U' */
225  nrow = m - i - nb;
226  ncol = n - i - nb;
227 
228  // Send Y back to the GPU
229  magma_ssetmatrix( nrow, nb, work + nb, ldwrkx, dwork + nb, ldwrkx );
230  magma_ssetmatrix( ncol, nb,
231  work + (ldwrkx+1)*nb, ldwrky,
232  dwork + (ldwrkx+1)*nb, ldwrky );
233 
235  nrow, ncol, nb,
236  c_neg_one, dA(i+nb, i ), ldda,
237  dwork+(ldwrkx+1)*nb, ldwrky,
238  c_one, dA(i+nb, i+nb), ldda);
239 
241  nrow, ncol, nb,
242  c_neg_one, dwork+nb, ldwrkx,
243  dA( i, i+nb ), ldda,
244  c_one, dA( i+nb, i+nb ), ldda);
245 
246  /* Copy diagonal and off-diagonal elements of B back into A */
247  if (m >= n) {
248  jmax = i + nb;
249  for (j = i; j < jmax; ++j) {
250  *A(j, j ) = MAGMA_S_MAKE( d[j], 0. );
251  *A(j, j+1) = MAGMA_S_MAKE( e[j], 0. );
252  }
253  } else {
254  jmax = i + nb;
255  for (j = i; j < jmax; ++j) {
256  *A(j, j ) = MAGMA_S_MAKE( d[j], 0. );
257  *A(j+1, j ) = MAGMA_S_MAKE( e[j], 0. );
258  }
259  }
260  }
261 
262  /* Use unblocked code to reduce the remainder of the matrix */
263  nrow = m - i;
264  ncol = n - i;
265 
266  if ( 0 < minmn - nx ) {
267  magma_sgetmatrix( nrow, ncol, dA(i, i), ldda, A(i, i), lda );
268  }
269 
270  lapackf77_sgebrd( &nrow, &ncol,
271  A(i, i), &lda, d+i, e+i,
272  tauq+i, taup+i, work, &lwork, &iinfo);
273  work[0] = MAGMA_S_MAKE( lwkopt, 0. );
274 
275  magma_free( da );
276  return *info;
277 } /* sgebrd */
#define min(a, b)
Definition: common_magma.h:86
void magma_sgemm(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dB, magma_int_t lddb, float beta, magmaFloat_ptr dC, magma_int_t lddc)
magma_int_t magma_get_sgebrd_nb(magma_int_t m)
Definition: get_nb.cpp:443
#define __func__
Definition: common_magma.h:65
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
magma_int_t ldda
#define magma_free(ptr)
Definition: magma.h:57
#define MAGMA_S_NEG_ONE
Definition: magma.h:200
int magma_int_t
Definition: magmablas.h:12
#define dA(i, j)
Definition: sgebrd.cpp:14
#define magma_ssetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_s.h:702
void lapackf77_sgebrd(const magma_int_t *m, const magma_int_t *n, float *A, const magma_int_t *lda, float *d, float *e, float *tauq, float *taup, float *work, const magma_int_t *lwork, magma_int_t *info)
#define dwork(dev, i, j)
magma_int_t magma_slabrd_gpu(magma_int_t m, magma_int_t n, magma_int_t nb, float *a, magma_int_t lda, float *da, magma_int_t ldda, float *d, float *e, float *tauq, float *taup, float *x, magma_int_t ldx, float *dx, magma_int_t lddx, float *y, magma_int_t ldy, float *dy, magma_int_t lddy)
Definition: slabrd_gpu.cpp:18
#define MAGMA_S_ONE
Definition: magma.h:198
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MagmaTrans
Definition: magma.h:58
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define magma_sgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_s.h:705
#define A(i, j)
Definition: sgebrd.cpp:13
#define MAGMA_S_MAKE(r, i)
Definition: magma.h:189
#define MagmaNoTrans
Definition: magma.h:57
#define max(a, b)
Definition: common_magma.h:82

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgeev ( char  jobvl,
char  jobvr,
magma_int_t  n,
float *  a,
magma_int_t  lda,
float *  wr,
float *  wi,
float *  vl,
magma_int_t  ldvl,
float *  vr,
magma_int_t  ldvr,
float *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 25 of file sgeev.cpp.

References __func__, cblas_isamax(), cblas_snrm2(), cblas_srot(), cblas_sscal(), dT, lapackf77_lsame, lapackf77_sgebak(), lapackf77_sgebal(), lapackf77_sgehrd(), lapackf77_shseqr(), lapackf77_slabad, lapackf77_slacpy(), lapackf77_slamch, lapackf77_slange(), lapackf77_slapy2, lapackf77_slartg(), lapackf77_slascl(), lapackf77_sorghr(), lapackf77_strevc(), MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_sgehrd_nb(), MAGMA_S_MAKE, magma_sgehrd(), magma_sgehrd2(), magma_smalloc(), magma_sorghr(), magma_ssqrt, MAGMA_SUCCESS, magma_xerbla(), MagmaLowerStr, max, vl, and vr.

33 {
34 /* -- MAGMA (version 1.4.0) --
35  Univ. of Tennessee, Knoxville
36  Univ. of California, Berkeley
37  Univ. of Colorado, Denver
38  August 2013
39 
40  Purpose
41  =======
42  SGEEV computes for an N-by-N real nonsymmetric matrix A, the
43  eigenvalues and, optionally, the left and/or right eigenvectors.
44 
45  The right eigenvector v(j) of A satisfies
46  A * v(j) = lambda(j) * v(j)
47  where lambda(j) is its eigenvalue.
48  The left eigenvector u(j) of A satisfies
49  u(j)**T * A = lambda(j) * u(j)**T
50  where u(j)**T denotes the transpose of u(j).
51 
52  The computed eigenvectors are normalized to have Euclidean norm
53  equal to 1 and largest component real.
54 
55  Arguments
56  =========
57  JOBVL (input) CHARACTER*1
58  = 'N': left eigenvectors of A are not computed;
59  = 'V': left eigenvectors of are computed.
60 
61  JOBVR (input) CHARACTER*1
62  = 'N': right eigenvectors of A are not computed;
63  = 'V': right eigenvectors of A are computed.
64 
65  N (input) INTEGER
66  The order of the matrix A. N >= 0.
67 
68  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
69  On entry, the N-by-N matrix A.
70  On exit, A has been overwritten.
71 
72  LDA (input) INTEGER
73  The leading dimension of the array A. LDA >= max(1,N).
74 
75  WR (output) DOUBLE PRECISION array, dimension (N)
76  WI (output) DOUBLE PRECISION array, dimension (N)
77  WR and WI contain the real and imaginary parts,
78  respectively, of the computed eigenvalues. Complex
79  conjugate pairs of eigenvalues appear consecutively
80  with the eigenvalue having the positive imaginary part
81  first.
82 
83  VL (output) DOUBLE PRECISION array, dimension (LDVL,N)
84  If JOBVL = 'V', the left eigenvectors u(j) are stored one
85  after another in the columns of VL, in the same order
86  as their eigenvalues.
87  If JOBVL = 'N', VL is not referenced.
88  u(j) = VL(:,j), the j-th column of VL.
89 
90  LDVL (input) INTEGER
91  The leading dimension of the array VL. LDVL >= 1; if
92  JOBVL = 'V', LDVL >= N.
93 
94  VR (output) DOUBLE PRECISION array, dimension (LDVR,N)
95  If JOBVR = 'V', the right eigenvectors v(j) are stored one
96  after another in the columns of VR, in the same order
97  as their eigenvalues.
98  If JOBVR = 'N', VR is not referenced.
99  v(j) = VR(:,j), the j-th column of VR.
100 
101  LDVR (input) INTEGER
102  The leading dimension of the array VR. LDVR >= 1; if
103  JOBVR = 'V', LDVR >= N.
104 
105  WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
106  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
107 
108  LWORK (input) INTEGER
109  The dimension of the array WORK. LWORK >= (1+nb)*N.
110 
111  If LWORK = -1, then a workspace query is assumed; the routine
112  only calculates the optimal size of the WORK array, returns
113  this value as the first entry of the WORK array, and no error
114  message related to LWORK is issued by XERBLA.
115 
116  INFO (output) INTEGER
117  = 0: successful exit
118  < 0: if INFO = -i, the i-th argument had an illegal value.
119  > 0: if INFO = i, the QR algorithm failed to compute all the
120  eigenvalues, and no eigenvectors have been computed;
121  elements and i+1:N of W contain eigenvalues which have
122  converged.
123  ===================================================================== */
124 
125  #define vl(i,j) (vl + (i) + (j)*ldvl)
126  #define vr(i,j) (vr + (i) + (j)*ldvr)
127 
128  magma_int_t c_one = 1;
129  magma_int_t c_zero = 0;
130 
131  float d__1, d__2;
132  float r, cs, sn, scl;
133  float dum[1], eps;
134  float anrm, cscale, bignum, smlnum;
135  magma_int_t i, k, ilo, ihi;
136  magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, i__1, i__2, nb;
137  magma_int_t scalea, minwrk, lquery, wantvl, wantvr, select[1];
138 
139  char side[2] = {0, 0};
140  char jobvl_[2] = {jobvl, 0};
141  char jobvr_[2] = {jobvr, 0};
142 
143  *info = 0;
144  lquery = lwork == -1;
145  wantvl = lapackf77_lsame( jobvl_, "V" );
146  wantvr = lapackf77_lsame( jobvr_, "V" );
147  if (! wantvl && ! lapackf77_lsame( jobvl_, "N" )) {
148  *info = -1;
149  } else if (! wantvr && ! lapackf77_lsame( jobvr_, "N" )) {
150  *info = -2;
151  } else if (n < 0) {
152  *info = -3;
153  } else if (lda < max(1,n)) {
154  *info = -5;
155  } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) {
156  *info = -9;
157  } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) {
158  *info = -11;
159  }
160 
161  /* Compute workspace */
162  nb = magma_get_sgehrd_nb( n );
163  if (*info == 0) {
164  minwrk = (2+nb)*n;
165  work[0] = MAGMA_S_MAKE( (float) minwrk, 0. );
166 
167  if (lwork < minwrk && ! lquery) {
168  *info = -13;
169  }
170  }
171 
172  if (*info != 0) {
173  magma_xerbla( __func__, -(*info) );
174  return *info;
175  }
176  else if (lquery) {
177  return *info;
178  }
179 
180  /* Quick return if possible */
181  if (n == 0) {
182  return *info;
183  }
184 
185  #if defined(VERSION3)
186  float *dT;
187  if (MAGMA_SUCCESS != magma_smalloc( &dT, nb*n )) {
188  *info = MAGMA_ERR_DEVICE_ALLOC;
189  return *info;
190  }
191  #endif
192 
193  /* Get machine constants */
194  eps = lapackf77_slamch( "P" );
195  smlnum = lapackf77_slamch( "S" );
196  bignum = 1. / smlnum;
197  lapackf77_slabad( &smlnum, &bignum );
198  smlnum = magma_ssqrt( smlnum ) / eps;
199  bignum = 1. / smlnum;
200 
201  /* Scale A if max element outside range [SMLNUM,BIGNUM] */
202  anrm = lapackf77_slange( "M", &n, &n, A, &lda, dum );
203  scalea = 0;
204  if (anrm > 0. && anrm < smlnum) {
205  scalea = 1;
206  cscale = smlnum;
207  } else if (anrm > bignum) {
208  scalea = 1;
209  cscale = bignum;
210  }
211  if (scalea) {
212  lapackf77_slascl( "G", &c_zero, &c_zero, &anrm, &cscale, &n, &n, A, &lda, &ierr );
213  }
214 
215  /* Balance the matrix
216  * (Workspace: need N) */
217  ibal = 0;
218  lapackf77_sgebal( "B", &n, A, &lda, &ilo, &ihi, &work[ibal], &ierr );
219 
220  /* Reduce to upper Hessenberg form
221  * (Workspace: need 3*N, prefer 2*N + N*NB) */
222  itau = ibal + n;
223  iwrk = itau + n;
224  liwrk = lwork - iwrk;
225 
226  #if defined(VERSION1)
227  // Version 1 - LAPACK
228  lapackf77_sgehrd( &n, &ilo, &ihi, A, &lda,
229  &work[itau], &work[iwrk], &liwrk, &ierr );
230  #elif defined(VERSION2)
231  // Version 2 - LAPACK consistent HRD
232  magma_sgehrd2( n, ilo, ihi, A, lda,
233  &work[itau], &work[iwrk], liwrk, &ierr );
234  #elif defined(VERSION3)
235  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored,
236  magma_sgehrd( n, ilo, ihi, A, lda,
237  &work[itau], &work[iwrk], liwrk, dT, &ierr );
238  #endif
239 
240  if (wantvl) {
241  /* Want left eigenvectors
242  * Copy Householder vectors to VL */
243  side[0] = 'L';
245  A, &lda, vl, &ldvl );
246 
247  /* Generate orthogonal matrix in VL
248  * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) */
249  #if defined(VERSION1) || defined(VERSION2)
250  // Version 1 & 2 - LAPACK
251  lapackf77_sorghr( &n, &ilo, &ihi, vl, &ldvl, &work[itau],
252  &work[iwrk], &liwrk, &ierr );
253  #elif defined(VERSION3)
254  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
255  magma_sorghr( n, ilo, ihi, vl, ldvl, &work[itau], dT, nb, &ierr );
256  #endif
257 
258  /* Perform QR iteration, accumulating Schur vectors in VL
259  * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
260  iwrk = itau;
261  liwrk = lwork - iwrk;
262  lapackf77_shseqr( "S", "V", &n, &ilo, &ihi, A, &lda, WR, WI,
263  vl, &ldvl, &work[iwrk], &liwrk, info );
264 
265  if (wantvr) {
266  /* Want left and right eigenvectors
267  * Copy Schur vectors to VR */
268  side[0] = 'B';
269  lapackf77_slacpy( "F", &n, &n, vl, &ldvl, vr, &ldvr );
270  }
271  }
272  else if (wantvr) {
273  /* Want right eigenvectors
274  * Copy Householder vectors to VR */
275  side[0] = 'R';
276  lapackf77_slacpy( "L", &n, &n, A, &lda, vr, &ldvr );
277 
278  /* Generate orthogonal matrix in VR
279  * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) */
280  #if defined(VERSION1) || defined(VERSION2)
281  // Version 1 & 2 - LAPACK
282  lapackf77_sorghr( &n, &ilo, &ihi, vr, &ldvr, &work[itau],
283  &work[iwrk], &liwrk, &ierr );
284  #elif defined(VERSION3)
285  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
286  magma_sorghr( n, ilo, ihi, vr, ldvr, &work[itau], dT, nb, &ierr );
287  #endif
288 
289  /* Perform QR iteration, accumulating Schur vectors in VR
290  * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
291  iwrk = itau;
292  liwrk = lwork - iwrk;
293  lapackf77_shseqr( "S", "V", &n, &ilo, &ihi, A, &lda, WR, WI,
294  vr, &ldvr, &work[iwrk], &liwrk, info );
295  }
296  else {
297  /* Compute eigenvalues only
298  * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
299  iwrk = itau;
300  liwrk = lwork - iwrk;
301  lapackf77_shseqr( "E", "N", &n, &ilo, &ihi, A, &lda, WR, WI,
302  vr, &ldvr, &work[iwrk], &liwrk, info );
303  }
304 
305  /* If INFO > 0 from SHSEQR, then quit */
306  if (*info > 0) {
307  goto CLEANUP;
308  }
309 
310  if (wantvl || wantvr) {
311  /* Compute left and/or right eigenvectors
312  * (Workspace: need 4*N) */
313  lapackf77_strevc( side, "B", select, &n, A, &lda, vl, &ldvl,
314  vr, &ldvr, &n, &nout, &work[iwrk], &ierr );
315  }
316 
317  if (wantvl) {
318  /* Undo balancing of left eigenvectors
319  * (Workspace: need N) */
320  lapackf77_sgebak( "B", "L", &n, &ilo, &ihi, &work[ibal], &n,
321  vl, &ldvl, &ierr );
322 
323  /* Normalize left eigenvectors and make largest component real */
324  for (i = 0; i < n; ++i) {
325  if ( WI[i] == 0. ) {
326  scl = 1. / cblas_snrm2( n, vl(0,i), 1 );
327  cblas_sscal( n, scl, vl(0,i), 1 );
328  }
329  else if ( WI[i] > 0. ) {
330  d__1 = cblas_snrm2( n, vl(0,i), 1 );
331  d__2 = cblas_snrm2( n, vl(0,i+1), 1 );
332  scl = 1. / lapackf77_slapy2( &d__1, &d__2 );
333  cblas_sscal( n, scl, vl(0,i), 1 );
334  cblas_sscal( n, scl, vl(0,i+1), 1 );
335  for (k = 0; k < n; ++k) {
336  /* Computing 2nd power */
337  d__1 = *vl(k,i);
338  d__2 = *vl(k,i+1);
339  work[iwrk + k] = d__1*d__1 + d__2*d__2;
340  }
341  k = cblas_isamax( n, &work[iwrk], 1 );
342  lapackf77_slartg( vl(k,i), vl(k,i+1), &cs, &sn, &r );
343  cblas_srot( n, vl(0,i), 1, vl(0,i+1), 1, cs, sn );
344  *vl(k,i+1) = 0.;
345  }
346  }
347  }
348 
349  if (wantvr) {
350  /* Undo balancing of right eigenvectors
351  * (Workspace: need N) */
352  lapackf77_sgebak( "B", "R", &n, &ilo, &ihi, &work[ibal], &n,
353  vr, &ldvr, &ierr );
354 
355  /* Normalize right eigenvectors and make largest component real */
356  for (i = 0; i < n; ++i) {
357  if ( WI[i] == 0. ) {
358  scl = 1. / cblas_snrm2( n, vr(0,i), 1 );
359  cblas_sscal( n, scl, vr(0,i), 1 );
360  }
361  else if ( WI[i] > 0. ) {
362  d__1 = cblas_snrm2( n, vr(0,i), 1 );
363  d__2 = cblas_snrm2( n, vr(0,i+1), 1 );
364  scl = 1. / lapackf77_slapy2( &d__1, &d__2 );
365  cblas_sscal( n, scl, vr(0,i), 1 );
366  cblas_sscal( n, scl, vr(0,i+1), 1 );
367  for (k = 0; k < n; ++k) {
368  /* Computing 2nd power */
369  d__1 = *vr(k,i);
370  d__2 = *vr(k,i+1);
371  work[iwrk + k] = d__1*d__1 + d__2*d__2;
372  }
373  k = cblas_isamax( n, &work[iwrk], 1 );
374  lapackf77_slartg( vr(k,i), vr(k,i+1), &cs, &sn, &r );
375  cblas_srot( n, vr(0,i), 1, vr(0,i+1), 1, cs, sn );
376  *vr(k,i+1) = 0.;
377  }
378  }
379  }
380 
381 CLEANUP:
382  /* Undo scaling if necessary */
383  if (scalea) {
384  i__1 = n - (*info);
385  i__2 = max( n - (*info), 1 );
386  lapackf77_slascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
387  WR + (*info), &i__2, &ierr );
388  lapackf77_slascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
389  WI + (*info), &i__2, &ierr );
390  if (*info > 0) {
391  i__1 = ilo - 1;
392  lapackf77_slascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
393  WR, &n, &ierr );
394  lapackf77_slascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
395  WI, &n, &ierr );
396  }
397  }
398 
399  #if defined(VERSION3)
400  magma_free( dT );
401  #endif
402 
403  return *info;
404 } /* magma_sgeev */
void cblas_srot(const int N, float *X, const int incX, float *Y, const int incY, const float c, const float s)
#define __func__
Definition: common_magma.h:65
#define magma_ssqrt
Definition: common_magma.h:99
void lapackf77_strevc(const char *side, const char *howmny, magma_int_t *select, const magma_int_t *n, float *T, const magma_int_t *ldt, float *Vl, const magma_int_t *ldvl, float *Vr, const magma_int_t *ldvr, const magma_int_t *mm, magma_int_t *m, float *work, DWORKFORZ magma_int_t *info)
float cblas_snrm2(const int N, const float *X, const int incX)
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
void lapackf77_slacpy(const char *uplo, const magma_int_t *m, const magma_int_t *n, const float *A, const magma_int_t *lda, float *B, const magma_int_t *ldb)
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_sgehrd(magma_int_t n, magma_int_t ilo, magma_int_t ihi, float *A, magma_int_t lda, float *tau, float *work, magma_int_t lwork, float *dT, magma_int_t *info)
Definition: sgehrd.cpp:17
magma_int_t magma_sgehrd2(magma_int_t n, magma_int_t ilo, magma_int_t ihi, float *A, magma_int_t lda, float *tau, float *work, magma_int_t lwork, magma_int_t *info)
Definition: sgehrd2.cpp:14
#define lapackf77_slapy2
Definition: magma_lapack.h:42
void lapackf77_sgebal(const char *job, const magma_int_t *n, float *A, const magma_int_t *lda, magma_int_t *ilo, magma_int_t *ihi, float *scale, magma_int_t *info)
magma_int_t magma_get_sgehrd_nb(magma_int_t m)
Definition: get_nb.cpp:342
void lapackf77_sgebak(const char *job, const char *side, const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, const float *scale, const magma_int_t *m, float *V, const magma_int_t *ldv, magma_int_t *info)
void cblas_sscal(const int N, const float alpha, float *X, const int incX)
#define vl(i, j)
magma_int_t magma_sorghr(magma_int_t n, magma_int_t ilo, magma_int_t ihi, float *a, magma_int_t lda, float *tau, float *dT, magma_int_t nb, magma_int_t *info)
Definition: sorghr.cpp:14
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
float lapackf77_slange(const char *norm, const magma_int_t *m, const magma_int_t *n, const float *A, const magma_int_t *lda, float *work)
void lapackf77_sorghr(const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, float *A, const magma_int_t *lda, const float *tau, float *work, const magma_int_t *lwork, magma_int_t *info)
#define MagmaLowerStr
Definition: magma.h:85
#define lapackf77_slabad
Definition: magma_lapack.h:28
void lapackf77_sgehrd(const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, float *A, const magma_int_t *lda, float *tau, float *work, const magma_int_t *lwork, magma_int_t *info)
#define A(i, j)
Definition: cprint.cpp:16
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define lapackf77_slamch
Definition: magma_lapack.h:26
CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX)
#define MAGMA_S_MAKE(r, i)
Definition: magma.h:189
void lapackf77_shseqr(const char *job, const char *compz, const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, float *H, const magma_int_t *ldh, WSPLIT, float *Z, const magma_int_t *ldz, float *work, const magma_int_t *lwork, magma_int_t *info)
#define dT(m)
#define vr(i, j)
void lapackf77_slartg(float *F, float *G, float *cs, float *SN, float *R)
#define max(a, b)
Definition: common_magma.h:82
void lapackf77_slascl(const char *type, const magma_int_t *kl, const magma_int_t *ku, float *cfrom, float *cto, const magma_int_t *m, const magma_int_t *n, float *A, const magma_int_t *lda, magma_int_t *info)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgeev_m ( char  jobvl,
char  jobvr,
magma_int_t  n,
float *  A,
magma_int_t  lda,
float *  WR,
float *  WI,
float *  vl,
magma_int_t  ldvl,
float *  vr,
magma_int_t  ldvr,
float *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 27 of file sgeev_m.cpp.

References __func__, cblas_isamax(), cblas_snrm2(), cblas_srot(), cblas_sscal(), dT, lapackf77_lsame, lapackf77_sgebak(), lapackf77_sgebal(), lapackf77_sgehrd(), lapackf77_shseqr(), lapackf77_slabad, lapackf77_slacpy(), lapackf77_slamch, lapackf77_slange(), lapackf77_slapy2, lapackf77_slartg(), lapackf77_slascl(), lapackf77_sorghr(), lapackf77_strevc(), MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_sgehrd_nb(), MAGMA_S_MAKE, magma_sgehrd(), magma_sgehrd2(), magma_sgehrd_m(), magma_smalloc(), magma_smalloc_cpu(), magma_sorghr(), magma_sorghr_m(), magma_ssetmatrix, magma_ssqrt, MAGMA_SUCCESS, magma_xerbla(), MagmaLowerStr, max, T, vl, and vr.

35 {
36 /* -- MAGMA (version 1.4.0) --
37  Univ. of Tennessee, Knoxville
38  Univ. of California, Berkeley
39  Univ. of Colorado, Denver
40  August 2013
41 
42  Purpose
43  =======
44  SGEEV computes for an N-by-N real nonsymmetric matrix A, the
45  eigenvalues and, optionally, the left and/or right eigenvectors.
46 
47  The right eigenvector v(j) of A satisfies
48  A * v(j) = lambda(j) * v(j)
49  where lambda(j) is its eigenvalue.
50  The left eigenvector u(j) of A satisfies
51  u(j)**T * A = lambda(j) * u(j)**T
52  where u(j)**T denotes the transpose of u(j).
53 
54  The computed eigenvectors are normalized to have Euclidean norm
55  equal to 1 and largest component real.
56 
57  Arguments
58  =========
59  JOBVL (input) CHARACTER*1
60  = 'N': left eigenvectors of A are not computed;
61  = 'V': left eigenvectors of are computed.
62 
63  JOBVR (input) CHARACTER*1
64  = 'N': right eigenvectors of A are not computed;
65  = 'V': right eigenvectors of A are computed.
66 
67  N (input) INTEGER
68  The order of the matrix A. N >= 0.
69 
70  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
71  On entry, the N-by-N matrix A.
72  On exit, A has been overwritten.
73 
74  LDA (input) INTEGER
75  The leading dimension of the array A. LDA >= max(1,N).
76 
77  WR (output) DOUBLE PRECISION array, dimension (N)
78  WI (output) DOUBLE PRECISION array, dimension (N)
79  WR and WI contain the real and imaginary parts,
80  respectively, of the computed eigenvalues. Complex
81  conjugate pairs of eigenvalues appear consecutively
82  with the eigenvalue having the positive imaginary part
83  first.
84 
85  VL (output) DOUBLE PRECISION array, dimension (LDVL,N)
86  If JOBVL = 'V', the left eigenvectors u(j) are stored one
87  after another in the columns of VL, in the same order
88  as their eigenvalues.
89  If JOBVL = 'N', VL is not referenced.
90  u(j) = VL(:,j), the j-th column of VL.
91 
92  LDVL (input) INTEGER
93  The leading dimension of the array VL. LDVL >= 1; if
94  JOBVL = 'V', LDVL >= N.
95 
96  VR (output) DOUBLE PRECISION array, dimension (LDVR,N)
97  If JOBVR = 'V', the right eigenvectors v(j) are stored one
98  after another in the columns of VR, in the same order
99  as their eigenvalues.
100  If JOBVR = 'N', VR is not referenced.
101  v(j) = VR(:,j), the j-th column of VR.
102 
103  LDVR (input) INTEGER
104  The leading dimension of the array VR. LDVR >= 1; if
105  JOBVR = 'V', LDVR >= N.
106 
107  WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
108  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
109 
110  LWORK (input) INTEGER
111  The dimension of the array WORK. LWORK >= (1+nb)*N.
112 
113  If LWORK = -1, then a workspace query is assumed; the routine
114  only calculates the optimal size of the WORK array, returns
115  this value as the first entry of the WORK array, and no error
116  message related to LWORK is issued by XERBLA.
117 
118  INFO (output) INTEGER
119  = 0: successful exit
120  < 0: if INFO = -i, the i-th argument had an illegal value.
121  > 0: if INFO = i, the QR algorithm failed to compute all the
122  eigenvalues, and no eigenvectors have been computed;
123  elements and i+1:N of W contain eigenvalues which have
124  converged.
125  ===================================================================== */
126 
127  #define vl(i,j) (vl + (i) + (j)*ldvl)
128  #define vr(i,j) (vr + (i) + (j)*ldvr)
129 
130  magma_int_t c_one = 1;
131  magma_int_t c_zero = 0;
132 
133  float d__1, d__2;
134  float r, cs, sn, scl;
135  float dum[1], eps;
136  float anrm, cscale, bignum, smlnum;
137  magma_int_t i, k, ilo, ihi;
138  magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, i__1, i__2, nb;
139  magma_int_t scalea, minwrk, lquery, wantvl, wantvr, select[1];
140 
141  char side[2] = {0, 0};
142  char jobvl_[2] = {jobvl, 0};
143  char jobvr_[2] = {jobvr, 0};
144 
145  *info = 0;
146  lquery = lwork == -1;
147  wantvl = lapackf77_lsame( jobvl_, "V" );
148  wantvr = lapackf77_lsame( jobvr_, "V" );
149  if (! wantvl && ! lapackf77_lsame( jobvl_, "N" )) {
150  *info = -1;
151  } else if (! wantvr && ! lapackf77_lsame( jobvr_, "N" )) {
152  *info = -2;
153  } else if (n < 0) {
154  *info = -3;
155  } else if (lda < max(1,n)) {
156  *info = -5;
157  } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) {
158  *info = -9;
159  } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) {
160  *info = -11;
161  }
162 
163  /* Compute workspace */
164  nb = magma_get_sgehrd_nb( n );
165  if (*info == 0) {
166  minwrk = (2+nb)*n;
167  work[0] = MAGMA_S_MAKE( (float) minwrk, 0. );
168 
169  if (lwork < minwrk && ! lquery) {
170  *info = -13;
171  }
172  }
173 
174  if (*info != 0) {
175  magma_xerbla( __func__, -(*info) );
176  return *info;
177  }
178  else if (lquery) {
179  return *info;
180  }
181 
182  /* Quick return if possible */
183  if (n == 0) {
184  return *info;
185  }
186 
187  #if defined(Version3) || defined(Version4) || defined(Version5)
188  float *dT;
189  if (MAGMA_SUCCESS != magma_smalloc( &dT, nb*n )) {
190  *info = MAGMA_ERR_DEVICE_ALLOC;
191  return *info;
192  }
193  #endif
194  #if defined(Version4) || defined(Version5)
195  float *T;
196  if (MAGMA_SUCCESS != magma_smalloc_cpu( &T, nb*n )) {
197  magma_free( dT );
198  *info = MAGMA_ERR_HOST_ALLOC;
199  return *info;
200  }
201  #endif
202 
203  /* Get machine constants */
204  eps = lapackf77_slamch( "P" );
205  smlnum = lapackf77_slamch( "S" );
206  bignum = 1. / smlnum;
207  lapackf77_slabad( &smlnum, &bignum );
208  smlnum = magma_ssqrt( smlnum ) / eps;
209  bignum = 1. / smlnum;
210 
211  /* Scale A if max element outside range [SMLNUM,BIGNUM] */
212  anrm = lapackf77_slange( "M", &n, &n, A, &lda, dum );
213  scalea = 0;
214  if (anrm > 0. && anrm < smlnum) {
215  scalea = 1;
216  cscale = smlnum;
217  } else if (anrm > bignum) {
218  scalea = 1;
219  cscale = bignum;
220  }
221  if (scalea) {
222  lapackf77_slascl( "G", &c_zero, &c_zero, &anrm, &cscale, &n, &n, A, &lda, &ierr );
223  }
224 
225  /* Balance the matrix
226  * (Workspace: need N) */
227  ibal = 0;
228  lapackf77_sgebal( "B", &n, A, &lda, &ilo, &ihi, &work[ibal], &ierr );
229 
230  /* Reduce to upper Hessenberg form
231  * (Workspace: need 3*N, prefer 2*N + N*NB) */
232  itau = ibal + n;
233  iwrk = itau + n;
234  liwrk = lwork - iwrk;
235 
236  #if defined(Version1)
237  // Version 1 - LAPACK
238  lapackf77_sgehrd( &n, &ilo, &ihi, A, &lda,
239  &work[itau], &work[iwrk], &liwrk, &ierr );
240  #elif defined(Version2)
241  // Version 2 - LAPACK consistent HRD
242  magma_sgehrd2( n, ilo, ihi, A, lda,
243  &work[itau], &work[iwrk], &liwrk, &ierr );
244  #elif defined(Version3)
245  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored,
246  magma_sgehrd( n, ilo, ihi, A, lda,
247  &work[itau], &work[iwrk], liwrk, dT, &ierr );
248  #elif defined(Version4) || defined(Version5)
249  // Version 4 - Multi-GPU, T on host
250  magma_sgehrd_m( n, ilo, ihi, A, lda,
251  &work[itau], &work[iwrk], liwrk, T, &ierr );
252  magma_ssetmatrix( nb, n, T, nb, dT, nb );
253  #endif
254 
255  if (wantvl) {
256  /* Want left eigenvectors
257  * Copy Householder vectors to VL */
258  side[0] = 'L';
260  A, &lda, vl, &ldvl );
261 
262  /* Generate orthogonal matrix in VL
263  * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) */
264  #if defined(Version1) || defined(Version2)
265  // Version 1 & 2 - LAPACK
266  lapackf77_sorghr( &n, &ilo, &ihi, vl, &ldvl, &work[itau],
267  &work[iwrk], &liwrk, &ierr );
268  #elif defined(Version3) || defined(Version4)
269  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
270  magma_sorghr( n, ilo, ihi, vl, ldvl, &work[itau], dT, nb, &ierr );
271  #elif defined(Version5)
272  // Version 5 - Multi-GPU, T on host
273  magma_sorghr_m( n, ilo, ihi, vl, ldvl, &work[itau], T, nb, &ierr );
274  #endif
275 
276  /* Perform QR iteration, accumulating Schur vectors in VL
277  * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
278  iwrk = itau;
279  liwrk = lwork - iwrk;
280  lapackf77_shseqr( "S", "V", &n, &ilo, &ihi, A, &lda, WR, WI,
281  vl, &ldvl, &work[iwrk], &liwrk, info );
282 
283  if (wantvr) {
284  /* Want left and right eigenvectors
285  * Copy Schur vectors to VR */
286  side[0] = 'B';
287  lapackf77_slacpy( "F", &n, &n, vl, &ldvl, vr, &ldvr );
288  }
289  }
290  else if (wantvr) {
291  /* Want right eigenvectors
292  * Copy Householder vectors to VR */
293  side[0] = 'R';
294  lapackf77_slacpy( "L", &n, &n, A, &lda, vr, &ldvr );
295 
296  /* Generate orthogonal matrix in VR
297  * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) */
298  #if defined(Version1) || defined(Version2)
299  // Version 1 & 2 - LAPACK
300  lapackf77_sorghr( &n, &ilo, &ihi, vr, &ldvr, &work[itau],
301  &work[iwrk], &liwrk, &ierr );
302  #elif defined(Version3) || defined(Version4)
303  // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
304  magma_sorghr( n, ilo, ihi, vr, ldvr, &work[itau], dT, nb, &ierr );
305  #elif defined(Version5)
306  // Version 5 - Multi-GPU, T on host
307  magma_sorghr_m( n, ilo, ihi, vr, ldvr, &work[itau], T, nb, &ierr );
308  #endif
309 
310  /* Perform QR iteration, accumulating Schur vectors in VR
311  * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
312  iwrk = itau;
313  liwrk = lwork - iwrk;
314  lapackf77_shseqr( "S", "V", &n, &ilo, &ihi, A, &lda, WR, WI,
315  vr, &ldvr, &work[iwrk], &liwrk, info );
316  }
317  else {
318  /* Compute eigenvalues only
319  * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
320  iwrk = itau;
321  liwrk = lwork - iwrk;
322  lapackf77_shseqr( "E", "N", &n, &ilo, &ihi, A, &lda, WR, WI,
323  vr, &ldvr, &work[iwrk], &liwrk, info );
324  }
325 
326  /* If INFO > 0 from SHSEQR, then quit */
327  if (*info > 0) {
328  goto CLEANUP;
329  }
330 
331  if (wantvl || wantvr) {
332  /* Compute left and/or right eigenvectors
333  * (Workspace: need 4*N) */
334  lapackf77_strevc( side, "B", select, &n, A, &lda, vl, &ldvl,
335  vr, &ldvr, &n, &nout, &work[iwrk], &ierr );
336  }
337 
338  if (wantvl) {
339  /* Undo balancing of left eigenvectors
340  * (Workspace: need N) */
341  lapackf77_sgebak( "B", "L", &n, &ilo, &ihi, &work[ibal], &n,
342  vl, &ldvl, &ierr );
343 
344  /* Normalize left eigenvectors and make largest component real */
345  for (i = 0; i < n; ++i) {
346  if ( WI[i] == 0. ) {
347  scl = 1. / cblas_snrm2( n, vl(0,i), 1 );
348  cblas_sscal( n, scl, vl(0,i), 1 );
349  }
350  else if ( WI[i] > 0. ) {
351  d__1 = cblas_snrm2( n, vl(0,i), 1 );
352  d__2 = cblas_snrm2( n, vl(0,i+1), 1 );
353  scl = 1. / lapackf77_slapy2( &d__1, &d__2 );
354  cblas_sscal( n, scl, vl(0,i), 1 );
355  cblas_sscal( n, scl, vl(0,i+1), 1 );
356  for (k = 0; k < n; ++k) {
357  /* Computing 2nd power */
358  d__1 = *vl(k,i);
359  d__2 = *vl(k,i+1);
360  work[iwrk + k] = d__1*d__1 + d__2*d__2;
361  }
362  k = cblas_isamax( n, &work[iwrk], 1 );
363  lapackf77_slartg( vl(k,i), vl(k,i+1), &cs, &sn, &r );
364  cblas_srot( n, vl(0,i), 1, vl(0,i+1), 1, cs, sn );
365  *vl(k,i+1) = 0.;
366  }
367  }
368  }
369 
370  if (wantvr) {
371  /* Undo balancing of right eigenvectors
372  * (Workspace: need N) */
373  lapackf77_sgebak( "B", "R", &n, &ilo, &ihi, &work[ibal], &n,
374  vr, &ldvr, &ierr );
375 
376  /* Normalize right eigenvectors and make largest component real */
377  for (i = 0; i < n; ++i) {
378  if ( WI[i] == 0. ) {
379  scl = 1. / cblas_snrm2( n, vr(0,i), 1 );
380  cblas_sscal( n, scl, vr(0,i), 1 );
381  }
382  else if ( WI[i] > 0. ) {
383  d__1 = cblas_snrm2( n, vr(0,i), 1 );
384  d__2 = cblas_snrm2( n, vr(0,i+1), 1 );
385  scl = 1. / lapackf77_slapy2( &d__1, &d__2 );
386  cblas_sscal( n, scl, vr(0,i), 1 );
387  cblas_sscal( n, scl, vr(0,i+1), 1 );
388  for (k = 0; k < n; ++k) {
389  /* Computing 2nd power */
390  d__1 = *vr(k,i);
391  d__2 = *vr(k,i+1);
392  work[iwrk + k] = d__1*d__1 + d__2*d__2;
393  }
394  k = cblas_isamax( n, &work[iwrk], 1 );
395  lapackf77_slartg( vr(k,i), vr(k,i+1), &cs, &sn, &r );
396  cblas_srot( n, vr(0,i), 1, vr(0,i+1), 1, cs, sn );
397  *vr(k,i+1) = 0.;
398  }
399  }
400  }
401 
402 CLEANUP:
403  /* Undo scaling if necessary */
404  if (scalea) {
405  i__1 = n - (*info);
406  i__2 = max( n - (*info), 1 );
407  lapackf77_slascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
408  WR + (*info), &i__2, &ierr );
409  lapackf77_slascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
410  WI + (*info), &i__2, &ierr );
411  if (*info > 0) {
412  i__1 = ilo - 1;
413  lapackf77_slascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
414  WR, &n, &ierr );
415  lapackf77_slascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
416  WI, &n, &ierr );
417  }
418  }
419 
420  #if defined(Version3) || defined(Version4) || defined(Version5)
421  magma_free( dT );
422  #endif
423  #if defined(Version4) || defined(Version5)
424  magma_free_cpu( T );
425  #endif
426 
427  return *info;
428 } /* magma_sgeev */
void cblas_srot(const int N, float *X, const int incX, float *Y, const int incY, const float c, const float s)
#define __func__
Definition: common_magma.h:65
#define magma_ssqrt
Definition: common_magma.h:99
void lapackf77_strevc(const char *side, const char *howmny, magma_int_t *select, const magma_int_t *n, float *T, const magma_int_t *ldt, float *Vl, const magma_int_t *ldvl, float *Vr, const magma_int_t *ldvr, const magma_int_t *mm, magma_int_t *m, float *work, DWORKFORZ magma_int_t *info)
float cblas_snrm2(const int N, const float *X, const int incX)
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define T(m)
Definition: zgeqrf_mc.cpp:14
#define magma_free(ptr)
Definition: magma.h:57
magma_int_t magma_sgehrd_m(magma_int_t n, magma_int_t ilo, magma_int_t ihi, float *A, magma_int_t lda, float *tau, float *work, magma_int_t lwork, float *T, magma_int_t *info)
Definition: sgehrd_m.cpp:16
void lapackf77_slacpy(const char *uplo, const magma_int_t *m, const magma_int_t *n, const float *A, const magma_int_t *lda, float *B, const magma_int_t *ldb)
int magma_int_t
Definition: magmablas.h:12
magma_int_t magma_sgehrd(magma_int_t n, magma_int_t ilo, magma_int_t ihi, float *A, magma_int_t lda, float *tau, float *work, magma_int_t lwork, float *dT, magma_int_t *info)
Definition: sgehrd.cpp:17
magma_int_t magma_sgehrd2(magma_int_t n, magma_int_t ilo, magma_int_t ihi, float *A, magma_int_t lda, float *tau, float *work, magma_int_t lwork, magma_int_t *info)
Definition: sgehrd2.cpp:14
#define lapackf77_slapy2
Definition: magma_lapack.h:42
void lapackf77_sgebal(const char *job, const magma_int_t *n, float *A, const magma_int_t *lda, magma_int_t *ilo, magma_int_t *ihi, float *scale, magma_int_t *info)
static magma_err_t magma_smalloc_cpu(float **ptrPtr, size_t n)
Definition: magma.h:83
#define vl(i, j)
magma_int_t magma_get_sgehrd_nb(magma_int_t m)
Definition: get_nb.cpp:342
void lapackf77_sgebak(const char *job, const char *side, const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, const float *scale, const magma_int_t *m, float *V, const magma_int_t *ldv, magma_int_t *info)
#define magma_ssetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_s.h:702
void cblas_sscal(const int N, const float alpha, float *X, const int incX)
magma_int_t magma_sorghr(magma_int_t n, magma_int_t ilo, magma_int_t ihi, float *a, magma_int_t lda, float *tau, float *dT, magma_int_t nb, magma_int_t *info)
Definition: sorghr.cpp:14
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
float lapackf77_slange(const char *norm, const magma_int_t *m, const magma_int_t *n, const float *A, const magma_int_t *lda, float *work)
void lapackf77_sorghr(const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, float *A, const magma_int_t *lda, const float *tau, float *work, const magma_int_t *lwork, magma_int_t *info)
#define MagmaLowerStr
Definition: magma.h:85
#define lapackf77_slabad
Definition: magma_lapack.h:28
void lapackf77_sgehrd(const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, float *A, const magma_int_t *lda, float *tau, float *work, const magma_int_t *lwork, magma_int_t *info)
#define A(i, j)
Definition: cprint.cpp:16
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define lapackf77_slamch
Definition: magma_lapack.h:26
CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX)
#define MAGMA_S_MAKE(r, i)
Definition: magma.h:189
void lapackf77_shseqr(const char *job, const char *compz, const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, float *H, const magma_int_t *ldh, WSPLIT, float *Z, const magma_int_t *ldz, float *work, const magma_int_t *lwork, magma_int_t *info)
#define dT(m)
void lapackf77_slartg(float *F, float *G, float *cs, float *SN, float *R)
#define max(a, b)
Definition: common_magma.h:82
#define vr(i, j)
magma_err_t magma_free_cpu(void *ptr)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
void lapackf77_slascl(const char *type, const magma_int_t *kl, const magma_int_t *ku, float *cfrom, float *cto, const magma_int_t *m, const magma_int_t *n, float *A, const magma_int_t *lda, magma_int_t *info)
magma_int_t magma_sorghr_m(magma_int_t n, magma_int_t ilo, magma_int_t ihi, float *A, magma_int_t lda, float *tau, float *T, magma_int_t nb, magma_int_t *info)
Definition: sorghr_m.cpp:16

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgehrd ( magma_int_t  n,
magma_int_t  ilo,
magma_int_t  ihi,
float *  A,
magma_int_t  lda,
float *  tau,
float *  work,
magma_int_t  lwork,
float *  dT,
magma_int_t info 
)

Definition at line 17 of file sgehrd.cpp.

References __func__, A, dA, dTi, dV, dwork, lapackf77_sgehd2(), MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_sgehrd_nb(), MAGMA_S_ONE, MAGMA_S_SET2REAL, MAGMA_S_ZERO, magma_sgetmatrix, magma_slahr2(), magma_slahru(), magma_smalloc(), magma_smalloc_cpu(), magma_ssetmatrix, MAGMA_SUCCESS, magma_xerbla(), magmablas_slaset(), max, min, szero_nbxnb_block(), and T.

23 {
24 /* -- MAGMA (version 1.4.0) --
25  Univ. of Tennessee, Knoxville
26  Univ. of California, Berkeley
27  Univ. of Colorado, Denver
28  August 2013
29 
30  Purpose
31  =======
32  SGEHRD reduces a REAL general matrix A to upper Hessenberg form H by
33  an orthogonal similarity transformation: Q' * A * Q = H . This version
34  stores the triangular matrices used in the factorization so that they can
35  be applied directly (i.e., without being recomputed) later. As a result,
36  the application of Q is much faster.
37 
38  Arguments
39  =========
40  N (input) INTEGER
41  The order of the matrix A. N >= 0.
42 
43  ILO (input) INTEGER
44  IHI (input) INTEGER
45  It is assumed that A is already upper triangular in rows
46  and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
47  set by a previous call to SGEBAL; otherwise they should be
48  set to 1 and N respectively. See Further Details.
49  1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
50 
51  A (input/output) REAL array, dimension (LDA,N)
52  On entry, the N-by-N general matrix to be reduced.
53  On exit, the upper triangle and the first subdiagonal of A
54  are overwritten with the upper Hessenberg matrix H, and the
55  elements below the first subdiagonal, with the array TAU,
56  represent the orthogonal matrix Q as a product of elementary
57  reflectors. See Further Details.
58 
59  LDA (input) INTEGER
60  The leading dimension of the array A. LDA >= max(1,N).
61 
62  TAU (output) REAL array, dimension (N-1)
63  The scalar factors of the elementary reflectors (see Further
64  Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
65  zero.
66 
67  WORK (workspace/output) REAL array, dimension (LWORK)
68  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
69 
70  LWORK (input) INTEGER
71  The length of the array WORK. LWORK >= max(1,N).
72  For optimum performance LWORK >= N*NB, where NB is the
73  optimal blocksize.
74 
75  If LWORK = -1, then a workspace query is assumed; the routine
76  only calculates the optimal size of the WORK array, returns
77  this value as the first entry of the WORK array, and no error
78  message related to LWORK is issued by XERBLA.
79 
80  dT (output) REAL array on the GPU, dimension NB*N,
81  where NB is the optimal blocksize. It stores the NB*NB blocks
82  of the triangular T matrices used in the reduction.
83 
84  INFO (output) INTEGER
85  = 0: successful exit
86  < 0: if INFO = -i, the i-th argument had an illegal value.
87 
88  Further Details
89  ===============
90  The matrix Q is represented as a product of (ihi-ilo) elementary
91  reflectors
92 
93  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
94 
95  Each H(i) has the form
96 
97  H(i) = I - tau * v * v'
98 
99  where tau is a real scalar, and v is a real vector with
100  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
101  exit in A(i+2:ihi,i), and tau in TAU(i).
102 
103  The contents of A are illustrated by the following example, with
104  n = 7, ilo = 2 and ihi = 6:
105 
106  on entry, on exit,
107 
108  ( a a a a a a a ) ( a a h h h h a )
109  ( a a a a a a ) ( a h h h h a )
110  ( a a a a a a ) ( h h h h h h )
111  ( a a a a a a ) ( v2 h h h h h )
112  ( a a a a a a ) ( v2 v3 h h h h )
113  ( a a a a a a ) ( v2 v3 v4 h h h )
114  ( a ) ( a )
115 
116  where a denotes an element of the original matrix A, h denotes a
117  modified element of the upper Hessenberg matrix H, and vi denotes an
118  element of the vector defining H(i).
119 
120  This implementation follows the hybrid algorithm and notations described in
121 
122  S. Tomov and J. Dongarra, "Accelerating the reduction to upper Hessenberg
123  form through hybrid GPU-based computing," University of Tennessee Computer
124  Science Technical Report, UT-CS-09-642 (also LAPACK Working Note 219),
125  May 24, 2009.
126 
127  This version stores the T matrices in dT, for later use in magma_sorghr.
128 
129  ===================================================================== */
130 
131  #define A( i, j ) ( A + (i) + (j)*lda)
132  #define dA( i, j ) (dA + (i) + (j-ilo)*ldda)
133 
134  float c_one = MAGMA_S_ONE;
135  float c_zero = MAGMA_S_ZERO;
136 
138  magma_int_t ldda = n; // assumed in slahru
139 
140  magma_int_t nh, iws;
141  magma_int_t iinfo;
142  magma_int_t ldwork;
143  magma_int_t lquery;
144 
145  *info = 0;
146  iws = n*nb;
147  MAGMA_S_SET2REAL( work[0], (float) iws );
148 
149  lquery = lwork == -1;
150  if (n < 0) {
151  *info = -1;
152  } else if (ilo < 1 || ilo > max(1,n)) {
153  *info = -2;
154  } else if (ihi < min(ilo,n) || ihi > n) {
155  *info = -3;
156  } else if (lda < max(1,n)) {
157  *info = -5;
158  } else if (lwork < max(1,n) && ! lquery) {
159  *info = -8;
160  }
161  if (*info != 0) {
162  magma_xerbla( __func__, -(*info) );
163  return *info;
164  }
165  else if (lquery)
166  return *info;
167 
168  // Adjust from 1-based indexing
169  ilo -= 1;
170 
171  // Quick return if possible
172  nh = ihi - ilo;
173  if (nh <= 1) {
174  work[0] = c_one;
175  return *info;
176  }
177 
178  // GPU workspace is:
179  // nb*ldda for dwork for slahru
180  // nb*ldda for dV
181  // n*ldda for dA
182  float *dwork;
183  if (MAGMA_SUCCESS != magma_smalloc( &dwork, 2*nb*ldda + n*ldda )) {
184  *info = MAGMA_ERR_DEVICE_ALLOC;
185  return *info;
186  }
187  float *dV = dwork + nb*ldda;
188  float *dA = dwork + nb*ldda*2;
189  ldwork = n;
190 
191  magma_int_t i;
192 
193  float *T, *dTi;
194  magma_smalloc_cpu( &T, nb*nb );
195  if ( T == NULL ) {
196  magma_free( dwork );
197  *info = MAGMA_ERR_HOST_ALLOC;
198  return *info;
199  }
200 
201  // zero first block of V, which is lower triangular
202  szero_nbxnb_block(nb, dV, ldda);
203 
204  // Set elements 0:ILO-1 and IHI-1:N-2 of TAU to zero
205  for(i = 0; i < ilo; ++i)
206  tau[i] = c_zero;
207 
208  for(i = max(0,ihi-1); i < n-1; ++i)
209  tau[i] = c_zero;
210 
211  for(i=0; i < nb*nb; i += 4)
212  T[i] = T[i+1] = T[i+2] = T[i+3] = c_zero;
213  magmablas_slaset( 'F', nb, n, dT, nb );
214 
215  // If not enough workspace, use unblocked code
216  if ( lwork < iws ) {
217  nb = 1;
218  }
219 
220  if (nb == 1 || nb > nh) {
221  // Use unblocked code below
222  i = ilo;
223  }
224  else {
225  // Use blocked code
226  // Copy the matrix to the GPU
227  magma_ssetmatrix( n, n-ilo, A(0,ilo), lda, dA, ldda );
228 
229  for (i = ilo; i < ihi-1 - nb; i += nb) {
230  // Reduce columns i:i+nb-1 to Hessenberg form, returning the
231  // matrices V and T of the block reflector H = I - V*T*V'
232  // which performs the reduction, and also the matrix Y = A*V*T
233 
234  // Get the current panel (no need for the 1st iteration)
235  magma_sgetmatrix( ihi-i, nb,
236  dA(i,i), ldda,
237  A (i,i), lda );
238 
239  // add 1 to i for 1-based index
240  magma_slahr2( ihi, i+1, nb,
241  dA(0,i),
242  dV,
243  A (0,i), lda,
244  &tau[i], T, nb, work, ldwork);
245 
246  // Copy T from the CPU to dT on the GPU
247  dTi = dT + (i - ilo)*nb;
248  magma_ssetmatrix( nb, nb, T, nb, dTi, nb );
249 
250  magma_slahru( n, ihi, i, nb,
251  A (0,i), lda,
252  dA(0,i), // dA
253  dA(i,i), // dY, stored over current panel
254  dV, dTi, dwork );
255  }
256 
257  // Copy remainder to host
258  magma_sgetmatrix( n, n-i,
259  dA(0,i), ldda,
260  A (0,i), lda );
261  }
262 
263  // Use unblocked code to reduce the rest of the matrix
264  // add 1 to i for 1-based index
265  i += 1;
266  lapackf77_sgehd2(&n, &i, &ihi, A, &lda, tau, work, &iinfo);
267  MAGMA_S_SET2REAL( work[0], (float) iws );
268 
269  magma_free( dwork );
270  magma_free_cpu( T );
271 
272  return *info;
273 } /* magma_sgehrd */
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define T(m)
Definition: zgeqrf_mc.cpp:14
void magmablas_slaset(magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda)
magma_int_t ldda
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
static magma_err_t magma_smalloc_cpu(float **ptrPtr, size_t n)
Definition: magma.h:83
magma_int_t magma_get_sgehrd_nb(magma_int_t m)
Definition: get_nb.cpp:342
#define magma_ssetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_s.h:702
#define dTi(d)
#define dwork(dev, i, j)
magma_int_t magma_slahru(magma_int_t n, magma_int_t ihi, magma_int_t k, magma_int_t nb, float *a, magma_int_t lda, float *da, float *y, float *v, float *t, float *dwork)
Definition: slahru.cpp:17
#define dA(i, j)
#define MAGMA_S_ONE
Definition: magma.h:198
#define dV(m)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MAGMA_S_ZERO
Definition: magma.h:197
#define MAGMA_S_SET2REAL(v, t)
Definition: magma.h:181
magma_int_t magma_slahr2(magma_int_t m, magma_int_t n, magma_int_t nb, float *da, float *dv, float *a, magma_int_t lda, float *tau, float *t, magma_int_t ldt, float *y, magma_int_t ldy)
Definition: slahr2.cpp:17
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define magma_sgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_s.h:705
#define A(i, j)
void szero_nbxnb_block(magma_int_t nb, magmaFloat_ptr dA, magma_int_t ldda)
#define dT(m)
#define max(a, b)
Definition: common_magma.h:82
magma_err_t magma_free_cpu(void *ptr)
void lapackf77_sgehd2(const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, float *A, const magma_int_t *lda, float *tau, float *work, magma_int_t *info)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgehrd2 ( magma_int_t  n,
magma_int_t  ilo,
magma_int_t  ihi,
float *  A,
magma_int_t  lda,
float *  tau,
float *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file sgehrd2.cpp.

References __func__, lapackf77_sgehd2(), MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_sgehrd_nb(), MAGMA_S_ONE, MAGMA_S_SET2REAL, MAGMA_S_ZERO, magma_sgetmatrix, magma_slahr2(), magma_slahru(), magma_smalloc(), magma_smalloc_cpu(), magma_ssetmatrix, MAGMA_SUCCESS, magma_xerbla(), max, min, and szero_nbxnb_block().

18 {
19 /* -- MAGMA (version 1.4.0) --
20  Univ. of Tennessee, Knoxville
21  Univ. of California, Berkeley
22  Univ. of Colorado, Denver
23  August 2013
24 
25  Purpose
26  =======
27  SGEHRD2 reduces a REAL general matrix A to upper Hessenberg form H by
28  an orthogonal similarity transformation: Q' * A * Q = H .
29 
30  Arguments
31  =========
32  N (input) INTEGER
33  The order of the matrix A. N >= 0.
34 
35  ILO (input) INTEGER
36  IHI (input) INTEGER
37  It is assumed that A is already upper triangular in rows
38  and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
39  set by a previous call to SGEBAL; otherwise they should be
40  set to 1 and N respectively. See Further Details.
41  1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
42 
43  A (input/output) REAL array, dimension (LDA,N)
44  On entry, the N-by-N general matrix to be reduced.
45  On exit, the upper triangle and the first subdiagonal of A
46  are overwritten with the upper Hessenberg matrix H, and the
47  elements below the first subdiagonal, with the array TAU,
48  represent the orthogonal matrix Q as a product of elementary
49  reflectors. See Further Details.
50 
51  LDA (input) INTEGER
52  The leading dimension of the array A. LDA >= max(1,N).
53 
54  TAU (output) REAL array, dimension (N-1)
55  The scalar factors of the elementary reflectors (see Further
56  Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
57  zero.
58 
59  WORK (workspace/output) REAL array, dimension (LWORK)
60  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
61 
62  LWORK (input) INTEGER
63  The length of the array WORK. LWORK >= max(1,N).
64  For optimum performance LWORK >= N*NB, where NB is the
65  optimal blocksize.
66 
67  If LWORK = -1, then a workspace query is assumed; the routine
68  only calculates the optimal size of the WORK array, returns
69  this value as the first entry of the WORK array, and no error
70  message related to LWORK is issued by XERBLA.
71 
72  INFO (output) INTEGER
73  = 0: successful exit
74  < 0: if INFO = -i, the i-th argument had an illegal value.
75 
76  Further Details
77  ===============
78  The matrix Q is represented as a product of (ihi-ilo) elementary
79  reflectors
80 
81  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
82 
83  Each H(i) has the form
84 
85  H(i) = I - tau * v * v'
86 
87  where tau is a real scalar, and v is a real vector with
88  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
89  exit in A(i+2:ihi,i), and tau in TAU(i).
90 
91  The contents of A are illustrated by the following example, with
92  n = 7, ilo = 2 and ihi = 6:
93 
94  on entry, on exit,
95 
96  ( a a a a a a a ) ( a a h h h h a )
97  ( a a a a a a ) ( a h h h h a )
98  ( a a a a a a ) ( h h h h h h )
99  ( a a a a a a ) ( v2 h h h h h )
100  ( a a a a a a ) ( v2 v3 h h h h )
101  ( a a a a a a ) ( v2 v3 v4 h h h )
102  ( a ) ( a )
103 
104  where a denotes an element of the original matrix A, h denotes a
105  modified element of the upper Hessenberg matrix H, and vi denotes an
106  element of the vector defining H(i).
107 
108  This implementation follows the hybrid algorithm and notations described in
109 
110  S. Tomov and J. Dongarra, "Accelerating the reduction to upper Hessenberg
111  form through hybrid GPU-based computing," University of Tennessee Computer
112  Science Technical Report, UT-CS-09-642 (also LAPACK Working Note 219),
113  May 24, 2009.
114  ===================================================================== */
115 
116 
117  float c_one = MAGMA_S_ONE;
118  float c_zero = MAGMA_S_ZERO;
119 
121  magma_int_t N = n, ldda = n;
122 
123  magma_int_t ib;
124  magma_int_t nh, iws;
125  magma_int_t nbmin, iinfo;
126  magma_int_t ldwork;
127  magma_int_t lquery;
128 
129  --tau;
130 
131  *info = 0;
132  MAGMA_S_SET2REAL( work[0], (float) n * nb );
133 
134  lquery = lwork == -1;
135  if (n < 0) {
136  *info = -1;
137  } else if (ilo < 1 || ilo > max(1,n)) {
138  *info = -2;
139  } else if (ihi < min(ilo,n) || ihi > n) {
140  *info = -3;
141  } else if (lda < max(1,n)) {
142  *info = -5;
143  } else if (lwork < max(1,n) && ! lquery) {
144  *info = -8;
145  }
146  if (*info != 0) {
147  magma_xerbla( __func__, -(*info) );
148  return *info;
149  }
150  else if (lquery)
151  return *info;
152 
153  /* Quick return if possible */
154  nh = ihi - ilo + 1;
155  if (nh <= 1) {
156  work[0] = c_one;
157  return *info;
158  }
159 
160  float *da;
161  if (MAGMA_SUCCESS != magma_smalloc( &da, N*ldda + 2*N*nb + nb*nb )) {
162  *info = MAGMA_ERR_DEVICE_ALLOC;
163  return *info;
164  }
165 
166  float *d_A = da;
167  float *d_work = da + (N+nb)*ldda;
168 
169  magma_int_t i__;
170 
171  float *t, *d_t;
172  magma_smalloc_cpu( &t, nb*nb );
173  if ( t == NULL ) {
174  magma_free( da );
175  *info = MAGMA_ERR_HOST_ALLOC;
176  return *info;
177  }
178  d_t = d_work + nb * ldda;
179 
180  szero_nbxnb_block(nb, d_A+N*ldda, ldda);
181 
182  /* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
183  for (i__ = 1; i__ < ilo; ++i__)
184  tau[i__] = c_zero;
185 
186  for (i__ = max(1,ihi); i__ < n; ++i__)
187  tau[i__] = c_zero;
188 
189  for(i__=0; i__< nb*nb; i__+=4)
190  t[i__] = t[i__+1] = t[i__+2] = t[i__+3] = c_zero;
191 
192  nbmin = 2;
193  iws = 1;
194  if (nb > 1 && nb < nh) {
195  /* Determine when to cross over from blocked to unblocked code
196  (last block is always handled by unblocked code) */
197  if (nb < nh) {
198  /* Determine if workspace is large enough for blocked code */
199  iws = n * nb;
200  if (lwork < iws) {
201  /* Not enough workspace to use optimal NB: determine the
202  minimum value of NB, and reduce NB or force use of
203  unblocked code */
204  nbmin = nb;
205  if (lwork >= n * nbmin)
206  nb = lwork / n;
207  else
208  nb = 1;
209  }
210  }
211  }
212  ldwork = n;
213 
214  if (nb < nbmin || nb >= nh) {
215  /* Use unblocked code below */
216  i__ = ilo;
217  }
218  else {
219  /* Use blocked code */
220  /* Copy the matrix to the GPU */
221  magma_ssetmatrix( N, N-ilo+1, a+(ilo-1)*(lda), lda, d_A, ldda );
222 
223  for (i__ = ilo; i__ < ihi - nb; i__ += nb) {
224  /* Computing MIN */
225  ib = min(nb, ihi - i__);
226 
227  /* Reduce columns i:i+ib-1 to Hessenberg form, returning the
228  matrices V and T of the block reflector H = I - V*T*V'
229  which performs the reduction, and also the matrix Y = A*V*T */
230 
231  /* Get the current panel (no need for the 1st iteration) */
232  magma_sgetmatrix( ihi-i__+1, ib,
233  d_A + (i__ - ilo)*ldda + i__ - 1, ldda,
234  a + (i__ - 1 )*lda + i__ - 1, lda );
235 
236  magma_slahr2(ihi, i__, ib,
237  d_A + (i__ - ilo)*ldda,
238  d_A + N*ldda + 1,
239  a + (i__ - 1 )*(lda) , lda,
240  &tau[i__], t, nb, work, ldwork);
241 
242  /* Copy T from the CPU to D_T on the GPU */
243  magma_ssetmatrix( nb, nb, t, nb, d_t, nb );
244 
245  magma_slahru(n, ihi, i__ - 1, ib,
246  a + (i__ - 1 )*(lda), lda,
247  d_A + (i__ - ilo)*ldda,
248  d_A + (i__ - ilo)*ldda + i__ - 1,
249  d_A + N*ldda, d_t, d_work);
250  }
251  }
252 
253  /* Use unblocked code to reduce the rest of the matrix */
254  if (!(nb < nbmin || nb >= nh)) {
255  magma_sgetmatrix( n, n-i__+1,
256  d_A+ (i__-ilo)*ldda, ldda,
257  a + (i__-1)*(lda), lda );
258  }
259  lapackf77_sgehd2(&n, &i__, &ihi, a, &lda, &tau[1], work, &iinfo);
260  MAGMA_S_SET2REAL( work[0], (float) iws );
261 
262  magma_free( da );
263  magma_free_cpu(t);
264 
265  return *info;
266 } /* magma_sgehrd2 */
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
magma_int_t ldda
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
static magma_err_t magma_smalloc_cpu(float **ptrPtr, size_t n)
Definition: magma.h:83
magma_int_t magma_get_sgehrd_nb(magma_int_t m)
Definition: get_nb.cpp:342
#define magma_ssetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_s.h:702
magma_int_t magma_slahru(magma_int_t n, magma_int_t ihi, magma_int_t k, magma_int_t nb, float *a, magma_int_t lda, float *da, float *y, float *v, float *t, float *dwork)
Definition: slahru.cpp:17
#define MAGMA_S_ONE
Definition: magma.h:198
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MAGMA_S_ZERO
Definition: magma.h:197
#define MAGMA_S_SET2REAL(v, t)
Definition: magma.h:181
magma_int_t magma_slahr2(magma_int_t m, magma_int_t n, magma_int_t nb, float *da, float *dv, float *a, magma_int_t lda, float *tau, float *t, magma_int_t ldt, float *y, magma_int_t ldy)
Definition: slahr2.cpp:17
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define magma_sgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_s.h:705
void szero_nbxnb_block(magma_int_t nb, magmaFloat_ptr dA, magma_int_t ldda)
#define max(a, b)
Definition: common_magma.h:82
magma_err_t magma_free_cpu(void *ptr)
void lapackf77_sgehd2(const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, float *A, const magma_int_t *lda, float *tau, float *work, magma_int_t *info)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgehrd_m ( magma_int_t  n,
magma_int_t  ilo,
magma_int_t  ihi,
float *  A,
magma_int_t  lda,
float *  tau,
float *  work,
magma_int_t  lwork,
float *  T,
magma_int_t info 
)

Definition at line 16 of file sgehrd_m.cpp.

References __func__, sgehrd_data::A, A, dA, lapackf77_sgehd2(), lapackf77_slaset(), sgehrd_data::ldda, sgehrd_data::ldv, sgehrd_data::ldvd, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_sgehrd_nb(), magma_getdevice(), magma_num_gpus(), magma_queue_create, magma_queue_destroy, MAGMA_S_ONE, MAGMA_S_SET2REAL, MAGMA_S_ZERO, magma_setdevice(), magma_sgetmatrix, magma_sgetmatrix_async, magma_slahr2_m(), magma_slahru_m(), magma_smalloc(), magma_ssetmatrix_1D_col_bcyclic(), MAGMA_SUCCESS, magma_xerbla(), magmablasSetKernelStream(), max, min, sgehrd_data::ngpu, sgehrd_data::streams, sgehrd_data::Ti, sgehrd_data::V, sgehrd_data::Vd, sgehrd_data::W, and sgehrd_data::Y.

23 {
24 /* -- MAGMA (version 1.4.0) --
25  Univ. of Tennessee, Knoxville
26  Univ. of California, Berkeley
27  Univ. of Colorado, Denver
28  August 2013
29 
30  Purpose
31  =======
32  SGEHRD reduces a REAL general matrix A to upper Hessenberg form H by
33  an orthogonal similarity transformation: Q' * A * Q = H . This version
34  stores the triangular matrices used in the factorization so that they can
35  be applied directly (i.e., without being recomputed) later. As a result,
36  the application of Q is much faster.
37 
38  Arguments
39  =========
40  N (input) INTEGER
41  The order of the matrix A. N >= 0.
42 
43  ILO (input) INTEGER
44  IHI (input) INTEGER
45  It is assumed that A is already upper triangular in rows
46  and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
47  set by a previous call to SGEBAL; otherwise they should be
48  set to 1 and N respectively. See Further Details.
49  1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
50 
51  A (input/output) REAL array, dimension (LDA,N)
52  On entry, the N-by-N general matrix to be reduced.
53  On exit, the upper triangle and the first subdiagonal of A
54  are overwritten with the upper Hessenberg matrix H, and the
55  elements below the first subdiagonal, with the array TAU,
56  represent the orthogonal matrix Q as a product of elementary
57  reflectors. See Further Details.
58 
59  LDA (input) INTEGER
60  The leading dimension of the array A. LDA >= max(1,N).
61 
62  TAU (output) REAL array, dimension (N-1)
63  The scalar factors of the elementary reflectors (see Further
64  Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
65  zero.
66 
67  WORK (workspace/output) REAL array, dimension (LWORK)
68  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
69 
70  LWORK (input) INTEGER
71  The length of the array WORK. LWORK >= max(1,N).
72  For optimum performance LWORK >= N*NB, where NB is the
73  optimal blocksize.
74 
75  If LWORK = -1, then a workspace query is assumed; the routine
76  only calculates the optimal size of the WORK array, returns
77  this value as the first entry of the WORK array, and no error
78  message related to LWORK is issued by XERBLA.
79 
80  T (output) REAL array, dimension NB*N,
81  where NB is the optimal blocksize. It stores the NB*NB blocks
82  of the triangular T matrices used in the reduction.
83 
84  INFO (output) INTEGER
85  = 0: successful exit
86  < 0: if INFO = -i, the i-th argument had an illegal value.
87 
88  Further Details
89  ===============
90  The matrix Q is represented as a product of (ihi-ilo) elementary
91  reflectors
92 
93  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
94 
95  Each H(i) has the form
96 
97  H(i) = I - tau * v * v'
98 
99  where tau is a real scalar, and v is a real vector with
100  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
101  exit in A(i+2:ihi,i), and tau in TAU(i).
102 
103  The contents of A are illustrated by the following example, with
104  n = 7, ilo = 2 and ihi = 6:
105 
106  on entry, on exit,
107 
108  ( a a a a a a a ) ( a a h h h h a )
109  ( a a a a a a ) ( a h h h h a )
110  ( a a a a a a ) ( h h h h h h )
111  ( a a a a a a ) ( v2 h h h h h )
112  ( a a a a a a ) ( v2 v3 h h h h )
113  ( a a a a a a ) ( v2 v3 v4 h h h )
114  ( a ) ( a )
115 
116  where a denotes an element of the original matrix A, h denotes a
117  modified element of the upper Hessenberg matrix H, and vi denotes an
118  element of the vector defining H(i).
119 
120  This implementation follows the hybrid algorithm and notations described in
121 
122  S. Tomov and J. Dongarra, "Accelerating the reduction to upper Hessenberg
123  form through hybrid GPU-based computing," University of Tennessee Computer
124  Science Technical Report, UT-CS-09-642 (also LAPACK Working Note 219),
125  May 24, 2009.
126 
127  This version stores the T matrices, for later use in magma_sorghr.
128 
129  ===================================================================== */
130 
131  #define A( i, j ) (A + (i) + (j)*lda)
132  #define dA( d, i, j ) (data.A[d] + (i) + (j)*ldda)
133 
134  float c_one = MAGMA_S_ONE;
135  float c_zero = MAGMA_S_ZERO;
136 
138 
139  magma_int_t nh, iws, ldda, min_lblocks, max_lblocks, last_dev, d;
140  magma_int_t dpanel, di, nlocal, i, i2, ib, ldwork;
141  magma_int_t iinfo;
142  magma_int_t lquery;
143  struct sgehrd_data data;
144 
145  int ngpu = magma_num_gpus();
146 
147  *info = 0;
148  iws = n*(nb + nb*ngpu);
149  MAGMA_S_SET2REAL( work[0], (float) iws );
150 
151  lquery = lwork == -1;
152  if (n < 0) {
153  *info = -1;
154  } else if (ilo < 1 || ilo > max(1,n)) {
155  *info = -2;
156  } else if (ihi < min(ilo,n) || ihi > n) {
157  *info = -3;
158  } else if (lda < max(1,n)) {
159  *info = -5;
160  } else if (lwork < max(1,n) && ! lquery) {
161  *info = -8;
162  }
163  if (*info != 0) {
164  magma_xerbla( __func__, -(*info) );
165  return *info;
166  }
167  else if (lquery)
168  return *info;
169 
170  magma_device_t cdevice;
171  magma_getdevice( &cdevice );
172 
173  // Adjust from 1-based indexing
174  ilo -= 1;
175 
176  // Quick return if possible
177  nh = ihi - ilo;
178  if (nh <= 1) {
179  work[0] = c_one;
180  return *info;
181  }
182 
183  // Set elements 0:ILO-1 and IHI-1:N-2 of TAU to zero
184  for(i = 0; i < ilo; ++i)
185  tau[i] = c_zero;
186 
187  for(i = max(0,ihi-1); i < n-1; ++i)
188  tau[i] = c_zero;
189 
190  // set T to zero
191  lapackf77_slaset( "Full", &nb, &n, &c_zero, &c_zero, T, &nb );
192 
193  // set to null, to simplify cleanup code
194  for( d = 0; d < ngpu; ++d ) {
195  data.A[d] = NULL;
196  data.streams[d] = NULL;
197  }
198 
199  // If not enough workspace, use unblocked code
200  if ( lwork < iws ) {
201  nb = 1;
202  }
203 
204  if (nb == 1 || nb >= nh) {
205  // Use unblocked code below
206  i = ilo;
207  }
208  else {
209  // Use blocked code
210  // allocate memory on GPUs for A and workspaces
211  ldda = ((n+31)/32)*32;
212  min_lblocks = (n / nb) / ngpu;
213  max_lblocks = ((n-1) / nb) / ngpu + 1;
214  last_dev = (n / nb) % ngpu;
215 
216  // V and Vd need to be padded for copying in mslahr2
217  data.ngpu = ngpu;
218  data.ldda = ldda;
219  data.ldv = nb*max_lblocks*ngpu;
220  data.ldvd = nb*max_lblocks;
221 
222  for( d = 0; d < ngpu; ++d ) {
223  magma_setdevice( d );
224  nlocal = min_lblocks*nb;
225  if ( d < last_dev ) {
226  nlocal += nb;
227  }
228  else if ( d == last_dev ) {
229  nlocal += (n % nb);
230  }
231 
232  ldwork = nlocal*ldda // A
233  + nb*data.ldv // V
234  + nb*data.ldvd // Vd
235  + nb*ldda // Y
236  + nb*ldda // W
237  + nb*nb; // Ti
238  if ( MAGMA_SUCCESS != magma_smalloc( &data.A[d], ldwork )) {
239  *info = MAGMA_ERR_DEVICE_ALLOC;
240  goto CLEANUP;
241  }
242  data.V [d] = data.A [d] + nlocal*ldda;
243  data.Vd[d] = data.V [d] + nb*data.ldv;
244  data.Y [d] = data.Vd[d] + nb*data.ldvd;
245  data.W [d] = data.Y [d] + nb*ldda;
246  data.Ti[d] = data.W [d] + nb*ldda;
247 
248  magma_queue_create( &data.streams[d] );
249  }
250 
251  // Copy the matrix to GPUs
252  magma_ssetmatrix_1D_col_bcyclic( n, n, A, lda, data.A, ldda, ngpu, nb );
253 
254  // round ilo down to block boundary
255  ilo = (ilo/nb)*nb;
256  for (i = ilo; i < ihi - 1 - nb; i += nb) {
257  // Reduce columns i:i+nb-1 to Hessenberg form, returning the
258  // matrices V and T of the block reflector H = I - V*T*V'
259  // which performs the reduction, and also the matrix Y = A*V*T
260 
261  // Get the current panel (no need for the 1st iteration)
262  dpanel = (i / nb) % ngpu;
263  di = ((i / nb) / ngpu) * nb;
264  if ( i > ilo ) {
265  magma_setdevice( dpanel );
266  magma_sgetmatrix( ihi-i, nb,
267  dA(dpanel, i, di), ldda,
268  A(i,i), lda );
269  }
270 
271  // add 1 to i for 1-based index
272  magma_slahr2_m( ihi, i+1, nb, A(0,i), lda,
273  &tau[i], &T[i*nb], nb, work, n, &data );
274 
275  magma_slahru_m( n, ihi, i, nb, A, lda, &data );
276 
277  // copy first i rows above panel to host
278  magma_setdevice( dpanel );
279  magma_sgetmatrix_async( i, nb,
280  dA(dpanel, 0, di), ldda,
281  A(0,i), lda, data.streams[dpanel] );
282  }
283 
284  // Copy remainder to host, block-by-block
285  for( i2 = i; i2 < n; i2 += nb ) {
286  ib = min( nb, n-i2 );
287  d = (i2 / nb) % ngpu;
288  di = (i2 / nb) / ngpu * nb;
289  magma_setdevice( d );
290  magma_sgetmatrix( n, ib,
291  dA(d, 0, di), ldda,
292  A(0,i2), lda );
293  }
294  }
295 
296  // Use unblocked code to reduce the rest of the matrix
297  // add 1 to i for 1-based index
298  i += 1;
299  lapackf77_sgehd2(&n, &i, &ihi, A, &lda, tau, work, &iinfo);
300  MAGMA_S_SET2REAL( work[0], (float) iws );
301 
302 CLEANUP:
303  for( d = 0; d < ngpu; ++d ) {
304  magma_setdevice( d );
305  magmablasSetKernelStream( NULL );
306  magma_free( data.A[d] );
307  data.A[d] = NULL;
308  if ( data.streams[d] != NULL ) {
309  magma_queue_destroy( data.streams[d] );
310  }
311  }
312  magma_setdevice( cdevice );
313 
314  return *info;
315 } // magma_sgehrd
magma_int_t magma_slahru_m(magma_int_t n, magma_int_t ihi, magma_int_t k, magma_int_t nb, float *A, magma_int_t lda, struct sgehrd_data *data)
Definition: slahru_m.cpp:16
#define min(a, b)
Definition: common_magma.h:86
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
magma_int_t magma_num_gpus(void)
Definition: auxiliary.cpp:83
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define T(m)
Definition: zgeqrf_mc.cpp:14
magma_int_t ldda
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define magma_sgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_s.h:714
void magma_ssetmatrix_1D_col_bcyclic(magma_int_t m, magma_int_t n, const float *hA, magma_int_t lda, magmaFloat_ptr dA[], magma_int_t ldda, magma_int_t ngpu, magma_int_t nb)
#define magma_queue_destroy(queue)
Definition: magma.h:116
#define dA(d, i, j)
magma_int_t magma_get_sgehrd_nb(magma_int_t m)
Definition: get_nb.cpp:342
magma_int_t ldda
cublasStatus_t magmablasSetKernelStream(magma_queue_t stream)
void magma_setdevice(magma_device_t dev)
void magma_getdevice(magma_device_t *dev)
magma_int_t magma_slahr2_m(magma_int_t n, magma_int_t k, magma_int_t nb, float *A, magma_int_t lda, float *tau, float *T, magma_int_t ldt, float *Y, magma_int_t ldy, struct sgehrd_data *data)
Definition: slahr2_m.cpp:16
#define MAGMA_S_ONE
Definition: magma.h:198
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MAGMA_S_ZERO
Definition: magma.h:197
#define MAGMA_S_SET2REAL(v, t)
Definition: magma.h:181
void lapackf77_slaset(const char *uplo, const magma_int_t *m, const magma_int_t *n, const float *alpha, const float *beta, float *A, const magma_int_t *lda)
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define magma_sgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_s.h:705
#define max(a, b)
Definition: common_magma.h:82
void lapackf77_sgehd2(const magma_int_t *n, const magma_int_t *ilo, const magma_int_t *ihi, float *A, const magma_int_t *lda, float *tau, float *work, magma_int_t *info)
#define A(i, j)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgelqf ( magma_int_t  m,
magma_int_t  n,
float *  A,
magma_int_t  lda,
float *  tau,
float *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file sgelqf.cpp.

References __func__, dA, dAT, sgehrd_data::ldda, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_sgelqf_nb(), MAGMA_S_MAKE, MAGMA_S_ONE, magma_sgeqrf2_gpu(), magma_sgetmatrix, magma_smalloc(), magma_ssetmatrix, MAGMA_SUCCESS, magma_xerbla(), magmablas_stranspose2(), magmablas_stranspose_inplace(), max, and min.

17 {
18 /* -- MAGMA (version 1.4.0) --
19  Univ. of Tennessee, Knoxville
20  Univ. of California, Berkeley
21  Univ. of Colorado, Denver
22  August 2013
23 
24  Purpose
25  =======
26  SGELQF computes an LQ factorization of a REAL M-by-N matrix A:
27  A = L * Q.
28 
29  Arguments
30  =========
31  M (input) INTEGER
32  The number of rows of the matrix A. M >= 0.
33 
34  N (input) INTEGER
35  The number of columns of the matrix A. N >= 0.
36 
37  A (input/output) REAL array, dimension (LDA,N)
38  On entry, the M-by-N matrix A.
39  On exit, the elements on and below the diagonal of the array
40  contain the m-by-min(m,n) lower trapezoidal matrix L (L is
41  lower triangular if m <= n); the elements above the diagonal,
42  with the array TAU, represent the orthogonal matrix Q as a
43  product of elementary reflectors (see Further Details).
44 
45  Higher performance is achieved if A is in pinned memory, e.g.
46  allocated using magma_malloc_pinned.
47 
48  LDA (input) INTEGER
49  The leading dimension of the array A. LDA >= max(1,M).
50 
51  TAU (output) REAL array, dimension (min(M,N))
52  The scalar factors of the elementary reflectors (see Further
53  Details).
54 
55  WORK (workspace/output) REAL array, dimension (MAX(1,LWORK))
56  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
57 
58  Higher performance is achieved if WORK is in pinned memory, e.g.
59  allocated using magma_malloc_pinned.
60 
61  LWORK (input) INTEGER
62  The dimension of the array WORK. LWORK >= max(1,M).
63  For optimum performance LWORK >= M*NB, where NB is the
64  optimal blocksize.
65 
66  If LWORK = -1, then a workspace query is assumed; the routine
67  only calculates the optimal size of the WORK array, returns
68  this value as the first entry of the WORK array, and no error
69  message related to LWORK is issued.
70 
71  INFO (output) INTEGER
72  = 0: successful exit
73  < 0: if INFO = -i, the i-th argument had an illegal value
74  if INFO = -10 internal GPU memory allocation failed.
75 
76  Further Details
77  ===============
78  The matrix Q is represented as a product of elementary reflectors
79 
80  Q = H(k) . . . H(2) H(1), where k = min(m,n).
81 
82  Each H(i) has the form
83 
84  H(i) = I - tau * v * v'
85 
86  where tau is a real scalar, and v is a real vector with
87  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
88  and tau in TAU(i).
89  ===================================================================== */
90 
91  #define a_ref(a_1,a_2) ( a+(a_2)*(lda) + (a_1))
92 
93  float *dA, *dAT;
94  float c_one = MAGMA_S_ONE;
95  magma_int_t maxm, maxn, maxdim, nb;
96  magma_int_t iinfo, ldda;
97  int lquery;
98 
99  /* Function Body */
100  *info = 0;
101  nb = magma_get_sgelqf_nb(m);
102 
103  work[0] = MAGMA_S_MAKE( (float)(m*nb), 0 );
104  lquery = (lwork == -1);
105  if (m < 0) {
106  *info = -1;
107  } else if (n < 0) {
108  *info = -2;
109  } else if (lda < max(1,m)) {
110  *info = -4;
111  } else if (lwork < max(1,m) && ! lquery) {
112  *info = -7;
113  }
114  if (*info != 0) {
115  magma_xerbla( __func__, -(*info) );
116  return *info;
117  }
118  else if (lquery) {
119  return *info;
120  }
121 
122  /* Quick return if possible */
123  if (min(m, n) == 0) {
124  work[0] = c_one;
125  return *info;
126  }
127 
128  maxm = ((m + 31)/32)*32;
129  maxn = ((n + 31)/32)*32;
130  maxdim = max(maxm, maxn);
131 
132  if (maxdim*maxdim < 2*maxm*maxn)
133  {
134  ldda = maxdim;
135 
136  if (MAGMA_SUCCESS != magma_smalloc( &dA, maxdim*maxdim )) {
137  *info = MAGMA_ERR_DEVICE_ALLOC;
138  return *info;
139  }
140 
141  magma_ssetmatrix( m, n, a, lda, dA, ldda );
142  dAT = dA;
143  magmablas_stranspose_inplace( ldda, dAT, ldda );
144  }
145  else
146  {
147  ldda = maxn;
148 
149  if (MAGMA_SUCCESS != magma_smalloc( &dA, 2*maxn*maxm )) {
150  *info = MAGMA_ERR_DEVICE_ALLOC;
151  return *info;
152  }
153 
154  magma_ssetmatrix( m, n, a, lda, dA, maxm );
155 
156  dAT = dA + maxn * maxm;
157  magmablas_stranspose2( dAT, ldda, dA, maxm, m, n );
158  }
159 
160  magma_sgeqrf2_gpu(n, m, dAT, ldda, tau, &iinfo);
161 
162  if (maxdim*maxdim < 2*maxm*maxn) {
163  magmablas_stranspose_inplace( ldda, dAT, ldda );
164  magma_sgetmatrix( m, n, dA, ldda, a, lda );
165  } else {
166  magmablas_stranspose2( dA, maxm, dAT, ldda, n, m );
167  magma_sgetmatrix( m, n, dA, maxm, a, lda );
168  }
169 
170  magma_free( dA );
171 
172  return *info;
173 } /* magma_sgelqf */
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
magma_int_t magma_get_sgelqf_nb(magma_int_t m)
Definition: get_nb.cpp:232
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define magma_ssetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_s.h:702
magma_int_t ldda
#define dAT(i, j)
#define MAGMA_S_ONE
Definition: magma.h:198
void magmablas_stranspose2(magmaFloat_ptr odata, magma_int_t ldo, magmaFloat_const_ptr idata, magma_int_t ldi, magma_int_t m, magma_int_t n)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
void magmablas_stranspose_inplace(magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda)
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define magma_sgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_s.h:705
#define MAGMA_S_MAKE(r, i)
Definition: magma.h:189
magma_int_t magma_sgeqrf2_gpu(magma_int_t m, magma_int_t n, float *dA, magma_int_t ldda, float *tau, magma_int_t *info)
Definition: sgeqrf2_gpu.cpp:15
#define max(a, b)
Definition: common_magma.h:82
#define dA(dev, i, j)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgelqf_gpu ( magma_int_t  m,
magma_int_t  n,
float *  dA,
magma_int_t  ldda,
float *  tau,
float *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file sgelqf_gpu.cpp.

References __func__, dA, dAT, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_sgelqf_nb(), MAGMA_S_MAKE, MAGMA_S_ONE, magma_sgeqrf2_gpu(), magma_smalloc(), MAGMA_SUCCESS, magma_xerbla(), magmablas_stranspose2(), magmablas_stranspose_inplace(), max, and min.

17 {
18 /* -- MAGMA (version 1.4.0) --
19  Univ. of Tennessee, Knoxville
20  Univ. of California, Berkeley
21  Univ. of Colorado, Denver
22  August 2013
23 
24  Purpose
25  =======
26  SGELQF computes an LQ factorization of a REAL M-by-N matrix dA:
27  dA = L * Q.
28 
29  Arguments
30  =========
31  M (input) INTEGER
32  The number of rows of the matrix A. M >= 0.
33 
34  N (input) INTEGER
35  The number of columns of the matrix A. N >= 0.
36 
37  dA (input/output) REAL array on the GPU, dimension (LDA,N)
38  On entry, the M-by-N matrix dA.
39  On exit, the elements on and below the diagonal of the array
40  contain the m-by-min(m,n) lower trapezoidal matrix L (L is
41  lower triangular if m <= n); the elements above the diagonal,
42  with the array TAU, represent the orthogonal matrix Q as a
43  product of elementary reflectors (see Further Details).
44 
45  LDA (input) INTEGER
46  The leading dimension of the array dA. LDA >= max(1,M).
47 
48  TAU (output) REAL array, dimension (min(M,N))
49  The scalar factors of the elementary reflectors (see Further
50  Details).
51 
52  WORK (workspace/output) REAL array, dimension (MAX(1,LWORK))
53  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
54 
55  Higher performance is achieved if WORK is in pinned memory, e.g.
56  allocated using magma_malloc_pinned.
57 
58  LWORK (input) INTEGER
59  The dimension of the array WORK. LWORK >= max(1,M).
60  For optimum performance LWORK >= M*NB, where NB is the
61  optimal blocksize.
62 
63  If LWORK = -1, then a workspace query is assumed; the routine
64  only calculates the optimal size of the WORK array, returns
65  this value as the first entry of the WORK array, and no error
66  message related to LWORK is issued.
67 
68  INFO (output) INTEGER
69  = 0: successful exit
70  < 0: if INFO = -i, the i-th argument had an illegal value
71  if INFO = -10 internal GPU memory allocation failed.
72 
73  Further Details
74  ===============
75  The matrix Q is represented as a product of elementary reflectors
76 
77  Q = H(k) . . . H(2) H(1), where k = min(m,n).
78 
79  Each H(i) has the form
80 
81  H(i) = I - tau * v * v'
82 
83  where tau is a real scalar, and v is a real vector with
84  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
85  and tau in TAU(i).
86  ===================================================================== */
87 
88  #define a_ref(a_1,a_2) ( dA+(a_2)*(lda) + (a_1))
89 
90  float *dAT;
91  float c_one = MAGMA_S_ONE;
92  magma_int_t maxm, maxn, maxdim, nb;
93  magma_int_t iinfo;
94  int lquery;
95 
96  *info = 0;
97  nb = magma_get_sgelqf_nb(m);
98 
99  work[0] = MAGMA_S_MAKE( (float)(m*nb), 0 );
100  lquery = (lwork == -1);
101  if (m < 0) {
102  *info = -1;
103  } else if (n < 0) {
104  *info = -2;
105  } else if (lda < max(1,m)) {
106  *info = -4;
107  } else if (lwork < max(1,m) && ! lquery) {
108  *info = -7;
109  }
110  if (*info != 0) {
111  magma_xerbla( __func__, -(*info) );
112  return *info;
113  }
114  else if (lquery) {
115  return *info;
116  }
117 
118  /* Quick return if possible */
119  if (min(m, n) == 0) {
120  work[0] = c_one;
121  return *info;
122  }
123 
124  maxm = ((m + 31)/32)*32;
125  maxn = ((n + 31)/32)*32;
126  maxdim = max(maxm, maxn);
127 
128  int ldat = maxn;
129 
130  dAT = dA;
131 
132  if ( m == n ) {
133  ldat = lda;
134  magmablas_stranspose_inplace( m, dAT, lda );
135  }
136  else {
137  if (MAGMA_SUCCESS != magma_smalloc( &dAT, maxm*maxn ) ){
138  *info = MAGMA_ERR_DEVICE_ALLOC;
139  return *info;
140  }
141 
142  magmablas_stranspose2( dAT, ldat, dA, lda, m, n );
143  }
144 
145  magma_sgeqrf2_gpu(n, m, dAT, ldat, tau, &iinfo);
146 
147  if ( m == n ) {
148  magmablas_stranspose_inplace( m, dAT, ldat );
149  }
150  else {
151  magmablas_stranspose2( dA, lda, dAT, ldat, n, m );
152  magma_free( dAT );
153  }
154 
155  return *info;
156 } /* magma_sgelqf_gpu */
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
magma_int_t magma_get_sgelqf_nb(magma_int_t m)
Definition: get_nb.cpp:232
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define dAT(i, j)
#define MAGMA_S_ONE
Definition: magma.h:198
void magmablas_stranspose2(magmaFloat_ptr odata, magma_int_t ldo, magmaFloat_const_ptr idata, magma_int_t ldi, magma_int_t m, magma_int_t n)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
void magmablas_stranspose_inplace(magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda)
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define MAGMA_S_MAKE(r, i)
Definition: magma.h:189
magma_int_t magma_sgeqrf2_gpu(magma_int_t m, magma_int_t n, float *dA, magma_int_t ldda, float *tau, magma_int_t *info)
Definition: sgeqrf2_gpu.cpp:15
#define max(a, b)
Definition: common_magma.h:82
#define dA(dev, i, j)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgels3_gpu ( char  trans,
magma_int_t  m,
magma_int_t  n,
magma_int_t  nrhs,
float *  dA,
magma_int_t  ldda,
float *  dB,
magma_int_t  lddb,
float *  hwork,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file sgels3_gpu.cpp.

References __func__, dT, MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_sgeqrf_nb(), MAGMA_S_MAKE, MAGMA_S_ONE, magma_sgeqrf3_gpu(), magma_sgeqrs3_gpu(), magma_smalloc(), magma_smalloc_cpu(), MAGMA_SUCCESS, magma_xerbla(), max, and min.

19 {
20 /* -- MAGMA (version 1.4.0) --
21  Univ. of Tennessee, Knoxville
22  Univ. of California, Berkeley
23  Univ. of Colorado, Denver
24  August 2013
25 
26  Purpose
27  =======
28  Solves the overdetermined, least squares problem
29  min || A*X - C ||
30  using the QR factorization A.
31  The underdetermined problem (m < n) is not currently handled.
32 
33 
34  Arguments
35  =========
36  TRANS (input) CHARACTER*1
37  = 'N': the linear system involves A.
38  Only trans='N' is currently handled.
39 
40  M (input) INTEGER
41  The number of rows of the matrix A. M >= 0.
42 
43  N (input) INTEGER
44  The number of columns of the matrix A. M >= N >= 0.
45 
46  NRHS (input) INTEGER
47  The number of columns of the matrix C. NRHS >= 0.
48 
49  A (input/output) REAL array, dimension (LDA,N)
50  On entry, the M-by-N matrix A.
51  On exit, A is overwritten by details of its QR
52  factorization as returned by SGEQRF3.
53 
54  LDDA (input) INTEGER
55  The leading dimension of the array A, LDDA >= M.
56 
57  DB (input/output) REAL array on the GPU, dimension (LDDB,NRHS)
58  On entry, the M-by-NRHS matrix C.
59  On exit, the N-by-NRHS solution matrix X.
60 
61  LDDB (input) INTEGER
62  The leading dimension of the array DB. LDDB >= M.
63 
64  HWORK (workspace/output) REAL array, dimension MAX(1,LWORK).
65  On exit, if INFO = 0, HWORK(1) returns the optimal LWORK.
66 
67  LWORK (input) INTEGER
68  The dimension of the array HWORK,
69  LWORK >= (M - N + NB)*(NRHS + NB) + NRHS*NB,
70  where NB is the blocksize given by magma_get_sgeqrf_nb( M ).
71 
72  If LWORK = -1, then a workspace query is assumed; the routine
73  only calculates the optimal size of the HWORK array, returns
74  this value as the first entry of the HWORK array.
75 
76  INFO (output) INTEGER
77  = 0: successful exit
78  < 0: if INFO = -i, the i-th argument had an illegal value
79  ===================================================================== */
80 
81  #define a_ref(a_1,a_2) (dA + (a_2)*(ldda) + (a_1))
82 
83  float *dT, *tau;
84  magma_int_t k;
85 
87  magma_int_t lwkopt = (m - n + nb)*(nrhs + nb) + nrhs*nb;
88  int lquery = (lwork == -1);
89 
90  hwork[0] = MAGMA_S_MAKE( (float)lwkopt, 0. );
91 
92  *info = 0;
93  /* For now, N is the only case working */
94  if ( (trans != 'N') && (trans != 'n' ) )
95  *info = -1;
96  else if (m < 0)
97  *info = -2;
98  else if (n < 0 || m < n) /* LQ is not handle for now*/
99  *info = -3;
100  else if (nrhs < 0)
101  *info = -4;
102  else if (ldda < max(1,m))
103  *info = -6;
104  else if (lddb < max(1,m))
105  *info = -8;
106  else if (lwork < lwkopt && ! lquery)
107  *info = -10;
108 
109  if (*info != 0) {
110  magma_xerbla( __func__, -(*info) );
111  return *info;
112  }
113  else if (lquery)
114  return *info;
115 
116  k = min(m,n);
117  if (k == 0) {
118  hwork[0] = MAGMA_S_ONE;
119  return *info;
120  }
121 
122  /*
123  * Allocate temporary buffers
124  */
125  int ldtwork = ( 2*k + ((n+31)/32)*32 )*nb;
126  if (nb < nrhs)
127  ldtwork = ( 2*k + ((n+31)/32)*32 )*nrhs;
128  if (MAGMA_SUCCESS != magma_smalloc( &dT, ldtwork )) {
129  *info = MAGMA_ERR_DEVICE_ALLOC;
130  return *info;
131  }
132 
133  magma_smalloc_cpu( &tau, k );
134  if ( tau == NULL ) {
135  magma_free( dT );
136  *info = MAGMA_ERR_HOST_ALLOC;
137  return *info;
138  }
139 
140  magma_sgeqrf3_gpu( m, n, dA, ldda, tau, dT, info );
141  if ( *info == 0 ) {
142  magma_sgeqrs3_gpu( m, n, nrhs,
143  dA, ldda, tau, dT,
144  dB, lddb, hwork, lwork, info );
145  }
146 
147  magma_free( dT );
148  magma_free_cpu(tau);
149  return *info;
150 }
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
#define hwork
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define dB(dev, i, j)
static magma_err_t magma_smalloc_cpu(float **ptrPtr, size_t n)
Definition: magma.h:83
magma_int_t magma_get_sgeqrf_nb(magma_int_t m)
Definition: get_nb.cpp:120
magma_int_t ldda
#define MAGMA_S_ONE
Definition: magma.h:198
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
magma_int_t magma_sgeqrs3_gpu(magma_int_t m, magma_int_t n, magma_int_t nrhs, float *dA, magma_int_t ldda, float *tau, float *dT, float *dB, magma_int_t lddb, float *hwork, magma_int_t lhwork, magma_int_t *info)
Definition: sgeqrs3_gpu.cpp:14
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define MAGMA_S_MAKE(r, i)
Definition: magma.h:189
magma_int_t magma_sgeqrf3_gpu(magma_int_t m, magma_int_t n, float *dA, magma_int_t ldda, float *tau, float *dT, magma_int_t *info)
Definition: sgeqrf3_gpu.cpp:38
#define dT(m)
#define max(a, b)
Definition: common_magma.h:82
magma_err_t magma_free_cpu(void *ptr)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define dA(dev, i, j)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgels_gpu ( char  trans,
magma_int_t  m,
magma_int_t  n,
magma_int_t  nrhs,
float *  dA,
magma_int_t  ldda,
float *  dB,
magma_int_t  lddb,
float *  hwork,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file sgels_gpu.cpp.

References __func__, dT, MAGMA_ERR_DEVICE_ALLOC, MAGMA_ERR_HOST_ALLOC, magma_free, magma_free_cpu(), magma_get_sgeqrf_nb(), MAGMA_S_MAKE, MAGMA_S_ONE, magma_sgeqrf_gpu(), magma_sgeqrs_gpu(), magma_smalloc(), magma_smalloc_cpu(), MAGMA_SUCCESS, magma_xerbla(), max, and min.

19 {
20 /* -- MAGMA (version 1.4.0) --
21  Univ. of Tennessee, Knoxville
22  Univ. of California, Berkeley
23  Univ. of Colorado, Denver
24  August 2013
25 
26  Purpose
27  =======
28  Solves the overdetermined, least squares problem
29  min || A*X - C ||
30  using the QR factorization A.
31  The underdetermined problem (m < n) is not currently handled.
32 
33 
34  Arguments
35  =========
36  TRANS (input) CHARACTER*1
37  = 'N': the linear system involves A.
38  Only trans='N' is currently handled.
39 
40  M (input) INTEGER
41  The number of rows of the matrix A. M >= 0.
42 
43  N (input) INTEGER
44  The number of columns of the matrix A. M >= N >= 0.
45 
46  NRHS (input) INTEGER
47  The number of columns of the matrix C. NRHS >= 0.
48 
49  DA (input/output) REAL array on the GPU, dimension (LDA,N)
50  On entry, the M-by-N matrix A.
51  On exit, A is overwritten by details of its QR
52  factorization as returned by SGEQRF.
53 
54  LDDA (input) INTEGER
55  The leading dimension of the array A, LDDA >= M.
56 
57  DB (input/output) REAL array on the GPU, dimension (LDDB,NRHS)
58  On entry, the M-by-NRHS matrix C.
59  On exit, the N-by-NRHS solution matrix X.
60 
61  LDDB (input) INTEGER
62  The leading dimension of the array DB. LDDB >= M.
63 
64  HWORK (workspace/output) REAL array, dimension MAX(1,LWORK).
65  On exit, if INFO = 0, HWORK(1) returns the optimal LWORK.
66 
67  LWORK (input) INTEGER
68  The dimension of the array HWORK,
69  LWORK >= (M - N + NB)*(NRHS + NB) + NRHS*NB,
70  where NB is the blocksize given by magma_get_sgeqrf_nb( M ).
71 
72  If LWORK = -1, then a workspace query is assumed; the routine
73  only calculates the optimal size of the HWORK array, returns
74  this value as the first entry of the HWORK array.
75 
76  INFO (output) INTEGER
77  = 0: successful exit
78  < 0: if INFO = -i, the i-th argument had an illegal value
79  ===================================================================== */
80 
81  float *dT, *tau;
82  magma_int_t k;
83 
85  magma_int_t lwkopt = (m - n + nb)*(nrhs + nb) + nrhs*nb;
86  int lquery = (lwork == -1);
87 
88  hwork[0] = MAGMA_S_MAKE( (float)lwkopt, 0. );
89 
90  *info = 0;
91  /* For now, N is the only case working */
92  if ( (trans != 'N') && (trans != 'n' ) )
93  *info = -1;
94  else if (m < 0)
95  *info = -2;
96  else if (n < 0 || m < n) /* LQ is not handle for now*/
97  *info = -3;
98  else if (nrhs < 0)
99  *info = -4;
100  else if (ldda < max(1,m))
101  *info = -6;
102  else if (lddb < max(1,m))
103  *info = -8;
104  else if (lwork < lwkopt && ! lquery)
105  *info = -10;
106 
107  if (*info != 0) {
108  magma_xerbla( __func__, -(*info) );
109  return *info;
110  }
111  else if (lquery)
112  return *info;
113 
114  k = min(m,n);
115  if (k == 0) {
116  hwork[0] = MAGMA_S_ONE;
117  return *info;
118  }
119 
120  /*
121  * Allocate temporary buffers
122  */
123  int ldtwork = ( 2*k + ((n+31)/32)*32 )*nb;
124  if (nb < nrhs)
125  ldtwork = ( 2*k + ((n+31)/32)*32 )*nrhs;
126  if (MAGMA_SUCCESS != magma_smalloc( &dT, ldtwork )) {
127  *info = MAGMA_ERR_DEVICE_ALLOC;
128  return *info;
129  }
130 
131  magma_smalloc_cpu( &tau, k );
132  if ( tau == NULL ) {
133  magma_free( dT );
134  *info = MAGMA_ERR_HOST_ALLOC;
135  return *info;
136  }
137 
138  magma_sgeqrf_gpu( m, n, dA, ldda, tau, dT, info );
139 
140  if ( *info == 0 ) {
141  magma_sgeqrs_gpu( m, n, nrhs,
142  dA, ldda, tau, dT,
143  dB, lddb, hwork, lwork, info );
144  }
145 
146  magma_free( dT );
147  magma_free_cpu(tau);
148  return *info;
149 }
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
#define hwork
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
#define dB(dev, i, j)
magma_int_t magma_sgeqrf_gpu(magma_int_t m, magma_int_t n, float *dA, magma_int_t ldda, float *tau, float *dT, magma_int_t *info)
Definition: sgeqrf_gpu.cpp:43
static magma_err_t magma_smalloc_cpu(float **ptrPtr, size_t n)
Definition: magma.h:83
magma_int_t magma_get_sgeqrf_nb(magma_int_t m)
Definition: get_nb.cpp:120
magma_int_t ldda
magma_int_t magma_sgeqrs_gpu(magma_int_t m, magma_int_t n, magma_int_t nrhs, float *dA, magma_int_t ldda, float *tau, float *dT, float *dB, magma_int_t lddb, float *hwork, magma_int_t lhwork, magma_int_t *info)
Definition: sgeqrs_gpu.cpp:14
#define MAGMA_S_ONE
Definition: magma.h:198
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define MAGMA_S_MAKE(r, i)
Definition: magma.h:189
#define dT(m)
#define max(a, b)
Definition: common_magma.h:82
magma_err_t magma_free_cpu(void *ptr)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define dA(dev, i, j)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgeqlf ( magma_int_t  m,
magma_int_t  n,
float *  A,
magma_int_t  lda,
float *  tau,
float *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 14 of file sgeqlf.cpp.

References __func__, a_ref, da_ref, dwork, lapackf77_sgeqlf(), lapackf77_slarft(), sgehrd_data::ldda, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_sgeqlf_nb(), magma_queue_create, magma_queue_destroy, magma_queue_sync, MAGMA_S_MAKE, MAGMA_S_ONE, magma_sgetmatrix, magma_sgetmatrix_async, magma_slarfb_gpu(), magma_smalloc(), magma_ssetmatrix, magma_ssetmatrix_async, MAGMA_SUCCESS, magma_xerbla(), MagmaBackward, MagmaBackwardStr, MagmaColumnwise, MagmaColumnwiseStr, MagmaLeft, MagmaLower, MagmaTrans, max, min, spanel_to_q(), and sq_to_panel().

17 {
18 /* -- MAGMA (version 1.4.0) --
19  Univ. of Tennessee, Knoxville
20  Univ. of California, Berkeley
21  Univ. of Colorado, Denver
22  August 2013
23 
24  Purpose
25  =======
26  SGEQLF computes a QL factorization of a REAL M-by-N matrix A:
27  A = Q * L.
28 
29  Arguments
30  =========
31  M (input) INTEGER
32  The number of rows of the matrix A. M >= 0.
33 
34  N (input) INTEGER
35  The number of columns of the matrix A. N >= 0.
36 
37  A (input/output) REAL array, dimension (LDA,N)
38  On entry, the M-by-N matrix A.
39  On exit, if m >= n, the lower triangle of the subarray
40  A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L;
41  if m <= n, the elements on and below the (n-m)-th
42  superdiagonal contain the M-by-N lower trapezoidal matrix L;
43  the remaining elements, with the array TAU, represent the
44  orthogonal matrix Q as a product of elementary reflectors
45  (see Further Details).
46 
47  Higher performance is achieved if A is in pinned memory, e.g.
48  allocated using magma_malloc_pinned.
49 
50  LDA (input) INTEGER
51  The leading dimension of the array A. LDA >= max(1,M).
52 
53  TAU (output) REAL array, dimension (min(M,N))
54  The scalar factors of the elementary reflectors (see Further
55  Details).
56 
57  WORK (workspace/output) REAL array, dimension (MAX(1,LWORK))
58  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
59 
60  Higher performance is achieved if WORK is in pinned memory, e.g.
61  allocated using magma_malloc_pinned.
62 
63  LWORK (input) INTEGER
64  The dimension of the array WORK. LWORK >= max(1,N).
65  For optimum performance LWORK >= N*NB, where NB can be obtained
66  through magma_get_sgeqlf_nb(M).
67 
68  If LWORK = -1, then a workspace query is assumed; the routine
69  only calculates the optimal size of the WORK array, returns
70  this value as the first entry of the WORK array, and no error
71  message related to LWORK is issued by XERBLA.
72 
73  INFO (output) INTEGER
74  = 0: successful exit
75  < 0: if INFO = -i, the i-th argument had an illegal value
76  or another error occured, such as memory allocation failed.
77 
78  Further Details
79  ===============
80  The matrix Q is represented as a product of elementary reflectors
81 
82  Q = H(k) . . . H(2) H(1), where k = min(m,n).
83 
84  Each H(i) has the form
85 
86  H(i) = I - tau * v * v'
87 
88  where tau is a real scalar, and v is a real vector with
89  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in
90  A(1:m-k+i-1,n-k+i), and tau in TAU(i).
91  ===================================================================== */
92 
93  #define a_ref(a_1,a_2) ( a+(a_2)*(lda) + (a_1))
94  #define da_ref(a_1,a_2) (da+(a_2)*ldda + (a_1))
95 
96  float *da, *dwork;
97  float c_one = MAGMA_S_ONE;
98  magma_int_t i, k, lddwork, old_i, old_ib, nb;
99  magma_int_t rows, cols;
100  magma_int_t ib, ki, kk, mu, nu, iinfo, ldda;
101  int lquery;
102 
103  nb = magma_get_sgeqlf_nb(m);
104  *info = 0;
105  lquery = (lwork == -1);
106 
107  // silence "uninitialized" warnings
108  old_ib = nb;
109  old_i = 0;
110 
111  if (m < 0) {
112  *info = -1;
113  } else if (n < 0) {
114  *info = -2;
115  } else if (lda < max(1,m)) {
116  *info = -4;
117  }
118 
119  if (*info == 0) {
120  k = min(m,n);
121  if (k == 0)
122  work[0] = c_one;
123  else {
124  work[0] = MAGMA_S_MAKE( n*nb, 0 );
125  }
126 
127  if (lwork < max(1,n) && ! lquery)
128  *info = -7;
129  }
130 
131  if (*info != 0) {
132  magma_xerbla( __func__, -(*info) );
133  return *info;
134  }
135  else if (lquery)
136  return *info;
137 
138  /* Quick return if possible */
139  if (k == 0)
140  return *info;
141 
142  lddwork = ((n+31)/32)*32;
143  ldda = ((m+31)/32)*32;
144 
145  if (MAGMA_SUCCESS != magma_smalloc( &da, (n)*ldda + nb*lddwork )) {
146  *info = MAGMA_ERR_DEVICE_ALLOC;
147  return *info;
148  }
149  dwork = da + ldda*(n);
150 
151  magma_queue_t stream[2];
152  magma_queue_create( &stream[0] );
153  magma_queue_create( &stream[1] );
154 
155  if ( (nb > 1) && (nb < k) ) {
156  /* Use blocked code initially.
157  The last kk columns are handled by the block method.
158  First, copy the matrix on the GPU except the last kk columns */
159  magma_ssetmatrix_async( (m), (n-nb),
160  a_ref(0, 0), lda,
161  da_ref(0, 0), ldda, stream[0] );
162 
163  ki = ((k - nb - 1) / nb) * nb;
164  kk = min(k, ki + nb);
165  for (i = k - kk + ki; i >= k -kk; i -= nb) {
166  ib = min(k-i,nb);
167 
168  if (i < k - kk + ki) {
169  /* 1. Copy asynchronously the current panel to the CPU.
170  2. Copy asynchronously the submatrix below the panel
171  to the CPU) */
172  rows = m - k + i + ib;
173  magma_sgetmatrix_async( rows, ib,
174  da_ref(0, n-k+i), ldda,
175  a_ref(0, n-k+i), lda, stream[1] );
176 
177  magma_sgetmatrix_async( (m-rows), ib,
178  da_ref(rows, n-k+i), ldda,
179  a_ref(rows, n-k+i), lda, stream[0] );
180 
181  /* Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left in
182  two steps - implementing the lookahead techniques.
183  This is the main update from the lookahead techniques. */
184  rows = m - k + old_i + old_ib;
185  cols = n - k + old_i - old_ib;
187  rows, cols, old_ib,
188  da_ref(0, cols+old_ib), ldda, dwork, lddwork,
189  da_ref(0, 0 ), ldda, dwork+old_ib, lddwork);
190  }
191 
192  magma_queue_sync( stream[1] );
193  /* Compute the QL factorization of the current block
194  A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) */
195  rows = m - k + i + ib;
196  cols = n - k + i;
197  lapackf77_sgeqlf(&rows,&ib, a_ref(0,cols), &lda, tau+i, work, &lwork, &iinfo);
198 
199  if (cols > 0) {
200  /* Form the triangular factor of the block reflector
201  H = H(i+ib-1) . . . H(i+1) H(i) */
203  &rows, &ib,
204  a_ref(0, cols), &lda, tau + i, work, &ib);
205 
206  spanel_to_q( MagmaLower, ib, a_ref(rows-ib,cols), lda, work+ib*ib);
207  magma_ssetmatrix( rows, ib,
208  a_ref(0,cols), lda,
209  da_ref(0,cols), ldda );
210  sq_to_panel( MagmaLower, ib, a_ref(rows-ib,cols), lda, work+ib*ib);
211 
212  // Send the triangular part on the GPU
213  magma_ssetmatrix( ib, ib, work, ib, dwork, lddwork );
214 
215  /* Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left in
216  two steps - implementing the lookahead techniques.
217  This is the update of first ib columns. */
218  if (i-ib >= k -kk)
220  rows, ib, ib,
221  da_ref(0, cols), ldda, dwork, lddwork,
222  da_ref(0,cols-ib), ldda, dwork+ib, lddwork);
223  else{
225  rows, cols, ib,
226  da_ref(0, cols), ldda, dwork, lddwork,
227  da_ref(0, 0 ), ldda, dwork+ib, lddwork);
228  }
229 
230  old_i = i;
231  old_ib = ib;
232  }
233  }
234  mu = m - k + i + nb;
235  nu = n - k + i + nb;
236 
237  magma_sgetmatrix( m, nu, da_ref(0,0), ldda, a_ref(0,0), lda );
238  } else {
239  mu = m;
240  nu = n;
241  }
242 
243  /* Use unblocked code to factor the last or only block */
244  if (mu > 0 && nu > 0)
245  lapackf77_sgeqlf(&mu, &nu, a_ref(0,0), &lda, tau, work, &lwork, &iinfo);
246 
247  magma_queue_destroy( stream[0] );
248  magma_queue_destroy( stream[1] );
249  magma_free( da );
250  return *info;
251 } /* magma_sgeqlf */
#define min(a, b)
Definition: common_magma.h:86
#define MagmaLeft
Definition: magma.h:68
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
void sq_to_panel(char uplo, magma_int_t ib, float *A, magma_int_t lda, float *work)
Definition: spanel_to_q.cpp:57
int magma_int_t
Definition: magmablas.h:12
#define magma_sgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_s.h:714
magma_int_t magma_slarfb_gpu(char side, char trans, char direct, char storev, magma_int_t m, magma_int_t n, magma_int_t k, const float *dv, magma_int_t ldv, const float *dt, magma_int_t ldt, float *dc, magma_int_t ldc, float *dwork, magma_int_t ldwork)
Definition: slarfb_gpu.cpp:15
#define magma_queue_destroy(queue)
Definition: magma.h:116
#define magma_ssetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_s.h:702
magma_int_t ldda
#define MagmaBackward
Definition: magma.h:72
#define dwork(dev, i, j)
#define a_ref(a_1, a_2)
#define MagmaLower
Definition: magma.h:62
#define MAGMA_S_ONE
Definition: magma.h:198
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MagmaTrans
Definition: magma.h:58
#define magma_ssetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_s.h:711
#define MagmaColumnwiseStr
Definition: magma.h:97
#define MAGMA_SUCCESS
Definition: magma.h:106
void spanel_to_q(char uplo, magma_int_t ib, float *A, magma_int_t lda, float *work)
Definition: spanel_to_q.cpp:17
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define magma_sgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_s.h:705
magma_int_t magma_get_sgeqlf_nb(magma_int_t m)
Definition: get_nb.cpp:190
#define MAGMA_S_MAKE(r, i)
Definition: magma.h:189
#define MagmaBackwardStr
Definition: magma.h:95
#define max(a, b)
Definition: common_magma.h:82
void lapackf77_slarft(const char *direct, const char *storev, const magma_int_t *n, const magma_int_t *k, float *V, const magma_int_t *ldv, const float *tau, float *T, const magma_int_t *ldt)
void lapackf77_sgeqlf(const magma_int_t *m, const magma_int_t *n, float *A, const magma_int_t *lda, float *tau, float *work, const magma_int_t *lwork, magma_int_t *info)
#define da_ref(a_1, a_2)
#define MagmaColumnwise
Definition: magma.h:74
#define magma_queue_sync(queue)
Definition: magma.h:119

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgeqp3 ( magma_int_t  m,
magma_int_t  n,
float *  a,
magma_int_t  lda,
magma_int_t jpvt,
float *  tau,
float *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 18 of file sgeqp3.cpp.

References __func__, A, blasf77_sswap(), cblas_snrm2(), dA, dwork, lapackf77_sgeqrf(), lapackf77_slaqp2(), lapackf77_sormqr(), sgehrd_data::ldda, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_sgeqp3_nb(), magma_queue_create, magma_queue_destroy, magma_queue_sync, MAGMA_S_MAKE, magma_sgetmatrix, magma_slaqps(), magma_smalloc(), magma_ssetmatrix_async, MAGMA_SUCCESS, magma_xerbla(), MagmaLeftStr, MagmaTransStr, max, and min.

26 {
27 /* -- MAGMA (version 1.4.0) --
28  Univ. of Tennessee, Knoxville
29  Univ. of California, Berkeley
30  Univ. of Colorado, Denver
31  August 2013
32 
33  Purpose
34  =======
35  SGEQP3 computes a QR factorization with column pivoting of a
36  matrix A: A*P = Q*R using Level 3 BLAS.
37 
38  Arguments
39  =========
40  M (input) INTEGER
41  The number of rows of the matrix A. M >= 0.
42 
43  N (input) INTEGER
44  The number of columns of the matrix A. N >= 0.
45 
46  A (input/output) REAL array, dimension (LDA,N)
47  On entry, the M-by-N matrix A.
48  On exit, the upper triangle of the array contains the
49  min(M,N)-by-N upper trapezoidal matrix R; the elements below
50  the diagonal, together with the array TAU, represent the
51  unitary matrix Q as a product of min(M,N) elementary
52  reflectors.
53 
54  LDA (input) INTEGER
55  The leading dimension of the array A. LDA >= max(1,M).
56 
57  JPVT (input/output) INTEGER array, dimension (N)
58  On entry, if JPVT(J).ne.0, the J-th column of A is permuted
59  to the front of A*P (a leading column); if JPVT(J)=0,
60  the J-th column of A is a free column.
61  On exit, if JPVT(J)=K, then the J-th column of A*P was the
62  the K-th column of A.
63 
64  TAU (output) REAL array, dimension (min(M,N))
65  The scalar factors of the elementary reflectors.
66 
67  WORK (workspace/output) REAL array, dimension (MAX(1,LWORK))
68  On exit, if INFO=0, WORK(1) returns the optimal LWORK.
69 
70  LWORK (input) INTEGER
71  The dimension of the array WORK.
72  For [sd]geqp3, LWORK >= (N+1)*NB + 2*N;
73  for [cz]geqp3, LWORK >= (N+1)*NB,
74  where NB is the optimal blocksize.
75 
76  If LWORK = -1, then a workspace query is assumed; the routine
77  only calculates the optimal size of the WORK array, returns
78  this value as the first entry of the WORK array, and no error
79  message related to LWORK is issued by XERBLA.
80 
81  For [cz]geqp3 only:
82  RWORK (workspace) DOUBLE PRECISION array, dimension (2*N)
83 
84  INFO (output) INTEGER
85  = 0: successful exit.
86  < 0: if INFO = -i, the i-th argument had an illegal value.
87 
88  Further Details
89  ===============
90  The matrix Q is represented as a product of elementary reflectors
91 
92  Q = H(1) H(2) . . . H(k), where k = min(m,n).
93 
94  Each H(i) has the form
95 
96  H(i) = I - tau * v * v'
97 
98  where tau is a real scalar, and v is a real vector
99  with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in
100  A(i+1:m,i), and tau in TAU(i).
101  ===================================================================== */
102 
103 #define A(i, j) (A + (i) + (j)*(lda ))
104 #define dA(i, j) (dwork + (i) + (j)*(ldda))
105 
106  float *dwork, *df;
107 
108  magma_int_t ione = 1;
109 
110  magma_int_t n_j, ldda, ldwork;
111  magma_int_t j, jb, na, nb, sm, sn, fjb, nfxd, minmn;
112  magma_int_t topbmn, sminmn, lwkopt, lquery;
113 
114  *info = 0;
115  lquery = (lwork == -1);
116  if (m < 0) {
117  *info = -1;
118  } else if (n < 0) {
119  *info = -2;
120  } else if (lda < max(1,m)) {
121  *info = -4;
122  }
123 
124  nb = magma_get_sgeqp3_nb(min(m, n));
125  if (*info == 0) {
126  minmn = min(m,n);
127  if (minmn == 0) {
128  lwkopt = 1;
129  } else {
130  lwkopt = (n + 1)*nb;
131 #if defined(PRECISION_d) || defined(PRECISION_s)
132  lwkopt += 2*n;
133 #endif
134  }
135  work[0] = MAGMA_S_MAKE( lwkopt, 0. );
136 
137  if (lwork < lwkopt && ! lquery) {
138  *info = -8;
139  }
140  }
141 
142  if (*info != 0) {
143  magma_xerbla( __func__, -(*info) );
144  return *info;
145  } else if (lquery) {
146  return *info;
147  }
148 
149  if (minmn == 0)
150  return *info;
151 
152 #if defined(PRECISION_d) || defined(PRECISION_s)
153  float *rwork = work + (n + 1)*nb;
154 #endif
155 
156  ldda = ((m+31)/32)*32;
157  ldwork = n*ldda + (n+1)*nb;
158  if (MAGMA_SUCCESS != magma_smalloc( &dwork, ldwork )) {
159  *info = MAGMA_ERR_DEVICE_ALLOC;
160  return *info;
161  }
162  df = dwork + n*ldda;
163  // dwork used for dA
164 
165  magma_queue_t stream;
166  magma_queue_create( &stream );
167 
168  /* Move initial columns up front.
169  * Note jpvt uses 1-based indices for historical compatibility. */
170  nfxd = 0;
171  for (j = 0; j < n; ++j) {
172  if (jpvt[j] != 0) {
173  if (j != nfxd) {
174  blasf77_sswap(&m, A(0, j), &ione, A(0, nfxd), &ione);
175  jpvt[j] = jpvt[nfxd];
176  jpvt[nfxd] = j + 1;
177  }
178  else {
179  jpvt[j] = j + 1;
180  }
181  ++nfxd;
182  }
183  else {
184  jpvt[j] = j + 1;
185  }
186  }
187 
188  /* Factorize fixed columns
189  =======================
190  Compute the QR factorization of fixed columns and update
191  remaining columns. */
192  if (nfxd > 0) {
193  na = min(m,nfxd);
194  lapackf77_sgeqrf(&m, &na, A, &lda, tau, work, &lwork, info);
195  if (na < n) {
196  n_j = n - na;
198  A, &lda, tau, A(0, na), &lda,
199  work, &lwork, info );
200  }
201  }
202 
203  /* Factorize free columns */
204  if (nfxd < minmn) {
205  sm = m - nfxd;
206  sn = n - nfxd;
207  sminmn = minmn - nfxd;
208 
209  if (nb < sminmn) {
210  j = nfxd;
211 
212  // Set the original matrix to the GPU
213  magma_ssetmatrix_async( m, sn,
214  A (0,j), lda,
215  dA(0,j), ldda, stream );
216  }
217 
218  /* Initialize partial column norms. */
219  for (j = nfxd; j < n; ++j) {
220  rwork[j] = cblas_snrm2(sm, A(nfxd, j), ione);
221  rwork[n + j] = rwork[j];
222  }
223 
224  j = nfxd;
225  if (nb < sminmn) {
226  /* Use blocked code initially. */
227  magma_queue_sync( stream );
228 
229  /* Compute factorization: while loop. */
230  topbmn = minmn - nb;
231  while(j < topbmn) {
232  jb = min(nb, topbmn - j);
233 
234  /* Factorize JB columns among columns J:N. */
235  n_j = n - j;
236 
237  if (j>nfxd) {
238  // Get panel to the CPU
239  magma_sgetmatrix( m-j, jb,
240  dA(j,j), ldda,
241  A (j,j), lda );
242 
243  // Get the rows
244  magma_sgetmatrix( jb, n_j - jb,
245  dA(j,j + jb), ldda,
246  A (j,j + jb), lda );
247  }
248 
249  magma_slaqps( m, n_j, j, jb, &fjb,
250  A (0, j), lda,
251  dA(0, j), ldda,
252  &jpvt[j], &tau[j], &rwork[j], &rwork[n + j],
253  work,
254  &work[jb], n_j,
255  &df[jb], n_j );
256 
257  j += fjb; /* fjb is actual number of columns factored */
258  }
259  }
260 
261  /* Use unblocked code to factor the last or only block. */
262  if (j < minmn) {
263  n_j = n - j;
264  if (j > nfxd) {
265  magma_sgetmatrix( m-j, n_j,
266  dA(j,j), ldda,
267  A (j,j), lda );
268  }
269  lapackf77_slaqp2(&m, &n_j, &j, A(0, j), &lda, &jpvt[j],
270  &tau[j], &rwork[j], &rwork[n+j], work );
271  }
272  }
273 
274  work[0] = MAGMA_S_MAKE( lwkopt, 0. );
275  magma_free( dwork );
276 
277  magma_queue_destroy( stream );
278 
279  return *info;
280 } /* sgeqp3 */
#define MagmaTransStr
Definition: magma.h:81
#define min(a, b)
Definition: common_magma.h:86
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
void lapackf77_sormqr(const char *side, const char *trans, const magma_int_t *m, const magma_int_t *n, const magma_int_t *k, const float *A, const magma_int_t *lda, const float *tau, float *C, const magma_int_t *ldc, float *work, const magma_int_t *lwork, magma_int_t *info)
void lapackf77_sgeqrf(const magma_int_t *m, const magma_int_t *n, float *A, const magma_int_t *lda, float *tau, float *work, const magma_int_t *lwork, magma_int_t *info)
float cblas_snrm2(const int N, const float *X, const int incX)
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
void blasf77_sswap(const magma_int_t *n, float *x, const magma_int_t *incx, float *y, const magma_int_t *incy)
#define MagmaLeftStr
Definition: magma.h:91
#define magma_queue_destroy(queue)
Definition: magma.h:116
magma_int_t magma_get_sgeqp3_nb(magma_int_t m)
Definition: get_nb.cpp:97
magma_int_t ldda
#define dwork(dev, i, j)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define magma_ssetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_s.h:711
#define dA(i, j)
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define magma_sgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_s.h:705
magma_int_t magma_slaqps(magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, float *A, magma_int_t lda, float *dA, magma_int_t ldda, magma_int_t *jpvt, float *tau, float *vn1, float *vn2, float *auxv, float *F, magma_int_t ldf, float *dF, magma_int_t lddf)
Definition: slaqps.cpp:18
#define MAGMA_S_MAKE(r, i)
Definition: magma.h:189
#define max(a, b)
Definition: common_magma.h:82
void lapackf77_slaqp2(magma_int_t *m, magma_int_t *n, magma_int_t *offset, float *a, magma_int_t *lda, magma_int_t *jpvt, float *tau, float *vn1, float *vn2, float *work)
#define A(i, j)
#define magma_queue_sync(queue)
Definition: magma.h:119

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgeqp3_gpu ( magma_int_t  m,
magma_int_t  n,
float *  A,
magma_int_t  lda,
magma_int_t jpvt,
float *  tau,
float *  work,
magma_int_t  lwork,
magma_int_t info 
)

Definition at line 17 of file sgeqp3_gpu.cpp.

References __func__, A, blasf77_sswap(), magma_dcopymatrix, MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_sgeqp3_nb(), magma_scopymatrix, magma_slaqps2_gpu(), magma_smalloc(), MAGMA_SUCCESS, magma_xerbla(), magmablas_snrm2_cols(), max, and min.

25 {
26 /* -- MAGMA (version 1.4.0) --
27  Univ. of Tennessee, Knoxville
28  Univ. of California, Berkeley
29  Univ. of Colorado, Denver
30  August 2013
31 
32  Purpose
33  =======
34  SGEQP3 computes a QR factorization with column pivoting of a
35  matrix A: A*P = Q*R using Level 3 BLAS.
36 
37  Arguments
38  =========
39  M (input) INTEGER
40  The number of rows of the matrix A. M >= 0.
41 
42  N (input) INTEGER
43  The number of columns of the matrix A. N >= 0.
44 
45  A (input/output) REAL array, dimension (LDA,N)
46  On entry, the M-by-N matrix A.
47  On exit, the upper triangle of the array contains the
48  min(M,N)-by-N upper trapezoidal matrix R; the elements below
49  the diagonal, together with the array TAU, represent the
50  unitary matrix Q as a product of min(M,N) elementary
51  reflectors.
52 
53  LDA (input) INTEGER
54  The leading dimension of the array A. LDA >= max(1,M).
55 
56  JPVT (input/output) INTEGER array, dimension (N)
57  On entry, if JPVT(J).ne.0, the J-th column of A is permuted
58  to the front of A*P (a leading column); if JPVT(J)=0,
59  the J-th column of A is a free column.
60  On exit, if JPVT(J)=K, then the J-th column of A*P was the
61  the K-th column of A.
62 
63  TAU (output) REAL array, dimension (min(M,N))
64  The scalar factors of the elementary reflectors.
65 
66  WORK (workspace/output) REAL array, dimension (MAX(1,LWORK))
67  On exit, if INFO=0, WORK(1) returns the optimal LWORK.
68 
69  LWORK (input) INTEGER
70  The dimension of the array WORK.
71  For [sd]geqp3, LWORK >= (N+1)*NB + 2*N;
72  for [cz]geqp3, LWORK >= (N+1)*NB,
73  where NB is the optimal blocksize.
74 
75  If LWORK = -1, then a workspace query is assumed; the routine
76  only calculates the optimal size of the WORK array, returns
77  this value as the first entry of the WORK array, and no error
78  message related to LWORK is issued by XERBLA.
79 
80  For [cz]geqp3 only:
81  RWORK (workspace) DOUBLE PRECISION array, dimension (2*N)
82 
83  INFO (output) INTEGER
84  = 0: successful exit.
85  < 0: if INFO = -i, the i-th argument had an illegal value.
86 
87  Further Details
88  ===============
89  The matrix Q is represented as a product of elementary reflectors
90 
91  Q = H(1) H(2) . . . H(k), where k = min(m,n).
92 
93  Each H(i) has the form
94 
95  H(i) = I - tau * v * v'
96 
97  where tau is a real scalar, and v is a real vector
98  with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in
99  A(i+1:m,i), and tau in TAU(i).
100  ===================================================================== */
101 
102 #define A(i, j) (A + (i) + (j)*(lda ))
103 
104  magma_int_t ione = 1;
105 
106  //magma_int_t na;
107  magma_int_t n_j;
108  magma_int_t j, jb, nb, sm, sn, fjb, nfxd, minmn;
109  magma_int_t topbmn, sminmn, lwkopt, lquery;
110 
111  *info = 0;
112  lquery = (lwork == -1);
113  if (m < 0) {
114  *info = -1;
115  } else if (n < 0) {
116  *info = -2;
117  } else if (lda < max(1,m)) {
118  *info = -4;
119  }
120 
121  nb = magma_get_sgeqp3_nb(min(m, n));
122  if (*info == 0) {
123  minmn = min(m,n);
124  if (minmn == 0) {
125  lwkopt = 1;
126  } else {
127  lwkopt = (n + 1)*nb;
128 #if defined(PRECISION_d) || defined(PRECISION_s)
129  lwkopt += 2*n;
130 #endif
131  }
132  //work[0] = MAGMA_S_MAKE( lwkopt, 0. );
133 
134  if (lwork < lwkopt && ! lquery) {
135  *info = -8;
136  }
137  }
138 
139  if (*info != 0) {
140  magma_xerbla( __func__, -(*info) );
141  return *info;
142  } else if (lquery) {
143  return *info;
144  }
145 
146  if (minmn == 0)
147  return *info;
148 
149 #if defined(PRECISION_d) || defined(PRECISION_s)
150  float *rwork = work + (n + 1)*nb;
151 #endif
152  float *df;
153  if (MAGMA_SUCCESS != magma_smalloc( &df, (n+1)*nb )) {
154  *info = MAGMA_ERR_DEVICE_ALLOC;
155  return *info;
156  }
157  cudaMemset( df, 0, (n+1)*nb*sizeof(float) );
158 
159  nfxd = 0;
160  /* Move initial columns up front.
161  * Note jpvt uses 1-based indices for historical compatibility. */
162  for (j = 0; j < n; ++j) {
163  if (jpvt[j] != 0) {
164  if (j != nfxd) {
165  blasf77_sswap(&m, A(0, j), &ione, A(0, nfxd), &ione);
166  jpvt[j] = jpvt[nfxd];
167  jpvt[nfxd] = j + 1;
168  }
169  else {
170  jpvt[j] = j + 1;
171  }
172  ++nfxd;
173  }
174  else {
175  jpvt[j] = j + 1;
176  }
177  }
178 
179  /* Factorize fixed columns
180  =======================
181  Compute the QR factorization of fixed columns and update
182  remaining columns.
183  if (nfxd > 0) {
184  na = min(m,nfxd);
185  lapackf77_sgeqrf(&m, &na, A, &lda, tau, work, &lwork, info);
186  if (na < n) {
187  n_j = n - na;
188  lapackf77_sormqr( MagmaLeftStr, MagmaTransStr, &m, &n_j, &na,
189  A, &lda, tau, A(0, na), &lda,
190  work, &lwork, info );
191  }
192  }*/
193 
194  /* Factorize free columns */
195  if (nfxd < minmn) {
196  sm = m - nfxd;
197  sn = n - nfxd;
198  sminmn = minmn - nfxd;
199 
200  /*if (nb < sminmn) {
201  j = nfxd;
202 
203  // Set the original matrix to the GPU
204  magma_ssetmatrix_async( m, sn,
205  A (0,j), lda,
206  dA(0,j), ldda, stream[0] );
207  }*/
208 
209  /* Initialize partial column norms. */
210  magmablas_snrm2_cols(sm, sn, A(nfxd,nfxd), lda, &rwork[nfxd]);
211 #if defined(PRECISION_d) || defined(PRECISION_z)
212  magma_dcopymatrix( sn, 1, &rwork[nfxd], sn, &rwork[n+nfxd], sn);
213 #else
214  magma_scopymatrix( sn, 1, &rwork[nfxd], sn, &rwork[n+nfxd], sn);
215 #endif
216  /*for (j = nfxd; j < n; ++j) {
217  rwork[j] = cblas_snrm2(sm, A(nfxd, j), ione);
218  rwork[n + j] = rwork[j];
219  }*/
220 
221  j = nfxd;
222  //if (nb < sminmn)
223  {
224  /* Use blocked code initially. */
225  //magma_queue_sync( stream[0] );
226 
227  /* Compute factorization: while loop. */
228  topbmn = minmn;// - nb;
229  while(j < topbmn) {
230  jb = min(nb, topbmn - j);
231 
232  /* Factorize JB columns among columns J:N. */
233  n_j = n - j;
234 
235  /*if (j>nfxd) {
236  // Get panel to the CPU
237  magma_sgetmatrix( m-j, jb,
238  dA(j,j), ldda,
239  A (j,j), lda );
240 
241  // Get the rows
242  magma_sgetmatrix( jb, n_j - jb,
243  dA(j,j + jb), ldda,
244  A (j,j + jb), lda );
245  }*/
246 
247  //magma_slaqps_gpu // this is a cpp-file
248  magma_slaqps2_gpu // this is a cuda-file
249  ( m, n_j, j, jb, &fjb,
250  A (0, j), lda,
251  &jpvt[j], &tau[j], &rwork[j], &rwork[n + j],
252  work,
253  &df[jb], n_j );
254 
255  j += fjb; /* fjb is actual number of columns factored */
256  }
257  }
258 
259  /* Use unblocked code to factor the last or only block.
260  if (j < minmn) {
261  n_j = n - j;
262  if (j > nfxd) {
263  magma_sgetmatrix( m-j, n_j,
264  dA(j,j), ldda,
265  A (j,j), lda );
266  }
267  lapackf77_slaqp2(&m, &n_j, &j, A(0, j), &lda, &jpvt[j],
268  &tau[j], &rwork[j], &rwork[n+j], work );
269  }*/
270  }
271  //work[0] = MAGMA_S_MAKE( lwkopt, 0. );
272  magma_free(df);
273 
274  return *info;
275 } /* sgeqp3 */
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
int magma_int_t
Definition: magmablas.h:12
void blasf77_sswap(const magma_int_t *n, float *x, const magma_int_t *incx, float *y, const magma_int_t *incy)
magma_int_t magma_get_sgeqp3_nb(magma_int_t m)
Definition: get_nb.cpp:97
#define magma_scopymatrix(m, n, dA_src, ldda, dB_dst, lddb)
Definition: magmablas_s.h:708
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define magma_dcopymatrix(m, n, dA_src, ldda, dB_dst, lddb)
Definition: magmablas_d.h:708
#define MAGMA_SUCCESS
Definition: magma.h:106
static magma_err_t magma_smalloc(magmaFloat_ptr *ptrPtr, size_t n)
Definition: magma.h:77
#define A(i, j)
#define max(a, b)
Definition: common_magma.h:82
magma_int_t magma_slaqps2_gpu(magma_int_t m, magma_int_t n, magma_int_t offset, magma_int_t nb, magma_int_t *kb, float *A, magma_int_t lda, magma_int_t *jpvt, float *tau, float *vn1, float *vn2, float *auxv, float *dF, magma_int_t lddf)
void magmablas_snrm2_cols(magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magmaFloat_ptr dxnorm)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgeqr2_gpu ( magma_int_t  m,
magma_int_t  n,
float *  dA,
magma_int_t  lda,
float *  tau,
float *  work,
magma_int_t info 
)

Here is the caller graph for this function:

magma_int_t magma_sgeqr2x2_gpu ( magma_int_t m,
magma_int_t n,
float *  dA,
magma_int_t ldda,
float *  dtau,
float *  dT,
float *  ddA,
float *  dwork,
magma_int_t info 
)

Definition at line 14 of file sgeqr2x_gpu-v2.cpp.

References __func__, da_ref, magma_slarfbx_gpu(), magma_slarfgtx_gpu(), magma_xerbla(), magmablas_snrm2_adjust(), magmablas_snrm2_cols(), max, and min.

18 {
19 /* -- MAGMA (version 1.4.0) --
20  Univ. of Tennessee, Knoxville
21  Univ. of California, Berkeley
22  Univ. of Colorado, Denver
23  August 2013
24 
25  Purpose
26  =======
27  SGEQR2 computes a QR factorization of a real m by n matrix A:
28  A = Q * R.
29 
30  This expert routine requires two more arguments than the standard
31  sgeqr2, namely, dT and ddA, explained below. The storage for A is
32  also not as in the LAPACK's sgeqr2 routine (see below).
33 
34  The first is used to output the triangular
35  n x n factor T of the block reflector used in the factorization.
36  The second holds the diagonal nxn blocks of A, i.e., the diagonal
37  submatrices of R. This routine implements the left looking QR.
38 
39  Arguments
40  =========
41  M (input) INTEGER
42  The number of rows of the matrix A. M >= 0.
43 
44  N (input) INTEGER
45  The number of columns of the matrix A. N >= 0.
46 
47  A (input/output) REAL array, dimension (LDA,N)
48  On entry, the m by n matrix A.
49  On exit, the unitary matrix Q as a
50  product of elementary reflectors (see Further Details).
51 
52  the elements on and above the diagonal of the array
53  contain the min(m,n) by n upper trapezoidal matrix R (R is
54  upper triangular if m >= n); the elements below the diagonal,
55  with the array TAU, represent the unitary matrix Q as a
56  product of elementary reflectors (see Further Details).
57 
58  LDA (input) INTEGER
59  The leading dimension of the array A. LDA >= max(1,M).
60 
61  TAU (output) REAL array, dimension (min(M,N))
62  The scalar factors of the elementary reflectors (see Further
63  Details).
64 
65  dT (output) REAL array, dimension N x N.
66  Stores the triangular N x N factor T of the block reflector
67  used in the factorization. The lower triangular part is 0.
68 
69  ddA (output) REAL array, dimension N x N.
70  Stores the elements of the upper N x N diagonal block of A.
71  LAPACK stores this array in A. There are 0s below the diagonal.
72 
73  RWORK (workspace) DOUBLE_PRECISION array, dimension (3 N)
74 
75  INFO (output) INTEGER
76  = 0: successful exit
77  < 0: if INFO = -i, the i-th argument had an illegal value
78 
79  Further Details
80  ===============
81  The matrix Q is represented as a product of elementary reflectors
82 
83  Q = H(1) H(2) . . . H(k), where k = min(m,n).
84 
85  Each H(i) has the form
86 
87  H(i) = I - tau * v * v'
88 
89  where tau is a real scalar, and v is a real vector with
90  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
91  and tau in TAU(i).
92  ===================================================================== */
93 
94  #define da_ref(a_1,a_2) ( dA+(a_2)*(*ldda) + (a_1))
95 
96  magma_int_t i, k;
97 
98  float *work = (float *)dwork;
99  float *dnorm = dwork + 4*(*n);
100 
101 
102  *info = 0;
103  if (*m < 0) {
104  *info = -1;
105  } else if (*n < 0) {
106  *info = -2;
107  } else if (*ldda < max(1,*m)) {
108  *info = -4;
109  }
110  if (*info != 0) {
111  magma_xerbla( __func__, -(*info) );
112  return *info;
113  }
114 
115  /* Compute the norms of the trailing columns */
116  k = min(*m,*n);
117  magmablas_snrm2_cols(*m, k, da_ref(0,0), *ldda, dnorm);
118 
119  for (i = 0; i < k; ++i) {
120  /* 1. Apply H' to A(:,i) from the left
121  2. Adjust the dnorm[i] to hold the norm of A(i:m,i) */
122  if (i>0) {
123  magma_slarfbx_gpu(*m, i, da_ref(0, 0), *ldda,
124  dT, k, da_ref(0, i), work);
125  magmablas_snrm2_adjust(i, dnorm+i, da_ref(0, i));
126  }
127 
128  /* Generate elementary reflector H(i) to annihilate A(i+1:m,i)
129  1. 1 is not yet put on the diagonal of A
130  2. Elements above the diagonal are copied in ddA and the ones
131  in A are set to zero
132  3. update T */
133  magma_slarfgtx_gpu(*m-i, da_ref(i, i), da_ref(min(i+1,*m), i), dtau+i,
134  dnorm+i, ddA + i + i*(*n), i,
135  da_ref(i,0), *ldda, dT, k, work);
136  }
137 
138  return *info;
139 } /* magma_sgeqr2 */
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
int magma_int_t
Definition: magmablas.h:12
void magmablas_snrm2_adjust(magma_int_t k, float *xnorm, float *c)
magma_int_t ldda
#define dwork(dev, i, j)
void magma_slarfgtx_gpu(magma_int_t n, float *dx0, float *dx, float *dtau, float *dxnorm, float *dA, magma_int_t it, float *V, magma_int_t ldv, float *T, magma_int_t ldt, float *dwork)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define da_ref(a_1, a_2)
void magma_slarfbx_gpu(magma_int_t m, magma_int_t k, float *V, magma_int_t ldv, float *dT, magma_int_t ldt, float *c, float *dwork)
#define dT(m)
#define max(a, b)
Definition: common_magma.h:82
void magmablas_snrm2_cols(magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magmaFloat_ptr dxnorm)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgeqr2x3_gpu ( magma_int_t m,
magma_int_t n,
float *  dA,
magma_int_t ldda,
float *  dtau,
float *  dT,
float *  ddA,
float *  dwork,
magma_int_t info 
)

Definition at line 54 of file sgeqr2x_gpu-v3.cpp.

References __func__, BLOCK_SIZE, da_ref, dwork, magma_slarfb2_gpu(), magma_slarfbx_gpu(), magma_slarfgtx_gpu(), magma_xerbla(), magmablas_snrm2_adjust(), magmablas_snrm2_cols(), max, and min.

58 {
59 /* -- MAGMA (version 1.4.0) --
60  Univ. of Tennessee, Knoxville
61  Univ. of California, Berkeley
62  Univ. of Colorado, Denver
63  August 2013
64 
65  Purpose
66  =======
67  SGEQR2 computes a QR factorization of a real m by n matrix A:
68  A = Q * R.
69 
70  This expert routine requires two more arguments than the standard
71  sgeqr2, namely, dT and ddA, explained below. The storage for A is
72  also not as in the LAPACK's sgeqr2 routine (see below).
73 
74  The first is used to output the triangular
75  n x n factor T of the block reflector used in the factorization.
76  The second holds the diagonal nxn blocks of A, i.e., the diagonal
77  submatrices of R. This routine implements the left looking QR.
78 
79  This version adds internal blocking.
80 
81  Arguments
82  =========
83  M (input) INTEGER
84  The number of rows of the matrix A. M >= 0.
85 
86  N (input) INTEGER
87  The number of columns of the matrix A. N >= 0.
88 
89  A (input/output) REAL array, dimension (LDA,N)
90  On entry, the m by n matrix A.
91  On exit, the unitary matrix Q as a
92  product of elementary reflectors (see Further Details).
93 
94  the elements on and above the diagonal of the array
95  contain the min(m,n) by n upper trapezoidal matrix R (R is
96  upper triangular if m >= n); the elements below the diagonal,
97  with the array TAU, represent the unitary matrix Q as a
98  product of elementary reflectors (see Further Details).
99 
100  LDA (input) INTEGER
101  The leading dimension of the array A. LDA >= max(1,M).
102 
103  TAU (output) REAL array, dimension (min(M,N))
104  The scalar factors of the elementary reflectors (see Further
105  Details).
106 
107  dT (output) REAL array, dimension N x N.
108  Stores the triangular N x N factor T of the block reflector
109  used in the factorization. The lower triangular part is 0.
110 
111  ddA (output) REAL array, dimension N x N.
112  Stores the elements of the upper N x N diagonal block of A.
113  LAPACK stores this array in A. There are 0s below the diagonal.
114 
115  RWORK (workspace) DOUBLE_PRECISION array, dimension (3 N)
116 
117  INFO (output) INTEGER
118  = 0: successful exit
119  < 0: if INFO = -i, the i-th argument had an illegal value
120 
121  Further Details
122  ===============
123  The matrix Q is represented as a product of elementary reflectors
124 
125  Q = H(1) H(2) . . . H(k), where k = min(m,n).
126 
127  Each H(i) has the form
128 
129  H(i) = I - tau * v * v'
130 
131  where tau is a real scalar, and v is a real vector with
132  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
133  and tau in TAU(i).
134  ===================================================================== */
135 
136  #define da_ref(a_1,a_2) ( dA+(a_2)*(*ldda) + (a_1))
137  #define BLOCK_SIZE 32
138 
139  magma_int_t i, k;
140 
141  float *dnorm = dwork;
142  float *work = (float *)(dwork+2*(*n));
143 
144  *info = 0;
145  if (*m < 0) {
146  *info = -1;
147  } else if (*n < 0) {
148  *info = -2;
149  } else if (*ldda < max(1,*m)) {
150  *info = -4;
151  }
152  if (*info != 0) {
153  magma_xerbla( __func__, -(*info) );
154  return *info;
155  }
156 
157  /* Compute the norms of the trailing columns */
158  k = min(*m,*n);
159  magmablas_snrm2_cols(*m, k, da_ref(0,0), *ldda, dnorm);
160 
161  for (int b=0; b < k; b += BLOCK_SIZE) {
162  for (i = b; i < min(k, b+BLOCK_SIZE); ++i) {
163 
164  /* Apply H' to A(:,i) from the left */
165  if ( i-b > 0)
166  magma_slarfbx_gpu(*m-b, i-b, da_ref(b, b), *ldda,
167  dT+b+b*k, k, da_ref(b, i), work);
168 
169  /* Adjust the dnorm[i] to hold the norm of A(i:m,i) */
170  if ( i > 0 )
171  magmablas_snrm2_adjust(i, dnorm+i, da_ref(0, i));
172 
173  /* Generate elementary reflector H(i) to annihilate A(i+1:m,i)
174  1. 1 is not yet put on the diagonal of A
175  2. Elements above the diagonal are copied in ddA and
176  the ones in A are set to zero
177  3. update T */
178  magma_slarfgtx_gpu(*m-i, da_ref(i, i), da_ref(min(i+1,*m), i), dtau+i,
179  dnorm+i, ddA + i + i*(*n), i,
180  da_ref(i,0), *ldda, dT, k, work);
181  }
182 
183  /* Apply the transformations to the trailing matrix. */
184  //magma_slarfb2_gpu( MagmaLeft, MagmaTrans, MagmaForward, MagmaColumnwise,
186  *m-b, k-i, BLOCK_SIZE,
187  da_ref(b, b), *ldda, dT+b+b*k, k,
188  da_ref(b, i), *ldda, work, k-i);
189  }
190 
191  return *info;
192 } /* magma_sgeqr2 */
#define min(a, b)
Definition: common_magma.h:86
#define BLOCK_SIZE
#define __func__
Definition: common_magma.h:65
int magma_int_t
Definition: magmablas.h:12
void magmablas_snrm2_adjust(magma_int_t k, float *xnorm, float *c)
#define da_ref(a_1, a_2)
magma_int_t ldda
#define dwork(dev, i, j)
void magma_slarfgtx_gpu(magma_int_t n, float *dx0, float *dx, float *dtau, float *dxnorm, float *dA, magma_int_t it, float *V, magma_int_t ldv, float *T, magma_int_t ldt, float *dwork)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
magma_int_t magma_slarfb2_gpu(magma_int_t m, magma_int_t n, magma_int_t k, const float *dV, magma_int_t ldv, const float *dT, magma_int_t ldt, float *dC, magma_int_t ldc, float *dwork, magma_int_t ldwork)
void magma_slarfbx_gpu(magma_int_t m, magma_int_t k, float *V, magma_int_t ldv, float *dT, magma_int_t ldt, float *c, float *dwork)
#define dT(m)
#define max(a, b)
Definition: common_magma.h:82
void magmablas_snrm2_cols(magma_int_t m, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, magmaFloat_ptr dxnorm)

Here is the call graph for this function:

Here is the caller graph for this function:

magma_int_t magma_sgeqr2x4_gpu ( magma_int_t m,
magma_int_t n,
float *  dA,
magma_int_t ldda,
float *  dtau,
float *  dT,
float *  ddA,
float *  dwork,
magma_int_t info,
magma_queue_t  stream 
)

Here is the caller graph for this function:

magma_int_t magma_sgeqr2x_gpu ( magma_int_t m,
magma_int_t n,
float *  dA,
magma_int_t ldda,
float *  dtau,
float *  dT,
float *  ddA,
float *  dwork,
magma_int_t info 
)

Definition at line 14 of file sgeqr2x_gpu.cpp.

References __func__, da_ref, dwork, magma_slarfgx_gpu(), magma_slarfx_gpu(), magma_xerbla(), magmablas_snrm2_cols(), max, and min.

18 {
19 /* -- MAGMA (version 1.4.0) --
20  Univ. of Tennessee, Knoxville
21  Univ. of California, Berkeley
22  Univ. of Colorado, Denver
23  August 2013
24 
25  Purpose
26  =======
27  SGEQR2 computes a QR factorization of a real m by n matrix A:
28  A = Q * R.
29 
30  This expert routine requires two more arguments than the standard
31  sgeqr2, namely, dT and ddA, explained below. The storage for A is
32  also not as in the LAPACK's sgeqr2 routine (see below).
33 
34  The first is used to output the triangular
35  n x n factor T of the block reflector used in the factorization.
36  The second holds the diagonal nxn blocks of A, i.e., the diagonal
37  submatrices of R.
38 
39  This version implements the right-looking QR.
40 
41  Arguments
42  =========
43  M (input) INTEGER
44  The number of rows of the matrix A. M >= 0.
45 
46  N (input) INTEGER
47  The number of columns of the matrix A. N >= 0.
48 
49  A (input/output) COMPLEX*16 array, dimension (LDA,N)
50  On entry, the m by n matrix A.
51  On exit, the unitary matrix Q as a
52  product of elementary reflectors (see Further Details).
53 
54  the elements on and above the diagonal of the array
55  contain the min(m,n) by n upper trapezoidal matrix R (R is
56  upper triangular if m >= n); the elements below the diagonal,
57  with the array TAU, represent the unitary matrix Q as a
58  product of elementary reflectors (see Further Details).
59 
60  LDA (input) INTEGER
61  The leading dimension of the array A. LDA >= max(1,M).
62 
63  TAU (output) COMPLEX*16 array, dimension (min(M,N))
64  The scalar factors of the elementary reflectors (see Further
65  Details).
66 
67  dT (output) COMPLEX*16 array, dimension N x N.
68  Stores the triangular N x N factor T of the block reflector
69  used in the factorization. The lower triangular part is 0.
70 
71  ddA (output) COMPLEX*16 array, dimension N x N.
72  Stores the elements of the upper N x N diagonal block of A.
73  LAPACK stores this array in A. There are 0s below the diagonal.
74 
75  WORK (workspace) COMPLEX*16 array, dimension (N)
76 
77  INFO (output) INTEGER
78  = 0: successful exit
79  < 0: if INFO = -i, the i-th argument had an illegal value
80 
81  Further Details
82  ===============
83  The matrix Q is represented as a product of elementary reflectors
84 
85  Q = H(1) H(2) . . . H(k), where k = min(m,n).
86 
87  Each H(i) has the form
88 
89  H(i) = I - tau * v * v'
90 
91  where tau is a real scalar, and v is a real vector with
92  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
93  and tau in TAU(i).
94  ===================================================================== */
95 
96  #define da_ref(a_1,a_2) ( dA+(a_2)*(*ldda) + (a_1))
97 
98  magma_int_t i, k;
99 
100  float *dnorm = dwork;
101  float *work = (float *)(dwork+2*(*n));
102 
103  *info = 0;
104  if (*m < 0) {
105  *info = -1;
106  } else if (*n < 0) {
107  *info = -2;
108  } else if (*ldda < max(1,*m)) {
109  *info = -4;
110  }
111  if (*info != 0) {
112  magma_xerbla( __func__, -(*info) );
113  return *info;
114  }
115 
116  /* Compute the norms of the trailing columns */
117  k = min(*m,*n);
118  magmablas_snrm2_cols(*m, k, da_ref(0,0), *ldda, dnorm);
119 
120  for (i = 0; i < k; ++i) {
121  /* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
122  magma_slarfgx_gpu(*m-i, da_ref(i, i), da_ref(min(i+1,*m), i), dtau+i, dnorm+i,
123  ddA + i + i*(*n), i);
124 
125  if (i < *n) {
126  /* Apply H(i)' to A(i:m,i+1:n) from the left */
127  magma_slarfx_gpu(*m-i, *n-i-1, da_ref(i, i), dtau+i,
128  //da_ref(i, i+1), *ldda, dnorm+i+1,
129  da_ref(i, 0), *ldda, dnorm+i+1,
130  dT, i, wo