MAGMA  2.3.0 Matrix Algebra for GPU and Multicore Architectures

$$C = op(A)^{-1} B$$ or $$C = B \;op(A)^{-1}$$ where $$A$$ is triangular More...

## Functions

void magmablas_ctrsm_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t ldda, magmaFloatComplex **dB_array, magma_int_t lddb, magmaFloatComplex **dX_array, magma_int_t lddx, magmaFloatComplex **dinvA_array, magma_int_t dinvA_length, magmaFloatComplex **dA_displ, magmaFloatComplex **dB_displ, magmaFloatComplex **dX_displ, magmaFloatComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
ctrsm_outofplace solves one of the matrix equations on gpu More...

void magmablas_ctrsm_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t ldda, magmaFloatComplex **dB_array, magma_int_t lddb, magmaFloatComplex **dX_array, magma_int_t lddx, magmaFloatComplex **dinvA_array, magma_int_t dinvA_length, magmaFloatComplex **dA_displ, magmaFloatComplex **dB_displ, magmaFloatComplex **dX_displ, magmaFloatComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
ctrsm_work solves one of the matrix equations on gpu More...

void magmablas_ctrsm_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t ldda, magmaFloatComplex **dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
ctrsm solves one of the matrix equations on gpu More...

void magmablas_ctrsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t *ldda, magmaFloatComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
ctrsm solves one of the matrix equations on gpu More...

void magmablas_ctrsm_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t *ldda, magmaFloatComplex **dB_array, magma_int_t *lddb, magmaFloatComplex **dX_array, magma_int_t *lddx, magmaFloatComplex **dinvA_array, magma_int_t *dinvA_length, magmaFloatComplex **dA_displ, magmaFloatComplex **dB_displ, magmaFloatComplex **dX_displ, magmaFloatComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
ctrsm_outofplace solves one of the matrix equations on gpu More...

void magmablas_ctrsm_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t *ldda, magmaFloatComplex **dB_array, magma_int_t *lddb, magmaFloatComplex **dX_array, magma_int_t *lddx, magmaFloatComplex **dinvA_array, magma_int_t *dinvA_length, magmaFloatComplex **dA_displ, magmaFloatComplex **dB_displ, magmaFloatComplex **dX_displ, magmaFloatComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
ctrsm_work solves one of the matrix equations on gpu More...

void magmablas_ctrsm_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaFloatComplex alpha, magmaFloatComplex **dA_array, magma_int_t *ldda, magmaFloatComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)

void magmablas_dtrsm_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, double alpha, double **dA_array, magma_int_t ldda, double **dB_array, magma_int_t lddb, double **dX_array, magma_int_t lddx, double **dinvA_array, magma_int_t dinvA_length, double **dA_displ, double **dB_displ, double **dX_displ, double **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
dtrsm_outofplace solves one of the matrix equations on gpu More...

void magmablas_dtrsm_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, double alpha, double **dA_array, magma_int_t ldda, double **dB_array, magma_int_t lddb, double **dX_array, magma_int_t lddx, double **dinvA_array, magma_int_t dinvA_length, double **dA_displ, double **dB_displ, double **dX_displ, double **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
dtrsm_work solves one of the matrix equations on gpu More...

void magmablas_dtrsm_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, double alpha, double **dA_array, magma_int_t ldda, double **dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
dtrsm solves one of the matrix equations on gpu More...

void magmablas_dtrsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, double alpha, double **dA_array, magma_int_t *ldda, double **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
dtrsm solves one of the matrix equations on gpu More...

void magmablas_dtrsm_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, double alpha, double **dA_array, magma_int_t *ldda, double **dB_array, magma_int_t *lddb, double **dX_array, magma_int_t *lddx, double **dinvA_array, magma_int_t *dinvA_length, double **dA_displ, double **dB_displ, double **dX_displ, double **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
dtrsm_outofplace solves one of the matrix equations on gpu More...

void magmablas_dtrsm_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, double alpha, double **dA_array, magma_int_t *ldda, double **dB_array, magma_int_t *lddb, double **dX_array, magma_int_t *lddx, double **dinvA_array, magma_int_t *dinvA_length, double **dA_displ, double **dB_displ, double **dX_displ, double **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
dtrsm_work solves one of the matrix equations on gpu More...

void magmablas_dtrsm_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, double alpha, double **dA_array, magma_int_t *ldda, double **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)

void magmablas_strsm_outofplace_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, float alpha, float **dA_array, magma_int_t ldda, float **dB_array, magma_int_t lddb, float **dX_array, magma_int_t lddx, float **dinvA_array, magma_int_t dinvA_length, float **dA_displ, float **dB_displ, float **dX_displ, float **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
strsm_outofplace solves one of the matrix equations on gpu More...

void magmablas_strsm_work_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, float alpha, float **dA_array, magma_int_t ldda, float **dB_array, magma_int_t lddb, float **dX_array, magma_int_t lddx, float **dinvA_array, magma_int_t dinvA_length, float **dA_displ, float **dB_displ, float **dX_displ, float **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue)
strsm_work solves one of the matrix equations on gpu More...

void magmablas_strsm_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, float alpha, float **dA_array, magma_int_t ldda, float **dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
strsm solves one of the matrix equations on gpu More...

void magmablas_strsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, float alpha, float **dA_array, magma_int_t *ldda, float **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
strsm solves one of the matrix equations on gpu More...

void magmablas_strsm_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, float alpha, float **dA_array, magma_int_t *ldda, float **dB_array, magma_int_t *lddb, float **dX_array, magma_int_t *lddx, float **dinvA_array, magma_int_t *dinvA_length, float **dA_displ, float **dB_displ, float **dX_displ, float **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
strsm_outofplace solves one of the matrix equations on gpu More...

void magmablas_strsm_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, float alpha, float **dA_array, magma_int_t *ldda, float **dB_array, magma_int_t *lddb, float **dX_array, magma_int_t *lddx, float **dinvA_array, magma_int_t *dinvA_length, float **dA_displ, float **dB_displ, float **dX_displ, float **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
strsm_work solves one of the matrix equations on gpu More...

void magmablas_strsm_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, float alpha, float **dA_array, magma_int_t *ldda, float **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)

ztrsm_outofplace solves one of the matrix equations on gpu More...

ztrsm_work solves one of the matrix equations on gpu More...

void magmablas_ztrsm_batched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t ldda, magmaDoubleComplex **dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue)
ztrsm solves one of the matrix equations on gpu More...

void magmablas_ztrsm_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t *ldda, magmaDoubleComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_queue_t queue)
ztrsm solves one of the matrix equations on gpu More...

void magmablas_ztrsm_outofplace_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t *ldda, magmaDoubleComplex **dB_array, magma_int_t *lddb, magmaDoubleComplex **dX_array, magma_int_t *lddx, magmaDoubleComplex **dinvA_array, magma_int_t *dinvA_length, magmaDoubleComplex **dA_displ, magmaDoubleComplex **dB_displ, magmaDoubleComplex **dX_displ, magmaDoubleComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
ztrsm_outofplace solves one of the matrix equations on gpu More...

void magmablas_ztrsm_work_vbatched (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t *m, magma_int_t *n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t *ldda, magmaDoubleComplex **dB_array, magma_int_t *lddb, magmaDoubleComplex **dX_array, magma_int_t *lddx, magmaDoubleComplex **dinvA_array, magma_int_t *dinvA_length, magmaDoubleComplex **dA_displ, magmaDoubleComplex **dB_displ, magmaDoubleComplex **dX_displ, magmaDoubleComplex **dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)
ztrsm_work solves one of the matrix equations on gpu More...

void magmablas_ztrsm_vbatched_max_nocheck (magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t *m, magma_int_t *n, magmaDoubleComplex alpha, magmaDoubleComplex **dA_array, magma_int_t *ldda, magmaDoubleComplex **dB_array, magma_int_t *lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue)

## Detailed Description

$$C = op(A)^{-1} B$$ or $$C = B \;op(A)^{-1}$$ where $$A$$ is triangular

## Function Documentation

 void magmablas_ctrsm_outofplace_batched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex ** dA_array, magma_int_t ldda, magmaFloatComplex ** dB_array, magma_int_t lddb, magmaFloatComplex ** dX_array, magma_int_t lddx, magmaFloatComplex ** dinvA_array, magma_int_t dinvA_length, magmaFloatComplex ** dA_displ, magmaFloatComplex ** dB_displ, magmaFloatComplex ** dX_displ, magmaFloatComplex ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue )

ctrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER. On entry, m specifies the number of rows of B. m >= 0. [in] n INTEGER. On entry, n specifies the number of columns of B. n >= 0. [in] alpha COMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ). [in] dB_array Array of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. [in] lddb INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ). dinvA_array Array of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB [in] dinvA_length INTEGER The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by CTRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_ctrsm_work_batched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex ** dA_array, magma_int_t ldda, magmaFloatComplex ** dB_array, magma_int_t lddb, magmaFloatComplex ** dX_array, magma_int_t lddx, magmaFloatComplex ** dinvA_array, magma_int_t dinvA_length, magmaFloatComplex ** dA_displ, magmaFloatComplex ** dB_displ, magmaFloatComplex ** dX_displ, magmaFloatComplex ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue )

ctrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER. On entry, m specifies the number of rows of B. m >= 0. [in] n INTEGER. On entry, n specifies the number of columns of B. n >= 0. [in] alpha COMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ). dinvA_array Array of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB, where CTRTRI_BATCHED_NB = 128. [in] dinvA_length INTEGER The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by CTRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_ctrsm_batched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex ** dA_array, magma_int_t ldda, magmaFloatComplex ** dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue )

ctrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] m INTEGER. On entry, m specifies the number of rows of B. m >= 0. [in] n INTEGER. On entry, n specifies the number of columns of B. n >= 0. [in] alpha COMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ). [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_ctrsm_vbatched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t * m, magma_int_t * n, magmaFloatComplex alpha, magmaFloatComplex ** dA_array, magma_int_t * ldda, magmaFloatComplex ** dB_array, magma_int_t * lddb, magma_int_t batchCount, magma_queue_t queue )

ctrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] m INTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0. [in] n INTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0. [in] alpha COMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ). [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_ctrsm_outofplace_vbatched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t * m, magma_int_t * n, magmaFloatComplex alpha, magmaFloatComplex ** dA_array, magma_int_t * ldda, magmaFloatComplex ** dB_array, magma_int_t * lddb, magmaFloatComplex ** dX_array, magma_int_t * lddx, magmaFloatComplex ** dinvA_array, magma_int_t * dinvA_length, magmaFloatComplex ** dA_displ, magmaFloatComplex ** dB_displ, magmaFloatComplex ** dX_displ, magmaFloatComplex ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue )

ctrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0. [in] n INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0. [in] alpha COMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ). [in] dB_array Array of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. [in] lddb INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ). dinvA_array Array of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB [in] dinvA_length INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by CTRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] max_m INTEGER The maximum value in m. [in] max_n INTEGER The maximum value in n. [in] queue magma_queue_t Queue to execute in.
 void magmablas_ctrsm_work_vbatched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t * m, magma_int_t * n, magmaFloatComplex alpha, magmaFloatComplex ** dA_array, magma_int_t * ldda, magmaFloatComplex ** dB_array, magma_int_t * lddb, magmaFloatComplex ** dX_array, magma_int_t * lddx, magmaFloatComplex ** dinvA_array, magma_int_t * dinvA_length, magmaFloatComplex ** dA_displ, magmaFloatComplex ** dB_displ, magmaFloatComplex ** dX_displ, magmaFloatComplex ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue )

ctrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ctrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0. [in] n INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0. [in] alpha COMPLEX. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a COMPLEX array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a COMPLEX array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a COMPLEX array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ). dinvA_array Array of pointers, dimension (batchCount). Each is a COMPLEX array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/CTRTRI_BATCHED_NB)*CTRTRI_BATCHED_NB*CTRTRI_BATCHED_NB. [in] dinvA_length INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by CTRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] max_m INTEGER The maximum value in m. [in] max_n INTEGER The maximum value in n. [in] queue magma_queue_t Queue to execute in.
 void magmablas_ctrsm_vbatched_max_nocheck ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t * m, magma_int_t * n, magmaFloatComplex alpha, magmaFloatComplex ** dA_array, magma_int_t * ldda, magmaFloatComplex ** dB_array, magma_int_t * lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue )
 void magmablas_dtrsm_outofplace_batched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, double alpha, double ** dA_array, magma_int_t ldda, double ** dB_array, magma_int_t lddb, double ** dX_array, magma_int_t lddx, double ** dinvA_array, magma_int_t dinvA_length, double ** dA_displ, double ** dB_displ, double ** dX_displ, double ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue )

dtrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER. On entry, m specifies the number of rows of B. m >= 0. [in] n INTEGER. On entry, n specifies the number of columns of B. n >= 0. [in] alpha DOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ). [in] dB_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. [in] lddb INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ). dinvA_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB [in] dinvA_length INTEGER The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by DTRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_dtrsm_work_batched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, double alpha, double ** dA_array, magma_int_t ldda, double ** dB_array, magma_int_t lddb, double ** dX_array, magma_int_t lddx, double ** dinvA_array, magma_int_t dinvA_length, double ** dA_displ, double ** dB_displ, double ** dX_displ, double ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue )

dtrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER. On entry, m specifies the number of rows of B. m >= 0. [in] n INTEGER. On entry, n specifies the number of columns of B. n >= 0. [in] alpha DOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ). dinvA_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB, where DTRTRI_BATCHED_NB = 128. [in] dinvA_length INTEGER The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by DTRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_dtrsm_batched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, double alpha, double ** dA_array, magma_int_t ldda, double ** dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue )

dtrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] m INTEGER. On entry, m specifies the number of rows of B. m >= 0. [in] n INTEGER. On entry, n specifies the number of columns of B. n >= 0. [in] alpha DOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ). [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_dtrsm_vbatched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t * m, magma_int_t * n, double alpha, double ** dA_array, magma_int_t * ldda, double ** dB_array, magma_int_t * lddb, magma_int_t batchCount, magma_queue_t queue )

dtrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] m INTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0. [in] n INTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0. [in] alpha DOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ). [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_dtrsm_outofplace_vbatched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t * m, magma_int_t * n, double alpha, double ** dA_array, magma_int_t * ldda, double ** dB_array, magma_int_t * lddb, double ** dX_array, magma_int_t * lddx, double ** dinvA_array, magma_int_t * dinvA_length, double ** dA_displ, double ** dB_displ, double ** dX_displ, double ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue )

dtrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0. [in] n INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0. [in] alpha DOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ). [in] dB_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. [in] lddb INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ). dinvA_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB [in] dinvA_length INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by DTRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] max_m INTEGER The maximum value in m. [in] max_n INTEGER The maximum value in n. [in] queue magma_queue_t Queue to execute in.
 void magmablas_dtrsm_work_vbatched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t * m, magma_int_t * n, double alpha, double ** dA_array, magma_int_t * ldda, double ** dB_array, magma_int_t * lddb, double ** dX_array, magma_int_t * lddx, double ** dinvA_array, magma_int_t * dinvA_length, double ** dA_displ, double ** dB_displ, double ** dX_displ, double ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue )

dtrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_dtrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0. [in] n INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0. [in] alpha DOUBLE PRECISION. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ). dinvA_array Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/DTRTRI_BATCHED_NB)*DTRTRI_BATCHED_NB*DTRTRI_BATCHED_NB. [in] dinvA_length INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by DTRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] max_m INTEGER The maximum value in m. [in] max_n INTEGER The maximum value in n. [in] queue magma_queue_t Queue to execute in.
 void magmablas_dtrsm_vbatched_max_nocheck ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t * m, magma_int_t * n, double alpha, double ** dA_array, magma_int_t * ldda, double ** dB_array, magma_int_t * lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue )
 void magmablas_strsm_outofplace_batched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, float alpha, float ** dA_array, magma_int_t ldda, float ** dB_array, magma_int_t lddb, float ** dX_array, magma_int_t lddx, float ** dinvA_array, magma_int_t dinvA_length, float ** dA_displ, float ** dB_displ, float ** dX_displ, float ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue )

strsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER. On entry, m specifies the number of rows of B. m >= 0. [in] n INTEGER. On entry, n specifies the number of columns of B. n >= 0. [in] alpha REAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a REAL array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ). [in] dB_array Array of pointers, dimension (batchCount). Each is a REAL array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. [in] lddb INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a REAL array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ). dinvA_array Array of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB [in] dinvA_length INTEGER The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by STRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_strsm_work_batched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, float alpha, float ** dA_array, magma_int_t ldda, float ** dB_array, magma_int_t lddb, float ** dX_array, magma_int_t lddx, float ** dinvA_array, magma_int_t dinvA_length, float ** dA_displ, float ** dB_displ, float ** dX_displ, float ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue )

strsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER. On entry, m specifies the number of rows of B. m >= 0. [in] n INTEGER. On entry, n specifies the number of columns of B. n >= 0. [in] alpha REAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a REAL array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a REAL array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a REAL array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ). dinvA_array Array of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB, where STRTRI_BATCHED_NB = 128. [in] dinvA_length INTEGER The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by STRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_strsm_batched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, float alpha, float ** dA_array, magma_int_t ldda, float ** dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue )

strsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] m INTEGER. On entry, m specifies the number of rows of B. m >= 0. [in] n INTEGER. On entry, n specifies the number of columns of B. n >= 0. [in] alpha REAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a REAL array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a REAL array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ). [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_strsm_vbatched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t * m, magma_int_t * n, float alpha, float ** dA_array, magma_int_t * ldda, float ** dB_array, magma_int_t * lddb, magma_int_t batchCount, magma_queue_t queue )

strsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] m INTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0. [in] n INTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0. [in] alpha REAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a REAL array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a REAL array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ). [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_strsm_outofplace_vbatched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t * m, magma_int_t * n, float alpha, float ** dA_array, magma_int_t * ldda, float ** dB_array, magma_int_t * lddb, float ** dX_array, magma_int_t * lddx, float ** dinvA_array, magma_int_t * dinvA_length, float ** dA_displ, float ** dB_displ, float ** dX_displ, float ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue )

strsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0. [in] n INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0. [in] alpha REAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a REAL array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ). [in] dB_array Array of pointers, dimension (batchCount). Each is a REAL array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. [in] lddb INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a REAL array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ). dinvA_array Array of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB [in] dinvA_length INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by STRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] max_m INTEGER The maximum value in m. [in] max_n INTEGER The maximum value in n. [in] queue magma_queue_t Queue to execute in.
 void magmablas_strsm_work_vbatched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t * m, magma_int_t * n, float alpha, float ** dA_array, magma_int_t * ldda, float ** dB_array, magma_int_t * lddb, float ** dX_array, magma_int_t * lddx, float ** dinvA_array, magma_int_t * dinvA_length, float ** dA_displ, float ** dB_displ, float ** dX_displ, float ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue )

strsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_strsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0. [in] n INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0. [in] alpha REAL. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a REAL array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a REAL array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a REAL array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ). dinvA_array Array of pointers, dimension (batchCount). Each is a REAL array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/STRTRI_BATCHED_NB)*STRTRI_BATCHED_NB*STRTRI_BATCHED_NB. [in] dinvA_length INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by STRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] max_m INTEGER The maximum value in m. [in] max_n INTEGER The maximum value in n. [in] queue magma_queue_t Queue to execute in.
 void magmablas_strsm_vbatched_max_nocheck ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t * m, magma_int_t * n, float alpha, float ** dA_array, magma_int_t * ldda, float ** dB_array, magma_int_t * lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue )
 void magmablas_ztrsm_outofplace_batched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex ** dA_array, magma_int_t ldda, magmaDoubleComplex ** dB_array, magma_int_t lddb, magmaDoubleComplex ** dX_array, magma_int_t lddx, magmaDoubleComplex ** dinvA_array, magma_int_t dinvA_length, magmaDoubleComplex ** dA_displ, magmaDoubleComplex ** dB_displ, magmaDoubleComplex ** dX_displ, magmaDoubleComplex ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue )

ztrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER. On entry, m specifies the number of rows of B. m >= 0. [in] n INTEGER. On entry, n specifies the number of columns of B. n >= 0. [in] alpha COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ). [in] dB_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. [in] lddb INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ). dinvA_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(n/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB [in] dinvA_length INTEGER The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by ZTRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_ztrsm_work_batched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex ** dA_array, magma_int_t ldda, magmaDoubleComplex ** dB_array, magma_int_t lddb, magmaDoubleComplex ** dX_array, magma_int_t lddx, magmaDoubleComplex ** dinvA_array, magma_int_t dinvA_length, magmaDoubleComplex ** dA_displ, magmaDoubleComplex ** dB_displ, magmaDoubleComplex ** dX_displ, magmaDoubleComplex ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_queue_t queue )

ztrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER. On entry, m specifies the number of rows of B. m >= 0. [in] n INTEGER. On entry, n specifies the number of columns of B. n >= 0. [in] alpha COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( lddx, n ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER. On entry, lddx specifies the first dimension of each array X. lddx >= max( 1, m ). dinvA_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(m/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(n/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB, where ZTRTRI_BATCHED_NB = 128. [in] dinvA_length INTEGER The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by ZTRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_ztrsm_batched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex ** dA_array, magma_int_t ldda, magmaDoubleComplex ** dB_array, magma_int_t lddb, magma_int_t batchCount, magma_queue_t queue )

ztrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] m INTEGER. On entry, m specifies the number of rows of B. m >= 0. [in] n INTEGER. On entry, n specifies the number of columns of B. n >= 0. [in] alpha COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( ldda, k ), where k is m when side = MagmaLeft and is n when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER. On entry, ldda specifies the first dimension of each array A. When side = MagmaLeft, ldda >= max( 1, m ), when side = MagmaRight, ldda >= max( 1, n ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( lddb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER. On entry, lddb specifies the first dimension of each array B. lddb >= max( 1, m ). [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_ztrsm_vbatched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t * m, magma_int_t * n, magmaDoubleComplex alpha, magmaDoubleComplex ** dA_array, magma_int_t * ldda, magmaDoubleComplex ** dB_array, magma_int_t * lddb, magma_int_t batchCount, magma_queue_t queue )

ztrsm solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] m INTEGER array, dimension(batchCount + 1). On entry, each element M specifies the number of rows of the corresponding B. M >= 0. [in] n INTEGER array, dimension(batchCount + 1). On entry, each element N specifies the number of columns of the corresponding B. N >= 0. [in] alpha COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER array, dimension(batchCount + 1). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER array, dimension(batchCount + 1). On entry, LDDB specifies the first dimension of each array B. lddb >= max( 1, M ). [in] batchCount INTEGER The number of matrices to operate on. [in] queue magma_queue_t Queue to execute in.
 void magmablas_ztrsm_outofplace_vbatched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t * m, magma_int_t * n, magmaDoubleComplex alpha, magmaDoubleComplex ** dA_array, magma_int_t * ldda, magmaDoubleComplex ** dB_array, magma_int_t * lddb, magmaDoubleComplex ** dX_array, magma_int_t * lddx, magmaDoubleComplex ** dinvA_array, magma_int_t * dinvA_length, magmaDoubleComplex ** dA_displ, magmaDoubleComplex ** dB_displ, magmaDoubleComplex ** dX_displ, magmaDoubleComplex ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue )

ztrsm_outofplace solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of the corresponding B. M >= 0. [in] n INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of the corresponding B. N >= 0. [in] alpha COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ). [in] dB_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. [in] lddb INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. LDDB >= max( 1, M ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. LDDX >= max( 1, M ). dinvA_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB If side == MagmaRight, dinvA must be of size >= ceil(N/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB [in] dinvA_length INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by ZTRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] max_m INTEGER The maximum value in m. [in] max_n INTEGER The maximum value in n. [in] queue magma_queue_t Queue to execute in.
 void magmablas_ztrsm_work_vbatched ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t flag, magma_int_t * m, magma_int_t * n, magmaDoubleComplex alpha, magmaDoubleComplex ** dA_array, magma_int_t * ldda, magmaDoubleComplex ** dB_array, magma_int_t * lddb, magmaDoubleComplex ** dX_array, magma_int_t * lddx, magmaDoubleComplex ** dinvA_array, magma_int_t * dinvA_length, magmaDoubleComplex ** dA_displ, magmaDoubleComplex ** dB_displ, magmaDoubleComplex ** dX_displ, magmaDoubleComplex ** dinvA_displ, magma_int_t resetozero, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue )

ztrsm_work solves one of the matrix equations on gpu

op(A)*X = alpha*B,   or
X*op(A) = alpha*B,


where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of

op(A) = A,    or
op(A) = A^T,  or
op(A) = A^H.


The matrix X is overwritten on B.

This is an asynchronous version of magmablas_ztrsm with flag, d_dinvA and dX workspaces as arguments.

Parameters
 [in] side magma_side_t. On entry, side specifies whether op(A) appears on the left or right of X as follows: = MagmaLeft: op(A)*X = alpha*B. = MagmaRight: X*op(A) = alpha*B. [in] uplo magma_uplo_t. On entry, uplo specifies whether the matrix A is an upper or lower triangular matrix as follows: = MagmaUpper: A is an upper triangular matrix. = MagmaLower: A is a lower triangular matrix. [in] transA magma_trans_t. On entry, transA specifies the form of op(A) to be used in the matrix multiplication as follows: = MagmaNoTrans: op(A) = A. = MagmaTrans: op(A) = A^T. = MagmaConjTrans: op(A) = A^H. [in] diag magma_diag_t. On entry, diag specifies whether or not A is unit triangular as follows: = MagmaUnit: A is assumed to be unit triangular. = MagmaNonUnit: A is not assumed to be unit triangular. [in] flag BOOLEAN. If flag is true, invert diagonal blocks. If flag is false, assume diagonal blocks (stored in d_dinvA) are already inverted. [in] m INTEGER array, dimension(batchCount). On entry, each element M specifies the number of rows of each B. M >= 0. [in] n INTEGER array, dimension(batchCount). On entry, each element N specifies the number of columns of each B. N >= 0. [in] alpha COMPLEX_16. On entry, alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. [in] dA_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of dimension ( LDDA, k ), where k is M when side = MagmaLeft and is N when side = MagmaRight. Before entry with uplo = MagmaUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with uplo = MagmaLower, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when diag = MagmaUnit, the diagonal elements of A are not referenced either, but are assumed to be unity. [in] ldda INTEGER array, dimension(batchCount). On entry, each element LDDA specifies the first dimension of each array A. When side = MagmaLeft, LDDA >= max( 1, M ), when side = MagmaRight, LDDA >= max( 1, N ). [in,out] dB_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of dimension ( LDDB, N ). Before entry, the leading M by N part of the array B must contain the right-hand side matrix B. On exit, the solution matrix X [in] lddb INTEGER array, dimension(batchCount). On entry, each element LDDB specifies the first dimension of each array B. lddb >= max( 1, M ). [in,out] dX_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array X of dimension ( LDDX, N ). On entry, should be set to 0 On exit, the solution matrix X [in] lddx INTEGER array, dimension(batchCount). On entry, each element LDDX specifies the first dimension of each array X. lddx >= max( 1, M ). dinvA_array Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array dinvA, a workspace on device. If side == MagmaLeft, dinvA must be of size >= ceil(M/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB, If side == MagmaRight, dinvA must be of size >= ceil(N/ZTRTRI_BATCHED_NB)*ZTRTRI_BATCHED_NB*ZTRTRI_BATCHED_NB. [in] dinvA_length INTEGER array, dimension(batchCount). The size of each workspace matrix dinvA dA_displ (workspace) Array of pointers, dimension (batchCount). dB_displ (workspace) Array of pointers, dimension (batchCount). dX_displ (workspace) Array of pointers, dimension (batchCount). dinvA_displ (workspace) Array of pointers, dimension (batchCount). [in] resetozero INTEGER Used internally by ZTRTRI_DIAG routine [in] batchCount INTEGER The number of matrices to operate on. [in] max_m INTEGER The maximum value in m. [in] max_n INTEGER The maximum value in n. [in] queue magma_queue_t Queue to execute in.
 void magmablas_ztrsm_vbatched_max_nocheck ( magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag, magma_int_t * m, magma_int_t * n, magmaDoubleComplex alpha, magmaDoubleComplex ** dA_array, magma_int_t * ldda, magmaDoubleComplex ** dB_array, magma_int_t * lddb, magma_int_t batchCount, magma_int_t max_m, magma_int_t max_n, magma_queue_t queue )