\( C = \alpha \;op(A) \;op(B) + \beta C \) More...

Functions
void	magmablas_cgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex const const dA_array, magma_int_t ldda, magmaFloatComplex const const dB_array, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
	CGEMM performs one of the matrix-matrix operations. More...

void	magmablas_cgemm_batched_core (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex const const dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, magmaFloatComplex const const dB_array, magma_int_t Bi, magma_int_t Bj, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex **dC_array, magma_int_t Ci, magma_int_t Cj, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
	CGEMM performs one of the matrix-matrix operations. More...

void	magmablas_cgemm_vbatched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex const const dA_array, magma_int_t ldda, magmaFloatComplex const const dB_array, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
	CGEMM performs one of the matrix-matrix operations. More...

void	magmablas_dgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha, double const const dA_array, magma_int_t ldda, double const const dB_array, magma_int_t lddb, double beta, double **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
	DGEMM performs one of the matrix-matrix operations. More...

void	magmablas_dgemm_batched_core (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha, double const const dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, double const const dB_array, magma_int_t Bi, magma_int_t Bj, magma_int_t lddb, double beta, double **dC_array, magma_int_t Ci, magma_int_t Cj, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
	DGEMM performs one of the matrix-matrix operations. More...

void	magmablas_dgemm_vbatched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha, double const const dA_array, magma_int_t ldda, double const const dB_array, magma_int_t lddb, double beta, double dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
	DGEMM performs one of the matrix-matrix operations. More...

void	magmablas_sgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, float const const dA_array, magma_int_t ldda, float const const dB_array, magma_int_t lddb, float beta, float **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
	SGEMM performs one of the matrix-matrix operations. More...

void	magmablas_sgemm_batched_core (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, float const const dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, float const const dB_array, magma_int_t Bi, magma_int_t Bj, magma_int_t lddb, float beta, float **dC_array, magma_int_t Ci, magma_int_t Cj, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
	SGEMM performs one of the matrix-matrix operations. More...

void	magmablas_sgemm_vbatched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, float const const dA_array, magma_int_t ldda, float const const dB_array, magma_int_t lddb, float beta, float dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
	SGEMM performs one of the matrix-matrix operations. More...

void	magmablas_zgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex const const dA_array, magma_int_t ldda, magmaDoubleComplex const const dB_array, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
	ZGEMM performs one of the matrix-matrix operations. More...

void	magmablas_zgemm_batched_core (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex const const dA_array, magma_int_t Ai, magma_int_t Aj, magma_int_t ldda, magmaDoubleComplex const const dB_array, magma_int_t Bi, magma_int_t Bj, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex **dC_array, magma_int_t Ci, magma_int_t Cj, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
	ZGEMM performs one of the matrix-matrix operations. More...

void	magmablas_zgemm_vbatched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex const const dA_array, magma_int_t ldda, magmaDoubleComplex const const dB_array, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
	ZGEMM performs one of the matrix-matrix operations. More...

Detailed Description

\( C = \alpha \;op(A) \;op(B) + \beta C \)

Function Documentation

void magmablas_cgemm_batched	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		magmaFloatComplex	alpha,
		magmaFloatComplex const const	dA_array,
		magma_int_t	ldda,
		magmaFloatComplex const const	dB_array,
		magma_int_t	lddb,
		magmaFloatComplex	beta,
		magmaFloatComplex **	dC_array,
		magma_int_t	lddc,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

CGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]	n	INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]	k	INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]	alpha	COMPLEX On entry, ALPHA specifies the scalar alpha.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]	beta	COMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]	dC_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alphaop( A )op( B ) + beta*C ).
[in]	lddc	INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_cgemm_batched_core	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		magmaFloatComplex	alpha,
		magmaFloatComplex const const	dA_array,
		magma_int_t	Ai,
		magma_int_t	Aj,
		magma_int_t	ldda,
		magmaFloatComplex const const	dB_array,
		magma_int_t	Bi,
		magma_int_t	Bj,
		magma_int_t	lddb,
		magmaFloatComplex	beta,
		magmaFloatComplex **	dC_array,
		magma_int_t	Ci,
		magma_int_t	Cj,
		magma_int_t	lddc,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

CGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]	n	INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]	k	INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]	alpha	COMPLEX On entry, ALPHA specifies the scalar alpha.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]	beta	COMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]	dC_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, each array C is overwritten by the m by n matrix ( alphaop( A )op( B ) + beta*C ).
[in]	lddc	INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]	Ai	INTEGER Row offset for all 'A' matrices.
[in]	Aj	INTEGER Column offset for all 'A' matrices.
[in]	Bi	INTEGER Row offset for all 'B' matrices.
[in]	Bj	INTEGER Column offset for all 'B' matrices.
[in]	Ci	INTEGER Row offset for all 'C' matrices.
[in]	Cj	INTEGER Column offset for all 'C' matrices.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_cgemm_vbatched	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t *	m,
		magma_int_t *	n,
		magma_int_t *	k,
		magmaFloatComplex	alpha,
		magmaFloatComplex const const	dA_array,
		magma_int_t *	ldda,
		magmaFloatComplex const const	dB_array,
		magma_int_t *	lddb,
		magmaFloatComplex	beta,
		magmaFloatComplex **	dC_array,
		magma_int_t *	lddc,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

CGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	Array of integers, dimension (batchCount + 1) On entry, each INTEGER M specifies the number of rows of the corresponding matrices op( A ) and C. M must be at least zero. The last element of the array is used internally by the routine.
[in]	n	Array of integers, dimension (batchCount + 1). On entry, each INTEGER N specifies the number of columns of the corresponding matrix op( B ) and the number of columns of the corresponding matrix C. N must be at least zero. The last element of the array is used internally by the routine.
[in]	k	Array of integers, dimension (batchCount + 1). On entry, each INTEGER K specifies the number of columns of the corresponding matrix op( A ) and the number of rows of the corresponding matrix op( B ). K must be at least zero. The last element of the array is used internally by the routine.
[in]	alpha	COMPLEX On entry, ALPHA specifies the scalar alpha.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array A of DIMENSION ( LDDA, ka ), where ka is K when transA = MagmaNoTrans, and is M otherwise. Before entry with transA = MagmaNoTrans, the leading M by K part of the array A must contain the matrix A, otherwise the leading K by M part of the array A must contain the matrix A.
[in]	ldda	Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDA specifies the first dimension of the corresponding matrix A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDDA must be at least max( 1, M ), otherwise ldda must be at least max( 1, K ). The last element of the array is used internally by the routine.
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array B of DIMENSION ( LDDB, kb ), where kb is N when transB = MagmaNoTrans, and is K otherwise. Before entry with transB = MagmaNoTrans, the leading K by N part of the array B must contain the matrix B, otherwise the leading N by K part of the array B must contain the matrix B.
[in]	lddb	Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDB specifies the first dimension of the corresponding matrix B as declared in the calling (sub) program. When transB = MagmaNoTrans then LDDB must be at least max( 1, K ), otherwise LDDB must be at least max( 1, N ). The last element of the array is used internally by the routine.
[in]	beta	COMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]	dC_array	Array of pointers, dimension (batchCount). Each is a COMPLEX array C of DIMENSION ( LDDC, N ). Before entry, the leading M by N part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the M by N matrix ( alphaop( A )op( B ) + beta*C ).
[in]	lddc	Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDC specifies the first dimension of the corresponding matrix C as declared in the calling (sub) program. LDDC must be at least max( 1, M ). The last element of the array is used internally by the routine.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_dgemm_batched	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		double	alpha,
		double const const	dA_array,
		magma_int_t	ldda,
		double const const	dB_array,
		magma_int_t	lddb,
		double	beta,
		double **	dC_array,
		magma_int_t	lddc,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

DGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]	n	INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]	k	INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]	alpha	DOUBLE PRECISION On entry, ALPHA specifies the scalar alpha.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]	beta	DOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]	dC_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alphaop( A )op( B ) + beta*C ).
[in]	lddc	INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_dgemm_batched_core	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		double	alpha,
		double const const	dA_array,
		magma_int_t	Ai,
		magma_int_t	Aj,
		magma_int_t	ldda,
		double const const	dB_array,
		magma_int_t	Bi,
		magma_int_t	Bj,
		magma_int_t	lddb,
		double	beta,
		double **	dC_array,
		magma_int_t	Ci,
		magma_int_t	Cj,
		magma_int_t	lddc,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

DGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]	n	INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]	k	INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]	alpha	DOUBLE PRECISION On entry, ALPHA specifies the scalar alpha.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]	beta	DOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]	dC_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, each array C is overwritten by the m by n matrix ( alphaop( A )op( B ) + beta*C ).
[in]	lddc	INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]	Ai	INTEGER Row offset for all 'A' matrices.
[in]	Aj	INTEGER Column offset for all 'A' matrices.
[in]	Bi	INTEGER Row offset for all 'B' matrices.
[in]	Bj	INTEGER Column offset for all 'B' matrices.
[in]	Ci	INTEGER Row offset for all 'C' matrices.
[in]	Cj	INTEGER Column offset for all 'C' matrices.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_dgemm_vbatched	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t *	m,
		magma_int_t *	n,
		magma_int_t *	k,
		double	alpha,
		double const const	dA_array,
		magma_int_t *	ldda,
		double const const	dB_array,
		magma_int_t *	lddb,
		double	beta,
		double **	dC_array,
		magma_int_t *	lddc,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

DGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	Array of integers, dimension (batchCount + 1) On entry, each INTEGER M specifies the number of rows of the corresponding matrices op( A ) and C. M must be at least zero. The last element of the array is used internally by the routine.
[in]	n	Array of integers, dimension (batchCount + 1). On entry, each INTEGER N specifies the number of columns of the corresponding matrix op( B ) and the number of columns of the corresponding matrix C. N must be at least zero. The last element of the array is used internally by the routine.
[in]	k	Array of integers, dimension (batchCount + 1). On entry, each INTEGER K specifies the number of columns of the corresponding matrix op( A ) and the number of rows of the corresponding matrix op( B ). K must be at least zero. The last element of the array is used internally by the routine.
[in]	alpha	DOUBLE PRECISION On entry, ALPHA specifies the scalar alpha.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of DIMENSION ( LDDA, ka ), where ka is K when transA = MagmaNoTrans, and is M otherwise. Before entry with transA = MagmaNoTrans, the leading M by K part of the array A must contain the matrix A, otherwise the leading K by M part of the array A must contain the matrix A.
[in]	ldda	Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDA specifies the first dimension of the corresponding matrix A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDDA must be at least max( 1, M ), otherwise ldda must be at least max( 1, K ). The last element of the array is used internally by the routine.
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of DIMENSION ( LDDB, kb ), where kb is N when transB = MagmaNoTrans, and is K otherwise. Before entry with transB = MagmaNoTrans, the leading K by N part of the array B must contain the matrix B, otherwise the leading N by K part of the array B must contain the matrix B.
[in]	lddb	Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDB specifies the first dimension of the corresponding matrix B as declared in the calling (sub) program. When transB = MagmaNoTrans then LDDB must be at least max( 1, K ), otherwise LDDB must be at least max( 1, N ). The last element of the array is used internally by the routine.
[in]	beta	DOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]	dC_array	Array of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array C of DIMENSION ( LDDC, N ). Before entry, the leading M by N part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the M by N matrix ( alphaop( A )op( B ) + beta*C ).
[in]	lddc	Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDC specifies the first dimension of the corresponding matrix C as declared in the calling (sub) program. LDDC must be at least max( 1, M ). The last element of the array is used internally by the routine.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_sgemm_batched	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		float	alpha,
		float const const	dA_array,
		magma_int_t	ldda,
		float const const	dB_array,
		magma_int_t	lddb,
		float	beta,
		float **	dC_array,
		magma_int_t	lddc,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

SGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]	n	INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]	k	INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]	alpha	REAL On entry, ALPHA specifies the scalar alpha.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a REAL array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]	beta	REAL. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]	dC_array	Array of pointers, dimension (batchCount). Each is a REAL array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alphaop( A )op( B ) + beta*C ).
[in]	lddc	INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_sgemm_batched_core	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		float	alpha,
		float const const	dA_array,
		magma_int_t	Ai,
		magma_int_t	Aj,
		magma_int_t	ldda,
		float const const	dB_array,
		magma_int_t	Bi,
		magma_int_t	Bj,
		magma_int_t	lddb,
		float	beta,
		float **	dC_array,
		magma_int_t	Ci,
		magma_int_t	Cj,
		magma_int_t	lddc,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

SGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]	n	INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]	k	INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]	alpha	REAL On entry, ALPHA specifies the scalar alpha.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a REAL array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]	beta	REAL. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]	dC_array	Array of pointers, dimension (batchCount). REAL array of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, each array C is overwritten by the m by n matrix ( alphaop( A )op( B ) + beta*C ).
[in]	lddc	INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]	Ai	INTEGER Row offset for all 'A' matrices.
[in]	Aj	INTEGER Column offset for all 'A' matrices.
[in]	Bi	INTEGER Row offset for all 'B' matrices.
[in]	Bj	INTEGER Column offset for all 'B' matrices.
[in]	Ci	INTEGER Row offset for all 'C' matrices.
[in]	Cj	INTEGER Column offset for all 'C' matrices.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_sgemm_vbatched	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t *	m,
		magma_int_t *	n,
		magma_int_t *	k,
		float	alpha,
		float const const	dA_array,
		magma_int_t *	ldda,
		float const const	dB_array,
		magma_int_t *	lddb,
		float	beta,
		float **	dC_array,
		magma_int_t *	lddc,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

SGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	Array of integers, dimension (batchCount + 1) On entry, each INTEGER M specifies the number of rows of the corresponding matrices op( A ) and C. M must be at least zero. The last element of the array is used internally by the routine.
[in]	n	Array of integers, dimension (batchCount + 1). On entry, each INTEGER N specifies the number of columns of the corresponding matrix op( B ) and the number of columns of the corresponding matrix C. N must be at least zero. The last element of the array is used internally by the routine.
[in]	k	Array of integers, dimension (batchCount + 1). On entry, each INTEGER K specifies the number of columns of the corresponding matrix op( A ) and the number of rows of the corresponding matrix op( B ). K must be at least zero. The last element of the array is used internally by the routine.
[in]	alpha	REAL On entry, ALPHA specifies the scalar alpha.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a REAL array A of DIMENSION ( LDDA, ka ), where ka is K when transA = MagmaNoTrans, and is M otherwise. Before entry with transA = MagmaNoTrans, the leading M by K part of the array A must contain the matrix A, otherwise the leading K by M part of the array A must contain the matrix A.
[in]	ldda	Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDA specifies the first dimension of the corresponding matrix A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDDA must be at least max( 1, M ), otherwise ldda must be at least max( 1, K ). The last element of the array is used internally by the routine.
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a REAL array B of DIMENSION ( LDDB, kb ), where kb is N when transB = MagmaNoTrans, and is K otherwise. Before entry with transB = MagmaNoTrans, the leading K by N part of the array B must contain the matrix B, otherwise the leading N by K part of the array B must contain the matrix B.
[in]	lddb	Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDB specifies the first dimension of the corresponding matrix B as declared in the calling (sub) program. When transB = MagmaNoTrans then LDDB must be at least max( 1, K ), otherwise LDDB must be at least max( 1, N ). The last element of the array is used internally by the routine.
[in]	beta	REAL. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]	dC_array	Array of pointers, dimension (batchCount). Each is a REAL array C of DIMENSION ( LDDC, N ). Before entry, the leading M by N part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the M by N matrix ( alphaop( A )op( B ) + beta*C ).
[in]	lddc	Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDC specifies the first dimension of the corresponding matrix C as declared in the calling (sub) program. LDDC must be at least max( 1, M ). The last element of the array is used internally by the routine.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_zgemm_batched	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		magmaDoubleComplex	alpha,
		magmaDoubleComplex const const	dA_array,
		magma_int_t	ldda,
		magmaDoubleComplex const const	dB_array,
		magma_int_t	lddb,
		magmaDoubleComplex	beta,
		magmaDoubleComplex **	dC_array,
		magma_int_t	lddc,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ZGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]	n	INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]	k	INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]	alpha	COMPLEX_16 On entry, ALPHA specifies the scalar alpha.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]	beta	COMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]	dC_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alphaop( A )op( B ) + beta*C ).
[in]	lddc	INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_zgemm_batched_core	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		magmaDoubleComplex	alpha,
		magmaDoubleComplex const const	dA_array,
		magma_int_t	Ai,
		magma_int_t	Aj,
		magma_int_t	ldda,
		magmaDoubleComplex const const	dB_array,
		magma_int_t	Bi,
		magma_int_t	Bj,
		magma_int_t	lddb,
		magmaDoubleComplex	beta,
		magmaDoubleComplex **	dC_array,
		magma_int_t	Ci,
		magma_int_t	Cj,
		magma_int_t	lddc,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ZGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]	n	INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]	k	INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]	alpha	COMPLEX_16 On entry, ALPHA specifies the scalar alpha.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]	ldda	INTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]	lddb	INTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]	beta	COMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]	dC_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alphaop( A )op( B ) + beta*C ).
[in]	lddc	INTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]	Ai	INTEGER Row offset for all 'A' matrices.
[in]	Aj	INTEGER Column offset for all 'A' matrices.
[in]	Bi	INTEGER Row offset for all 'B' matrices.
[in]	Bj	INTEGER Column offset for all 'B' matrices.
[in]	Ci	INTEGER Row offset for all 'C' matrices.
[in]	Cj	INTEGER Column offset for all 'C' matrices.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_zgemm_vbatched	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t *	m,
		magma_int_t *	n,
		magma_int_t *	k,
		magmaDoubleComplex	alpha,
		magmaDoubleComplex const const	dA_array,
		magma_int_t *	ldda,
		magmaDoubleComplex const const	dB_array,
		magma_int_t *	lddb,
		magmaDoubleComplex	beta,
		magmaDoubleComplex **	dC_array,
		magma_int_t *	lddc,
		magma_int_t	batchCount,
		magma_queue_t	queue
	)

ZGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	Array of integers, dimension (batchCount + 1) On entry, each INTEGER M specifies the number of rows of the corresponding matrices op( A ) and C. M must be at least zero. The last element of the array is used internally by the routine.
[in]	n	Array of integers, dimension (batchCount + 1). On entry, each INTEGER N specifies the number of columns of the corresponding matrix op( B ) and the number of columns of the corresponding matrix C. N must be at least zero. The last element of the array is used internally by the routine.
[in]	k	Array of integers, dimension (batchCount + 1). On entry, each INTEGER K specifies the number of columns of the corresponding matrix op( A ) and the number of rows of the corresponding matrix op( B ). K must be at least zero. The last element of the array is used internally by the routine.
[in]	alpha	COMPLEX_16 On entry, ALPHA specifies the scalar alpha.
[in]	dA_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of DIMENSION ( LDDA, ka ), where ka is K when transA = MagmaNoTrans, and is M otherwise. Before entry with transA = MagmaNoTrans, the leading M by K part of the array A must contain the matrix A, otherwise the leading K by M part of the array A must contain the matrix A.
[in]	ldda	Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDA specifies the first dimension of the corresponding matrix A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDDA must be at least max( 1, M ), otherwise ldda must be at least max( 1, K ). The last element of the array is used internally by the routine.
[in]	dB_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of DIMENSION ( LDDB, kb ), where kb is N when transB = MagmaNoTrans, and is K otherwise. Before entry with transB = MagmaNoTrans, the leading K by N part of the array B must contain the matrix B, otherwise the leading N by K part of the array B must contain the matrix B.
[in]	lddb	Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDB specifies the first dimension of the corresponding matrix B as declared in the calling (sub) program. When transB = MagmaNoTrans then LDDB must be at least max( 1, K ), otherwise LDDB must be at least max( 1, N ). The last element of the array is used internally by the routine.
[in]	beta	COMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]	dC_array	Array of pointers, dimension (batchCount). Each is a COMPLEX_16 array C of DIMENSION ( LDDC, N ). Before entry, the leading M by N part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the M by N matrix ( alphaop( A )op( B ) + beta*C ).
[in]	lddc	Array of integers, dimension (batchCount + 1). On entry, each INTEGER LDDC specifies the first dimension of the corresponding matrix C as declared in the calling (sub) program. LDDC must be at least max( 1, M ). The last element of the array is used internally by the routine.
[in]	batchCount	INTEGER The number of matrices to operate on.
[in]	queue	magma_queue_t Queue to execute in.

Functions

Detailed Description

Function Documentation

Parameters

Parameters

Parameters

Parameters

Parameters

Parameters

Parameters

Parameters

Parameters

Parameters

Parameters

Parameters