\( C = \alpha \;op(A) \;op(B) + \beta C \) More...

Functions
void	magma_cgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc, magma_queue_t queue)
	Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \). More...

void	magma_dgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dB, magma_int_t lddb, double beta, magmaDouble_ptr dC, magma_int_t lddc, magma_queue_t queue)
	Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \). More...

void	magma_hgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaHalf alpha, magmaHalf_const_ptr dA, magma_int_t ldda, magmaHalf_const_ptr dB, magma_int_t lddb, magmaHalf beta, magmaHalf_ptr dC, magma_int_t lddc, magma_queue_t queue)
	Perform FP16 matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \). More...

void	magma_sgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dB, magma_int_t lddb, float beta, magmaFloat_ptr dC, magma_int_t lddc, magma_queue_t queue)
	Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \). More...

void	magma_zgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc, magma_queue_t queue)
	Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \). More...

void	magmablas_cgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex const dA, magma_int_t ldda, magmaFloatComplex const dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex *dC, magma_int_t lddc, magma_queue_t queue)
	CGEMM performs one of the matrix-matrix operations. More...

void	magmablas_cgemm_reduce (magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc, magma_queue_t queue)
	CGEMM_REDUCE performs one of the matrix-matrix operations. More...

void	magmablas_dgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha, double const dA, magma_int_t ldda, double const dB, magma_int_t lddb, double beta, double *dC, magma_int_t lddc, magma_queue_t queue)
	DGEMM performs one of the matrix-matrix operations. More...

void	magmablas_dgemm_reduce (magma_int_t m, magma_int_t n, magma_int_t k, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dB, magma_int_t lddb, double beta, magmaDouble_ptr dC, magma_int_t lddc, magma_queue_t queue)
	DGEMM_REDUCE performs one of the matrix-matrix operations. More...

void	magmablas_sgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, float const dA, magma_int_t ldda, float const dB, magma_int_t lddb, float beta, float *dC, magma_int_t lddc, magma_queue_t queue)
	SGEMM performs one of the matrix-matrix operations. More...

void	magmablas_sgemm_reduce (magma_int_t m, magma_int_t n, magma_int_t k, float alpha, magmaFloat_const_ptr dA, magma_int_t ldda, magmaFloat_const_ptr dB, magma_int_t lddb, float beta, magmaFloat_ptr dC, magma_int_t lddc, magma_queue_t queue)
	SGEMM_REDUCE performs one of the matrix-matrix operations. More...

void	magmablas_zgemm (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex const dA, magma_int_t ldda, magmaDoubleComplex const dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex *dC, magma_int_t lddc, magma_queue_t queue)
	ZGEMM performs one of the matrix-matrix operations. More...

void	magmablas_zgemm_reduce (magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc, magma_queue_t queue)
	ZGEMM_REDUCE performs one of the matrix-matrix operations. More...

Detailed Description

\( C = \alpha \;op(A) \;op(B) + \beta C \)

Function Documentation

void magma_cgemm	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		magmaFloatComplex	alpha,
		magmaFloatComplex_const_ptr	dA,
		magma_int_t	ldda,
		magmaFloatComplex_const_ptr	dB,
		magma_int_t	lddb,
		magmaFloatComplex	beta,
		magmaFloatComplex_ptr	dC,
		magma_int_t	lddc,
		magma_queue_t	queue
	)

Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \).

Parameters

[in]	transA	Operation op(A) to perform on matrix A.
[in]	transB	Operation op(B) to perform on matrix B.
[in]	m	Number of rows of C and op(A). m >= 0.
[in]	n	Number of columns of C and op(B). n >= 0.
[in]	k	Number of columns of op(A) and rows of op(B). k >= 0.
[in]	alpha	Scalar \( \alpha \)
[in]	dA	COMPLEX array on GPU device. If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m); otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k).
[in]	ldda	Leading dimension of dA.
[in]	dB	COMPLEX array on GPU device. If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k); otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n).
[in]	lddb	Leading dimension of dB.
[in]	beta	Scalar \( \beta \)
[in,out]	dC	COMPLEX array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m).
[in]	lddc	Leading dimension of dC.
[in]	queue	magma_queue_t Queue to execute in.

void magma_dgemm	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		double	alpha,
		magmaDouble_const_ptr	dA,
		magma_int_t	ldda,
		magmaDouble_const_ptr	dB,
		magma_int_t	lddb,
		double	beta,
		magmaDouble_ptr	dC,
		magma_int_t	lddc,
		magma_queue_t	queue
	)

Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \).

Parameters

[in]	transA	Operation op(A) to perform on matrix A.
[in]	transB	Operation op(B) to perform on matrix B.
[in]	m	Number of rows of C and op(A). m >= 0.
[in]	n	Number of columns of C and op(B). n >= 0.
[in]	k	Number of columns of op(A) and rows of op(B). k >= 0.
[in]	alpha	Scalar \( \alpha \)
[in]	dA	DOUBLE PRECISION array on GPU device. If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m); otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k).
[in]	ldda	Leading dimension of dA.
[in]	dB	DOUBLE PRECISION array on GPU device. If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k); otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n).
[in]	lddb	Leading dimension of dB.
[in]	beta	Scalar \( \beta \)
[in,out]	dC	DOUBLE PRECISION array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m).
[in]	lddc	Leading dimension of dC.
[in]	queue	magma_queue_t Queue to execute in.

void magma_hgemm	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		magmaHalf	alpha,
		magmaHalf_const_ptr	dA,
		magma_int_t	ldda,
		magmaHalf_const_ptr	dB,
		magma_int_t	lddb,
		magmaHalf	beta,
		magmaHalf_ptr	dC,
		magma_int_t	lddc,
		magma_queue_t	queue
	)

Perform FP16 matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \).

This routine requires CUDA 7.5 or greater.

Parameters

[in]	transA	Operation op(A) to perform on matrix A.
[in]	transB	Operation op(B) to perform on matrix B.
[in]	m	Number of rows of C and op(A). m >= 0.
[in]	n	Number of columns of C and op(B). n >= 0.
[in]	k	Number of columns of op(A) and rows of op(B). k >= 0.
[in]	alpha	Scalar \( \alpha \)
[in]	dA	HALF PRECISION array on GPU device. If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m); otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k).
[in]	ldda	Leading dimension of dA.
[in]	dB	HALF PRECISION array on GPU device. If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k); otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n).
[in]	lddb	Leading dimension of dB.
[in]	beta	Scalar \( \beta \)
[in,out]	dC	HALF PRECISION array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m).
[in]	lddc	Leading dimension of dC.
[in]	queue	magma_queue_t Queue to execute in.

void magma_sgemm	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		float	alpha,
		magmaFloat_const_ptr	dA,
		magma_int_t	ldda,
		magmaFloat_const_ptr	dB,
		magma_int_t	lddb,
		float	beta,
		magmaFloat_ptr	dC,
		magma_int_t	lddc,
		magma_queue_t	queue
	)

Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \).

Parameters

[in]	transA	Operation op(A) to perform on matrix A.
[in]	transB	Operation op(B) to perform on matrix B.
[in]	m	Number of rows of C and op(A). m >= 0.
[in]	n	Number of columns of C and op(B). n >= 0.
[in]	k	Number of columns of op(A) and rows of op(B). k >= 0.
[in]	alpha	Scalar \( \alpha \)
[in]	dA	REAL array on GPU device. If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m); otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k).
[in]	ldda	Leading dimension of dA.
[in]	dB	REAL array on GPU device. If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k); otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n).
[in]	lddb	Leading dimension of dB.
[in]	beta	Scalar \( \beta \)
[in,out]	dC	REAL array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m).
[in]	lddc	Leading dimension of dC.
[in]	queue	magma_queue_t Queue to execute in.

void magma_zgemm	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		magmaDoubleComplex	alpha,
		magmaDoubleComplex_const_ptr	dA,
		magma_int_t	ldda,
		magmaDoubleComplex_const_ptr	dB,
		magma_int_t	lddb,
		magmaDoubleComplex	beta,
		magmaDoubleComplex_ptr	dC,
		magma_int_t	lddc,
		magma_queue_t	queue
	)

Perform matrix-matrix product, \( C = \alpha op(A) op(B) + \beta C \).

Parameters

[in]	transA	Operation op(A) to perform on matrix A.
[in]	transB	Operation op(B) to perform on matrix B.
[in]	m	Number of rows of C and op(A). m >= 0.
[in]	n	Number of columns of C and op(B). n >= 0.
[in]	k	Number of columns of op(A) and rows of op(B). k >= 0.
[in]	alpha	Scalar \( \alpha \)
[in]	dA	COMPLEX_16 array on GPU device. If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m); otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k).
[in]	ldda	Leading dimension of dA.
[in]	dB	COMPLEX_16 array on GPU device. If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k); otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n).
[in]	lddb	Leading dimension of dB.
[in]	beta	Scalar \( \beta \)
[in,out]	dC	COMPLEX_16 array on GPU device. The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m).
[in]	lddc	Leading dimension of dC.
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_cgemm	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		magmaFloatComplex	alpha,
		magmaFloatComplex const *	dA,
		magma_int_t	ldda,
		magmaFloatComplex const *	dB,
		magma_int_t	lddb,
		magmaFloatComplex	beta,
		magmaFloatComplex *	dC,
		magma_int_t	lddc,
		magma_queue_t	queue
	)

CGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	INTEGER. On entry, M specifies the number of rows of the matrix op( dA ) and of the matrix dC. M must be at least zero.
[in]	n	INTEGER. On entry, N specifies the number of columns of the matrix op( dB ) and the number of columns of the matrix dC. N must be at least zero.
[in]	k	INTEGER. On entry, K specifies the number of columns of the matrix op( dA ) and the number of rows of the matrix op( dB ). K must be at least zero.
[in]	alpha	COMPLEX On entry, ALPHA specifies the scalar alpha.
[in]	dA	COMPLEX array of DIMENSION ( LDA, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA.
[in]	ldda	INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ).
[in]	dB	COMPLEX array of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array dB must contain the matrix dB, otherwise the leading n by k part of the array dB must contain the matrix dB.
[in]	lddb	INTEGER. On entry, LDB specifies the first dimension of dB as declared in the calling (sub) program. When transB = MagmaNoTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ).
[in]	beta	COMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input.
[in,out]	dC	COMPLEX array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array dC must contain the matrix dC, except when beta is zero, in which case dC need not be set on entry. On exit, the array dC is overwritten by the m by n matrix ( alphaop( dA )op( dB ) + beta*dC ).
[in]	lddc	INTEGER. On entry, LDC specifies the first dimension of dC as declared in the calling (sub) program. LDC must be at least max( 1, m ).
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_cgemm_reduce	(	magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		magmaFloatComplex	alpha,
		magmaFloatComplex_const_ptr	dA,
		magma_int_t	ldda,
		magmaFloatComplex_const_ptr	dB,
		magma_int_t	lddb,
		magmaFloatComplex	beta,
		magmaFloatComplex_ptr	dC,
		magma_int_t	lddc,
		magma_queue_t	queue
	)

CGEMM_REDUCE performs one of the matrix-matrix operations.

C := alpha*A^T*B + beta*C,

where alpha and beta are scalars, and A, B and C are matrices, with A a k-by-m matrix, B a k-by-n matrix, and C an m-by-n matrix.

This routine is tuned for m, n << k. Typically, m and n are expected to be less than 128.

void magmablas_dgemm	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		double	alpha,
		double const *	dA,
		magma_int_t	ldda,
		double const *	dB,
		magma_int_t	lddb,
		double	beta,
		double *	dC,
		magma_int_t	lddc,
		magma_queue_t	queue
	)

DGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	INTEGER. On entry, M specifies the number of rows of the matrix op( dA ) and of the matrix dC. M must be at least zero.
[in]	n	INTEGER. On entry, N specifies the number of columns of the matrix op( dB ) and the number of columns of the matrix dC. N must be at least zero.
[in]	k	INTEGER. On entry, K specifies the number of columns of the matrix op( dA ) and the number of rows of the matrix op( dB ). K must be at least zero.
[in]	alpha	DOUBLE PRECISION On entry, ALPHA specifies the scalar alpha.
[in]	dA	DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA.
[in]	ldda	INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ).
[in]	dB	DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array dB must contain the matrix dB, otherwise the leading n by k part of the array dB must contain the matrix dB.
[in]	lddb	INTEGER. On entry, LDB specifies the first dimension of dB as declared in the calling (sub) program. When transB = MagmaNoTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ).
[in]	beta	DOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input.
[in,out]	dC	DOUBLE PRECISION array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array dC must contain the matrix dC, except when beta is zero, in which case dC need not be set on entry. On exit, the array dC is overwritten by the m by n matrix ( alphaop( dA )op( dB ) + beta*dC ).
[in]	lddc	INTEGER. On entry, LDC specifies the first dimension of dC as declared in the calling (sub) program. LDC must be at least max( 1, m ).
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_dgemm_reduce	(	magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		double	alpha,
		magmaDouble_const_ptr	dA,
		magma_int_t	ldda,
		magmaDouble_const_ptr	dB,
		magma_int_t	lddb,
		double	beta,
		magmaDouble_ptr	dC,
		magma_int_t	lddc,
		magma_queue_t	queue
	)

DGEMM_REDUCE performs one of the matrix-matrix operations.

C := alpha*A^T*B + beta*C,

where alpha and beta are scalars, and A, B and C are matrices, with A a k-by-m matrix, B a k-by-n matrix, and C an m-by-n matrix.

This routine is tuned for m, n << k. Typically, m and n are expected to be less than 128.

void magmablas_sgemm	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		float	alpha,
		float const *	dA,
		magma_int_t	ldda,
		float const *	dB,
		magma_int_t	lddb,
		float	beta,
		float *	dC,
		magma_int_t	lddc,
		magma_queue_t	queue
	)

SGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	INTEGER. On entry, M specifies the number of rows of the matrix op( dA ) and of the matrix dC. M must be at least zero.
[in]	n	INTEGER. On entry, N specifies the number of columns of the matrix op( dB ) and the number of columns of the matrix dC. N must be at least zero.
[in]	k	INTEGER. On entry, K specifies the number of columns of the matrix op( dA ) and the number of rows of the matrix op( dB ). K must be at least zero.
[in]	alpha	REAL On entry, ALPHA specifies the scalar alpha.
[in]	dA	REAL array of DIMENSION ( LDA, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA.
[in]	ldda	INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ).
[in]	dB	REAL array of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array dB must contain the matrix dB, otherwise the leading n by k part of the array dB must contain the matrix dB.
[in]	lddb	INTEGER. On entry, LDB specifies the first dimension of dB as declared in the calling (sub) program. When transB = MagmaNoTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ).
[in]	beta	REAL. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input.
[in,out]	dC	REAL array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array dC must contain the matrix dC, except when beta is zero, in which case dC need not be set on entry. On exit, the array dC is overwritten by the m by n matrix ( alphaop( dA )op( dB ) + beta*dC ).
[in]	lddc	INTEGER. On entry, LDC specifies the first dimension of dC as declared in the calling (sub) program. LDC must be at least max( 1, m ).
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_sgemm_reduce	(	magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		float	alpha,
		magmaFloat_const_ptr	dA,
		magma_int_t	ldda,
		magmaFloat_const_ptr	dB,
		magma_int_t	lddb,
		float	beta,
		magmaFloat_ptr	dC,
		magma_int_t	lddc,
		magma_queue_t	queue
	)

SGEMM_REDUCE performs one of the matrix-matrix operations.

C := alpha*A^T*B + beta*C,

where alpha and beta are scalars, and A, B and C are matrices, with A a k-by-m matrix, B a k-by-n matrix, and C an m-by-n matrix.

This routine is tuned for m, n << k. Typically, m and n are expected to be less than 128.

void magmablas_zgemm	(	magma_trans_t	transA,
		magma_trans_t	transB,
		magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		magmaDoubleComplex	alpha,
		magmaDoubleComplex const *	dA,
		magma_int_t	ldda,
		magmaDoubleComplex const *	dB,
		magma_int_t	lddb,
		magmaDoubleComplex	beta,
		magmaDoubleComplex *	dC,
		magma_int_t	lddc,
		magma_queue_t	queue
	)

ZGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters

[in]	transA	magma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( A ) = A. = MagmaTrans: op( A ) = AT. = MagmaConjTrans: op( A ) = AH.
[in]	transB	magma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows: = MagmaNoTrans: op( B ) = B. = MagmaTrans: op( B ) = BT. = MagmaConjTrans: op( B ) = BH.
[in]	m	INTEGER. On entry, M specifies the number of rows of the matrix op( dA ) and of the matrix dC. M must be at least zero.
[in]	n	INTEGER. On entry, N specifies the number of columns of the matrix op( dB ) and the number of columns of the matrix dC. N must be at least zero.
[in]	k	INTEGER. On entry, K specifies the number of columns of the matrix op( dA ) and the number of rows of the matrix op( dB ). K must be at least zero.
[in]	alpha	COMPLEX_16 On entry, ALPHA specifies the scalar alpha.
[in]	dA	COMPLEX_16 array of DIMENSION ( LDA, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array dA must contain the matrix dA, otherwise the leading k by m part of the array dA must contain the matrix dA.
[in]	ldda	INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ).
[in]	dB	COMPLEX_16 array of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array dB must contain the matrix dB, otherwise the leading n by k part of the array dB must contain the matrix dB.
[in]	lddb	INTEGER. On entry, LDB specifies the first dimension of dB as declared in the calling (sub) program. When transB = MagmaNoTrans then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ).
[in]	beta	COMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then dC need not be set on input.
[in,out]	dC	COMPLEX_16 array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array dC must contain the matrix dC, except when beta is zero, in which case dC need not be set on entry. On exit, the array dC is overwritten by the m by n matrix ( alphaop( dA )op( dB ) + beta*dC ).
[in]	lddc	INTEGER. On entry, LDC specifies the first dimension of dC as declared in the calling (sub) program. LDC must be at least max( 1, m ).
[in]	queue	magma_queue_t Queue to execute in.

void magmablas_zgemm_reduce	(	magma_int_t	m,
		magma_int_t	n,
		magma_int_t	k,
		magmaDoubleComplex	alpha,
		magmaDoubleComplex_const_ptr	dA,
		magma_int_t	ldda,
		magmaDoubleComplex_const_ptr	dB,
		magma_int_t	lddb,
		magmaDoubleComplex	beta,
		magmaDoubleComplex_ptr	dC,
		magma_int_t	lddc,
		magma_queue_t	queue
	)

ZGEMM_REDUCE performs one of the matrix-matrix operations.

C := alpha*A^T*B + beta*C,

where alpha and beta are scalars, and A, B and C are matrices, with A a k-by-m matrix, B a k-by-n matrix, and C an m-by-n matrix.

This routine is tuned for m, n << k. Typically, m and n are expected to be less than 128.

Functions

Detailed Description

Function Documentation

Parameters

Parameters

Parameters

Parameters