MAGMA  2.3.0
Matrix Algebra for GPU and Multicore Architectures
 All Classes Files Functions Friends Groups Pages
gemm: General matrix multiply: C = AB + C

\( C = \alpha \;op(A) \;op(B) + \beta C \) More...

Functions

void magmablas_cgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex const *const *dA_array, magma_int_t ldda, magmaFloatComplex const *const *dB_array, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
 CGEMM performs one of the matrix-matrix operations. More...
 
void magmablas_cgemm_batched_core (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex const *const *dA_array, magma_int_t ldda, magmaFloatComplex const *const *dB_array, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex **dC_array, magma_int_t lddc, magma_int_t roffA, magma_int_t coffA, magma_int_t roffB, magma_int_t coffB, magma_int_t roffC, magma_int_t coffC, magma_int_t batchCount, magma_queue_t queue)
 CGEMM performs one of the matrix-matrix operations. More...
 
void magmablas_cgemm_vbatched (magma_trans_t transA, magma_trans_t transB, magma_int_t *m, magma_int_t *n, magma_int_t *k, magmaFloatComplex alpha, magmaFloatComplex const *const *dA_array, magma_int_t *ldda, magmaFloatComplex const *const *dB_array, magma_int_t *lddb, magmaFloatComplex beta, magmaFloatComplex **dC_array, magma_int_t *lddc, magma_int_t batchCount, magma_queue_t queue)
 CGEMM performs one of the matrix-matrix operations. More...
 
void magmablas_dgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha, double const *const *dA_array, magma_int_t ldda, double const *const *dB_array, magma_int_t lddb, double beta, double **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
 DGEMM performs one of the matrix-matrix operations. More...
 
void magmablas_dgemm_batched_core (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha, double const *const *dA_array, magma_int_t ldda, double const *const *dB_array, magma_int_t lddb, double beta, double **dC_array, magma_int_t lddc, magma_int_t roffA, magma_int_t coffA, magma_int_t roffB, magma_int_t coffB, magma_int_t roffC, magma_int_t coffC, magma_int_t batchCount, magma_queue_t queue)
 DGEMM performs one of the matrix-matrix operations. More...
 
void magmablas_dgemm_vbatched (magma_trans_t transA, magma_trans_t transB, magma_int_t *m, magma_int_t *n, magma_int_t *k, double alpha, double const *const *dA_array, magma_int_t *ldda, double const *const *dB_array, magma_int_t *lddb, double beta, double **dC_array, magma_int_t *lddc, magma_int_t batchCount, magma_queue_t queue)
 DGEMM performs one of the matrix-matrix operations. More...
 
void magmablas_sgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, float const *const *dA_array, magma_int_t ldda, float const *const *dB_array, magma_int_t lddb, float beta, float **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
 SGEMM performs one of the matrix-matrix operations. More...
 
void magmablas_sgemm_batched_core (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, float alpha, float const *const *dA_array, magma_int_t ldda, float const *const *dB_array, magma_int_t lddb, float beta, float **dC_array, magma_int_t lddc, magma_int_t roffA, magma_int_t coffA, magma_int_t roffB, magma_int_t coffB, magma_int_t roffC, magma_int_t coffC, magma_int_t batchCount, magma_queue_t queue)
 SGEMM performs one of the matrix-matrix operations. More...
 
void magmablas_sgemm_vbatched (magma_trans_t transA, magma_trans_t transB, magma_int_t *m, magma_int_t *n, magma_int_t *k, float alpha, float const *const *dA_array, magma_int_t *ldda, float const *const *dB_array, magma_int_t *lddb, float beta, float **dC_array, magma_int_t *lddc, magma_int_t batchCount, magma_queue_t queue)
 SGEMM performs one of the matrix-matrix operations. More...
 
void magmablas_zgemm_batched (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex const *const *dA_array, magma_int_t ldda, magmaDoubleComplex const *const *dB_array, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex **dC_array, magma_int_t lddc, magma_int_t batchCount, magma_queue_t queue)
 ZGEMM performs one of the matrix-matrix operations. More...
 
void magmablas_zgemm_batched_core (magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex const *const *dA_array, magma_int_t ldda, magmaDoubleComplex const *const *dB_array, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex **dC_array, magma_int_t lddc, magma_int_t roffA, magma_int_t coffA, magma_int_t roffB, magma_int_t coffB, magma_int_t roffC, magma_int_t coffC, magma_int_t batchCount, magma_queue_t queue)
 ZGEMM performs one of the matrix-matrix operations. More...
 
void magmablas_zgemm_vbatched (magma_trans_t transA, magma_trans_t transB, magma_int_t *m, magma_int_t *n, magma_int_t *k, magmaDoubleComplex alpha, magmaDoubleComplex const *const *dA_array, magma_int_t *ldda, magmaDoubleComplex const *const *dB_array, magma_int_t *lddb, magmaDoubleComplex beta, magmaDoubleComplex **dC_array, magma_int_t *lddc, magma_int_t batchCount, magma_queue_t queue)
 ZGEMM performs one of the matrix-matrix operations. More...
 

Detailed Description

\( C = \alpha \;op(A) \;op(B) + \beta C \)

Function Documentation

void magmablas_cgemm_batched ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex const *const *  dA_array,
magma_int_t  ldda,
magmaFloatComplex const *const *  dB_array,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex **  dC_array,
magma_int_t  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

CGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters
[in]transAmagma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]transBmagma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( B ) = B.
  • = MagmaTrans: op( B ) = B**T.
  • = MagmaConjTrans: op( B ) = B**H.
[in]mINTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]nINTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]kINTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]alphaCOMPLEX On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]betaCOMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ).
[in]lddcINTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_cgemm_batched_core ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaFloatComplex  alpha,
magmaFloatComplex const *const *  dA_array,
magma_int_t  ldda,
magmaFloatComplex const *const *  dB_array,
magma_int_t  lddb,
magmaFloatComplex  beta,
magmaFloatComplex **  dC_array,
magma_int_t  lddc,
magma_int_t  roffA,
magma_int_t  coffA,
magma_int_t  roffB,
magma_int_t  coffB,
magma_int_t  roffC,
magma_int_t  coffC,
magma_int_t  batchCount,
magma_queue_t  queue 
)

CGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters
[in]transAmagma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]transBmagma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( B ) = B.
  • = MagmaTrans: op( B ) = B**T.
  • = MagmaConjTrans: op( B ) = B**H.
[in]mINTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]nINTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]kINTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]alphaCOMPLEX On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]betaCOMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, each array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ).
[in]lddcINTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]roffAINTEGER Row offset for all 'A' matrices.
[in]coffAINTEGER Column offset for all 'A' matrices.
[in]roffBINTEGER Row offset for all 'B' matrices.
[in]coffBINTEGER Column offset for all 'B' matrices.
[in]roffCINTEGER Row offset for all 'C' matrices.
[in]coffCINTEGER Column offset for all 'C' matrices.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_cgemm_vbatched ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t *  m,
magma_int_t *  n,
magma_int_t *  k,
magmaFloatComplex  alpha,
magmaFloatComplex const *const *  dA_array,
magma_int_t *  ldda,
magmaFloatComplex const *const *  dB_array,
magma_int_t *  lddb,
magmaFloatComplex  beta,
magmaFloatComplex **  dC_array,
magma_int_t *  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

CGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters
[in]transAmagma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]transBmagma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( B ) = B.
  • = MagmaTrans: op( B ) = B**T.
  • = MagmaConjTrans: op( B ) = B**H.
[in]mArray of integers, dimension (batchCount + 1) On entry, each INTEGER M specifies the number of rows of the corresponding matrices op( A ) and C. M must be at least zero. The last element of the array is used internally by the routine.
[in]nArray of integers, dimension (batchCount + 1). On entry, each INTEGER N specifies the number of columns of the corresponding matrix op( B ) and the number of columns of the corresponding matrix C. N must be at least zero. The last element of the array is used internally by the routine.
[in]kArray of integers, dimension (batchCount + 1). On entry, each INTEGER K specifies the number of columns of the corresponding matrix op( A ) and the number of rows of the corresponding matrix op( B ). K must be at least zero. The last element of the array is used internally by the routine.
[in]alphaCOMPLEX On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array A of DIMENSION ( LDDA, ka ), where ka is K when transA = MagmaNoTrans, and is M otherwise. Before entry with transA = MagmaNoTrans, the leading M by K part of the array A must contain the matrix A, otherwise the leading K by M part of the array A must contain the matrix A.
[in]lddaArray of integers, dimension (batchCount + 1). On entry, each INTEGER LDDA specifies the first dimension of the corresponding matrix A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDDA must be at least max( 1, M ), otherwise ldda must be at least max( 1, K ). The last element of the array is used internally by the routine.
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array B of DIMENSION ( LDDB, kb ), where kb is N when transB = MagmaNoTrans, and is K otherwise. Before entry with transB = MagmaNoTrans, the leading K by N part of the array B must contain the matrix B, otherwise the leading N by K part of the array B must contain the matrix B.
[in]lddbArray of integers, dimension (batchCount + 1). On entry, each INTEGER LDDB specifies the first dimension of the corresponding matrix B as declared in the calling (sub) program. When transB = MagmaNoTrans then LDDB must be at least max( 1, K ), otherwise LDDB must be at least max( 1, N ). The last element of the array is used internally by the routine.
[in]betaCOMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX array C of DIMENSION ( LDDC, N ). Before entry, the leading M by N part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the M by N matrix ( alpha*op( A )*op( B ) + beta*C ).
[in]lddcArray of integers, dimension (batchCount + 1). On entry, each INTEGER LDDC specifies the first dimension of the corresponding matrix C as declared in the calling (sub) program. LDDC must be at least max( 1, M ). The last element of the array is used internally by the routine.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_dgemm_batched ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
double  alpha,
double const *const *  dA_array,
magma_int_t  ldda,
double const *const *  dB_array,
magma_int_t  lddb,
double  beta,
double **  dC_array,
magma_int_t  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

DGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters
[in]transAmagma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]transBmagma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( B ) = B.
  • = MagmaTrans: op( B ) = B**T.
  • = MagmaConjTrans: op( B ) = B**H.
[in]mINTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]nINTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]kINTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]alphaDOUBLE PRECISION On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]betaDOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ).
[in]lddcINTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_dgemm_batched_core ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
double  alpha,
double const *const *  dA_array,
magma_int_t  ldda,
double const *const *  dB_array,
magma_int_t  lddb,
double  beta,
double **  dC_array,
magma_int_t  lddc,
magma_int_t  roffA,
magma_int_t  coffA,
magma_int_t  roffB,
magma_int_t  coffB,
magma_int_t  roffC,
magma_int_t  coffC,
magma_int_t  batchCount,
magma_queue_t  queue 
)

DGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters
[in]transAmagma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]transBmagma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( B ) = B.
  • = MagmaTrans: op( B ) = B**T.
  • = MagmaConjTrans: op( B ) = B**H.
[in]mINTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]nINTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]kINTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]alphaDOUBLE PRECISION On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of DIMENSION ( LDB, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]betaDOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, each array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ).
[in]lddcINTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]roffAINTEGER Row offset for all 'A' matrices.
[in]coffAINTEGER Column offset for all 'A' matrices.
[in]roffBINTEGER Row offset for all 'B' matrices.
[in]coffBINTEGER Column offset for all 'B' matrices.
[in]roffCINTEGER Row offset for all 'C' matrices.
[in]coffCINTEGER Column offset for all 'C' matrices.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_dgemm_vbatched ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t *  m,
magma_int_t *  n,
magma_int_t *  k,
double  alpha,
double const *const *  dA_array,
magma_int_t *  ldda,
double const *const *  dB_array,
magma_int_t *  lddb,
double  beta,
double **  dC_array,
magma_int_t *  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

DGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters
[in]transAmagma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]transBmagma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( B ) = B.
  • = MagmaTrans: op( B ) = B**T.
  • = MagmaConjTrans: op( B ) = B**H.
[in]mArray of integers, dimension (batchCount + 1) On entry, each INTEGER M specifies the number of rows of the corresponding matrices op( A ) and C. M must be at least zero. The last element of the array is used internally by the routine.
[in]nArray of integers, dimension (batchCount + 1). On entry, each INTEGER N specifies the number of columns of the corresponding matrix op( B ) and the number of columns of the corresponding matrix C. N must be at least zero. The last element of the array is used internally by the routine.
[in]kArray of integers, dimension (batchCount + 1). On entry, each INTEGER K specifies the number of columns of the corresponding matrix op( A ) and the number of rows of the corresponding matrix op( B ). K must be at least zero. The last element of the array is used internally by the routine.
[in]alphaDOUBLE PRECISION On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array A of DIMENSION ( LDDA, ka ), where ka is K when transA = MagmaNoTrans, and is M otherwise. Before entry with transA = MagmaNoTrans, the leading M by K part of the array A must contain the matrix A, otherwise the leading K by M part of the array A must contain the matrix A.
[in]lddaArray of integers, dimension (batchCount + 1). On entry, each INTEGER LDDA specifies the first dimension of the corresponding matrix A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDDA must be at least max( 1, M ), otherwise ldda must be at least max( 1, K ). The last element of the array is used internally by the routine.
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array B of DIMENSION ( LDDB, kb ), where kb is N when transB = MagmaNoTrans, and is K otherwise. Before entry with transB = MagmaNoTrans, the leading K by N part of the array B must contain the matrix B, otherwise the leading N by K part of the array B must contain the matrix B.
[in]lddbArray of integers, dimension (batchCount + 1). On entry, each INTEGER LDDB specifies the first dimension of the corresponding matrix B as declared in the calling (sub) program. When transB = MagmaNoTrans then LDDB must be at least max( 1, K ), otherwise LDDB must be at least max( 1, N ). The last element of the array is used internally by the routine.
[in]betaDOUBLE PRECISION. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). Each is a DOUBLE PRECISION array C of DIMENSION ( LDDC, N ). Before entry, the leading M by N part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the M by N matrix ( alpha*op( A )*op( B ) + beta*C ).
[in]lddcArray of integers, dimension (batchCount + 1). On entry, each INTEGER LDDC specifies the first dimension of the corresponding matrix C as declared in the calling (sub) program. LDDC must be at least max( 1, M ). The last element of the array is used internally by the routine.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_sgemm_batched ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
float  alpha,
float const *const *  dA_array,
magma_int_t  ldda,
float const *const *  dB_array,
magma_int_t  lddb,
float  beta,
float **  dC_array,
magma_int_t  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

SGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters
[in]transAmagma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]transBmagma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( B ) = B.
  • = MagmaTrans: op( B ) = B**T.
  • = MagmaConjTrans: op( B ) = B**H.
[in]mINTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]nINTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]kINTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]alphaREAL On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]betaREAL. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). Each is a REAL array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ).
[in]lddcINTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_sgemm_batched_core ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
float  alpha,
float const *const *  dA_array,
magma_int_t  ldda,
float const *const *  dB_array,
magma_int_t  lddb,
float  beta,
float **  dC_array,
magma_int_t  lddc,
magma_int_t  roffA,
magma_int_t  coffA,
magma_int_t  roffB,
magma_int_t  coffB,
magma_int_t  roffC,
magma_int_t  coffC,
magma_int_t  batchCount,
magma_queue_t  queue 
)

SGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters
[in]transAmagma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]transBmagma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( B ) = B.
  • = MagmaTrans: op( B ) = B**T.
  • = MagmaConjTrans: op( B ) = B**H.
[in]mINTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]nINTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]kINTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]alphaREAL On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]betaREAL. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). REAL array of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, each array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ).
[in]lddcINTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]roffAINTEGER Row offset for all 'A' matrices.
[in]coffAINTEGER Column offset for all 'A' matrices.
[in]roffBINTEGER Row offset for all 'B' matrices.
[in]coffBINTEGER Column offset for all 'B' matrices.
[in]roffCINTEGER Row offset for all 'C' matrices.
[in]coffCINTEGER Column offset for all 'C' matrices.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_sgemm_vbatched ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t *  m,
magma_int_t *  n,
magma_int_t *  k,
float  alpha,
float const *const *  dA_array,
magma_int_t *  ldda,
float const *const *  dB_array,
magma_int_t *  lddb,
float  beta,
float **  dC_array,
magma_int_t *  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

SGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters
[in]transAmagma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]transBmagma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( B ) = B.
  • = MagmaTrans: op( B ) = B**T.
  • = MagmaConjTrans: op( B ) = B**H.
[in]mArray of integers, dimension (batchCount + 1) On entry, each INTEGER M specifies the number of rows of the corresponding matrices op( A ) and C. M must be at least zero. The last element of the array is used internally by the routine.
[in]nArray of integers, dimension (batchCount + 1). On entry, each INTEGER N specifies the number of columns of the corresponding matrix op( B ) and the number of columns of the corresponding matrix C. N must be at least zero. The last element of the array is used internally by the routine.
[in]kArray of integers, dimension (batchCount + 1). On entry, each INTEGER K specifies the number of columns of the corresponding matrix op( A ) and the number of rows of the corresponding matrix op( B ). K must be at least zero. The last element of the array is used internally by the routine.
[in]alphaREAL On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a REAL array A of DIMENSION ( LDDA, ka ), where ka is K when transA = MagmaNoTrans, and is M otherwise. Before entry with transA = MagmaNoTrans, the leading M by K part of the array A must contain the matrix A, otherwise the leading K by M part of the array A must contain the matrix A.
[in]lddaArray of integers, dimension (batchCount + 1). On entry, each INTEGER LDDA specifies the first dimension of the corresponding matrix A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDDA must be at least max( 1, M ), otherwise ldda must be at least max( 1, K ). The last element of the array is used internally by the routine.
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a REAL array B of DIMENSION ( LDDB, kb ), where kb is N when transB = MagmaNoTrans, and is K otherwise. Before entry with transB = MagmaNoTrans, the leading K by N part of the array B must contain the matrix B, otherwise the leading N by K part of the array B must contain the matrix B.
[in]lddbArray of integers, dimension (batchCount + 1). On entry, each INTEGER LDDB specifies the first dimension of the corresponding matrix B as declared in the calling (sub) program. When transB = MagmaNoTrans then LDDB must be at least max( 1, K ), otherwise LDDB must be at least max( 1, N ). The last element of the array is used internally by the routine.
[in]betaREAL. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). Each is a REAL array C of DIMENSION ( LDDC, N ). Before entry, the leading M by N part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the M by N matrix ( alpha*op( A )*op( B ) + beta*C ).
[in]lddcArray of integers, dimension (batchCount + 1). On entry, each INTEGER LDDC specifies the first dimension of the corresponding matrix C as declared in the calling (sub) program. LDDC must be at least max( 1, M ). The last element of the array is used internally by the routine.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_zgemm_batched ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaDoubleComplex  alpha,
magmaDoubleComplex const *const *  dA_array,
magma_int_t  ldda,
magmaDoubleComplex const *const *  dB_array,
magma_int_t  lddb,
magmaDoubleComplex  beta,
magmaDoubleComplex **  dC_array,
magma_int_t  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ZGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters
[in]transAmagma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]transBmagma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( B ) = B.
  • = MagmaTrans: op( B ) = B**T.
  • = MagmaConjTrans: op( B ) = B**H.
[in]mINTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]nINTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]kINTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]alphaCOMPLEX_16 On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]betaCOMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ).
[in]lddcINTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_zgemm_batched_core ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t  m,
magma_int_t  n,
magma_int_t  k,
magmaDoubleComplex  alpha,
magmaDoubleComplex const *const *  dA_array,
magma_int_t  ldda,
magmaDoubleComplex const *const *  dB_array,
magma_int_t  lddb,
magmaDoubleComplex  beta,
magmaDoubleComplex **  dC_array,
magma_int_t  lddc,
magma_int_t  roffA,
magma_int_t  coffA,
magma_int_t  roffB,
magma_int_t  coffB,
magma_int_t  roffC,
magma_int_t  coffC,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ZGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters
[in]transAmagma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]transBmagma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( B ) = B.
  • = MagmaTrans: op( B ) = B**T.
  • = MagmaConjTrans: op( B ) = B**H.
[in]mINTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero.
[in]nINTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero.
[in]kINTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero.
[in]alphaCOMPLEX_16 On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of DIMENSION ( ldda, ka ), where ka is k when transA = MagmaNoTrans, and is m otherwise. Before entry with transA = MagmaNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A.
[in]lddaINTEGER. On entry, ldda specifies the first dimension of each array A as declared in the calling (sub) program. When transA = MagmaNoTrans then ldda must be at least max( 1, m ), otherwise ldda must be at least max( 1, k ).
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of DIMENSION ( lddb, kb ), where kb is n when transB = MagmaNoTrans, and is k otherwise. Before entry with transB = MagmaNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B.
[in]lddbINTEGER. On entry, lddb specifies the first dimension of each array B as declared in the calling (sub) program. When transB = MagmaNoTrans then lddb must be at least max( 1, k ), otherwise lddb must be at least max( 1, n ).
[in]betaCOMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array C of DIMENSION ( lddc, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ).
[in]lddcINTEGER. On entry, lddc specifies the first dimension of each array C as declared in the calling (sub) program. lddc must be at least max( 1, m ).
[in]roffAINTEGER Row offset for all 'A' matrices.
[in]coffAINTEGER Column offset for all 'A' matrices.
[in]roffBINTEGER Row offset for all 'B' matrices.
[in]coffBINTEGER Column offset for all 'B' matrices.
[in]roffCINTEGER Row offset for all 'C' matrices.
[in]coffCINTEGER Column offset for all 'C' matrices.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.
void magmablas_zgemm_vbatched ( magma_trans_t  transA,
magma_trans_t  transB,
magma_int_t *  m,
magma_int_t *  n,
magma_int_t *  k,
magmaDoubleComplex  alpha,
magmaDoubleComplex const *const *  dA_array,
magma_int_t *  ldda,
magmaDoubleComplex const *const *  dB_array,
magma_int_t *  lddb,
magmaDoubleComplex  beta,
magmaDoubleComplex **  dC_array,
magma_int_t *  lddc,
magma_int_t  batchCount,
magma_queue_t  queue 
)

ZGEMM performs one of the matrix-matrix operations.

C = alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X      or
op( X ) = X**T   or
op( X ) = X**H,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

Parameters
[in]transAmagma_trans_t. On entry, transA specifies the form of op( A ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( A ) = A.
  • = MagmaTrans: op( A ) = A**T.
  • = MagmaConjTrans: op( A ) = A**H.
[in]transBmagma_trans_t. On entry, transB specifies the form of op( B ) to be used in the matrix multiplication as follows:
  • = MagmaNoTrans: op( B ) = B.
  • = MagmaTrans: op( B ) = B**T.
  • = MagmaConjTrans: op( B ) = B**H.
[in]mArray of integers, dimension (batchCount + 1) On entry, each INTEGER M specifies the number of rows of the corresponding matrices op( A ) and C. M must be at least zero. The last element of the array is used internally by the routine.
[in]nArray of integers, dimension (batchCount + 1). On entry, each INTEGER N specifies the number of columns of the corresponding matrix op( B ) and the number of columns of the corresponding matrix C. N must be at least zero. The last element of the array is used internally by the routine.
[in]kArray of integers, dimension (batchCount + 1). On entry, each INTEGER K specifies the number of columns of the corresponding matrix op( A ) and the number of rows of the corresponding matrix op( B ). K must be at least zero. The last element of the array is used internally by the routine.
[in]alphaCOMPLEX_16 On entry, ALPHA specifies the scalar alpha.
[in]dA_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array A of DIMENSION ( LDDA, ka ), where ka is K when transA = MagmaNoTrans, and is M otherwise. Before entry with transA = MagmaNoTrans, the leading M by K part of the array A must contain the matrix A, otherwise the leading K by M part of the array A must contain the matrix A.
[in]lddaArray of integers, dimension (batchCount + 1). On entry, each INTEGER LDDA specifies the first dimension of the corresponding matrix A as declared in the calling (sub) program. When transA = MagmaNoTrans then LDDA must be at least max( 1, M ), otherwise ldda must be at least max( 1, K ). The last element of the array is used internally by the routine.
[in]dB_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array B of DIMENSION ( LDDB, kb ), where kb is N when transB = MagmaNoTrans, and is K otherwise. Before entry with transB = MagmaNoTrans, the leading K by N part of the array B must contain the matrix B, otherwise the leading N by K part of the array B must contain the matrix B.
[in]lddbArray of integers, dimension (batchCount + 1). On entry, each INTEGER LDDB specifies the first dimension of the corresponding matrix B as declared in the calling (sub) program. When transB = MagmaNoTrans then LDDB must be at least max( 1, K ), otherwise LDDB must be at least max( 1, N ). The last element of the array is used internally by the routine.
[in]betaCOMPLEX_16. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input.
[in,out]dC_arrayArray of pointers, dimension (batchCount). Each is a COMPLEX_16 array C of DIMENSION ( LDDC, N ). Before entry, the leading M by N part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the M by N matrix ( alpha*op( A )*op( B ) + beta*C ).
[in]lddcArray of integers, dimension (batchCount + 1). On entry, each INTEGER LDDC specifies the first dimension of the corresponding matrix C as declared in the calling (sub) program. LDDC must be at least max( 1, M ). The last element of the array is used internally by the routine.
[in]batchCountINTEGER The number of matrices to operate on.
[in]queuemagma_queue_t Queue to execute in.