PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pztrmm.c File Reference
#include "common.h"
Include dependency graph for pztrmm.c:

Go to the source code of this file.

Macros

#define A(m, n)   BLKADDR(A, PLASMA_Complex64_t, m, n)
#define B(m, n)   BLKADDR(B, PLASMA_Complex64_t, m, n)

Functions

void plasma_pztrmm_quark (PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum trans, PLASMA_enum diag, PLASMA_Complex64_t alpha, PLASMA_desc A, PLASMA_desc B, PLASMA_sequence *sequence, PLASMA_request *request)

Detailed Description

PLASMA auxiliary routines PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Mathieu Faverge
Date:
2010-11-15 normal z -> s d c

Definition in file pztrmm.c.


Macro Definition Documentation

#define A (   m,
 
)    BLKADDR(A, PLASMA_Complex64_t, m, n)

Definition at line 17 of file pztrmm.c.

#define B (   m,
 
)    BLKADDR(B, PLASMA_Complex64_t, m, n)

Definition at line 18 of file pztrmm.c.


Function Documentation

void plasma_pztrmm_quark ( PLASMA_enum  side,
PLASMA_enum  uplo,
PLASMA_enum  trans,
PLASMA_enum  diag,
PLASMA_Complex64_t  alpha,
PLASMA_desc  A,
PLASMA_desc  B,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile triangular matrix-matrix multiplication - dynamic scheduling

Definition at line 269 of file pztrmm.c.

References A, B, BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaLeft, PlasmaNoTrans, PlasmaUpper, plasma_context_struct::quark, QUARK_CORE_zgemm(), QUARK_CORE_ztrmm(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int k, m, n;
int lda, ldak, ldb, ldbk;
int tempkm, tempkn, tempmm, tempnn;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
/*
* PlasmaLeft / PlasmaUpper / PlasmaNoTrans
*/
if (side == PlasmaLeft) {
if (uplo == PlasmaUpper) {
if (trans == PlasmaNoTrans) {
for (m = 0; m < B.mt; m++) {
tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
ldb = BLKLDD(B, m);
lda = BLKLDD(A, m);
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
plasma->quark, &task_flags,
tempmm, tempnn, A.mb,
alpha, A(m, m), lda, /* lda * tempkm */
B(m, n), ldb); /* ldb * tempnn */
for (k = m+1; k < A.mt; k++) {
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
ldbk = BLKLDD(B, k);
plasma->quark, &task_flags,
tempmm, tempnn, tempkn, A.mb,
alpha, A(m, k), lda,
B(k, n), ldbk,
zone, B(m, n), ldb);
}
}
}
}
/*
* PlasmaLeft / PlasmaUpper / Plasma[Conj]Trans
*/
else {
for (m = B.mt-1; m > -1; m--) {
tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
ldb = BLKLDD(B, m);
lda = BLKLDD(A, m);
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
plasma->quark, &task_flags,
tempmm, tempnn, A.mb,
alpha, A(m, m), lda, /* lda * tempkm */
B(m, n), ldb); /* ldb * tempnn */
for (k = 0; k < m; k++) {
plasma->quark, &task_flags,
tempmm, tempnn, B.mb, A.mb,
alpha, A(k, m), A.mb,
B(k, n), B.mb,
zone, B(m, n), ldb);
}
}
}
}
}
/*
* PlasmaLeft / PlasmaLower / PlasmaNoTrans
*/
else {
if (trans == PlasmaNoTrans) {
for (m = B.mt-1; m > -1; m--) {
tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
ldb = BLKLDD(B, m);
lda = BLKLDD(A, m);
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
plasma->quark, &task_flags,
tempmm, tempnn, A.mb,
alpha, A(m, m), lda, /* lda * tempkm */
B(m, n), ldb); /* ldb * tempnn */
for (k = 0; k < m; k++) {
plasma->quark, &task_flags,
tempmm, tempnn, B.mb, A.mb,
alpha, A(m, k), lda,
B(k, n), B.mb,
zone, B(m, n), ldb);
}
}
}
}
/*
* PlasmaLeft / PlasmaLower / Plasma[Conj]Trans
*/
else {
for (m = 0; m < B.mt; m++) {
tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
ldb = BLKLDD(B, m);
lda = BLKLDD(A, m);
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
plasma->quark, &task_flags,
tempmm, tempnn, A.mb,
alpha, A(m, m), lda, /* lda * tempkm */
B(m, n), ldb); /* ldb * tempnn */
for (k = m+1; k < A.mt; k++) {
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
plasma->quark, &task_flags,
tempmm, tempnn, tempkm, A.mb,
alpha, A(k, m), ldak,
B(k, n), ldbk,
zone, B(m, n), ldb);
}
}
}
}
}
}
/*
* PlasmaRight / PlasmaUpper / PlasmaNoTrans
*/
else {
if (uplo == PlasmaUpper) {
if (trans == PlasmaNoTrans) {
for (n = B.nt-1; n > -1; n--) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
lda = BLKLDD(A, n);
for (m = 0; m < B.mt; m++) {
tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
ldb = BLKLDD(B, m);
plasma->quark, &task_flags,
tempmm, tempnn, A.mb,
alpha, A(n, n), lda, /* lda * tempkm */
B(m, n), ldb); /* ldb * tempnn */
for (k = 0; k < n; k++) {
plasma->quark, &task_flags,
tempmm, tempnn, B.mb, A.mb,
alpha, B(m, k), ldb,
A(k, n), A.mb,
zone, B(m, n), ldb);
}
}
}
}
/*
* PlasmaRight / PlasmaUpper / Plasma[Conj]Trans
*/
else {
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
lda = BLKLDD(A, n);
for (m = 0; m < B.mt; m++) {
tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
ldb = BLKLDD(B, m);
plasma->quark, &task_flags,
tempmm, tempnn, A.mb,
alpha, A(n, n), lda, /* lda * tempkm */
B(m, n), ldb); /* ldb * tempnn */
for (k = n+1; k < A.mt; k++) {
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
plasma->quark, &task_flags,
tempmm, tempnn, tempkn, A.mb,
alpha, B(m, k), ldb,
A(n, k), lda,
zone, B(m, n), ldb);
}
}
}
}
}
/*
* PlasmaRight / PlasmaLower / PlasmaNoTrans
*/
else {
if (trans == PlasmaNoTrans) {
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
lda = BLKLDD(A, n);
for (m = 0; m < B.mt; m++) {
tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
ldb = BLKLDD(B, m);
plasma->quark, &task_flags,
tempmm, tempnn, A.mb,
alpha, A(n, n), lda, /* lda * tempkm */
B(m, n), ldb); /* ldb * tempnn */
for (k = n+1; k < A.mt; k++) {
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
ldak = BLKLDD(A, k);
plasma->quark, &task_flags,
tempmm, tempnn, tempkn, A.mb,
alpha, B(m, k), ldb,
A(k, n), ldak,
zone, B(m, n), ldb);
}
}
}
}
/*
* PlasmaRight / PlasmaLower / Plasma[Conj]Trans
*/
else {
for (n = B.nt-1; n > -1; n--) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
lda = BLKLDD(A, n);
for (m = 0; m < B.mt; m++) {
tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
ldb = BLKLDD(B, m);
plasma->quark, &task_flags,
tempmm, tempnn, A.mb,
alpha, A(n, n), lda, /* lda * tempkm */
B(m, n), ldb); /* ldb * tempnn */
for (k = 0; k < n; k++) {
plasma->quark, &task_flags,
tempmm, tempnn, B.mb, A.mb,
alpha, B(m, k), ldb,
A(n, k), lda,
zone, B(m, n), ldb);
}
}
}
}
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function: