PLASMA auxiliary routines PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver
- Version:
- 2.4.5
- Author:
- Mathieu Faverge
- Date:
- 2010-11-15 normal z -> s d c
Definition in file pztrmm.c.
Parallel tile triangular matrix-matrix multiplication - dynamic scheduling
Definition at line 269 of file pztrmm.c.
References A, B, BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaLeft, PlasmaNoTrans, PlasmaUpper, plasma_context_struct::quark, QUARK_CORE_zgemm(), QUARK_CORE_ztrmm(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.
{
int k, m, n;
int lda, ldak, ldb, ldbk;
int tempkm, tempkn, tempmm, tempnn;
return;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
alpha, A(m, m), lda,
for (k = m+1; k < A.
mt; k++) {
tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
plasma->
quark, &task_flags,
tempmm, tempnn, tempkn, A.
mb,
alpha, A(m, k), lda,
}
}
}
}
else {
for (m = B.
mt-1; m > -1; m--) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
alpha, A(m, m), lda,
for (k = 0; k < m; k++) {
plasma->
quark, &task_flags,
tempmm, tempnn, B.
mb, A.
mb,
zone, B(m, n), ldb);
}
}
}
}
}
else {
for (m = B.
mt-1; m > -1; m--) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
alpha, A(m, m), lda,
for (k = 0; k < m; k++) {
plasma->
quark, &task_flags,
tempmm, tempnn, B.
mb, A.
mb,
alpha, A(m, k), lda,
zone, B(m, n), ldb);
}
}
}
}
else {
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
alpha, A(m, m), lda,
for (k = m+1; k < A.
mt; k++) {
tempkm = k == A.
mt-1 ? A.
m-k*A.
mb : A.
mb;
plasma->
quark, &task_flags,
tempmm, tempnn, tempkm, A.
mb,
alpha, A(k, m), ldak,
}
}
}
}
}
}
else {
for (n = B.
nt-1; n > -1; n--) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
alpha, A(n, n), lda,
for (k = 0; k < n; k++) {
plasma->
quark, &task_flags,
tempmm, tempnn, B.
mb, A.
mb,
alpha, B(m, k), ldb,
zone, B(m, n), ldb);
}
}
}
}
else {
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
alpha, A(n, n), lda,
for (k = n+1; k < A.
mt; k++) {
tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
plasma->
quark, &task_flags,
tempmm, tempnn, tempkn, A.
mb,
alpha, B(m, k), ldb,
}
}
}
}
}
else {
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
alpha, A(n, n), lda,
for (k = n+1; k < A.
mt; k++) {
tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
plasma->
quark, &task_flags,
tempmm, tempnn, tempkn, A.
mb,
alpha, B(m, k), ldb,
}
}
}
}
else {
for (n = B.
nt-1; n > -1; n--) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
alpha, A(n, n), lda,
for (k = 0; k < n; k++) {
plasma->
quark, &task_flags,
tempmm, tempnn, B.
mb, A.
mb,
alpha, B(m, k), ldb,
}
}
}
}
}
}
}