PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_zttmqr.c File Reference
#include "common.h"
Include dependency graph for core_zttmqr.c:

Go to the source code of this file.

Functions

int CORE_zttmqr (int side, int trans, int M1, int N1, int M2, int N2, int K, int IB, PLASMA_Complex64_t *A1, int LDA1, PLASMA_Complex64_t *A2, int LDA2, PLASMA_Complex64_t *V, int LDV, PLASMA_Complex64_t *T, int LDT, PLASMA_Complex64_t *WORK, int LDWORK)
void QUARK_CORE_zttmqr (Quark *quark, Quark_Task_Flags *task_flags, int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, PLASMA_Complex64_t *A1, int lda1, PLASMA_Complex64_t *A2, int lda2, PLASMA_Complex64_t *V, int ldv, PLASMA_Complex64_t *T, int ldt)
void CORE_zttmqr_quark (Quark *quark)

Detailed Description

PLASMA core_blas kernel PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Hatem Ltaief
Mathieu Faverge
Dulceneia Becker
Date:
2010-11-15 normal z -> c d s

Definition in file core_zttmqr.c.


Function Documentation

int CORE_zttmqr ( int  side,
int  trans,
int  M1,
int  N1,
int  M2,
int  N2,
int  K,
int  IB,
PLASMA_Complex64_t A1,
int  LDA1,
PLASMA_Complex64_t A2,
int  LDA2,
PLASMA_Complex64_t V,
int  LDV,
PLASMA_Complex64_t T,
int  LDT,
PLASMA_Complex64_t WORK,
int  LDWORK 
)

CORE_zttmqr overwrites the general complex M1-by-N1 tile A1 and M2-by-N2 tile A2 (N1 == N2) with

                  SIDE = 'L'        SIDE = 'R'

TRANS = 'N': Q * | A1 | | A1 | * Q | A2 | | A2 |

TRANS = 'C': Q**H * | A1 | | A1 | * Q**H | A2 | | A2 |

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by CORE_zttqrt.

Parameters:
[in]side
  • PlasmaLeft : apply Q or Q**H from the Left;
  • PlasmaRight : apply Q or Q**H from the Right.
[in]trans
  • PlasmaNoTrans : No transpose, apply Q;
  • PlasmaConjTrans : ConjTranspose, apply Q**H.
[in]M1The number of rows of the tile A1. M1 >= 0.
[in]N1The number of columns of the tile A1. N1 >= 0.
[in]M2The number of rows of the tile A2. M2 >= 0.
[in]N2The number of columns of the tile A2. N2 >= 0.
[in]KThe number of elementary reflectors whose product defines the matrix Q.
[in]IBThe inner-blocking size. IB >= 0.
[in,out]A1On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]LDA1The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]A2On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]LDA2The leading dimension of the tile A2. LDA2 >= max(1,M2).
[in]VThe i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_ZTTQRT in the first k rows of its array argument V.
[in]LDVThe leading dimension of the array V. LDV >= max(1,K).
[out]TThe IB-by-N1 triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]LDTThe leading dimension of the array T. LDT >= IB.
[out]WORKWorkspace array of size LDWORK-by-N1.
[in]LDWORKThe dimension of the array WORK. LDWORK >= max(1,IB).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 116 of file core_zttmqr.c.

References CORE_zparfb(), coreblas_error, max, min, PLASMA_SUCCESS, PlasmaColumnwise, PlasmaConjTrans, PlasmaForward, PlasmaLeft, PlasmaNoTrans, and PlasmaRight.

{
int i, i1, i3;
int NQ, NW;
int kb, l;
int ic = 0;
int jc = 0;
int mi1 = M1;
int mi2 = M2;
int ni1 = N1;
int ni2 = N2;
/* Check input arguments */
if ((side != PlasmaLeft) && (side != PlasmaRight)) {
coreblas_error(1, "Illegal value of side");
return -1;
}
/* NQ is the order of Q */
if (side == PlasmaLeft) {
NQ = M2;
NW = IB;
}
else {
NQ = N2;
NW = M1;
}
coreblas_error(2, "Illegal value of trans");
return -2;
}
if (M1 < 0) {
coreblas_error(3, "Illegal value of M1");
return -3;
}
if (N1 < 0) {
coreblas_error(4, "Illegal value of N1");
return -4;
}
if ( (M2 < 0) ||
( (M2 != M1) && (side == PlasmaRight) ) ){
coreblas_error(5, "Illegal value of M2");
return -5;
}
if ( (N2 < 0) ||
( (N2 != N1) && (side == PlasmaLeft) ) ){
coreblas_error(6, "Illegal value of N2");
return -6;
}
if ((K < 0) ||
( (side == PlasmaLeft) && (K > M1) ) ||
( (side == PlasmaRight) && (K > N1) ) ) {
coreblas_error(7, "Illegal value of K");
return -7;
}
if (IB < 0) {
coreblas_error(8, "Illegal value of IB");
return -8;
}
if (LDA1 < max(1,M1)){
coreblas_error(10, "Illegal value of LDA1");
return -10;
}
if (LDA2 < max(1,M2)){
coreblas_error(12, "Illegal value of LDA2");
return -12;
}
if (LDV < max(1,NQ)){
coreblas_error(14, "Illegal value of LDV");
return -14;
}
if (LDT < max(1,IB)){
coreblas_error(16, "Illegal value of LDT");
return -16;
}
if (LDWORK < max(1,NW)){
coreblas_error(18, "Illegal value of LDWORK");
return -18;
}
/* Quick return */
if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
if (((side == PlasmaLeft) && (trans != PlasmaNoTrans))
|| ((side == PlasmaRight) && (trans == PlasmaNoTrans))) {
i1 = 0;
i3 = IB;
}
else {
i1 = ( ( K-1 ) / IB )*IB;
i3 = -IB;
}
for (i = i1; (i > -1) && (i < K); i+=i3) {
kb = min(IB, K-i);
if (side == PlasmaLeft) {
mi1 = kb;
mi2 = min(i+kb, M2);
l = min(kb, max(0, M2-i));
ic = i;
}
else {
ni1 = kb;
ni2 = min(i+kb, N2);
l = min(kb, max(0, N2-i));
jc = i;
}
/*
* Apply H or H' (NOTE: CORE_zparfb used to be CORE_zttrfb)
*/
mi1, ni1, mi2, ni2, kb, l,
&A1[LDA1*jc+ic], LDA1,
A2, LDA2,
&V[LDV*i], LDV,
&T[LDT*i], LDT,
WORK, LDWORK);
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_zttmqr_quark ( Quark quark)

Definition at line 291 of file core_zttmqr.c.

References CORE_zttmqr(), quark_unpack_args_18, side, T, trans, and V.

{
int side;
int trans;
int m1;
int n1;
int m2;
int n2;
int k;
int ib;
int lda1;
int lda2;
int ldv;
int ldt;
int ldwork;
quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
CORE_zttmqr(side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_zttmqr ( Quark quark,
Quark_Task_Flags task_flags,
int  side,
int  trans,
int  m1,
int  n1,
int  m2,
int  n2,
int  k,
int  ib,
int  nb,
PLASMA_Complex64_t A1,
int  lda1,
PLASMA_Complex64_t A2,
int  lda2,
PLASMA_Complex64_t V,
int  ldv,
PLASMA_Complex64_t T,
int  ldt 
)

Definition at line 251 of file core_zttmqr.c.

References CORE_zttmqr_quark(), DAG_CORE_TTMQR, INOUT, INPUT, PlasmaLeft, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_U, SCRATCH, and VALUE.

{
int ldwork = side == PlasmaLeft ? ib : nb;
sizeof(PLASMA_enum), &side, VALUE,
sizeof(PLASMA_enum), &trans, VALUE,
sizeof(int), &m1, VALUE,
sizeof(int), &n1, VALUE,
sizeof(int), &m2, VALUE,
sizeof(int), &n2, VALUE,
sizeof(int), &k, VALUE,
sizeof(int), &ib, VALUE,
sizeof(PLASMA_Complex64_t)*nb*nb, A1, INOUT,
sizeof(int), &lda1, VALUE,
sizeof(PLASMA_Complex64_t)*nb*nb, A2, INOUT,
sizeof(int), &lda2, VALUE,
sizeof(int), &ldv, VALUE,
sizeof(PLASMA_Complex64_t)*ib*nb, T, INPUT,
sizeof(int), &ldt, VALUE,
sizeof(PLASMA_Complex64_t)*ib*nb, NULL, SCRATCH,
sizeof(int), &ldwork, VALUE,
0);
}

Here is the call graph for this function:

Here is the caller graph for this function: