PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_sormlq.c File Reference
#include <lapacke.h>
#include "common.h"
Include dependency graph for core_sormlq.c:

Go to the source code of this file.

Functions

int CORE_sormlq (int side, int trans, int M, int N, int K, int IB, float *A, int LDA, float *T, int LDT, float *C, int LDC, float *WORK, int LDWORK)
void QUARK_CORE_sormlq (Quark *quark, Quark_Task_Flags *task_flags, int side, int trans, int m, int n, int k, int ib, int nb, float *A, int lda, float *T, int ldt, float *C, int ldc)
void CORE_sormlq_quark (Quark *quark)

Detailed Description

PLASMA core_blas kernel PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Hatem Ltaief
Mathieu Faverge
Jakub Kurzak
Dulceneia Becker
Date:
2010-11-15 s Tue Nov 22 14:35:20 2011

Definition in file core_sormlq.c.


Function Documentation

int CORE_sormlq ( int  side,
int  trans,
int  M,
int  N,
int  K,
int  IB,
float *  A,
int  LDA,
float *  T,
int  LDT,
float *  C,
int  LDC,
float *  WORK,
int  LDWORK 
)

CORE_sormlq overwrites the general complex M-by-N tile C with

              SIDE = 'L'     SIDE = 'R'

TRANS = 'N': Q * C C * Q TRANS = 'C': Q**T * C C * Q**T

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(k) . . . H(2) H(1)

as returned by CORE_sgelqt. Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'.

Parameters:
[in]side
  • PlasmaLeft : apply Q or Q**T from the Left;
  • PlasmaRight : apply Q or Q**T from the Right.
[in]trans
  • PlasmaNoTrans : No transpose, apply Q;
  • PlasmaTrans : Transpose, apply Q**T.
[in]MThe number of rows of the tile C. M >= 0.
[in]NThe number of columns of the tile C. N >= 0.
[in]KThe number of elementary reflectors whose product defines the matrix Q. If SIDE = PlasmaLeft, M >= K >= 0; if SIDE = PlasmaRight, N >= K >= 0.
[in]IBThe inner-blocking size. IB >= 0.
[in]ADimension: (LDA,M) if SIDE = PlasmaLeft, (LDA,N) if SIDE = PlasmaRight, The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_sgelqt in the first k rows of its array argument A.
[in]LDAThe leading dimension of the array A. LDA >= max(1,K).
[out]TThe IB-by-K triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]LDTThe leading dimension of the array T. LDT >= IB.
[in,out]COn entry, the M-by-N tile C. On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
[in]LDCThe leading dimension of the array C. LDC >= max(1,M).
[in,out]WORKOn exit, if INFO = 0, WORK(1) returns the optimal LDWORK.
[in]LDWORKThe dimension of the array WORK. If SIDE = PlasmaLeft, LDWORK >= max(1,N); if SIDE = PlasmaRight, LDWORK >= max(1,M).
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 108 of file core_sormlq.c.

References coreblas_error, lapack_const, max, min, PLASMA_SUCCESS, PlasmaForward, PlasmaLeft, PlasmaNoTrans, PlasmaRight, PlasmaRowwise, and PlasmaTrans.

{
int i, kb;
int i1, i3;
int nq, nw;
int ic = 0;
int jc = 0;
int ni = N;
int mi = M;
/* Check input arguments */
if ((side != PlasmaLeft) && (side != PlasmaRight)) {
coreblas_error(1, "Illegal value of side");
return -1;
}
/*
* NQ is the order of Q and NW is the minimum dimension of WORK
*/
if (side == PlasmaLeft) {
nq = M;
nw = N;
}
else {
nq = N;
nw = M;
}
if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
coreblas_error(2, "Illegal value of trans");
return -2;
}
if (M < 0) {
coreblas_error(3, "Illegal value of M");
return -3;
}
if (N < 0) {
coreblas_error(4, "Illegal value of N");
return -4;
}
if ((K < 0) || (K > nq)) {
coreblas_error(5, "Illegal value of K");
return -5;
}
if ((IB < 0) || ( (IB == 0) && ((M > 0) && (N > 0)) )) {
coreblas_error(6, "Illegal value of IB");
return -6;
}
if ((LDA < max(1,K)) && (K > 0)) {
coreblas_error(8, "Illegal value of LDA");
return -8;
}
if ((LDC < max(1,M)) && (M > 0)) {
coreblas_error(12, "Illegal value of LDC");
return -12;
}
if ((LDWORK < max(1,nw)) && (nw > 0)) {
coreblas_error(14, "Illegal value of LDWORK");
return -14;
}
/* Quick return */
if ((M == 0) || (N == 0) || (K == 0))
if (((side == PlasmaLeft) && (trans == PlasmaNoTrans))
|| ((side == PlasmaRight) && (trans != PlasmaNoTrans))) {
i1 = 0;
i3 = IB;
}
else {
i1 = ( ( K-1 ) / IB )*IB;
i3 = -IB;
}
if( trans == PlasmaNoTrans) {
}
else {
}
for(i = i1; (i >- 1) && (i < K); i+=i3 ) {
kb = min(IB, K-i);
if (side == PlasmaLeft) {
/*
* H or H' is applied to C(i:m,1:n)
*/
mi = M - i;
ic = i;
}
else {
/*
* H or H' is applied to C(1:m,i:n)
*/
ni = N - i;
jc = i;
}
/*
* Apply H or H'
*/
LAPACKE_slarfb_work(LAPACK_COL_MAJOR,
mi, ni, kb,
&A[LDA*i+i], LDA,
&T[LDT*i], LDT,
&C[LDC*jc+ic], LDC,
WORK, LDWORK);
}
}

Here is the caller graph for this function:

void CORE_sormlq_quark ( Quark quark)

Definition at line 264 of file core_sormlq.c.

References A, C, CORE_sormlq(), quark_unpack_args_14, side, T, and trans.

{
int side;
int trans;
int m;
int n;
int k;
int ib;
float *A;
int lda;
float *T;
int ldt;
float *C;
int ldc;
float *WORK;
int ldwork;
quark_unpack_args_14(quark, side, trans, m, n, k, ib,
A, lda, T, ldt, C, ldc, WORK, ldwork);
CORE_sormlq(side, trans, m, n, k, ib,
A, lda, T, ldt, C, ldc, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_sormlq ( Quark quark,
Quark_Task_Flags task_flags,
int  side,
int  trans,
int  m,
int  n,
int  k,
int  ib,
int  nb,
float *  A,
int  lda,
float *  T,
int  ldt,
float *  C,
int  ldc 
)

Definition at line 231 of file core_sormlq.c.

References CORE_sormlq_quark(), DAG_CORE_UNMLQ, INOUT, INPUT, QUARK_Insert_Task(), QUARK_REGION_U, SCRATCH, and VALUE.

{
sizeof(PLASMA_enum), &side, VALUE,
sizeof(PLASMA_enum), &trans, VALUE,
sizeof(int), &m, VALUE,
sizeof(int), &n, VALUE,
sizeof(int), &k, VALUE,
sizeof(int), &ib, VALUE,
sizeof(float)*nb*nb, A, INPUT | QUARK_REGION_U,
sizeof(int), &lda, VALUE,
sizeof(float)*ib*nb, T, INPUT,
sizeof(int), &ldt, VALUE,
sizeof(float)*nb*nb, C, INOUT,
sizeof(int), &ldc, VALUE,
sizeof(float)*ib*nb, NULL, SCRATCH,
sizeof(int), &nb, VALUE,
0);
}

Here is the call graph for this function:

Here is the caller graph for this function: