PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_dttlqt.c File Reference
#include <lapacke.h>
#include "common.h"
Include dependency graph for core_dttlqt.c:

Go to the source code of this file.

Macros

#define REAL

Functions

int CORE_dttlqt (int M, int N, int IB, double *A1, int LDA1, double *A2, int LDA2, double *T, int LDT, double *TAU, double *WORK)
void QUARK_CORE_dttlqt (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, int ib, int nb, double *A1, int lda1, double *A2, int lda2, double *T, int ldt)
void CORE_dttlqt_quark (Quark *quark)

Detailed Description

PLASMA core_blas kernel PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Hatem Ltaief
Mathieu Faverge
Dulceneia Becker
Date:
2010-11-15 d Tue Nov 22 14:35:17 2011

Definition in file core_dttlqt.c.


Macro Definition Documentation

#define REAL

Definition at line 20 of file core_dttlqt.c.


Function Documentation

int CORE_dttlqt ( int  M,
int  N,
int  IB,
double *  A1,
int  LDA1,
double *  A2,
int  LDA2,
double *  T,
int  LDT,
double *  TAU,
double *  WORK 
)

CORE_dttlqt computes a LQ factorization of a rectangular matrix formed by coupling side-by-side a complex M-by-M lower triangular tile A1 and a complex M-by-N lower triangular tile A2:

| A1 A2 | = L * Q

The tile Q is represented as a product of elementary reflectors

Q = H(k)' . . . H(2)' H(1)', where k = min(M,N).

Each H(i) has the form

H(i) = I - tau * v * v'

where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; g(v(i+1:n)) is stored on exit in A2(i,1:n), and tau in TAU(i).

Parameters:
[in]MThe number of rows of the tile A1 and A2. M >= 0. The number of columns of the tile A1.
[in]NThe number of columns of the tile A2. N >= 0.
[in]IBThe inner-blocking size. IB >= 0.
[in,out]A1On entry, the M-by-M tile A1. On exit, the elements on and below the diagonal of the array contain the M-by-M lower trapezoidal tile L; the elements above the diagonal are not referenced.
[in]LDA1The leading dimension of the array A1. LDA1 >= max(1,N).
[in,out]A2On entry, the M-by-N lower triangular tile A2. On exit, the elements on and below the diagonal of the array with the array TAU, represent the unitary tile Q as a product of elementary reflectors (see Further Details).
[in]LDA2The leading dimension of the array A2. LDA2 >= max(1,M).
[out]TThe IB-by-N triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]LDTThe leading dimension of the array T. LDT >= IB.
[out]TAUThe scalar factors of the elementary reflectors (see Further Details).
[in,out]WORK
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 100 of file core_dttlqt.c.

References cblas_daxpy(), cblas_dcopy(), cblas_dgemv(), cblas_dger(), cblas_dtrmv(), CblasColMajor, CORE_dlaset(), CORE_dparfb(), CORE_dpemv(), coreblas_error, max, min, PLASMA_SUCCESS, PlasmaForward, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaRowwise, PlasmaUpper, and PlasmaUpperLower.

{
static double zone = 1.0;
static double zzero = 0.0;
#ifdef COMPLEX
static int ione = 1;
#endif
double alpha;
int i, j, l, ii, sb, mi, ni;
/* Check input arguments */
if (M < 0) {
coreblas_error(1, "Illegal value of M");
return -1;
}
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if (IB < 0) {
coreblas_error(3, "Illegal value of IB");
return -3;
}
if ((LDA2 < max(1,M)) && (M > 0)) {
coreblas_error(7, "Illegal value of LDA2");
return -7;
}
/* Quick return */
if ((M == 0) || (N == 0) || (IB == 0))
/* TODO: Need to check why some cases require
* this to not have uninitialized values */
0., 0., T, LDT);
for(ii = 0; ii < M; ii += IB) {
sb = min(M-ii, IB);
for(i = 0; i < sb; i++) {
j = ii + i;
mi = sb-i-1;
ni = min( j + 1, N);
/*
* Generate elementary reflector H( II*IB+I ) to annihilate A( II*IB+I, II*IB+I:M ).
*/
#ifdef COMPLEX
LAPACKE_dlacgv_work(ni, &A2[j], LDA2);
LAPACKE_dlacgv_work(ione, &A1[LDA1*j+j], LDA1);
#endif
LAPACKE_dlarfg_work(ni+1, &A1[LDA1*j+j], &A2[j], LDA2, &TAU[j]);
if (mi > 0) {
/*
* Apply H( j-1 ) to A( j:II+IB-1, j-1:M ) from the right.
*/
mi,
&A1[LDA1*j+(j+1)], 1,
WORK, 1);
mi, ni,
(zone), &A2[j+1], LDA2,
&A2[j], LDA2,
(zone), WORK, 1);
alpha = -(TAU[j]);
mi, (alpha),
WORK, 1,
&A1[LDA1*j+j+1], 1);
CblasColMajor, mi, ni,
(alpha), WORK, 1,
&A2[j], LDA2,
&A2[j+1], LDA2);
}
/*
* Calculate T.
*/
if (i > 0 ) {
l = min(i, max(0, N-ii));
alpha = -(TAU[j]);
PlasmaNoTrans, PlasmaRowwise,
i , min(j, N), l,
alpha, &A2[ii], LDA2,
&A2[j], LDA2,
zzero, &T[LDT*j], 1,
WORK);
/* T(0:i-1, j) = T(0:i-1, ii:j-1) * T(0:i-1, j) */
(CBLAS_TRANSPOSE)PlasmaNoTrans,
i, &T[LDT*ii], LDT,
&T[LDT*j], 1);
}
#ifdef COMPLEX
LAPACKE_dlacgv_work(ni, &A2[j], LDA2 );
LAPACKE_dlacgv_work(ione, &A1[LDA1*j+j], LDA1 );
#endif
T[LDT*j+i] = TAU[j];
}
/* Apply Q to the rest of the matrix to the right */
if (M > ii+sb) {
mi = M-(ii+sb);
ni = min(ii+sb, N);
l = min(sb, max(0, ni-ii));
PlasmaRight, PlasmaNoTrans,
mi, IB, mi, ni, sb, l,
&A1[LDA1*ii+ii+sb], LDA1,
&A2[ii+sb], LDA2,
&A2[ii], LDA2,
&T[LDT*ii], LDT,
WORK, M);
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_dttlqt_quark ( Quark quark)

Definition at line 273 of file core_dttlqt.c.

References CORE_dttlqt(), quark_unpack_args_11, T, and TAU.

{
int m;
int n;
int ib;
double *A1;
int lda1;
double *A2;
int lda2;
double *T;
int ldt;
double *TAU;
double *WORK;
quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
CORE_dttlqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_dttlqt ( Quark quark,
Quark_Task_Flags task_flags,
int  m,
int  n,
int  ib,
int  nb,
double *  A1,
int  lda1,
double *  A2,
int  lda2,
double *  T,
int  ldt 
)

Definition at line 244 of file core_dttlqt.c.

References CORE_dttlqt_quark(), DAG_CORE_TTLQT, INOUT, LOCALITY, OUTPUT, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_L, SCRATCH, and VALUE.

{
sizeof(int), &m, VALUE,
sizeof(int), &n, VALUE,
sizeof(int), &ib, VALUE,
sizeof(double)*nb*nb, A1, INOUT|QUARK_REGION_D|QUARK_REGION_L,
sizeof(int), &lda1, VALUE,
sizeof(double)*nb*nb, A2, INOUT|QUARK_REGION_D|QUARK_REGION_L|LOCALITY,
sizeof(int), &lda2, VALUE,
sizeof(double)*ib*nb, T, OUTPUT,
sizeof(int), &ldt, VALUE,
sizeof(double)*nb, NULL, SCRATCH,
sizeof(double)*ib*nb, NULL, SCRATCH,
0);
}

Here is the call graph for this function:

Here is the caller graph for this function: