#include <lapacke.h>
#include "common.h"

Include dependency graph for core_ctsmlq_hetra1.c:

Macros
#define	COMPLEX

Functions
int	CORE_ctsmlq_hetra1 (int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, PLASMA_Complex32_t A1, int lda1, PLASMA_Complex32_t A2, int lda2, PLASMA_Complex32_t V, int ldv, PLASMA_Complex32_t T, int ldt, PLASMA_Complex32_t *WORK, int ldwork)
void	QUARK_CORE_ctsmlq_hetra1 (Quark quark, Quark_Task_Flags task_flags, int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, PLASMA_Complex32_t A1, int lda1, PLASMA_Complex32_t A2, int lda2, PLASMA_Complex32_t V, int ldv, PLASMA_Complex32_t T, int ldt)
void	CORE_ctsmlq_hetra1_quark (Quark *quark)

Detailed Description

PLASMA core_blas kernel PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:: 2.4.5

Author:: Hatem Ltaief; Mathieu Faverge; Azzam Haidar

Date:: 2010-11-15 c Tue Nov 22 14:35:23 2011

Definition in file core_ctsmlq_hetra1.c.

Macro Definition Documentation

#define COMPLEX

Definition at line 20 of file core_ctsmlq_hetra1.c.

Function Documentation

int CORE_ctsmlq_hetra1	(	int	side,
		int	trans,
		int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	k,
		int	ib,
		PLASMA_Complex32_t *	A1,
		int	lda1,
		PLASMA_Complex32_t *	A2,
		int	lda2,
		PLASMA_Complex32_t *	V,
		int	ldv,
		PLASMA_Complex32_t *	T,
		int	ldt,
		PLASMA_Complex32_t *	WORK,
		int	ldwork
	)

CORE_ctsmlq_hetra1: see CORE_ctsmlq

This kernel applies a Right transformation on | A1' A2 | and does not handle the transpose of A1. Needs therefore to make the explicit transpose of A1 before and after the application of the block of reflectors Can be further optimized by changing accordingly the underneath kernel ztsrfb!

Parameters:

[in]	side	PlasmaLeft : apply Q or QH from the Left; PlasmaRight : apply Q or QH from the Right.
[in]	trans	PlasmaNoTrans : No transpose, apply Q; PlasmaConjTrans : ConjTranspose, apply Q**H.
[in]	M1	The number of rows of the tile A1. M1 >= 0.
[in]	N1	The number of columns of the tile A1. N1 >= 0.
[in]	M2	The number of rows of the tile A2. M2 >= 0. M2 = M1 if side == PlasmaRight.
[in]	N2	The number of columns of the tile A2. N2 >= 0. N2 = N1 if side == PlasmaLeft.
[in]	K	The number of elementary reflectors whose product defines the matrix Q.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A1	On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]	A2	On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]	LDA2	The leading dimension of the tile A2. LDA2 >= max(1,M2).
[in]	V	The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_CTSLQT in the first k rows of its array argument V.
[in]	LDV	The leading dimension of the array V. LDV >= max(1,K).
[out]	T	The IB-by-N1 triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[out]	WORK	Workspace array of size LDWORK-by-M1 if side == PlasmaLeft LDWORK-by-IB if side == PlasmaRight
[in]	LDWORK	The leading dimension of the array WORK. LDWORK >= max(1,IB) if side == PlasmaLeft LDWORK >= max(1,N1) if side == PlasmaRight

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 125 of file core_ctsmlq_hetra1.c.

References CORE_ctsmlq(), coreblas_error, and PLASMA_SUCCESS.

{
    int i, j;
    if ( (m1 != n1) ) {
        coreblas_error(3, "Illegal value of M1, N1");
        return -3;
    }
    /* in-place transposition of A1 */
    for (j = 0; j < n1; j++){
        A1[j + j*lda1] = conjf(A1[j + j*lda1]);
        for (i = j+1; i < m1; i++){
            *WORK = *(A1 + i + j*lda1);
            *(A1 + i + j*lda1) = conjf(*(A1 + j + i*lda1));
            *(A1 + j + i*lda1) = conjf(*WORK);
        }
    }
    CORE_ctsmlq(side, trans, m1, n1, m2, n2, k, ib, 
                A1, lda1, A2, lda2, 
                V,  ldv,  T,  ldt, 
                WORK, ldwork);
    /* in-place transposition of A1 */
    for (j = 0; j < n1; j++){
        A1[j + j*lda1] = conjf(A1[j + j*lda1]);
        for (i = j+1; i < m1; i++){
            *WORK = *(A1 + i + j*lda1);
            *(A1 + i + j*lda1) = conjf(*(A1 + j + i*lda1));
            *(A1 + j + i*lda1) = conjf(*WORK);
        }
    }
    
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_ctsmlq_hetra1_quark ( Quark * quark )

This kernel applies a Right transformation on | A1' A2 | and does not handle the transpose of A1. Needs therefore to make the explicit transpose of A1 before and after the application of the block of reflectors Can be further optimized by changing accordingly the underneath kernel ztsrfb!

Definition at line 218 of file core_ctsmlq_hetra1.c.

References CORE_ctsmlq_hetra1(), quark_unpack_args_18, side, T, trans, and V.

{
    int side;
    int trans;
    int m1;
    int n1;
    int m2;
    int n2;
    int k;
    int ib;
    PLASMA_Complex32_t *A1;
    int lda1;
    PLASMA_Complex32_t *A2;
    int lda2;
    PLASMA_Complex32_t *V;
    int ldv;
    PLASMA_Complex32_t *T;
    int ldt;
    PLASMA_Complex32_t *WORK;
    int ldwork;
    quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, 
                         A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
    CORE_ctsmlq_hetra1(side, trans, m1, n1, m2, n2, k, ib, 
                       A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_ctsmlq_hetra1	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	side,
		int	trans,
		int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	k,
		int	ib,
		int	nb,
		PLASMA_Complex32_t *	A1,
		int	lda1,
		PLASMA_Complex32_t *	A2,
		int	lda2,
		PLASMA_Complex32_t *	V,
		int	ldv,
		PLASMA_Complex32_t *	T,
		int	ldt
	)

Definition at line 174 of file core_ctsmlq_hetra1.c.

References CORE_ctsmlq_hetra1_quark(), INOUT, INPUT, PlasmaLeft, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_U, SCRATCH, and VALUE.

{
    int ldwork = side == PlasmaLeft ? ib : nb;
    QUARK_Insert_Task(quark, CORE_ctsmlq_hetra1_quark, task_flags,
        sizeof(PLASMA_enum),                &side,  VALUE,
        sizeof(PLASMA_enum),                &trans, VALUE,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    A1,            INOUT|QUARK_REGION_U|QUARK_REGION_D,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    A2,            INOUT,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    V,             INPUT,
        sizeof(int),                        &ldv,   VALUE,
        sizeof(PLASMA_Complex32_t)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(PLASMA_Complex32_t)*ib*nb,    NULL,          SCRATCH,
        sizeof(int),                        &ldwork, VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

Macros

Functions

Detailed Description

Macro Definition Documentation

Function Documentation