PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_ztstrf.c File Reference
#include "common.h"
#include <cblas.h>
#include <math.h>
Include dependency graph for core_ztstrf.c:

Go to the source code of this file.

Functions

int CORE_ztstrf (int M, int N, int IB, int NB, PLASMA_Complex64_t *U, int LDU, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *L, int LDL, int *IPIV, PLASMA_Complex64_t *WORK, int LDWORK, int *INFO)
void QUARK_CORE_ztstrf (Quark *quark, Quark_Task_Flags *task_flags, int m, int n, int ib, int nb, PLASMA_Complex64_t *U, int ldu, PLASMA_Complex64_t *A, int lda, PLASMA_Complex64_t *L, int ldl, int *IPIV, PLASMA_sequence *sequence, PLASMA_request *request, PLASMA_bool check_info, int iinfo)
void CORE_ztstrf_quark (Quark *quark)

Detailed Description

PLASMA core_blas kernel PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Hatem Ltaief
Mathieu Faverge
Jakub Kurzak
Date:
2010-11-15 normal z -> c d s

Definition in file core_ztstrf.c.


Function Documentation

int CORE_ztstrf ( int  M,
int  N,
int  IB,
int  NB,
PLASMA_Complex64_t U,
int  LDU,
PLASMA_Complex64_t A,
int  LDA,
PLASMA_Complex64_t L,
int  LDL,
int *  IPIV,
PLASMA_Complex64_t WORK,
int  LDWORK,
int *  INFO 
)

CORE_ztstrf computes an LU factorization of a complex matrix formed by an upper triangular NB-by-N tile U on top of a M-by-N tile A using partial pivoting with row interchanges.

This is the right-looking Level 2.5 BLAS version of the algorithm.

Parameters:
[in]MThe number of rows of the tile A. M >= 0.
[in]NThe number of columns of the tile A. N >= 0.
[in]IBThe inner-blocking size. IB >= 0.
[in]NB
[in,out]UOn entry, the NB-by-N upper triangular tile. On exit, the new factor U from the factorization
[in]LDUThe leading dimension of the array U. LDU >= max(1,NB).
[in,out]AOn entry, the M-by-N tile to be factored. On exit, the factor L from the factorization
[in]LDAThe leading dimension of the array A. LDA >= max(1,M).
[in,out]LOn entry, the IB-by-N lower triangular tile. On exit, the interchanged rows form the tile A in case of pivoting.
[in]LDLThe leading dimension of the array L. LDL >= max(1,IB).
[out]IPIVThe pivot indices; for 1 <= i <= min(M,N), row i of the tile U was interchanged with row IPIV(i) of the tile A.
[in,out]WORK
[in]LDWORKThe dimension of the array WORK.
[out]INFO
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if INFO = -k, the k-th argument had an illegal value
>0if INFO = k, U(k,k) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.

Definition at line 99 of file core_ztstrf.c.

References cabs(), cblas_izamax(), CBLAS_SADDR, cblas_zcopy(), cblas_zgeru(), cblas_zscal(), cblas_zswap(), CblasColMajor, CORE_zssssm(), coreblas_error, max, min, and PLASMA_SUCCESS.

{
static PLASMA_Complex64_t zzero = 0.0;
static PLASMA_Complex64_t mzone =-1.0;
int i, j, ii, sb;
int im, ip;
/* Check input arguments */
*INFO = 0;
if (M < 0) {
coreblas_error(1, "Illegal value of M");
return -1;
}
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if (IB < 0) {
coreblas_error(3, "Illegal value of IB");
return -3;
}
if ((LDU < max(1,NB)) && (NB > 0)) {
coreblas_error(6, "Illegal value of LDU");
return -6;
}
if ((LDA < max(1,M)) && (M > 0)) {
coreblas_error(8, "Illegal value of LDA");
return -8;
}
if ((LDL < max(1,IB)) && (IB > 0)) {
coreblas_error(10, "Illegal value of LDL");
return -10;
}
/* Quick return */
if ((M == 0) || (N == 0) || (IB == 0))
/* Set L to 0 */
memset(L, 0, LDL*N*sizeof(PLASMA_Complex64_t));
ip = 0;
for (ii = 0; ii < N; ii += IB) {
sb = min(N-ii, IB);
for (i = 0; i < sb; i++) {
im = cblas_izamax(M, &A[LDA*(ii+i)], 1);
IPIV[ip] = ii+i+1;
if (cabs(A[LDA*(ii+i)+im]) > cabs(U[LDU*(ii+i)+ii+i])) {
/*
* Swap behind.
*/
cblas_zswap(i, &L[LDL*ii+i], LDL, &WORK[im], LDWORK );
/*
* Swap ahead.
*/
cblas_zswap(sb-i, &U[LDU*(ii+i)+ii+i], LDU, &A[LDA*(ii+i)+im], LDA );
/*
* Set IPIV.
*/
IPIV[ip] = NB + im + 1;
for (j = 0; j < i; j++) {
A[LDA*(ii+j)+im] = zzero;
}
}
if ((*INFO == 0) && (cabs(A[LDA*(ii+i)+im]) == zzero)
&& (cabs(U[LDU*(ii+i)+ii+i]) == zzero)) {
*INFO = ii+i+1;
}
alpha = ((PLASMA_Complex64_t)1. / U[LDU*(ii+i)+ii+i]);
cblas_zscal(M, CBLAS_SADDR(alpha), &A[LDA*(ii+i)], 1);
cblas_zcopy(M, &A[LDA*(ii+i)], 1, &WORK[LDWORK*i], 1);
CblasColMajor, M, sb-i-1,
CBLAS_SADDR(mzone), &A[LDA*(ii+i)], 1,
&U[LDU*(ii+i+1)+ii+i], LDU,
&A[LDA*(ii+i+1)], LDA );
ip = ip+1;
}
/*
* Apply the subpanel to the rest of the panel.
*/
if(ii+i < N) {
for(j = ii; j < ii+sb; j++) {
if (IPIV[j] <= NB) {
IPIV[j] = IPIV[j] - ii;
}
}
NB, N-(ii+sb), M, N-(ii+sb), sb, sb,
&U[LDU*(ii+sb)+ii], LDU,
&A[LDA*(ii+sb)], LDA,
&L[LDL*ii], LDL,
WORK, LDWORK, &IPIV[ii]);
for(j = ii; j < ii+sb; j++) {
if (IPIV[j] <= NB) {
IPIV[j] = IPIV[j] + ii;
}
}
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_ztstrf_quark ( Quark quark)

Definition at line 258 of file core_ztstrf.c.

References A, CORE_ztstrf(), IPIV, L, plasma_sequence_flush(), PLASMA_SUCCESS, and quark_unpack_args_17.

{
int m;
int n;
int ib;
int nb;
int ldu;
int lda;
int ldl;
int *IPIV;
int ldwork;
PLASMA_sequence *sequence;
PLASMA_request *request;
PLASMA_bool check_info;
int iinfo;
int info;
quark_unpack_args_17(quark, m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, sequence, request, check_info, iinfo);
CORE_ztstrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info);
if (info != PLASMA_SUCCESS && check_info)
plasma_sequence_flush(quark, sequence, request, iinfo + info);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_ztstrf ( Quark quark,
Quark_Task_Flags task_flags,
int  m,
int  n,
int  ib,
int  nb,
PLASMA_Complex64_t U,
int  ldu,
PLASMA_Complex64_t A,
int  lda,
PLASMA_Complex64_t L,
int  ldl,
int *  IPIV,
PLASMA_sequence sequence,
PLASMA_request request,
PLASMA_bool  check_info,
int  iinfo 
)

Definition at line 220 of file core_ztstrf.c.

References CORE_ztstrf_quark(), DAG_CORE_TSTRF, INOUT, LOCALITY, OUTPUT, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_U, SCRATCH, and VALUE.

{
sizeof(int), &m, VALUE,
sizeof(int), &n, VALUE,
sizeof(int), &ib, VALUE,
sizeof(int), &nb, VALUE,
sizeof(int), &ldu, VALUE,
sizeof(PLASMA_Complex64_t)*nb*nb, A, INOUT | LOCALITY,
sizeof(int), &lda, VALUE,
sizeof(PLASMA_Complex64_t)*ib*nb, L, OUTPUT,
sizeof(int), &ldl, VALUE,
sizeof(int)*nb, IPIV, OUTPUT,
sizeof(PLASMA_Complex64_t)*ib*nb, NULL, SCRATCH,
sizeof(int), &nb, VALUE,
sizeof(PLASMA_sequence*), &sequence, VALUE,
sizeof(PLASMA_request*), &request, VALUE,
sizeof(PLASMA_bool), &check_info, VALUE,
sizeof(int), &iinfo, VALUE,
0);
}

Here is the call graph for this function:

Here is the caller graph for this function: