PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pclange.c File Reference
#include <stdlib.h>
#include <math.h>
#include "common.h"
Include dependency graph for pclange.c:

Go to the source code of this file.

Macros

#define A(m, n, i, j, ldt)   (BLKADDR(A, PLASMA_Complex32_t, m, n)+((j)*(ldt)+(i)))

Functions

void plasma_pclange (plasma_context_t *plasma)
void plasma_pclange_quark (PLASMA_enum norm, PLASMA_desc A, float *work, float *result, PLASMA_sequence *sequence, PLASMA_request *request)

Detailed Description

PLASMA auxiliary routines PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Emmanuel Agullo
Mathieu Faverge
Date:
2010-11-15 c Tue Nov 22 14:35:39 2011

Definition in file pclange.c.


Macro Definition Documentation

#define A (   m,
  n,
  i,
  j,
  ldt 
)    (BLKADDR(A, PLASMA_Complex32_t, m, n)+((j)*(ldt)+(i)))

Definition at line 20 of file pclange.c.


Function Documentation

void plasma_pclange ( plasma_context_t plasma)

Definition at line 24 of file pclange.c.

References A, BLKLDD, CORE_clange(), CORE_scasum(), CORE_slange(), plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, max, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, norm, plasma_desc_t::nt, plasma_private_alloc(), plasma_private_free(), PLASMA_RANK, PLASMA_SIZE, plasma_unpack_args_6, PlasmaColumnwise, PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaMaxNorm, PlasmaOneNorm, PlasmaRealDouble, PlasmaRowwise, PlasmaUpperLower, ss_cond_set, ss_cond_wait, ss_finalize, and ss_init.

{
float *work;
float *result;
PLASMA_sequence *sequence;
PLASMA_request *request;
int m, n;
int next_m;
int next_n;
int ldam;
int step, lrank;
int X, X1, X2, Y, Y1, Y2;
float* lwork;
float normtmp, normtmp2;
plasma_unpack_args_6(norm, A, work, result, sequence, request);
*result = 0.0;
if (PLASMA_RANK == 0)
memset(work, 0, PLASMA_SIZE*sizeof(float));
switch (norm) {
/*
* PlasmaMaxNorm
*/
n = 0;
while (m >= A.mt && n < A.nt) {
n++;
m = m-A.mt;
}
while (n < A.nt) {
next_m = m;
next_n = n;
next_m += PLASMA_SIZE;
while (next_m >= A.mt && next_n < A.nt) {
next_n++;
next_m = next_m-A.mt;
}
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
Y1 = n == 0 ? A.j %A.nb : 0;
Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y = Y2 - Y1;
ldam = BLKLDD(A, m);
CORE_clange(PlasmaMaxNorm, X, Y, A(m, n, X1, Y1, ldam), ldam, NULL, &normtmp);
if (normtmp > work[PLASMA_RANK])
work[PLASMA_RANK] = normtmp;
m = next_m;
n = next_n;
}
break;
/*
* PlasmaOneNorm
*/
normtmp2 = 0.0;
lwork = (float*)plasma_private_alloc(plasma, A.nb, PlasmaRealDouble);
while (n < A.nt) {
Y1 = n == 0 ? A.j %A.nb : 0;
Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y = Y2 - Y1;
memset(lwork, 0, A.nb*sizeof(float));
for (m = 0; m < A.mt; m++) {
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
ldam = BLKLDD(A, m);
X, Y,
A(m, n, X1, Y1, ldam), ldam,
lwork);
}
CORE_slange(PlasmaMaxNorm, Y, 1, lwork, 1, NULL, &normtmp);
if (normtmp > normtmp2)
normtmp2 = normtmp;
}
work[PLASMA_RANK] = normtmp2;
plasma_private_free(plasma, lwork);
break;
/*
* PlasmaInfNorm
*/
normtmp2 = 0.0;
lwork = (float*)plasma_private_alloc(plasma, A.mb, PlasmaRealDouble);
while (m < A.mt) {
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
ldam = BLKLDD(A, m);
memset(lwork, 0, A.mb*sizeof(float));
for (n = 0; n < A.nt; n++) {
Y1 = n == 0 ? A.j %A.nb : 0;
Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y = Y2 - Y1;
X, Y,
A(m, n, X1, Y1, ldam), ldam,
lwork);
}
CORE_slange(PlasmaMaxNorm, X, 1, lwork, 1, NULL, &normtmp);
if (normtmp > normtmp2)
normtmp2 = normtmp;
}
work[PLASMA_RANK] = normtmp2;
plasma_private_free(plasma, lwork);
break;
/*
* PlasmaFrobeniusNorm - not implemented
*/
default:;
}
if (norm != PlasmaFrobeniusNorm) {
step = 1;
lrank = PLASMA_RANK;
while ( (lrank%2 == 0) && (PLASMA_RANK+step < PLASMA_SIZE) ) {
ss_cond_wait(PLASMA_RANK+step, 0, step);
work[PLASMA_RANK] = max(work[PLASMA_RANK], work[PLASMA_RANK+step]);
lrank = lrank >> 1;
step = step << 1;
}
if (PLASMA_RANK > 0) {
while( lrank != 0 ) {
if (lrank%2 == 1) {
lrank = 0;
} else {
lrank = lrank >> 1;
step = step << 1;
}
}
}
if (PLASMA_RANK == 0)
*result = work[0];
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pclange_quark ( PLASMA_enum  norm,
PLASMA_desc  A,
float *  work,
float *  result,
PLASMA_sequence sequence,
PLASMA_request request 
)

Definition at line 202 of file pclange.c.

References BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), plasma_shared_alloc(), PLASMA_SUCCESS, PlasmaColumnwise, PlasmaFrobeniusNorm, PlasmaInfNorm, PlasmaMaxNorm, PlasmaOneNorm, PlasmaRealDouble, PlasmaRowwise, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_clange_f1(), QUARK_CORE_free(), QUARK_CORE_scasum_f1(), QUARK_CORE_slange(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int X, X1, X2, Y, Y1, Y2;
int ldam;
int m, n;
int szeW;
float* lwork;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
*result = 0.0;
switch ( norm ) {
/*
* PlasmaMaxNorm
*/
szeW = A.mt*A.nt;
lwork = (float*)plasma_shared_alloc(plasma, szeW, PlasmaRealDouble);
memset(lwork, 0, szeW*sizeof(float));
for(m = 0; m < A.mt; m++) {
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
ldam = BLKLDD(A, m);
for(n = 0; n < A.nt; n++) {
Y1 = n == 0 ? A.j %A.nb : 0;
Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y = Y2 - Y1;
plasma->quark, &task_flags,
A(m, n, X1, Y1, ldam), ldam, ldam*Y,
0, &(lwork[A.mt*n+m]),
lwork, szeW);
}
}
plasma->quark, &task_flags,
lwork, A.mt, szeW,
0, result);
QUARK_CORE_free(plasma->quark, &task_flags, lwork, szeW*sizeof(PLASMA_Complex32_t));
break;
/*
* PlasmaOneNorm
*/
lwork = (float*)plasma_shared_alloc(plasma, (A.n+1), PlasmaRealDouble);
memset(lwork, 0, (A.n+1)*sizeof(float));
for(m = 0; m < A.mt; m++) {
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
ldam = BLKLDD(A, m);
for(n = 0; n < A.nt; n++) {
Y1 = n == 0 ? A.j %A.nb : 0;
Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y = Y2 - Y1;
plasma->quark, &task_flags,
A(m, n, X1, Y1, ldam), ldam, ldam*Y,
&(lwork[n*A.nb+1]), A.nb,
lwork, A.n);
}
}
plasma->quark, &task_flags,
PlasmaMaxNorm, A.n+1, 1,
lwork, 1, A.n+1,
0, result);
QUARK_CORE_free(plasma->quark, &task_flags, lwork, (A.n+1)*sizeof(PLASMA_Complex32_t));
break;
/*
* PlasmaInfNorm
*/
lwork = (float*)plasma_shared_alloc(plasma, (A.m+1), PlasmaRealDouble);
memset(lwork, 0, (A.m+1)*sizeof(float));
for(m = 0; m < A.mt; m++) {
X1 = m == 0 ? A.i %A.mb : 0;
X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
X = X2 - X1;
ldam = BLKLDD(A, m);
for(n = 0; n < A.nt; n++) {
Y1 = n == 0 ? A.j %A.nb : 0;
Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y = Y2 - Y1;
plasma->quark, &task_flags,
A(m, n, X1, Y1, ldam), ldam, ldam*Y,
&(lwork[m*A.mb+1]), A.mb,
lwork, A.m);
}
}
plasma->quark, &task_flags,
PlasmaMaxNorm, A.m+1, 1,
lwork, 1, A.m+1,
0, result);
QUARK_CORE_free(plasma->quark, &task_flags, lwork, (A.m+1)*sizeof(PLASMA_Complex32_t));
break;
/*
* PlasmaFrobeniusNorm - not implemented
*/
default:;
}
}

Here is the call graph for this function: