PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
control.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

int pthread_getconcurrency (void)
int pthread_setconcurrency (int)
void plasma_barrier_init (plasma_context_t *plasma)
void plasma_barrier (plasma_context_t *plasma)
void * plasma_parallel_section (void *plasma)
int plasma_setaffinity (int rank)
int plasma_unsetaffinity ()
int plasma_yield ()
void plasma_topology_init ()
void plasma_topology_finalize ()
int plasma_get_numthreads ()
int plasma_get_numthreads_numa ()
int plasma_get_affthreads (int *coresbind)
int PLASMA_Init (int cores)
int PLASMA_Init_Affinity (int cores, int *bindtab)
int PLASMA_Finalize ()

Detailed Description

PLASMA auxiliary routines PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Jakub Kurzak
Date:
2010-11-15

Definition in file control.h.


Function Documentation

void plasma_barrier ( plasma_context_t plasma)

Busy-waiting barrier

Definition at line 59 of file control.c.

References plasma_context_struct::barrier_id, plasma_context_struct::barrier_nblocked_thrds, plasma_context_struct::barrier_synccond, plasma_context_struct::barrier_synclock, PLASMA_RANK, PLASMA_SIZE, pthread_cond_broadcast(), pthread_cond_wait(), pthread_mutex_lock(), and pthread_mutex_unlock().

{
#ifdef BUSY_WAITING
int core;
if (PLASMA_RANK == 0) {
for (core = 1; core < PLASMA_SIZE; core++)
while (plasma->barrier_in[core] == 0);
for (core = 1; core < PLASMA_SIZE; core++)
plasma->barrier_in[core] = 0;
for (core = 1; core < PLASMA_SIZE; core++)
plasma->barrier_out[core] = 1;
}
else
{
plasma->barrier_in[PLASMA_RANK] = 1;
while (plasma->barrier_out[PLASMA_RANK] == 0);
plasma->barrier_out[PLASMA_RANK] = 0;
}
#else
int id;
id = plasma->barrier_id;
if (plasma->barrier_nblocked_thrds == PLASMA_SIZE) {
plasma->barrier_id++;
}
while (id == plasma->barrier_id)
#endif
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_barrier_init ( plasma_context_t plasma)

Internal routines

Busy-waiting barrier initialization

Definition at line 28 of file control.c.

References plasma_context_struct::barrier_id, plasma_context_struct::barrier_nblocked_thrds, plasma_context_struct::barrier_synccond, plasma_context_struct::barrier_synclock, CONTEXT_THREADS_MAX, pthread_cond_init(), and pthread_mutex_init().

{
#ifdef BUSY_WAITING
int core;
for (core = 0; core < CONTEXT_THREADS_MAX; core++) {
plasma->barrier_in[core] = 0;
plasma->barrier_out[core] = 0;
}
#else
plasma->barrier_id = 0;
pthread_cond_init( &(plasma->barrier_synccond), NULL);
#endif
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_Finalize ( )

PLASMA_Finalize - Finalize PLASMA.

Returns:
Return values:
PLASMA_SUCCESSsuccessful exit

Definition at line 293 of file control.c.

{
int core;
int status;
void *exitcodep;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_Finalize()", "PLASMA not initialized");
}
/* Terminate the dynamic scheduler */
/* Free quark structures */
QUARK_Free(plasma->quark);
/* Set termination action */
/* Barrier and clear action */
plasma_barrier(plasma);
// Join threads
for (core = 1; core < plasma->world_size; core++) {
status = pthread_join(plasma->thread_id[core], &exitcodep);
if (status != 0) {
plasma_fatal_error("PLASMA_Finalize", "pthread_join() failed");
return status;
}
}
/* Destroy thread attributes */
status = pthread_attr_destroy(&plasma->thread_attr);
if (status != 0)
plasma_fatal_error("PLASMA_Finalize", "pthread_attr_destroy() failed");
/* Destroy topology */
status = plasma_context_remove(plasma, pthread_self());
if (status != PLASMA_SUCCESS) {
plasma_fatal_error("PLASMA_Finalize", "plasma_context_remove() failed");
return status;
}
/* Restore the concurency */
/* actually it's really bad, we shoulde set the concurrency only
* if it's not already done and restore it only we had change it */
}
int plasma_get_affthreads ( int *  coresbind)

Definition at line 334 of file plasmaos.c.

References CONTEXT_THREADS_MAX, PLASMA_CLEANENV, plasma_error(), PLASMA_GETENV, and PLASMA_SUCCESS.

{
char *envstr = NULL;
int i;
/* Env variable does not exist, we search the system number of core */
PLASMA_GETENV("PLASMA_AFF_THREADS", envstr);
if ( envstr == NULL) {
for (i = 0; i < CONTEXT_THREADS_MAX; i++)
coresbind[i] = i % sys_corenbr;
}
else {
char *endptr;
int wrap = 0;
int nbr = 0;
long int val;
/* We use the content of the PLASMA_AFF_THREADS env. variable */
for (i = 0; i < CONTEXT_THREADS_MAX; i++) {
if (!wrap) {
val = strtol(envstr, &endptr, 10);
if (endptr != envstr) {
coresbind[i] = (int)val;
envstr = endptr;
}
else {
/* there must be at least one entry */
if (i < 1) {
plasma_error("plasma_get_affthreads", "PLASMA_AFF_THREADS should have at least one entry => everything will be bind on core 0");
coresbind[i] = 0;
i++;
}
/* there is no more values in the string */
/* the next threads are binded with a round robin policy over this array */
wrap = 1;
nbr = i;
coresbind[i] = coresbind[0];
}
}
else {
coresbind[i] = coresbind[i % nbr];
}
}
}
PLASMA_CLEANENV(envstr);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int plasma_get_numthreads ( )

Check for an integer in an environment variable, returning the integer value or a provided default value

Definition at line 283 of file plasmaos.c.

References PLASMA_CLEANENV, and PLASMA_GETENV.

{
char *envstr = NULL;
char *endptr;
long int thrdnbr = -1;
extern int errno;
/* Env variable does not exist, we search the system number of core */
PLASMA_GETENV("PLASMA_NUM_THREADS", envstr);
if ( envstr == NULL ) {
thrdnbr = sys_corenbr;
} else {
/* Convert to long, checking for errors */
thrdnbr = strtol(envstr, &endptr, 10);
if ((errno == ERANGE) || ((thrdnbr==0) && (endptr==envstr))) {
PLASMA_CLEANENV(envstr);
return -1;
}
}
PLASMA_CLEANENV(envstr);
return (int)thrdnbr;
}

Here is the caller graph for this function:

int plasma_get_numthreads_numa ( )

Definition at line 306 of file plasmaos.c.

References PLASMA_CLEANENV, and PLASMA_GETENV.

{
char *envstr = NULL;
char *endptr;
long int thrdnbr = -1;
extern int errno;
/* Env variable does not exist, we search the system number of core */
PLASMA_GETENV("PLASMA_NUM_THREADS_NUMA", envstr);
if ( envstr != NULL ) {
/* Convert to long, checking for errors */
thrdnbr = strtol(envstr, &endptr, 10);
if ((errno == ERANGE) || ((thrdnbr==0) && (endptr==envstr))) {
PLASMA_CLEANENV(envstr);
return -1;
}
} else {
#ifdef PLASMA_HWLOC
thrdnbr = plasma_getnuma_size();
#else
thrdnbr = 1;
#endif
}
PLASMA_CLEANENV(envstr);
return (int)thrdnbr;
}

Here is the caller graph for this function:

int PLASMA_Init ( int  cores)

User routines

PLASMA_Init - Initialize PLASMA.

Parameters:
[in]coresNumber of cores to use (threads to launch). If cores = 0, cores = PLASMA_NUM_THREADS if it is set, the system number of core otherwise.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit

Definition at line 153 of file control.c.

{
return PLASMA_Init_Affinity(cores, NULL);
}
int PLASMA_Init_Affinity ( int  cores,
int *  coresbind 
)

PLASMA_Init_Affinity - Initialize PLASMA.

Parameters:
[in]coresNumber of cores to use (threads to launch). If cores = 0, cores = PLASMA_NUM_THREADS if it is set, the system number of core otherwise.
[in]coresbindArray to specify where to bind each thread. Each thread i is binded to coresbind[hwloc(i)] if hwloc is provided, or to coresbind[i] otherwise. If coresbind = NULL, coresbind = PLASMA_AFF_THREADS if it is set, the identity function otherwise.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit

Definition at line 184 of file control.c.

{
int status;
int core;
/* Create context and insert in the context map */
if (plasma == NULL) {
plasma_fatal_error("PLASMA_Init", "plasma_context_create() failed");
}
status = plasma_context_insert(plasma, pthread_self());
if (status != PLASMA_SUCCESS) {
plasma_fatal_error("PLASMA_Init", "plasma_context_insert() failed");
}
/* Init number of cores and topology */
/* Set number of cores */
if ( cores < 1 ) {
if ( plasma->world_size == -1 ) {
plasma->world_size = 1;
plasma_warning("PLASMA_Init", "Could not find the number of cores: the thread number is set to 1");
}
}
else
plasma->world_size = cores;
if (plasma->world_size <= 0) {
plasma_fatal_error("PLASMA_Init", "failed to get system size");
}
/* Check if not more cores than the hard limit */
plasma_fatal_error("PLASMA_Init", "not supporting so many cores");
}
/* Get the size of each NUMA node */
while ( ((plasma->world_size)%(plasma->group_size)) != 0 )
(plasma->group_size)--;
/* Initialize barrier */
/* Initialize default thread attributes */
status = pthread_attr_init(&plasma->thread_attr);
if (status != 0) {
plasma_fatal_error("PLASMA_Init", "pthread_attr_init() failed");
return status;
}
/* Set scope to system */
if (status != 0) {
plasma_fatal_error("PLASMA_Init", "pthread_attr_setscope() failed");
return status;
}
/* Set concurrency */
if (status != 0) {
plasma_fatal_error("PLASMA_Init", "pthread_setconcurrency() failed");
return status;
}
/* Launch threads */
memset(plasma->thread_id, 0, CONTEXT_THREADS_MAX*sizeof(pthread_t));
if (coresbind != NULL) {
memcpy(plasma->thread_bind, coresbind, plasma->world_size*sizeof(int));
}
else {
}
/* Assign rank and thread ID for the master */
plasma->thread_rank[0] = 0;
plasma->thread_id[0] = pthread_self();
for (core = 1; core < plasma->world_size; core++) {
plasma->thread_rank[core] = core;
&plasma->thread_id[core],
&plasma->thread_attr,
(void*)plasma);
}
/* Set thread affinity for the master */
/* Initialize the dynamic scheduler */
plasma->quark = QUARK_Setup(plasma->world_size);
plasma_barrier(plasma);
}
void* plasma_parallel_section ( void *  plasma_ptr)

Main thread control

Definition at line 100 of file control.c.

References plasma_context_struct::action, plasma_context_struct::action_condt, plasma_context_struct::action_mutex, plasma_context_struct::parallel_func_ptr, PLASMA_ACT_DYNAMIC, PLASMA_ACT_FINALIZE, PLASMA_ACT_PARALLEL, PLASMA_ACT_STAND_BY, plasma_barrier(), plasma_fatal_error(), plasma_rank(), plasma_setaffinity(), pthread_cond_wait(), pthread_mutex_lock(), pthread_mutex_unlock(), plasma_context_struct::quark, QUARK_Worker_Loop(), and plasma_context_struct::thread_bind.

{
PLASMA_enum action;
/* Set thread affinity for the worker */
plasma_barrier(plasma);
while(1) {
while ((action = plasma->action) == PLASMA_ACT_STAND_BY)
plasma_barrier(plasma);
switch (action) {
plasma->parallel_func_ptr(plasma);
break;
QUARK_Worker_Loop(plasma->quark, plasma_rank(plasma));
break;
return NULL;
default:
plasma_fatal_error("plasma_parallel_section", "undefined action");
return NULL;
}
plasma_barrier(plasma);
}
return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int plasma_setaffinity ( int  rank)

This routine will set affinity for the calling thread that has rank 'rank'. Ranks start with 0.

If there are multiple instances of PLASMA then affinity will be wrong: all ranks 0 will be pinned to core 0.

Also, affinity is not resotred when PLASMA_Finalize() is called.

Definition at line 110 of file plasmaos.c.

References PLASMA_ERR_NOT_SUPPORTED, PLASMA_ERR_UNEXPECTED, and PLASMA_SUCCESS.

{
#ifndef PLASMA_AFFINITY_DISABLE
#if (defined PLASMA_OS_LINUX)
{
cpu_set_t set;
CPU_ZERO( &set );
CPU_SET( rank, &set );
#if (defined HAVE_OLD_SCHED_SETAFFINITY)
if( sched_setaffinity( 0, &set ) < 0 )
#else /* HAVE_OLD_SCHED_SETAFFINITY */
if( sched_setaffinity( 0, sizeof(set), &set) < 0 )
#endif /* HAVE_OLD_SCHED_SETAFFINITY */
{
}
}
#elif (defined PLASMA_OS_MACOS)
{
thread_affinity_policy_data_t ap;
int ret;
ap.affinity_tag = 1; /* non-null affinity tag */
ret = thread_policy_set( mach_thread_self(),
THREAD_AFFINITY_POLICY,
(integer_t*) &ap,
THREAD_AFFINITY_POLICY_COUNT
);
if(ret != 0)
}
#elif (defined PLASMA_OS_WINDOWS)
{
DWORD mask = 1 << rank;
if( SetThreadAffinityMask(GetCurrentThread(), mask) == 0)
}
#elif (defined PLASMA_OS_AIX)
{
tid_t self_ktid = thread_self ();
bindprocessor(BINDTHREAD, self_ktid, rank);
}
#else
#endif
#endif /* PLASMA_AFFINITY_DISABLE */
}

Here is the caller graph for this function:

void plasma_topology_finalize ( )

Definition at line 97 of file plasmaos.c.

References plasma_unsetaffinity().

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_topology_init ( )

Definition at line 61 of file plasmaos.c.

References pthread_mutex_lock(), and pthread_mutex_unlock().

{
pthread_mutex_lock(&mutextopo);
if ( !topo_initialized ) {
#if (defined PLASMA_OS_LINUX) || (defined PLASMA_OS_AIX)
sys_corenbr = sysconf(_SC_NPROCESSORS_ONLN);
#elif (defined PLASMA_OS_MACOS)
int mib[4];
int cpu;
size_t len = sizeof(cpu);
/* set the mib for hw.ncpu */
mib[0] = CTL_HW;
mib[1] = HW_AVAILCPU;
/* get the number of CPUs from the system */
sysctl(mib, 2, &cpu, &len, NULL, 0);
if( cpu < 1 ) {
mib[1] = HW_NCPU;
sysctl( mib, 2, &cpu, &len, NULL, 0 );
}
if( cpu < 1 ) {
cpu = 1;
}
sys_corenbr = cpu;
#elif (defined PLASMA_OS_WINDOWS)
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
sys_corenbr = sysinfo.dwNumberOfProcessors;
#endif
}
pthread_mutex_unlock(&mutextopo);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int plasma_unsetaffinity ( )

Here is the caller graph for this function:

int plasma_yield ( )

A thread can unlock the CPU if it has nothing to do to let another thread of less priority running for example for I/O.

Definition at line 248 of file plasmaos.c.

References PLASMA_ERR_NOT_SUPPORTED.

{
#if (defined PLASMA_OS_LINUX) || (defined PLASMA_OS_MACOS) || (defined PLASMA_OS_AIX)
return sched_yield();
#elif PLASMA_OS_WINDOWS
return SleepEx(0,0);
#else
#endif
}
int pthread_getconcurrency ( void  )
int pthread_setconcurrency ( int  )

Definition at line 264 of file plasmawinthread.c.

{
return 0;
}

Here is the caller graph for this function: