This graph shows which files directly or indirectly include this file:

Functions
int	pthread_getconcurrency (void)
int	pthread_setconcurrency (int)
void	plasma_barrier_init (plasma_context_t *plasma)
void	plasma_barrier (plasma_context_t *plasma)
void *	plasma_parallel_section (void *plasma)
int	plasma_setaffinity (int rank)
int	plasma_unsetaffinity ()
int	plasma_yield ()
void	plasma_topology_init ()
void	plasma_topology_finalize ()
int	plasma_get_numthreads ()
int	plasma_get_numthreads_numa ()
int	plasma_get_affthreads (int *coresbind)
int	PLASMA_Init (int cores)
int	PLASMA_Init_Affinity (int cores, int *bindtab)
int	PLASMA_Finalize ()

Detailed Description

PLASMA auxiliary routines PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:: 2.4.5

Author:: Jakub Kurzak

Date:: 2010-11-15

Definition in file control.h.

Function Documentation

void plasma_barrier ( plasma_context_t * plasma )

Busy-waiting barrier

Definition at line 59 of file control.c.

References plasma_context_struct::barrier_id, plasma_context_struct::barrier_nblocked_thrds, plasma_context_struct::barrier_synccond, plasma_context_struct::barrier_synclock, PLASMA_RANK, PLASMA_SIZE, pthread_cond_broadcast(), pthread_cond_wait(), pthread_mutex_lock(), and pthread_mutex_unlock().

{
#ifdef BUSY_WAITING
    int core;
    if (PLASMA_RANK == 0) {
        for (core = 1; core < PLASMA_SIZE; core++)
            while (plasma->barrier_in[core] == 0);
        for (core = 1; core < PLASMA_SIZE; core++)
            plasma->barrier_in[core] = 0;
        for (core = 1; core < PLASMA_SIZE; core++)
            plasma->barrier_out[core] = 1;
    }
    else
    {
        plasma->barrier_in[PLASMA_RANK] = 1;
        while (plasma->barrier_out[PLASMA_RANK] == 0);
        plasma->barrier_out[PLASMA_RANK] = 0;
    }
#else
    int id;
    pthread_mutex_lock(&(plasma->barrier_synclock));
    id = plasma->barrier_id;
    plasma->barrier_nblocked_thrds++;
    if (plasma->barrier_nblocked_thrds == PLASMA_SIZE) {
        plasma->barrier_nblocked_thrds = 0;
        plasma->barrier_id++;
        pthread_cond_broadcast(&(plasma->barrier_synccond));
    }
    while (id == plasma->barrier_id)
        pthread_cond_wait(&(plasma->barrier_synccond), &(plasma->barrier_synclock));
    pthread_mutex_unlock(&(plasma->barrier_synclock));
#endif
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_barrier_init ( plasma_context_t * plasma )

Internal routines

Busy-waiting barrier initialization

Definition at line 28 of file control.c.

References plasma_context_struct::barrier_id, plasma_context_struct::barrier_nblocked_thrds, plasma_context_struct::barrier_synccond, plasma_context_struct::barrier_synclock, CONTEXT_THREADS_MAX, pthread_cond_init(), and pthread_mutex_init().

{
#ifdef BUSY_WAITING
    int core;
    for (core = 0; core < CONTEXT_THREADS_MAX; core++) {
        plasma->barrier_in[core] = 0;
        plasma->barrier_out[core] = 0;
    }
#else
    plasma->barrier_id = 0;
    plasma->barrier_nblocked_thrds = 0;
    pthread_mutex_init(&(plasma->barrier_synclock), NULL);
    pthread_cond_init( &(plasma->barrier_synccond), NULL);
#endif
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_Finalize ( )

PLASMA_Finalize - Finalize PLASMA.

Returns:

Return values:

PLASMA_SUCCESS successful exit

Definition at line 293 of file control.c.

{
    int core;
    int status;
    void *exitcodep;
    plasma_context_t *plasma;
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_Finalize()", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Terminate the dynamic scheduler */
    plasma_dynamic_sync();
    /* Free quark structures */
    QUARK_Free(plasma->quark);
    /* Set termination action */
    pthread_mutex_lock(&plasma->action_mutex);
    plasma->action = PLASMA_ACT_FINALIZE;
    pthread_mutex_unlock(&plasma->action_mutex);
    pthread_cond_broadcast(&plasma->action_condt);
    /* Barrier and clear action */
    plasma_barrier(plasma);
    plasma->action = PLASMA_ACT_STAND_BY;
    // Join threads
    for (core = 1; core < plasma->world_size; core++) {
        status = pthread_join(plasma->thread_id[core], &exitcodep);
        if (status != 0) {
            plasma_fatal_error("PLASMA_Finalize", "pthread_join() failed");
            return status;
        }
    }
    plasma_barrier_finalize(plasma);
    /* Destroy thread attributes */
    status = pthread_attr_destroy(&plasma->thread_attr);
    if (status != 0)
        plasma_fatal_error("PLASMA_Finalize", "pthread_attr_destroy() failed");
    /* Destroy topology */
    plasma_topology_finalize();
    status = plasma_context_remove(plasma, pthread_self());
    if (status != PLASMA_SUCCESS) {
        plasma_fatal_error("PLASMA_Finalize", "plasma_context_remove() failed");
        return status;
    }
    /* Restore the concurency */
    /* actually it's really bad, we shoulde set the concurrency only
     * if it's not already done and restore it only we had change it */
    pthread_setconcurrency( 0 );
    return PLASMA_SUCCESS;
}

int plasma_get_affthreads ( int * coresbind )

Definition at line 334 of file plasmaos.c.

References CONTEXT_THREADS_MAX, PLASMA_CLEANENV, plasma_error(), PLASMA_GETENV, and PLASMA_SUCCESS.

                                          {
    char *envstr = NULL;
    int i;
    /* Env variable does not exist, we search the system number of core */
    PLASMA_GETENV("PLASMA_AFF_THREADS", envstr);
    if ( envstr == NULL) {
        for (i = 0; i < CONTEXT_THREADS_MAX; i++)
            coresbind[i] = i % sys_corenbr;
    }
    else {
        char *endptr;
        int   wrap = 0;
        int   nbr  = 0;
        long int val;
        /* We use the content of the PLASMA_AFF_THREADS env. variable */
        for (i = 0; i < CONTEXT_THREADS_MAX; i++) {
            if (!wrap) {
                val = strtol(envstr, &endptr, 10);
                if (endptr != envstr) {
                    coresbind[i] = (int)val;
                    envstr = endptr;
                }
                else {
                    /* there must be at least one entry */
                    if (i < 1) {
                        plasma_error("plasma_get_affthreads", "PLASMA_AFF_THREADS should have at least one entry => everything will be bind on core 0");
                        coresbind[i] = 0;
                        i++;
                    }
                    
                    /* there is no more values in the string                                 */
                    /* the next threads are binded with a round robin policy over this array */
                    wrap = 1;
                    nbr = i;
                    coresbind[i] = coresbind[0];
                }
            }
            else {
                coresbind[i] = coresbind[i % nbr];
            }
        }
    }
    PLASMA_CLEANENV(envstr);
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int plasma_get_numthreads ( )

Check for an integer in an environment variable, returning the integer value or a provided default value

Definition at line 283 of file plasmaos.c.

References PLASMA_CLEANENV, and PLASMA_GETENV.

{
    char    *envstr  = NULL;
    char    *endptr;
    long int thrdnbr = -1;
    extern int errno;
    
    /* Env variable does not exist, we search the system number of core */
    PLASMA_GETENV("PLASMA_NUM_THREADS", envstr);
    if ( envstr == NULL ) {
        thrdnbr = sys_corenbr;
    } else {
        /* Convert to long, checking for errors */
        thrdnbr = strtol(envstr, &endptr, 10);
        if ((errno == ERANGE) || ((thrdnbr==0) && (endptr==envstr))) {
            PLASMA_CLEANENV(envstr);
            return -1;
        }
    } 
    PLASMA_CLEANENV(envstr);
    return (int)thrdnbr;
}

Here is the caller graph for this function:

int plasma_get_numthreads_numa ( )

Definition at line 306 of file plasmaos.c.

References PLASMA_CLEANENV, and PLASMA_GETENV.

{
    char    *envstr  = NULL;
    char    *endptr;
    long int thrdnbr = -1;
    extern int errno;
    
    /* Env variable does not exist, we search the system number of core */
    PLASMA_GETENV("PLASMA_NUM_THREADS_NUMA", envstr);
    if ( envstr != NULL ) {
        /* Convert to long, checking for errors */
        thrdnbr = strtol(envstr, &endptr, 10);
        if ((errno == ERANGE) || ((thrdnbr==0) && (endptr==envstr))) {
            PLASMA_CLEANENV(envstr);
            return -1;
        }
    } else {
#ifdef PLASMA_HWLOC
      thrdnbr = plasma_getnuma_size();
#else
      thrdnbr = 1;
#endif
    } 
    PLASMA_CLEANENV(envstr);
    return (int)thrdnbr;
}

Here is the caller graph for this function:

int PLASMA_Init ( int cores )

User routines

PLASMA_Init - Initialize PLASMA.

Parameters:

[in] cores Number of cores to use (threads to launch). If cores = 0, cores = PLASMA_NUM_THREADS if it is set, the system number of core otherwise.

Returns:

Return values:

PLASMA_SUCCESS successful exit

Definition at line 153 of file control.c.

{
    return PLASMA_Init_Affinity(cores, NULL);
}

int PLASMA_Init_Affinity	(	int	cores,
		int *	coresbind
	)

PLASMA_Init_Affinity - Initialize PLASMA.

Parameters:

[in]	cores	Number of cores to use (threads to launch). If cores = 0, cores = PLASMA_NUM_THREADS if it is set, the system number of core otherwise.
[in]	coresbind	Array to specify where to bind each thread. Each thread i is binded to coresbind[hwloc(i)] if hwloc is provided, or to coresbind[i] otherwise. If coresbind = NULL, coresbind = PLASMA_AFF_THREADS if it is set, the identity function otherwise.

Returns:

Return values:

PLASMA_SUCCESS successful exit

Definition at line 184 of file control.c.

{
    plasma_context_t *plasma;
    int status;
    int core;
    /* Create context and insert in the context map */
    plasma = plasma_context_create();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_Init", "plasma_context_create() failed");
        return PLASMA_ERR_OUT_OF_RESOURCES;
    }
    status = plasma_context_insert(plasma, pthread_self());
    if (status != PLASMA_SUCCESS) {
        plasma_fatal_error("PLASMA_Init", "plasma_context_insert() failed");
        return PLASMA_ERR_OUT_OF_RESOURCES;
    }
    /* Init number of cores and topology */
    plasma_topology_init();
    /* Set number of cores */
    if ( cores < 1 ) {
        plasma->world_size = plasma_get_numthreads();
        if ( plasma->world_size == -1 ) {
            plasma->world_size = 1;
            plasma_warning("PLASMA_Init", "Could not find the number of cores: the thread number is set to 1");
        }
    }
    else
      plasma->world_size = cores;
    if (plasma->world_size <= 0) {
        plasma_fatal_error("PLASMA_Init", "failed to get system size");
        return PLASMA_ERR_NOT_FOUND;
    }
    /* Check if not more cores than the hard limit */
    if (plasma->world_size > CONTEXT_THREADS_MAX) {
        plasma_fatal_error("PLASMA_Init", "not supporting so many cores");
        return PLASMA_ERR_INTERNAL_LIMIT;
    }
    /* Get the size of each NUMA node */
    plasma->group_size = plasma_get_numthreads_numa();
    while ( ((plasma->world_size)%(plasma->group_size)) != 0 ) 
        (plasma->group_size)--;
    /* Initialize barrier */
    plasma_barrier_init(plasma);
    /* Initialize default thread attributes */
    status = pthread_attr_init(&plasma->thread_attr);
    if (status != 0) {
        plasma_fatal_error("PLASMA_Init", "pthread_attr_init() failed");
        return status;
    }
    /* Set scope to system */
    status = pthread_attr_setscope(&plasma->thread_attr, PTHREAD_SCOPE_SYSTEM);
    if (status != 0) {
        plasma_fatal_error("PLASMA_Init", "pthread_attr_setscope() failed");
        return status;
    }
    /* Set concurrency */
    status = pthread_setconcurrency(plasma->world_size);
    if (status != 0) {
        plasma_fatal_error("PLASMA_Init", "pthread_setconcurrency() failed");
        return status;
    }
    /*  Launch threads */
    memset(plasma->thread_id,   0, CONTEXT_THREADS_MAX*sizeof(pthread_t));
    if (coresbind != NULL) {
        memcpy(plasma->thread_bind, coresbind, plasma->world_size*sizeof(int));
    }
    else {
        plasma_get_affthreads(plasma->thread_bind);
    }
    /* Assign rank and thread ID for the master */
    plasma->thread_rank[0] = 0;
    plasma->thread_id[0] = pthread_self();
    for (core = 1; core < plasma->world_size; core++) {
        plasma->thread_rank[core] = core;
        pthread_create(
            &plasma->thread_id[core],
            &plasma->thread_attr,
             plasma_parallel_section,
             (void*)plasma);
    }
    /* Set thread affinity for the master */
    plasma_setaffinity(plasma->thread_bind[0]);
    /* Initialize the dynamic scheduler */
    plasma->quark =  QUARK_Setup(plasma->world_size);
    plasma_barrier(plasma);
    return PLASMA_SUCCESS;
}

void* plasma_parallel_section ( void * plasma_ptr )

Main thread control

Definition at line 100 of file control.c.

References plasma_context_struct::action, plasma_context_struct::action_condt, plasma_context_struct::action_mutex, plasma_context_struct::parallel_func_ptr, PLASMA_ACT_DYNAMIC, PLASMA_ACT_FINALIZE, PLASMA_ACT_PARALLEL, PLASMA_ACT_STAND_BY, plasma_barrier(), plasma_fatal_error(), plasma_rank(), plasma_setaffinity(), pthread_cond_wait(), pthread_mutex_lock(), pthread_mutex_unlock(), plasma_context_struct::quark, QUARK_Worker_Loop(), and plasma_context_struct::thread_bind.

{
    plasma_context_t *plasma = (plasma_context_t*)(plasma_ptr);
    PLASMA_enum action;
    /* Set thread affinity for the worker */
    plasma_setaffinity(plasma->thread_bind[plasma_rank(plasma)]);
    plasma_barrier(plasma);
    while(1) {
        pthread_mutex_lock(&plasma->action_mutex);
        while ((action = plasma->action) == PLASMA_ACT_STAND_BY)
            pthread_cond_wait(&plasma->action_condt, &plasma->action_mutex);
        pthread_mutex_unlock(&plasma->action_mutex);
        plasma_barrier(plasma);
        switch (action) {
            case PLASMA_ACT_PARALLEL:
                plasma->parallel_func_ptr(plasma);
                break;
            case PLASMA_ACT_DYNAMIC:
                QUARK_Worker_Loop(plasma->quark, plasma_rank(plasma));
                break;
            case PLASMA_ACT_FINALIZE:
                return NULL;
            default:
                plasma_fatal_error("plasma_parallel_section", "undefined action");
                return NULL;
        }
        plasma_barrier(plasma);
    }
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int plasma_setaffinity ( int rank )

This routine will set affinity for the calling thread that has rank 'rank'. Ranks start with 0.

If there are multiple instances of PLASMA then affinity will be wrong: all ranks 0 will be pinned to core 0.

Also, affinity is not resotred when PLASMA_Finalize() is called.

Definition at line 110 of file plasmaos.c.

References PLASMA_ERR_NOT_SUPPORTED, PLASMA_ERR_UNEXPECTED, and PLASMA_SUCCESS.

                                 {
#ifndef PLASMA_AFFINITY_DISABLE
#if (defined PLASMA_OS_LINUX)
    {
        cpu_set_t set;
        CPU_ZERO( &set );
        CPU_SET( rank, &set );
        
#if (defined HAVE_OLD_SCHED_SETAFFINITY)
        if( sched_setaffinity( 0, &set ) < 0 )
#else /* HAVE_OLD_SCHED_SETAFFINITY */
        if( sched_setaffinity( 0, sizeof(set), &set) < 0 )
#endif /* HAVE_OLD_SCHED_SETAFFINITY */
            {
                return PLASMA_ERR_UNEXPECTED;
            }
        return PLASMA_SUCCESS;
    }
#elif (defined PLASMA_OS_MACOS)
    {
        thread_affinity_policy_data_t ap;
        int                           ret;
        
        ap.affinity_tag = 1; /* non-null affinity tag */
        ret = thread_policy_set( mach_thread_self(),
                                 THREAD_AFFINITY_POLICY,
                                 (integer_t*) &ap,
                                 THREAD_AFFINITY_POLICY_COUNT
            );
        if(ret != 0) 
            return PLASMA_ERR_UNEXPECTED;
        return PLASMA_SUCCESS;
    }
#elif (defined PLASMA_OS_WINDOWS)
    {
        DWORD mask = 1 << rank;
        if( SetThreadAffinityMask(GetCurrentThread(), mask) == 0)
            return PLASMA_ERR_UNEXPECTED;
        
        return PLASMA_SUCCESS;
    }
#elif (defined PLASMA_OS_AIX)
    {
        tid_t self_ktid = thread_self ();
        bindprocessor(BINDTHREAD, self_ktid, rank);
        return PLASMA_SUCCESS;
    }
#else
    return PLASMA_ERR_NOT_SUPPORTED;
#endif
#endif /* PLASMA_AFFINITY_DISABLE */
}

Here is the caller graph for this function:

void plasma_topology_finalize ( )

Definition at line 97 of file plasmaos.c.

References plasma_unsetaffinity().

                               {
    plasma_unsetaffinity();
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_topology_init ( )

Definition at line 61 of file plasmaos.c.

References pthread_mutex_lock(), and pthread_mutex_unlock().

                           {
    pthread_mutex_lock(&mutextopo);
    if ( !topo_initialized ) {
#if (defined PLASMA_OS_LINUX) || (defined PLASMA_OS_AIX)
        sys_corenbr = sysconf(_SC_NPROCESSORS_ONLN);
#elif (defined PLASMA_OS_MACOS)
        int mib[4];
        int cpu;
        size_t len = sizeof(cpu);
        
        /* set the mib for hw.ncpu */
        mib[0] = CTL_HW;
        mib[1] = HW_AVAILCPU;
        /* get the number of CPUs from the system */
        sysctl(mib, 2, &cpu, &len, NULL, 0);
        if( cpu < 1 ) {
            mib[1] = HW_NCPU;
            sysctl( mib, 2, &cpu, &len, NULL, 0 );
        }
        if( cpu < 1 ) {
            cpu = 1;
        }
        sys_corenbr = cpu;
#elif (defined PLASMA_OS_WINDOWS)
        SYSTEM_INFO sysinfo;
        GetSystemInfo(&sysinfo);
        sys_corenbr = sysinfo.dwNumberOfProcessors;
#endif
    }
    pthread_mutex_unlock(&mutextopo);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int plasma_unsetaffinity ( )

Here is the caller graph for this function:

int plasma_yield ( )

A thread can unlock the CPU if it has nothing to do to let another thread of less priority running for example for I/O.

Definition at line 248 of file plasmaos.c.

References PLASMA_ERR_NOT_SUPPORTED.

                   {
#if (defined PLASMA_OS_LINUX) || (defined PLASMA_OS_MACOS) || (defined PLASMA_OS_AIX)
    return sched_yield();
#elif PLASMA_OS_WINDOWS
    return SleepEx(0,0);
#else
    return PLASMA_ERR_NOT_SUPPORTED;
#endif
}

int pthread_getconcurrency ( void )

int pthread_setconcurrency ( int )

Definition at line 264 of file plasmawinthread.c.

                                                                   {
  pthread_conclevel = level;
  return 0;
}

Here is the caller graph for this function:

Functions

Detailed Description

Function Documentation