MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
quark.c File Reference
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <stdarg.h>
#include <string.h>
#include <limits.h>
#include <errno.h>
#include <pthread.h>
#include "icl_list.h"
#include "icl_hash.h"
#include "bsd_queue.h"
#include "bsd_tree.h"
#include "quark.h"
#include "quark_unpack_args.h"
Include dependency graph for quark.c:

Go to the source code of this file.

Data Structures

struct  quark_s
 
struct  Quark_sequence_s
 
struct  worker_s
 
struct  quark_task_s
 
struct  dependency_s
 
struct  scratch_s
 
struct  address_set_node_s
 
struct  ll_list_node_s
 
struct  completed_tasks_node_s
 
struct  task_priority_tree_node_s
 

Macros

#define inline   __inline
 
#define fopen(ppfile, name, mode)   *ppfile = fopen(name, mode)
 
#define ULLONG_MAX   18446744073709551615ULL
 
#define DIRECTION_MASK   0x07
 
#define tasklevel_width_max_level   5000
 
#define DEPCOLOR   "black"
 
#define ANTIDEPCOLOR   "red"
 
#define GATHERVDEPCOLOR   "green"
 
#define DOT_DAG_FILENAME   "dot_dag_file.dot"
 
#define dot_dag_level_update(parent_level, child_level, quark)
 
#define dot_dag_print_edge(parentid, childid, color)
 

Typedefs

typedef struct worker_s Worker
 
typedef struct quark_task_s Task
 
typedef struct dependency_s Dependency
 
typedef struct scratch_s Scratch
 
typedef struct address_set_node_s Address_Set_Node
 
typedef struct ll_list_node_s ll_list_node_t
 
typedef struct ll_list_head_s ll_list_head_t
 
typedef struct
completed_tasks_node_s 
completed_tasks_node_t
 
typedef struct
completed_tasks_head_s 
completed_tasks_head_t
 
typedef struct
task_priority_tree_node_s 
task_priority_tree_node_t
 
typedef struct
task_priority_tree_head_s 
task_priority_tree_head_t
 

Enumerations

enum  task_status {
  NOTREADY, QUEUED, RUNNING, DONE,
  CANCELLED
}
 
enum  task_num { DGETRF, DTSTRF, DGESSM, DSSSM }
 
enum  bool { FALSE, TRUE }
 

Functions

int quark_getenv_int (char *name, int defval)
 
 LIST_HEAD (ll_list_head_s, ll_list_node_s)
 
 TAILQ_HEAD (completed_tasks_head_s, completed_tasks_node_s)
 
 RB_HEAD (task_priority_tree_head_s, task_priority_tree_node_s)
 
static int compare_task_priority_tree_nodes (task_priority_tree_node_t *n1, task_priority_tree_node_t *n2)
 
 RB_GENERATE (task_priority_tree_head_s, task_priority_tree_node_s, n_entry, compare_task_priority_tree_nodes)
 
static Taskquark_task_new ()
 
static void task_delete (Quark *quark, Task *task)
 
static Workerworker_new (Quark *quark, int rank)
 
static void worker_delete (Worker *worker)
 
static int quark_revolve_robin (Quark *quark)
 
static void quark_insert_task_dependencies (Quark *quark, Task *task)
 
static void quark_check_and_queue_ready_task (Quark *quark, Task *task)
 
static void work_set_affinity_and_call_main_loop (Worker *worker)
 
static void work_main_loop (Worker *worker)
 
static Scratchscratch_new (void *arg_ptr, int arg_size, icl_list_t *task_args_list_node_ptr)
 
static void scratch_allocate (Task *task)
 
static void scratch_deallocate (Task *task)
 
static void address_set_node_delete (Quark *quark, Address_Set_Node *address_set_node)
 
static void worker_remove_completed_task_enqueue_for_later_processing (Quark *quark, Task *task, int worker_rank)
 
static void remove_completed_task_and_check_for_ready (Quark *quark, Task *task, int worker_rank)
 
static void process_completed_tasks (Quark *quark)
 
int quark_setaffinity (int rank)
 
void quark_topology_init ()
 
void quark_topology_finalize ()
 
int quark_get_numthreads ()
 
int * quark_get_affthreads ()
 
int quark_yield ()
 
static int pthread_mutex_lock_asn (pthread_mutex_t *mtx)
 
static int pthread_mutex_trylock_asn (pthread_mutex_t *mtx)
 
static int pthread_mutex_unlock_asn (pthread_mutex_t *mtx)
 
static int pthread_mutex_lock_ready_list (pthread_mutex_t *mtx)
 
static int pthread_mutex_trylock_ready_list (pthread_mutex_t *mtx)
 
static int pthread_mutex_unlock_ready_list (pthread_mutex_t *mtx)
 
static int pthread_mutex_lock_wrap (pthread_mutex_t *mtx)
 
static int pthread_mutex_unlock_wrap (pthread_mutex_t *mtx)
 
static int pthread_mutex_lock_completed_tasks (pthread_mutex_t *mtx)
 
static int pthread_mutex_trylock_completed_tasks (pthread_mutex_t *mtx)
 
static int pthread_mutex_unlock_completed_tasks (pthread_mutex_t *mtx)
 
static int pthread_cond_wait_ready_list (pthread_cond_t *cond, pthread_mutex_t *mtx)
 
int QUARK_Thread_Rank (Quark *quark)
 
void * QUARK_Args_List (Quark *quark)
 
void * QUARK_Args_Pop (void *args_list, void **last_arg)
 
static unsigned int fnv_hash_function (void *key, int len)
 
static unsigned int address_hash_function (void *address)
 
static int address_key_compare (void *addr1, void *addr2)
 
static unsigned int ullong_hash_function (void *key)
 
static int ullong_key_compare (void *key1, void *key2)
 
static char * arg_dup (char *arg, int size)
 
static Dependencydependency_new (void *addr, long long size, quark_direction_t dir, bool loc, Task *task, bool accumulator, bool gatherv, icl_list_t *task_args_list_node_ptr)
 
QuarkQUARK_Setup (int num_threads)
 
QuarkQUARK_New (int num_threads)
 
void QUARK_Barrier (Quark *quark)
 
void QUARK_Waitall (Quark *quark)
 
void QUARK_Free (Quark *quark)
 
void QUARK_Delete (Quark *quark)
 
Taskquark_set_task_flags_in_task_structure (Quark *quark, Task *task, Quark_Task_Flags *task_flags)
 
TaskQUARK_Task_Init (Quark *quark, void(*function)(Quark *), Quark_Task_Flags *task_flags)
 
void QUARK_Task_Pack_Arg (Quark *quark, Task *task, int arg_size, void *arg_ptr, int arg_flags)
 
unsigned long long QUARK_Insert_Task_Packed (Quark *quark, Task *task)
 
unsigned long long QUARK_Insert_Task (Quark *quark, void(*function)(Quark *), Quark_Task_Flags *task_flags,...)
 
unsigned long long QUARK_Execute_Task (Quark *quark, void(*function)(Quark *), Quark_Task_Flags *task_flags,...)
 
int QUARK_Cancel_Task (Quark *quark, unsigned long long taskid)
 
static Address_Set_Nodeaddress_set_node_new (void *address, int size)
 
void quark_avoid_war_dependencies (Quark *quark, Address_Set_Node *asn_old, Task *parent_task)
 
static void address_set_node_initial_gatherv_check_and_launch (Quark *quark, Address_Set_Node *address_set_node, Dependency *completed_dep, int worker_rank)
 
static void address_set_node_accumulator_find_prepend (Quark *quark, Address_Set_Node *address_set_node)
 
static void address_set_node_initial_input_check_and_launch (Quark *quark, Address_Set_Node *address_set_node, Dependency *completed_dep, int worker_rank)
 
static void address_set_node_initial_output_check_and_launch (Quark *quark, Address_Set_Node *address_set_node, Dependency *completed_dep, int worker_rank)
 
void QUARK_Worker_Loop (Quark *quark, int thread_rank)
 
Quark_SequenceQUARK_Sequence_Create (Quark *quark)
 
int QUARK_Sequence_Cancel (Quark *quark, Quark_Sequence *sequence)
 
Quark_SequenceQUARK_Sequence_Destroy (Quark *quark, Quark_Sequence *sequence)
 
int QUARK_Sequence_Wait (Quark *quark, Quark_Sequence *sequence)
 
Quark_SequenceQUARK_Get_Sequence (Quark *quark)
 
char * QUARK_Get_Task_Label (Quark *quark)
 
Quark_Task_FlagsQUARK_Task_Flag_Set (Quark_Task_Flags *task_flags, int flag, intptr_t val)
 

Variables

static char * quark_task_default_label = " "
 
static char * quark_task_default_color = "white"
 
FILE * dot_dag_file
 

Detailed Description

QUARK (QUeuing And Runtime for Kernels) provides a runtime enviornment for the dynamic execution of precedence-constrained tasks.

QUARK is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver.

Version
2.3.1
Author
Asim YarKhan
Date
2010-11-15

Definition in file quark.c.

Macro Definition Documentation

#define ANTIDEPCOLOR   "red"

Definition at line 294 of file quark.c.

#define DEPCOLOR   "black"

Definition at line 293 of file quark.c.

#define DIRECTION_MASK   0x07

Definition at line 91 of file quark.c.

#define DOT_DAG_FILENAME   "dot_dag_file.dot"

Definition at line 296 of file quark.c.

#define dot_dag_level_update (   parent_level,
  child_level,
  quark 
)
Value:
if ( quark->dot_dag_enable ) { \
pthread_mutex_lock_wrap( &quark->dot_dag_mutex ); \
child_level = (parent_level+1 < child_level ? child_level : parent_level+1 ); \
pthread_mutex_unlock_wrap( &quark->dot_dag_mutex ); }
static int pthread_mutex_unlock_wrap(pthread_mutex_t *mtx)
Definition: quark.c:278
static int pthread_mutex_lock_wrap(pthread_mutex_t *mtx)
Definition: quark.c:277

Definition at line 298 of file quark.c.

#define dot_dag_print_edge (   parentid,
  childid,
  color 
)
Value:
if ( quark->dot_dag_enable && parentid!=0 ) { \
pthread_mutex_lock_wrap( &quark->dot_dag_mutex ); \
fprintf(dot_dag_file, "t%lld->t%lld [color=\"%s\"];\n", parentid, childid, color); \
pthread_mutex_unlock_wrap( &quark->dot_dag_mutex ); \
}
FILE * dot_dag_file
Definition: quark.c:297
static int pthread_mutex_unlock_wrap(pthread_mutex_t *mtx)
Definition: quark.c:278
static int pthread_mutex_lock_wrap(pthread_mutex_t *mtx)
Definition: quark.c:277

Definition at line 303 of file quark.c.

#define fopen (   ppfile,
  name,
  mode 
)    *ppfile = fopen(name, mode)

Definition at line 65 of file quark.c.

#define GATHERVDEPCOLOR   "green"

Definition at line 295 of file quark.c.

#define inline   __inline

Definition at line 57 of file quark.c.

#define tasklevel_width_max_level   5000

Definition at line 115 of file quark.c.

#define ULLONG_MAX   18446744073709551615ULL

Definition at line 85 of file quark.c.

Typedef Documentation

typedef struct completed_tasks_head_s completed_tasks_head_t

Definition at line 214 of file quark.c.

typedef struct dependency_s Dependency
typedef struct ll_list_head_s ll_list_head_t

Definition at line 206 of file quark.c.

typedef struct scratch_s Scratch
typedef struct quark_task_s Task
typedef struct task_priority_tree_head_s task_priority_tree_head_t

Definition at line 224 of file quark.c.

typedef struct worker_s Worker

Enumeration Type Documentation

enum bool
Enumerator
FALSE 
TRUE 

Definition at line 94 of file quark.c.

94 { FALSE, TRUE } bool;
Definition: quark.c:94
Definition: quark.c:94
bool
Definition: quark.c:94
enum task_num
Enumerator
DGETRF 
DTSTRF 
DGESSM 
DSSSM 

Definition at line 93 of file quark.c.

Definition: quark.c:93
Definition: quark.c:93
Definition: quark.c:93
Definition: quark.c:93
task_num
Definition: quark.c:93
Enumerator
NOTREADY 
QUEUED 
RUNNING 
DONE 
CANCELLED 

Definition at line 92 of file quark.c.

Definition: quark.c:92
Definition: quark.c:92
Definition: quark.c:92
task_status
Definition: quark.c:92
Definition: quark.c:92

Function Documentation

static unsigned int address_hash_function ( void *  address)
inlinestatic

Hash function to map addresses, cut into "long" size chunks, then XOR. The result will be matched to hash table size using mod in the hash table implementation

Definition at line 456 of file quark.c.

References fnv_hash_function().

457 {
458  int len = sizeof(void *);
459  unsigned int hashval = fnv_hash_function( &address, len );
460  return hashval;
461 }
static unsigned int fnv_hash_function(void *key, int len)
Definition: quark.c:440

Here is the call graph for this function:

Here is the caller graph for this function:

static int address_key_compare ( void *  addr1,
void *  addr2 
)
inlinestatic

Adress compare function for hash table

Definition at line 466 of file quark.c.

467 {
468  return (addr1 == addr2);
469 }

Here is the caller graph for this function:

static void address_set_node_accumulator_find_prepend ( Quark quark,
Address_Set_Node address_set_node 
)
static

Called by a worker each time a task is removed from an address set node. Sweeps through a sequence of ACCUMULATOR tasks from the beginning and prepends one at the beginning if only one (chained) dependency remaining. This does not actually lauch the prepended task, it depends on another function to do that. Assumes address_set_mutex is locked.

Definition at line 1450 of file quark.c.

References dependency_s::accumulator, dependency_s::address_set_waiting_deps_node_ptr, icl_list_s::data, FALSE, icl_list_delete(), icl_list_first(), icl_list_next(), icl_list_prepend(), quark_task_s::num_dependencies_remaining, dependency_s::task, and address_set_node_s::waiting_deps.

1451 {
1452  icl_list_t *dep_node = NULL;
1453  icl_list_t *first_dep_node = NULL;
1454  icl_list_t *first_ready_dep_node = NULL;
1455  icl_list_t *last_ready_dep_node = NULL;
1456  icl_list_t *last_dep_node = NULL;
1457  icl_list_t *swap_node = NULL;
1458  int acc_dep_count = 0;
1459 
1460  /* FOR each ACCUMULATOR task waiting at the beginning of address_set_node */
1461  for (dep_node = icl_list_first(address_set_node->waiting_deps);
1462  dep_node != NULL;
1463  dep_node = icl_list_next( address_set_node->waiting_deps, dep_node )) {
1464  Dependency *dependency = (Dependency *)dep_node->data;
1465  /* IF not an ACCUMULATOR dependency - break */
1466  if (dependency->accumulator == FALSE) break;
1467  Task *task = dependency->task;
1468  /* Scan through list keeping first, first_ready, last_ready, last */
1469  if (first_dep_node==NULL) first_dep_node = dep_node;
1470  if ( task->num_dependencies_remaining==1 ) {
1471  if (first_ready_dep_node==NULL) first_ready_dep_node = dep_node;
1472  last_ready_dep_node = dep_node;
1473  }
1474  last_dep_node = dep_node; /* TODO */
1475  acc_dep_count++;
1476  }
1477 
1478  /* Choose and move chosen ready node to the front of the list */
1479  /* Heuristic: Flip-flop between first-ready and last-ready.
1480  * Tested (always first, always last, flip-flop first/last) but
1481  * there was always a bad scenario. If perfect loop orders are
1482  * provided (e.g. Choleky inversion test) then this will not make
1483  * performance worse. If bad loops are provided, this will
1484  * improve performance, though not to the point of perfect
1485  * loops. */
1486  if (acc_dep_count % 2 == 0 ) {
1487  if ( last_ready_dep_node!=NULL ) swap_node = last_ready_dep_node;
1488  } else {
1489  if ( first_ready_dep_node != NULL ) swap_node = first_ready_dep_node;
1490  }
1491  if ( swap_node != NULL ) {
1492  Dependency *dependency = (Dependency *)swap_node->data;
1493  /* Move to front of the address_set_node waiting_deps list (if not already there) */
1494  if ( swap_node!=icl_list_first(address_set_node->waiting_deps) ) {
1495  icl_list_t *tmp_swap_node = icl_list_prepend( address_set_node->waiting_deps, dependency );
1496  dependency->address_set_waiting_deps_node_ptr = tmp_swap_node;
1497  icl_list_delete( address_set_node->waiting_deps, swap_node, NULL );
1498  }
1499  /* Lock the dependency in place by setting ACC to false now */
1500  dependency->accumulator = FALSE;
1501  }
1502 }
icl_list_t * waiting_deps
Definition: quark.c:187
volatile int num_dependencies_remaining
Definition: quark.c:146
icl_list_t * icl_list_prepend(icl_list_t *head, void *data)
Definition: icl_list.c:289
void * data
Definition: icl_list.h:18
icl_list_t * icl_list_first(icl_list_t *head)
Definition: icl_list.c:192
Definition: quark.c:94
struct quark_task_s * task
Definition: quark.c:163
icl_list_t * address_set_waiting_deps_node_ptr
Definition: quark.c:171
int icl_list_delete(icl_list_t *head, icl_list_t *pos, void(*free_function)(void *))
Definition: icl_list.c:84
volatile bool accumulator
Definition: quark.c:168
icl_list_t * icl_list_next(icl_list_t *head, icl_list_t *pos)
Definition: icl_list.c:228

Here is the call graph for this function:

Here is the caller graph for this function:

static void address_set_node_delete ( Quark quark,
Address_Set_Node address_set_node 
)
static

Clean and free address set node structures.

Definition at line 1230 of file quark.c.

References address_set_node_s::address, quark_s::address_set, address_set_node_s::delete_data_at_address_when_node_is_deleted, quark_s::dot_dag_enable, icl_hash_delete(), icl_list_destroy(), TRUE, and address_set_node_s::waiting_deps.

1231 {
1232  /* Free data if it was allocted as a WAR data copy */
1233  if ( address_set_node->delete_data_at_address_when_node_is_deleted == TRUE ) {
1234  free( address_set_node->address );
1235  }
1236  /* Do not free this structure if we are generating DAGs. The
1237  * structure contains information about the last task to write the
1238  * data used to make DAG edges */
1239  if ( quark->dot_dag_enable )
1240  return;
1241 
1242  /* Remove any data structures in the waiting_deps list */
1243  if ( address_set_node->waiting_deps != NULL )
1244  icl_list_destroy( address_set_node->waiting_deps, NULL );
1245  /* Delete and free the hash table entry if this was NOT a WAR create entry */
1246  icl_hash_delete( quark->address_set, address_set_node->address, NULL, NULL );
1247  /* Remove the data structure */
1248  free( address_set_node );
1249 }
icl_list_t * waiting_deps
Definition: quark.c:187
int icl_list_destroy(icl_list_t *head, void(*free_function)(void *))
Definition: icl_list.c:143
icl_hash_t * address_set
Definition: quark.c:108
int dot_dag_enable
Definition: quark.c:116
volatile bool delete_data_at_address_when_node_is_deleted
Definition: quark.c:191
Definition: quark.c:94
void * address
Definition: quark.c:184
int icl_hash_delete(icl_hash_t *ht, void *key, void(*free_key)(void *), void(*free_data)(void *))
Definition: icl_hash.c:224

Here is the call graph for this function:

Here is the caller graph for this function:

static void address_set_node_initial_gatherv_check_and_launch ( Quark quark,
Address_Set_Node address_set_node,
Dependency completed_dep,
int  worker_rank 
)
static

Called by a worker each time a task is removed from an address set node. Sweeps through a sequence of GATHERV dependencies from the beginning, and enables them all. Assumes address_set_mutex is locked.

Definition at line 1413 of file quark.c.

References icl_list_s::data, dependency_s::direction, dot_dag_level_update, dot_dag_print_edge, FALSE, dependency_s::gatherv, GATHERVDEPCOLOR, icl_list_first(), icl_list_next(), INOUT, quark_task_s::num_dependencies_remaining, OUTPUT, quark_check_and_queue_ready_task(), dependency_s::ready, dependency_s::task, quark_task_s::taskid, quark_task_s::tasklevel, TRUE, and address_set_node_s::waiting_deps.

1414 {
1415  icl_list_t *next_dep_node;
1416  Task *completed_task = completed_dep->task;
1417  for ( next_dep_node=icl_list_first(address_set_node->waiting_deps);
1418  next_dep_node!=NULL && next_dep_node->data != NULL;
1419  next_dep_node=icl_list_next(address_set_node->waiting_deps, next_dep_node) ) {
1420  Dependency *next_dep = (Dependency *)next_dep_node->data;
1421  /* Break when we run out of GATHERV output dependencies */
1422  if ( next_dep->gatherv==FALSE ) break;
1423  if ( next_dep->direction!=OUTPUT && next_dep->direction!=INOUT ) break;
1424  Task *next_task = next_dep->task;
1425  /* Update next_dep ready status */
1426  if ( next_dep->ready == FALSE ) {
1427  /* Record the locality information with the task data structure */
1428  //if ( next_dep->locality ) next_task->locality_preserving_dep = worker_rank;
1429  /* Mark the next dependency as ready since we have GATHERV flag */
1430  next_dep->ready = TRUE;
1431  dot_dag_print_edge( completed_task->taskid, next_task->taskid, GATHERVDEPCOLOR );
1432  dot_dag_level_update( completed_task->tasklevel, next_task->tasklevel, quark );
1433  next_task->num_dependencies_remaining--;
1434  /* If the dep status became true check related task, and put onto ready queues */
1435  quark_check_and_queue_ready_task( quark, next_task );
1436  }
1437 
1438  }
1439 }
icl_list_t * waiting_deps
Definition: quark.c:187
volatile int num_dependencies_remaining
Definition: quark.c:146
#define dot_dag_print_edge(parentid, childid, color)
Definition: quark.c:303
#define dot_dag_level_update(parent_level, child_level, quark)
Definition: quark.c:298
void * data
Definition: icl_list.h:18
bool gatherv
Definition: quark.c:169
volatile bool ready
Definition: quark.c:174
unsigned long long tasklevel
Definition: quark.c:152
icl_list_t * icl_list_first(icl_list_t *head)
Definition: icl_list.c:192
#define GATHERVDEPCOLOR
Definition: quark.c:295
Definition: quark.c:94
struct quark_task_s * task
Definition: quark.c:163
quark_direction_t direction
Definition: quark.c:166
Definition: quark.c:94
unsigned long long taskid
Definition: quark.c:151
Definition: quark.h:52
static void quark_check_and_queue_ready_task(Quark *quark, Task *task)
Definition: quark.c:1258
Definition: quark.h:52
icl_list_t * icl_list_next(icl_list_t *head, icl_list_t *pos)
Definition: icl_list.c:228

Here is the call graph for this function:

Here is the caller graph for this function:

static void address_set_node_initial_input_check_and_launch ( Quark quark,
Address_Set_Node address_set_node,
Dependency completed_dep,
int  worker_rank 
)
static

Called by a worker each time a task is removed from an address set node. Sweeps through a sequence of initial INPUT dependencies on an address, and launches any that are ready to go. Assumes address_set_mutex is locked.

Definition at line 1512 of file quark.c.

References ANTIDEPCOLOR, icl_list_s::data, DEPCOLOR, dependency_s::direction, quark_s::dot_dag_enable, dot_dag_level_update, dot_dag_print_edge, FALSE, icl_list_first(), icl_list_next(), INOUT, INPUT, OUTPUT, quark_check_and_queue_ready_task(), dependency_s::ready, dependency_s::task, quark_task_s::taskid, quark_task_s::tasklevel, TRUE, and address_set_node_s::waiting_deps.

1513 {
1514  icl_list_t *next_dep_node;
1515  Task *completed_task = completed_dep->task;
1516  for ( next_dep_node=icl_list_first(address_set_node->waiting_deps);
1517  next_dep_node!=NULL && next_dep_node->data != NULL;
1518  next_dep_node=icl_list_next(address_set_node->waiting_deps, next_dep_node) ) {
1519  Dependency *next_dep = (Dependency *)next_dep_node->data;
1520  Task *next_task = next_dep->task;
1521  /* Break when we hit an output dependency */
1522  if ( (next_dep->direction==OUTPUT || next_dep->direction==INOUT) ) {
1523  if ( completed_dep->direction == INPUT ) {
1524  /* Print DAG connections for antidependencies */
1525  dot_dag_print_edge( completed_task->taskid, next_task->taskid, ANTIDEPCOLOR );
1526  dot_dag_level_update( completed_task->tasklevel, next_task->tasklevel, quark );
1527  }
1528  break;
1529  }
1530  /* Update next_dep ready status; this logic assumes the breaks at the bottom */
1531  if ( next_dep->direction==INPUT && next_dep->ready == FALSE ) {
1532  /* Record the locality information with the task data structure */
1533  //if ( next_dep->locality ) next_task->locality_thread_id = worker_rank;
1534  /* If next_dep is INPUT, mark the next dependency as ready */
1535  next_dep->ready = TRUE;
1536  /* Only OUTPUT->INPUT edges get here */
1537  dot_dag_print_edge( completed_task->taskid, next_task->taskid, DEPCOLOR );
1538  dot_dag_level_update( completed_task->tasklevel, next_task->tasklevel, quark );
1539  next_task->num_dependencies_remaining--;
1540  /* If the dep status became true check related task, and put onto ready queues */
1541  quark_check_and_queue_ready_task( quark, next_task );
1542  }
1543 
1544  /* if we are generating the DAG, keep looping till an output
1545  * dependency (in order to print all WAR edges) */
1546  if (! quark->dot_dag_enable ) {
1547  /* If current original dependency (dep) was INPUT, we only need to
1548  * activate next INPUT/OUTPUT/INOUT dep, others should already be
1549  * handled; if original dep was OUTPUT/INOUT, need to keep
1550  * going till next OUTPUT/INOUT */
1551  if ( completed_dep->direction == INPUT ) break;
1552  }
1553  }
1554 }
icl_list_t * waiting_deps
Definition: quark.c:187
#define dot_dag_print_edge(parentid, childid, color)
Definition: quark.c:303
#define dot_dag_level_update(parent_level, child_level, quark)
Definition: quark.c:298
void * data
Definition: icl_list.h:18
volatile bool ready
Definition: quark.c:174
unsigned long long tasklevel
Definition: quark.c:152
icl_list_t * icl_list_first(icl_list_t *head)
Definition: icl_list.c:192
Definition: quark.c:94
struct quark_task_s * task
Definition: quark.c:163
int dot_dag_enable
Definition: quark.c:116
quark_direction_t direction
Definition: quark.c:166
#define INPUT
Definition: quark.h:53
Definition: quark.c:94
unsigned long long taskid
Definition: quark.c:151
Definition: quark.h:52
static void quark_check_and_queue_ready_task(Quark *quark, Task *task)
Definition: quark.c:1258
Definition: quark.h:52
#define ANTIDEPCOLOR
Definition: quark.c:294
#define DEPCOLOR
Definition: quark.c:293
icl_list_t * icl_list_next(icl_list_t *head, icl_list_t *pos)
Definition: icl_list.c:228

Here is the call graph for this function:

Here is the caller graph for this function:

static void address_set_node_initial_output_check_and_launch ( Quark quark,
Address_Set_Node address_set_node,
Dependency completed_dep,
int  worker_rank 
)
static

Called by a worker each time a task is removed from an address set node. Checks any initial OUTPUT/INOUT dependencies on an address, and launches any tasks that are ready to go. Assumes address_set_mutex is locked.

Definition at line 1564 of file quark.c.

References icl_list_s::data, DEPCOLOR, dependency_s::direction, dot_dag_level_update, dot_dag_print_edge, FALSE, icl_list_first(), INOUT, OUTPUT, quark_check_and_queue_ready_task(), dependency_s::ready, dependency_s::task, quark_task_s::taskid, quark_task_s::tasklevel, TRUE, and address_set_node_s::waiting_deps.

1565 {
1566  icl_list_t *next_dep_node;
1567  next_dep_node = icl_list_first(address_set_node->waiting_deps);
1568  if ( next_dep_node!=NULL && next_dep_node->data!=NULL ) {
1569  Dependency *next_dep = (Dependency *)next_dep_node->data;
1570  Task *next_task = next_dep->task;
1571  if ( (next_dep->direction==OUTPUT || next_dep->direction==INOUT) ) {
1572  /* Process OUTPUT next_deps, if at beginning of address_set_list waiting_deps starts */
1573  if ( next_dep->ready == FALSE ) {
1574  /* Record the locality information with the task data structure */
1575  //if ( next_dep->locality ) next_task->locality_thread_id = worker_rank;
1576  /* If next_dep is output, mark the next dep as ready only if it is at the front */
1577  next_dep->ready = TRUE;
1578  Task *completed_task = completed_dep->task;
1579  if ( completed_dep->direction==OUTPUT || completed_dep->direction==INOUT )
1580  dot_dag_print_edge( completed_task->taskid, next_task->taskid, DEPCOLOR );
1581  /* else */ /* Handled in initial_input_check_and_launch */
1582  /* dot_dag_print_edge( completed_task->taskid, next_task->taskid, ANTIDEPCOLOR ); */
1583  dot_dag_level_update( completed_task->tasklevel, next_task->tasklevel, quark );
1584  next_task->num_dependencies_remaining--;
1585  quark_check_and_queue_ready_task( quark, next_task );
1586  }
1587  }
1588  }
1589 }
icl_list_t * waiting_deps
Definition: quark.c:187
#define dot_dag_print_edge(parentid, childid, color)
Definition: quark.c:303
#define dot_dag_level_update(parent_level, child_level, quark)
Definition: quark.c:298
void * data
Definition: icl_list.h:18
volatile bool ready
Definition: quark.c:174
unsigned long long tasklevel
Definition: quark.c:152
icl_list_t * icl_list_first(icl_list_t *head)
Definition: icl_list.c:192
Definition: quark.c:94
struct quark_task_s * task
Definition: quark.c:163
quark_direction_t direction
Definition: quark.c:166
Definition: quark.c:94
unsigned long long taskid
Definition: quark.c:151
Definition: quark.h:52
static void quark_check_and_queue_ready_task(Quark *quark, Task *task)
Definition: quark.c:1258
Definition: quark.h:52
#define DEPCOLOR
Definition: quark.c:293

Here is the call graph for this function:

Here is the caller graph for this function:

static Address_Set_Node* address_set_node_new ( void *  address,
int  size 
)
static

Allocate and initialize address_set_node structure. These are inserted into the hash table.

Definition at line 1206 of file quark.c.

References address_set_node_s::address, address_set_node_s::delete_data_at_address_when_node_is_deleted, FALSE, icl_list_new(), address_set_node_s::last_reader_or_writer_taskid, address_set_node_s::last_reader_or_writer_tasklevel, address_set_node_s::last_thread, address_set_node_s::last_writer_taskid, address_set_node_s::last_writer_tasklevel, address_set_node_s::num_waiting_inout, address_set_node_s::num_waiting_input, address_set_node_s::num_waiting_output, address_set_node_s::size, and address_set_node_s::waiting_deps.

1207 {
1208  Address_Set_Node *address_set_node = (Address_Set_Node *)malloc(sizeof(Address_Set_Node));
1209  assert( address_set_node != NULL );
1210  address_set_node->address = address;
1211  address_set_node->size = size;
1212  address_set_node->last_thread = -1;
1213  address_set_node->waiting_deps = icl_list_new();
1214  assert( address_set_node->waiting_deps != NULL );
1215  address_set_node->num_waiting_input = 0;
1216  address_set_node->num_waiting_output = 0;
1217  address_set_node->num_waiting_inout = 0;
1219  address_set_node->last_writer_taskid = 0;
1220  address_set_node->last_writer_tasklevel = 0;
1221  address_set_node->last_reader_or_writer_taskid = 0;
1222  address_set_node->last_reader_or_writer_tasklevel = 0;
1223  return address_set_node;
1224 }
icl_list_t * waiting_deps
Definition: quark.c:187
icl_list_t * icl_list_new()
Definition: icl_list.c:22
unsigned long long last_reader_or_writer_taskid
Definition: quark.c:194
unsigned long long last_reader_or_writer_tasklevel
Definition: quark.c:195
volatile int num_waiting_output
Definition: quark.c:189
volatile int num_waiting_inout
Definition: quark.c:190
Definition: quark.c:94
volatile bool delete_data_at_address_when_node_is_deleted
Definition: quark.c:191
volatile int num_waiting_input
Definition: quark.c:188
unsigned long long last_writer_tasklevel
Definition: quark.c:193
void * address
Definition: quark.c:184
volatile int last_thread
Definition: quark.c:186
unsigned long long last_writer_taskid
Definition: quark.c:192

Here is the call graph for this function:

Here is the caller graph for this function:

static char* arg_dup ( char *  arg,
int  size 
)
inlinestatic

Duplicate the argument, allocating a memory buffer for it

Definition at line 509 of file quark.c.

510 {
511  char *argbuf = (char *) malloc(size);
512  assert( argbuf != NULL );
513  memcpy(argbuf, arg, size);
514  return argbuf;
515 }

Here is the caller graph for this function:

static int compare_task_priority_tree_nodes ( task_priority_tree_node_t n1,
task_priority_tree_node_t n2 
)
static

Definition at line 225 of file quark.c.

226 {
227  return n2->priority - n1->priority;
228 }
static Dependency* dependency_new ( void *  addr,
long long  size,
quark_direction_t  dir,
bool  loc,
Task task,
bool  accumulator,
bool  gatherv,
icl_list_t task_args_list_node_ptr 
)
inlinestatic

Allocate and initialize a dependency structure

Definition at line 521 of file quark.c.

References dependency_s::accumulator, dependency_s::address, dependency_s::address_set_node_ptr, dependency_s::address_set_waiting_deps_node_ptr, dependency_s::direction, FALSE, dependency_s::gatherv, INOUT, dependency_s::locality, quark_task_s::locality_preserving_dep, OUTPUT, dependency_s::ready, dependency_s::size, dependency_s::task, dependency_s::task_args_list_node_ptr, and dependency_s::task_dependency_list_node_ptr.

522 {
523  Dependency *dep = (Dependency *) malloc(sizeof(Dependency));
524  assert(dep != NULL);
525  dep->task = task;
526  dep->address = addr;
527  dep->size = size;
528  dep->direction = dir;
529  dep->locality = loc;
530  dep->accumulator = accumulator;
531  dep->gatherv = gatherv;
532  dep->address_set_node_ptr = NULL; /* convenience ptr, filled later */
533  dep->address_set_waiting_deps_node_ptr = NULL; /* convenience ptr, filled later */
534  dep->task_args_list_node_ptr = task_args_list_node_ptr; /* convenience ptr for WAR address updating */
535  dep->task_dependency_list_node_ptr = NULL; /* convenience ptr */
536  dep->ready = FALSE;
537  /* For the task, track the dependency to be use to do locality
538  * preservation; by default, use first output dependency. */
539  if ( dep->locality )
540  task->locality_preserving_dep = dep;
541  else if ( (task->locality_preserving_dep == NULL) && ( dep->direction==OUTPUT || dep->direction==INOUT) )
542  task->locality_preserving_dep = dep;
543  return dep;
544 }
int size
Definition: quark.c:165
bool gatherv
Definition: quark.c:169
struct address_set_node_s * address_set_node_ptr
Definition: quark.c:170
volatile bool ready
Definition: quark.c:174
Definition: quark.c:94
struct quark_task_s * task
Definition: quark.c:163
icl_list_t * task_dependency_list_node_ptr
Definition: quark.c:173
bool locality
Definition: quark.c:167
void * address
Definition: quark.c:164
quark_direction_t direction
Definition: quark.c:166
icl_list_t * address_set_waiting_deps_node_ptr
Definition: quark.c:171
Definition: quark.h:52
Definition: quark.h:52
volatile bool accumulator
Definition: quark.c:168
struct dependency_s * locality_preserving_dep
Definition: quark.c:150
icl_list_t * task_args_list_node_ptr
Definition: quark.c:172

Here is the caller graph for this function:

static unsigned int fnv_hash_function ( void *  key,
int  len 
)
inlinestatic

Well known hash function: Fowler/Noll/Vo - 32 bit version

Definition at line 440 of file quark.c.

441 {
442  unsigned char *p = key;
443  unsigned int h = 2166136261u;
444  int i;
445  for ( i = 0; i < len; i++ )
446  h = ( h * 16777619 ) ^ p[i];
447  return h;
448 }

Here is the caller graph for this function:

LIST_HEAD ( ll_list_head_s  ,
ll_list_node_s   
)
static void process_completed_tasks ( Quark quark)
static

Handle the queue of completed tasks.

Definition at line 2032 of file quark.c.

References quark_s::address_set_mutex, quark_s::completed_tasks, quark_s::completed_tasks_mutex, pthread_mutex_trylock_asn(), pthread_mutex_trylock_completed_tasks(), pthread_mutex_unlock_asn(), pthread_mutex_unlock_completed_tasks(), remove_completed_task_and_check_for_ready(), TAILQ_FIRST, TAILQ_REMOVE, completed_tasks_node_s::task, and completed_tasks_node_s::workerid.

2033 {
2034  completed_tasks_node_t *node = NULL;
2035  do {
2036  node = NULL;
2037  if ( pthread_mutex_trylock_asn( &quark->address_set_mutex ) == 0 ) {
2039  node = TAILQ_FIRST(quark->completed_tasks);
2040  if ( node!= NULL ) TAILQ_REMOVE( quark->completed_tasks, node, entries );
2042  }
2043  if ( node != NULL ) {
2045  free( node );
2046  }
2048  }
2049  } while ( node != NULL );
2050 }
static int pthread_mutex_trylock_asn(pthread_mutex_t *mtx)
Definition: quark.c:270
pthread_mutex_t address_set_mutex
Definition: quark.c:109
static int pthread_mutex_trylock_completed_tasks(pthread_mutex_t *mtx)
Definition: quark.c:281
static int pthread_mutex_unlock_asn(pthread_mutex_t *mtx)
Definition: quark.c:271
static int pthread_mutex_unlock_completed_tasks(pthread_mutex_t *mtx)
Definition: quark.c:282
#define TAILQ_REMOVE(head, elm, field)
Definition: bsd_queue.h:565
#define TAILQ_FIRST(head)
Definition: bsd_queue.h:481
static void remove_completed_task_and_check_for_ready(Quark *quark, Task *task, int worker_rank)
Definition: quark.c:2058
struct completed_tasks_head_s * completed_tasks
Definition: quark.c:121
pthread_mutex_t completed_tasks_mutex
Definition: quark.c:120

Here is the call graph for this function:

Here is the caller graph for this function:

static int pthread_cond_wait_ready_list ( pthread_cond_t cond,
pthread_mutex_t mtx 
)
inlinestatic

Definition at line 284 of file quark.c.

References pthread_cond_wait().

284 { return pthread_cond_wait( cond, mtx ); }
MAGMA_DLLPORT int MAGMA_CDECL pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)

Here is the call graph for this function:

static int pthread_mutex_lock_asn ( pthread_mutex_t mtx)
inlinestatic

Mutex wrappers for tracing/timing purposes. Makes it easier to profile the costs of these pthreads routines.

Definition at line 269 of file quark.c.

References pthread_mutex_lock().

269 { return pthread_mutex_lock( mtx ); }
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_lock(pthread_mutex_t *mutex)

Here is the call graph for this function:

Here is the caller graph for this function:

static int pthread_mutex_lock_completed_tasks ( pthread_mutex_t mtx)
inlinestatic

Definition at line 280 of file quark.c.

References pthread_mutex_lock().

280 { return pthread_mutex_lock( mtx ); }
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_lock(pthread_mutex_t *mutex)

Here is the call graph for this function:

Here is the caller graph for this function:

static int pthread_mutex_lock_ready_list ( pthread_mutex_t mtx)
inlinestatic

Definition at line 273 of file quark.c.

References pthread_mutex_lock().

273 { return pthread_mutex_lock( mtx ); }
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_lock(pthread_mutex_t *mutex)

Here is the call graph for this function:

Here is the caller graph for this function:

static int pthread_mutex_lock_wrap ( pthread_mutex_t mtx)
inlinestatic

Definition at line 277 of file quark.c.

References pthread_mutex_lock().

277 { return pthread_mutex_lock( mtx ); }
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_lock(pthread_mutex_t *mutex)

Here is the call graph for this function:

Here is the caller graph for this function:

static int pthread_mutex_trylock_asn ( pthread_mutex_t mtx)
inlinestatic

Definition at line 270 of file quark.c.

References pthread_mutex_trylock().

270 { return pthread_mutex_trylock( mtx ); }
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_trylock(pthread_mutex_t *mutex)

Here is the call graph for this function:

Here is the caller graph for this function:

static int pthread_mutex_trylock_completed_tasks ( pthread_mutex_t mtx)
inlinestatic

Definition at line 281 of file quark.c.

References pthread_mutex_trylock().

281 { return pthread_mutex_trylock( mtx ); }
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_trylock(pthread_mutex_t *mutex)

Here is the call graph for this function:

Here is the caller graph for this function:

static int pthread_mutex_trylock_ready_list ( pthread_mutex_t mtx)
inlinestatic

Definition at line 274 of file quark.c.

References pthread_mutex_trylock().

274 { return pthread_mutex_trylock( mtx ); }
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_trylock(pthread_mutex_t *mutex)

Here is the call graph for this function:

Here is the caller graph for this function:

static int pthread_mutex_unlock_asn ( pthread_mutex_t mtx)
inlinestatic

Definition at line 271 of file quark.c.

References pthread_mutex_unlock().

271 { return pthread_mutex_unlock( mtx ); }
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_unlock(pthread_mutex_t *mutex)

Here is the call graph for this function:

Here is the caller graph for this function:

static int pthread_mutex_unlock_completed_tasks ( pthread_mutex_t mtx)
inlinestatic

Definition at line 282 of file quark.c.

References pthread_mutex_unlock().

282 { return pthread_mutex_unlock( mtx ); }
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_unlock(pthread_mutex_t *mutex)

Here is the call graph for this function:

Here is the caller graph for this function:

static int pthread_mutex_unlock_ready_list ( pthread_mutex_t mtx)
inlinestatic

Definition at line 275 of file quark.c.

References pthread_mutex_unlock().

275 { return pthread_mutex_unlock( mtx ); }
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_unlock(pthread_mutex_t *mutex)

Here is the call graph for this function:

Here is the caller graph for this function:

static int pthread_mutex_unlock_wrap ( pthread_mutex_t mtx)
inlinestatic

Definition at line 278 of file quark.c.

References pthread_mutex_unlock().

278 { return pthread_mutex_unlock( mtx ); }
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_unlock(pthread_mutex_t *mutex)

Here is the call graph for this function:

Here is the caller graph for this function:

void quark_avoid_war_dependencies ( Quark quark,
Address_Set_Node asn_old,
Task parent_task 
)

Routine to avoid false (WAR write-after-read) dependencies by making copies of the data. Check if there are suffient INPUTS in the beginning of a address dependency followed by a OUTPUT or an INOUT (data<-RRRRW). If so, make a copy of the data, adjust the pointers of the read dependencies to point to the new copy (copy<-RRRR and data<-W) and send to workers if the tasks are ready. The copy can be automacally freed when all the reads are done. The write can proceed at once. The address_set_mutex is already locked when this is called.

Definition at line 1314 of file quark.c.

References dependency_s::address, address_set_node_s::address, quark_s::address_set, address_set_node_new(), dependency_s::address_set_node_ptr, dependency_s::address_set_waiting_deps_node_ptr, icl_list_s::data, address_set_node_s::delete_data_at_address_when_node_is_deleted, DEPCOLOR, dependency_s::direction, DONE, dot_dag_level_update, dot_dag_print_edge, FALSE, icl_hash_insert(), icl_list_append(), icl_list_delete(), icl_list_first(), icl_list_next(), INOUT, INPUT, quark_s::low_water_mark, NOTREADY, quark_task_s::num_dependencies_remaining, quark_s::num_queued_tasks, quark_s::num_tasks, quark_s::num_threads, address_set_node_s::num_waiting_input, OUTPUT, quark_check_and_queue_ready_task(), quark_getenv_int(), dependency_s::ready, address_set_node_s::size, quark_task_s::status, dependency_s::task, dependency_s::task_args_list_node_ptr, quark_task_s::taskid, quark_task_s::tasklevel, TRUE, address_set_node_s::waiting_deps, and quark_s::war_dependencies_enable.

1315 {
1316  /* Figure out if there are enough input dependencies to make this worthwhile */
1317  int count_initial_input_deps = 0;
1318  bool output_dep_reached = FALSE;
1319  double avg_queued_tasks_per_thread = (double)quark->num_queued_tasks/(double)quark->num_threads;
1320  double avg_tasks_per_thread = (double)quark->num_tasks/(double)quark->num_threads;
1321  int min_input_deps;
1322  icl_list_t *dep_node_old;
1323 
1324  /* Quick return if this is not enabled */
1325  if ( !quark->war_dependencies_enable ) return;
1326 
1327  /* TODO This stuff is still under development.... */
1328  if ( avg_queued_tasks_per_thread < 0.4 ) min_input_deps = 1;
1329  else if ( avg_queued_tasks_per_thread < 0.75 ) min_input_deps = 6;
1330  else if ( avg_queued_tasks_per_thread < 0.90 ) min_input_deps = 7;
1331  else if ( avg_queued_tasks_per_thread < 1.20 ) min_input_deps = 10;
1332  else if ( avg_queued_tasks_per_thread > 1.80 ) min_input_deps = 2000;
1333  else if ( avg_tasks_per_thread < (double)quark->low_water_mark/(double)quark->num_threads/2 ) min_input_deps = 2000;
1334  else min_input_deps = (int)(7 + 27 * avg_queued_tasks_per_thread);
1335 
1336  /* Override computed value using environment variable */
1337  min_input_deps = quark_getenv_int( "QUARK_AVOID_WAR_WHEN_NUM_WAITING_READS", min_input_deps );
1338 
1339  /* Shortcut return if there are not enough input tasks */
1340  if ( asn_old->num_waiting_input < min_input_deps ) return;
1341 
1342  /* Scan thru initial deps, make sure they are inputs and that there
1343  * are enough of them to make data copying worthwhile */
1344  for (dep_node_old=icl_list_first(asn_old->waiting_deps);
1345  dep_node_old!=NULL;
1346  dep_node_old=icl_list_next(asn_old->waiting_deps, dep_node_old)) {
1347  Dependency *dep = (Dependency *)dep_node_old->data;
1348  Task *task = dep->task;
1349  if ( dep->direction==INPUT && task->status==NOTREADY ) {
1350  count_initial_input_deps++;
1351  } else if ( (dep->direction==OUTPUT || dep->direction==INOUT) && task->status!=DONE ) {
1352  output_dep_reached = TRUE;
1353  break;
1354  }
1355  }
1356 
1357  /* if ( count_initial_input_deps>=quark->min_input_deps_to_avoid_war_dependencies && output_dep_reached ) { */
1358  if ( count_initial_input_deps>=min_input_deps && output_dep_reached ) {
1359  icl_list_t *dep_node_asn_old;
1360  Address_Set_Node *asn_new;
1361  /* Allocate and copy data */
1362  void *datacopy = malloc( asn_old->size );
1363  assert(datacopy!=NULL);
1364  /* TODO track the allocated memory in datacopies */
1365  /* quark->mem_allocated_to_war_dependency_data += asn_old->size; */
1366  memcpy( datacopy, asn_old->address, asn_old->size );
1367  /* Create address set node, attach to hash, and set it to clean up when done */
1368  asn_new = address_set_node_new( datacopy, asn_old->size );
1370  icl_hash_insert( quark->address_set, asn_new->address, asn_new );
1371  /* Update task dependences to point to this new data */
1372  /* Grab input deps from the old list, copy to new list, delete, then repeat */
1373  for ( dep_node_asn_old=icl_list_first(asn_old->waiting_deps);
1374  dep_node_asn_old!=NULL; ) {
1375  icl_list_t *dep_node_asn_old_to_be_deleted = NULL;
1376  Dependency *dep = (Dependency *)dep_node_asn_old->data;
1377  Task *task = dep->task;
1378  if ( dep->direction==INPUT && task->status==NOTREADY ) {
1379  dep_node_asn_old_to_be_deleted = dep_node_asn_old;
1380  icl_list_t *dep_node_new = icl_list_append( asn_new->waiting_deps, dep );
1381  asn_new->num_waiting_input++;
1382  /* In the args list, set the arg pointer to the new datacopy address */
1383  *(void **)dep->task_args_list_node_ptr->data = datacopy;
1384  dep->address = asn_new->address;
1385  dep->address_set_node_ptr = asn_new;
1386  dep->address_set_waiting_deps_node_ptr = dep_node_new;
1387  if (dep->ready == FALSE) { /* dep->ready will always be FALSE */
1388  dep->ready = TRUE;
1389  dot_dag_print_edge( parent_task->taskid, task->taskid, DEPCOLOR );
1390  dot_dag_level_update( parent_task->tasklevel, task->tasklevel, quark );
1391  task->num_dependencies_remaining--;
1392  quark_check_and_queue_ready_task( quark, task );
1393  }
1394  } else if ( (dep->direction==OUTPUT || dep->direction==INOUT) && task->status!=DONE ) {
1395  /* Once we return from this routine, this dep dependency will be processed */
1396  break;
1397  }
1398  dep_node_asn_old = icl_list_next(asn_old->waiting_deps, dep_node_asn_old);
1399  if (dep_node_asn_old_to_be_deleted!=NULL) {
1400  icl_list_delete(asn_old->waiting_deps, dep_node_asn_old_to_be_deleted, NULL);
1401  }
1402  }
1403  }
1404 }
icl_list_t * waiting_deps
Definition: quark.c:187
#define dot_dag_print_edge(parentid, childid, color)
Definition: quark.c:303
#define dot_dag_level_update(parent_level, child_level, quark)
Definition: quark.c:298
void * data
Definition: icl_list.h:18
struct address_set_node_s * address_set_node_ptr
Definition: quark.c:170
volatile bool ready
Definition: quark.c:174
icl_list_t * icl_list_append(icl_list_t *head, void *data)
Definition: icl_list.c:304
icl_entry_t * icl_hash_insert(icl_hash_t *ht, void *key, void *data)
Definition: icl_hash.c:132
unsigned long long tasklevel
Definition: quark.c:152
icl_list_t * icl_list_first(icl_list_t *head)
Definition: icl_list.c:192
Definition: quark.c:94
struct quark_task_s * task
Definition: quark.c:163
volatile long long num_tasks
Definition: quark.c:105
volatile int num_queued_tasks
Definition: quark.c:112
int low_water_mark
Definition: quark.c:97
icl_hash_t * address_set
Definition: quark.c:108
Definition: quark.c:92
static Address_Set_Node * address_set_node_new(void *address, int size)
Definition: quark.c:1206
volatile bool delete_data_at_address_when_node_is_deleted
Definition: quark.c:191
volatile int num_waiting_input
Definition: quark.c:188
void * address
Definition: quark.c:164
int num_threads
Definition: quark.c:99
quark_direction_t direction
Definition: quark.c:166
int war_dependencies_enable
Definition: quark.c:114
icl_list_t * address_set_waiting_deps_node_ptr
Definition: quark.c:171
#define INPUT
Definition: quark.h:53
Definition: quark.c:94
unsigned long long taskid
Definition: quark.c:151
int icl_list_delete(icl_list_t *head, icl_list_t *pos, void(*free_function)(void *))
Definition: icl_list.c:84
Definition: quark.h:52
static void quark_check_and_queue_ready_task(Quark *quark, Task *task)
Definition: quark.c:1258
void * address
Definition: quark.c:184
Definition: quark.h:52
int quark_getenv_int(char *name, int defval)
Definition: quarkos.c:300
Definition: quark.c:92
#define DEPCOLOR
Definition: quark.c:293
icl_list_t * task_args_list_node_ptr
Definition: quark.c:172
icl_list_t * icl_list_next(icl_list_t *head, icl_list_t *pos)
Definition: icl_list.c:228

Here is the call graph for this function:

Here is the caller graph for this function:

static void quark_check_and_queue_ready_task ( Quark quark,
Task task 
)
static

Queue ready tasks on a worker node, either using locality information or a round robin scheme. The address_set_mutex should be set when calling this, since we touch the task data structure (task->status) and update the quark->num_queued_tasks.

Definition at line 1258 of file quark.c.

References dependency_s::address_set_node_ptr, DONE, address_set_node_s::last_thread, quark_task_s::locality_preserving_dep, quark_task_s::lock_to_thread, quark_task_s::num_dependencies_remaining, quark_s::num_queued_tasks, quark_s::num_queued_tasks_cond, quark_s::num_threads, quark_task_s::priority, task_priority_tree_node_s::priority, pthread_cond_broadcast(), pthread_mutex_lock_ready_list(), pthread_mutex_unlock_ready_list(), quark_revolve_robin(), QUEUED, RB_INSERT, worker_s::ready_list, worker_s::ready_list_mutex, worker_s::ready_list_size, RUNNING, quark_task_s::status, task_priority_tree_node_s::task, and quark_s::worker.

1259 {
1260  int worker_thread_id = -1;
1261  Worker *worker = NULL;
1262  int assigned_thread_count = 0;
1263 
1264  if ( task->num_dependencies_remaining > 0 || task->status == QUEUED || task->status == RUNNING || task->status == DONE) return;
1265  task->status = QUEUED;
1266  /* Assign task to thread. Locked tasks get sent to appropriate
1267  * thread. Locality tasks should have be correctly placed. Tasks
1268  * without either should have the original round robin thread
1269  * assignment */
1270  if ( task->lock_to_thread >= 0) {
1271  worker_thread_id = task->lock_to_thread % quark->num_threads;
1272  } else if ( task->locality_preserving_dep != NULL ) {
1273  int last_thread = task->locality_preserving_dep->address_set_node_ptr->last_thread;
1274  if ( last_thread >= 0 ) worker_thread_id = last_thread;
1275  }
1276  if ( worker_thread_id < 0 ) worker_thread_id = quark_revolve_robin(quark);
1277 
1278  /* Handle tasks that need multiple threads */
1279  while ( assigned_thread_count < task->task_thread_count) {
1280 
1281  worker = quark->worker[worker_thread_id];
1282  /* Create a new entry for the ready list */
1283  task_priority_tree_node_t *new_task_tree_node = malloc(sizeof(task_priority_tree_node_t));
1284  assert( new_task_tree_node != NULL );
1285  new_task_tree_node->priority = task->priority;
1286  new_task_tree_node->task = task;
1287  /* Insert new entry into the ready list */
1289  RB_INSERT( task_priority_tree_head_s, worker->ready_list, new_task_tree_node );
1290  worker->ready_list_size++;
1293  quark->num_queued_tasks++;
1294 
1295  assigned_thread_count++;
1296  /* TODO Abort when too many threads requested */
1297  if ( assigned_thread_count < task->task_thread_count )
1298  worker_thread_id = (worker_thread_id+1) % quark->num_threads;
1299  }
1300 }
volatile int num_dependencies_remaining
Definition: quark.c:146
#define RB_INSERT(name, x, y)
Definition: bsd_tree.h:729
pthread_mutex_t ready_list_mutex
Definition: quark.c:132
struct task_priority_tree_head_s * ready_list
Definition: quark.c:133
MAGMA_DLLPORT int MAGMA_CDECL pthread_cond_broadcast(pthread_cond_t *cond)
int lock_to_thread
Definition: quark.c:153
struct address_set_node_s * address_set_node_ptr
Definition: quark.c:170
volatile int ready_list_size
Definition: quark.c:134
Definition: quark.c:92
struct worker_s ** worker
Definition: quark.c:100
static int quark_revolve_robin(Quark *quark)
Definition: quark.c:495
volatile int num_queued_tasks
Definition: quark.c:112
int priority
Definition: quark.c:156
volatile task_status status
Definition: quark.c:144
static int pthread_mutex_lock_ready_list(pthread_mutex_t *mtx)
Definition: quark.c:273
int num_threads
Definition: quark.c:99
Definition: quark.c:92
static int pthread_mutex_unlock_ready_list(pthread_mutex_t *mtx)
Definition: quark.c:275
volatile int last_thread
Definition: quark.c:186
pthread_cond_t num_queued_tasks_cond
Definition: quark.c:113
Definition: quark.c:92
struct dependency_s * locality_preserving_dep
Definition: quark.c:150

Here is the call graph for this function:

Here is the caller graph for this function:

int* quark_get_affthreads ( )

Definition at line 245 of file quarkos.c.

References CONTEXT_THREADS_MAX, QUARK_CLEANENV, and QUARK_GETENV.

245  {
246  char *envstr = NULL;
247  int i;
248 
249  int *coresbind = (int *)malloc(CONTEXT_THREADS_MAX*sizeof(int));
250  /* Env variable does not exist, we search the system number of core */
251  QUARK_GETENV("QUARK_AFF_THREADS", envstr);
252  if ( envstr == NULL) {
253  for (i = 0; i < CONTEXT_THREADS_MAX; i++)
254  coresbind[i] = i % sys_corenbr;
255  }
256  else {
257  char *endptr;
258  int wrap = 0;
259  int nbr = 0;
260  long int val;
261 
262  /* We use the content of the QUARK_AFF_THREADS env. variable */
263  for (i = 0; i < CONTEXT_THREADS_MAX; i++) {
264  if (!wrap) {
265  val = strtol(envstr, &endptr, 10);
266  if (endptr != envstr) {
267  coresbind[i] = (int)val;
268  envstr = endptr;
269  }
270  else {
271  /* there must be at least one entry */
272  if (i < 1) {
273  //quark_error("quark_get_affthreads", "QUARK_AFF_THREADS should have at least one entry => everything will be bind on core 0");
274  fprintf(stderr, "quark_get_affthreads: QUARK_AFF_THREADS should have at least one entry => everything will be bind on core 0");
275  coresbind[i] = 0;
276  i++;
277  }
278 
279  /* there is no more values in the string */
280  /* the next threads are binded with a round robin policy over this array */
281  wrap = 1;
282  nbr = i;
283 
284  coresbind[i] = coresbind[0];
285  }
286  }
287  else {
288  coresbind[i] = coresbind[i % nbr];
289  }
290  }
291  }
292  QUARK_CLEANENV(envstr);
293  /* return QUARK_SUCCESS; */
294  return coresbind;
295 }
#define QUARK_CLEANENV(str)
Definition: quarkos.c:214
#define QUARK_GETENV(var, str)
Definition: quarkos.c:213
#define CONTEXT_THREADS_MAX
Definition: quarkos.c:60
static volatile int sys_corenbr
Definition: quarkos.c:68

Here is the caller graph for this function:

int quark_get_numthreads ( )

Check for an integer in an environment variable, returning the integer value or a provided default value

Definition at line 222 of file quarkos.c.

References QUARK_CLEANENV, QUARK_GETENV, and sys_corenbr.

223 {
224  char *envstr = NULL;
225  char *endptr;
226  long int thrdnbr = -1;
227  extern int errno;
228 
229  /* Env variable does not exist, we search the system number of core */
230  QUARK_GETENV("QUARK_NUM_THREADS", envstr);
231  if ( envstr == NULL ) {
232  thrdnbr = sys_corenbr;
233  } else {
234  /* Convert to long, checking for errors */
235  thrdnbr = strtol(envstr, &endptr, 10);
236  if ((errno == ERANGE) || ((thrdnbr==0) && (endptr==envstr))) {
237  QUARK_CLEANENV(envstr);
238  return -1;
239  }
240  }
241  QUARK_CLEANENV(envstr);
242  return (int)thrdnbr;
243 }
#define QUARK_CLEANENV(str)
Definition: quarkos.c:214
#define QUARK_GETENV(var, str)
Definition: quarkos.c:213
static volatile int sys_corenbr
Definition: quarkos.c:68

Here is the caller graph for this function:

int quark_getenv_int ( char *  name,
int  defval 
)

Definition at line 300 of file quarkos.c.

References QUARK_CLEANENV, and QUARK_GETENV.

301 {
302  char *envstr = NULL;
303  char *endptr;
304  long int longval = -1;
305  extern int errno;
306 
307  QUARK_GETENV(name, envstr);
308  if ( envstr == NULL ) {
309  longval = defval;
310  } else {
311  /* Convert to long, checking for errors */
312  longval = strtol(envstr, &endptr, 10);
313  if ((errno == ERANGE) || ((longval==0) && (endptr==envstr))) {
314  longval = defval;
315  }
316  }
317  QUARK_CLEANENV(envstr);
318  return (int)longval;
319 }
#define QUARK_CLEANENV(str)
Definition: quarkos.c:214
#define QUARK_GETENV(var, str)
Definition: quarkos.c:213

Here is the caller graph for this function:

static void quark_insert_task_dependencies ( Quark quark,
Task task 
)
static

Called by the master insert task dependencies into the hash table. Any tasks that are ready to run are queued. The address_set_mutex must be locked before calling this routine.

Definition at line 1597 of file quark.c.

References dependency_s::address, quark_s::address_set, address_set_node_new(), dependency_s::address_set_node_ptr, dependency_s::address_set_waiting_deps_node_ptr, icl_list_s::data, DEPCOLOR, quark_task_s::dependency_list, dependency_s::direction, dot_dag_level_update, dot_dag_print_edge, FALSE, icl_hash_find(), icl_hash_insert(), icl_list_append(), icl_list_delete(), icl_list_first(), icl_list_next(), icl_list_prev(), INOUT, INPUT, quark_task_s::num_dependencies_remaining, OUTPUT, quark_avoid_war_dependencies(), dependency_s::ready, dependency_s::size, dependency_s::task, dependency_s::task_dependency_list_node_ptr, quark_task_s::taskid, quark_task_s::tasklevel, and TRUE.

1598 {
1599  icl_list_t *task_dep_p = NULL; /* task dependency list pointer */
1600 
1601  /* For each task dependency list pointer */
1602  for (task_dep_p = icl_list_first(task->dependency_list);
1603  task_dep_p != NULL;
1604  task_dep_p = icl_list_next(task->dependency_list, task_dep_p)) {
1605  Dependency *dep = (Dependency *) task_dep_p->data;
1606  /* Lookup address in address_set hash */
1607  Address_Set_Node *address_set_node = (Address_Set_Node *)icl_hash_find( quark->address_set, dep->address );
1608  /* If not found, create a new address set node and add it to the hash */
1609  if ( address_set_node == NULL ) {
1610  address_set_node = address_set_node_new( dep->address, dep->size );
1611  icl_hash_insert( quark->address_set, address_set_node->address, address_set_node );
1612  }
1613  /* Convenience shortcut pointer so that we don't have to hash again */
1614  dep->address_set_node_ptr = address_set_node;
1615  /* Add the dependency to the list of waiting dependencies on this address set node */
1616  icl_list_t *curr_dep_node = icl_list_append( address_set_node->waiting_deps, dep );
1617  /* Convenience shortcut pointer so we don't have to scan the waiting dependencies */
1618  dep->address_set_waiting_deps_node_ptr = curr_dep_node;
1619  /* Track num of waiting input, output and inout to be used to check false dependency resolution */
1620  if (dep->direction == INPUT) address_set_node->num_waiting_input++;
1621  else if (dep->direction == OUTPUT) address_set_node->num_waiting_output++;
1622  else if (dep->direction == INOUT) address_set_node->num_waiting_inout++;
1623 
1624  /* Handle the case that the a single task make multiple dependencies on the same data address */
1625  /* e.g. func( A11:IN, A11:INOUT, A11:OUT, A11:IN, A22:OUT ) */
1626  icl_list_t *prev_dep_node = icl_list_prev( address_set_node->waiting_deps, curr_dep_node);
1627  if ( prev_dep_node != NULL ) {
1628  Dependency *prev_dep = (Dependency *)prev_dep_node->data;
1629  Task *prev_task = prev_dep->task;
1630  if ( prev_task->taskid == task->taskid ) {
1631  /* The curr dependency will updated using the ordering INPUT < OUTPUT < INOUT */
1632  /* When the scheduler checks the front of the dependency list, it will find the correct dep setting */
1633  dep->direction = (dep->direction > prev_dep->direction ? INOUT : prev_dep->direction );
1634  if ( prev_dep->ready == FALSE ) {
1635  prev_dep->ready = TRUE;
1637  }
1638  /* Remove the redundent dependency from waiting deps and from the task */
1639  icl_list_delete( address_set_node->waiting_deps, prev_dep_node, NULL );
1641  /* Update the prev_dep_node ptr since it has changed */
1642  prev_dep_node = icl_list_prev( address_set_node->waiting_deps, curr_dep_node);
1643  }
1644  }
1645 
1646  /* This will avoid WAR dependencies if possible: if enabled, and
1647  * the current dependency is a write, and there were only reads
1648  * earlier (input>1, output+inout=1) */
1649  if ( ((dep->direction==OUTPUT || dep->direction==INOUT)) &&
1650  ((address_set_node->num_waiting_output + address_set_node->num_waiting_inout) == 1) ) {
1651  quark_avoid_war_dependencies( quark, address_set_node, task );
1652  }
1653 
1654  /* The following code decides whether the dep is ready or not */
1655  if ( dep->direction==INOUT || dep->direction==OUTPUT ) {
1656  /* If output, and previous dep exists, then ready=false */
1657  if ( prev_dep_node != NULL ) {
1658  dep->ready = FALSE;
1659  } else {
1660  dep->ready = TRUE;
1661  dot_dag_print_edge( address_set_node->last_reader_or_writer_taskid, task->taskid, DEPCOLOR );
1662  dot_dag_level_update( address_set_node->last_reader_or_writer_tasklevel, task->tasklevel, quark );
1664  }
1665  } else if ( dep->direction == INPUT ) {
1666  if ( prev_dep_node != NULL ) {
1667  /* If input, and previous dep is a read that is ready, then ready=true */
1668  Dependency *prev_dep = (Dependency *)prev_dep_node->data;
1669  if ( prev_dep->direction==INPUT && prev_dep->ready==TRUE ) {
1670  dep->ready = TRUE;
1671  dot_dag_print_edge( address_set_node->last_writer_taskid, task->taskid, DEPCOLOR );
1672  dot_dag_level_update( address_set_node->last_writer_tasklevel, task->tasklevel, quark );
1674  } else {
1675  dep->ready = FALSE;
1676  }
1677  } else {
1678  /* Input, but no previous node (is first), so ready */
1679  dep->ready = TRUE;
1680  dot_dag_print_edge( address_set_node->last_writer_taskid, task->taskid, DEPCOLOR );
1681  dot_dag_level_update( address_set_node->last_writer_tasklevel, task->tasklevel, quark );
1683  }
1684  }
1685  }
1686 }
int size
Definition: quark.c:165
volatile int num_dependencies_remaining
Definition: quark.c:146
#define dot_dag_print_edge(parentid, childid, color)
Definition: quark.c:303
#define dot_dag_level_update(parent_level, child_level, quark)
Definition: quark.c:298
void * data
Definition: icl_list.h:18
struct address_set_node_s * address_set_node_ptr
Definition: quark.c:170
volatile bool ready
Definition: quark.c:174
icl_list_t * icl_list_append(icl_list_t *head, void *data)
Definition: icl_list.c:304
icl_entry_t * icl_hash_insert(icl_hash_t *ht, void *key, void *data)
Definition: icl_hash.c:132
unsigned long long tasklevel
Definition: quark.c:152
icl_list_t * icl_list_first(icl_list_t *head)
Definition: icl_list.c:192
Definition: quark.c:94
icl_list_t * icl_list_prev(icl_list_t *head, icl_list_t *pos)
Definition: icl_list.c:246
struct quark_task_s * task
Definition: quark.c:163
icl_list_t * task_dependency_list_node_ptr
Definition: quark.c:173
icl_hash_t * address_set
Definition: quark.c:108
static Address_Set_Node * address_set_node_new(void *address, int size)
Definition: quark.c:1206
void quark_avoid_war_dependencies(Quark *quark, Address_Set_Node *asn_old, Task *parent_task)
Definition: quark.c:1314
void * address
Definition: quark.c:164
quark_direction_t direction
Definition: quark.c:166
icl_list_t * address_set_waiting_deps_node_ptr
Definition: quark.c:171
#define INPUT
Definition: quark.h:53
Definition: quark.c:94
unsigned long long taskid
Definition: quark.c:151
int icl_list_delete(icl_list_t *head, icl_list_t *pos, void(*free_function)(void *))
Definition: icl_list.c:84
void * icl_hash_find(icl_hash_t *ht, void *key)
Definition: icl_hash.c:105
Definition: quark.h:52
Definition: quark.h:52
icl_list_t * dependency_list
Definition: quark.c:148
#define DEPCOLOR
Definition: quark.c:293
icl_list_t * icl_list_next(icl_list_t *head, icl_list_t *pos)
Definition: icl_list.c:228

Here is the call graph for this function:

Here is the caller graph for this function:

static int quark_revolve_robin ( Quark quark)
inlinestatic

Rotate the next worker queue that will get a task assigned to it. The master (0) never gets round-robin tasks assigned to it.

Definition at line 495 of file quark.c.

References quark_s::list_robin, and quark_s::num_threads.

496 {
497  quark->list_robin++;
498  if (quark->list_robin == quark->num_threads)
499  quark->list_robin = 0;
500  if (quark->list_robin==0 && quark->num_threads>1)
501  quark->list_robin = 1;
502  return quark->list_robin;
503 }
volatile int list_robin
Definition: quark.c:102
int num_threads
Definition: quark.c:99

Here is the caller graph for this function:

Task* quark_set_task_flags_in_task_structure ( Quark quark,
Task task,
Quark_Task_Flags task_flags 
)

Use the task_flags data structure to set various items in the task (priority, lock_to_thread, color, labels, etc )

Definition at line 868 of file quark.c.

References quark_s::dot_dag_enable, quark_task_s::lock_to_thread, quark_task_s::priority, quark_task_s::sequence, quark_task_flags_s::task_color, quark_task_s::task_color, quark_task_flags_s::task_label, quark_task_s::task_label, quark_task_flags_s::task_lock_to_thread, quark_task_flags_s::task_priority, quark_task_flags_s::task_sequence, quark_task_flags_s::task_thread_count, and quark_task_s::task_thread_count.

869 {
870  if ( task_flags ) {
871  if ( task_flags->task_priority ) task->priority = task_flags->task_priority;
872  if ( task_flags->task_lock_to_thread >= 0 ) task->lock_to_thread = task_flags->task_lock_to_thread;
873  if ( task_flags->task_color && quark->dot_dag_enable ) task->task_color = strdup(task_flags->task_color);
874  if ( task_flags->task_label && quark->dot_dag_enable ) task->task_label = strdup(task_flags->task_label);
875  if ( task_flags->task_sequence ) task->sequence = task_flags->task_sequence;
876  if ( task_flags->task_thread_count > 1 ) task->task_thread_count = task_flags->task_thread_count;
877  }
878  return task;
879 }
void * task_sequence
Definition: quark.h:101
int task_thread_count
Definition: quark.c:159
int lock_to_thread
Definition: quark.c:153
char * task_label
Definition: quark.h:100
Quark_Sequence * sequence
Definition: quark.c:157
int priority
Definition: quark.c:156
char * task_label
Definition: quark.c:154
int dot_dag_enable
Definition: quark.c:116
int task_thread_count
Definition: quark.h:102
char * task_color
Definition: quark.h:99
char * task_color
Definition: quark.c:155
int task_lock_to_thread
Definition: quark.h:98
int task_priority
Definition: quark.h:97

Here is the caller graph for this function:

int quark_setaffinity ( int  rank)

This routine will set affinity for the calling thread that has rank 'rank'. Ranks start with 0.

If there are multiple instances of QUARK then affinity will be wrong: all ranks 0 will be pinned to core 0.

Also, affinity is not resotred when QUARK_Finalize() is called.

Definition at line 125 of file quarkos.c.

References QUARK_ERR_NOT_SUPPORTED, QUARK_ERR_UNEXPECTED, and QUARK_SUCCESS.

125  {
126 #ifndef QUARK_AFFINITY_DISABLE
127 #if (defined QUARK_OS_LINUX)
128  {
129  cpu_set_t set;
130  CPU_ZERO( &set );
131  CPU_SET( rank, &set );
132 
133 #if (defined HAVE_OLD_SCHED_SETAFFINITY)
134  if( sched_setaffinity( 0, &set ) < 0 )
135 #else /* HAVE_OLD_SCHED_SETAFFINITY */
136  if( sched_setaffinity( 0, sizeof(set), &set) < 0 )
137 #endif /* HAVE_OLD_SCHED_SETAFFINITY */
138  {
139  return QUARK_ERR_UNEXPECTED;
140  }
141 
142  return QUARK_SUCCESS;
143  }
144 #elif (defined QUARK_OS_MACOS)
145  {
146  thread_affinity_policy_data_t ap;
147  int ret;
148 
149  ap.affinity_tag = 1; /* non-null affinity tag */
150  ret = thread_policy_set( mach_thread_self(),
151  THREAD_AFFINITY_POLICY,
152  (integer_t*) &ap,
153  THREAD_AFFINITY_POLICY_COUNT
154  );
155  if(ret != 0)
156  return QUARK_ERR_UNEXPECTED;
157 
158  return QUARK_SUCCESS;
159  }
160 #elif (defined QUARK_OS_WINDOWS)
161  {
162  DWORD mask = 1 << rank;
163 
164  if( SetThreadAffinityMask(GetCurrentThread(), mask) == 0)
165  return QUARK_ERR_UNEXPECTED;
166 
167  return QUARK_SUCCESS;
168  }
169 #elif (defined QUARK_OS_AIX)
170  {
171  tid_t self_ktid = thread_self ();
172  bindprocessor(BINDTHREAD, self_ktid, rank);
173  return QUARK_SUCCESS;
174  }
175 #else
177 #endif
178 #endif /* QUARK_AFFINITY_DISABLE */
180 }
#define QUARK_ERR_UNEXPECTED
Definition: quarkos.c:57
#define QUARK_SUCCESS
Definition: quarkos.c:55
#define QUARK_ERR_NOT_SUPPORTED
Definition: quark.h:45

Here is the caller graph for this function:

static Task * quark_task_new ( )
static

Local function prototypes, declared static so they are not available outside the scope of this file.

Initialize the task data structure

Definition at line 314 of file quark.c.

References quark_task_s::args_list, quark_task_s::dependency_list, quark_task_s::function, icl_list_new(), quark_task_s::locality_preserving_dep, quark_task_s::lock_to_thread, NOTREADY, quark_task_s::num_dependencies, quark_task_s::num_dependencies_remaining, quark_task_s::priority, pthread_mutex_init(), quark_task_s::ptr_to_task_in_sequence, quark_task_default_color, quark_task_default_label, QUARK_TASK_MIN_PRIORITY, quark_task_s::scratch_list, quark_task_s::sequence, quark_task_s::status, quark_task_s::task_color, quark_task_s::task_label, quark_task_s::task_mutex, quark_task_s::task_thread_count, quark_task_s::taskid, quark_task_s::tasklevel, and ULLONG_MAX.

315 {
316  static unsigned long long taskid = 1;
317  Task *task = (Task *)malloc(sizeof(Task));
318  assert(task != NULL);
319  task->function = NULL;
320  task->num_dependencies = 0;
321  task->num_dependencies_remaining = 0;
322  task->args_list = icl_list_new();
323  assert(task->args_list != NULL);
324  task->dependency_list = icl_list_new();
325  assert(task->dependency_list != NULL);
326  task->locality_preserving_dep = NULL;
327  task->status = NOTREADY;
328  task->scratch_list = icl_list_new();
329  assert( task->scratch_list != NULL);
330  assert( taskid < ULLONG_MAX );
331  task->taskid = taskid++;
332  task->tasklevel = 0;
333  pthread_mutex_init(&task->task_mutex, NULL);
334  task->ptr_to_task_in_sequence = NULL;
335  task->sequence = NULL;
339  task->lock_to_thread = -1;
340  task->task_thread_count = 1;
341  return task;
342 }
volatile int num_dependencies_remaining
Definition: quark.c:146
int task_thread_count
Definition: quark.c:159
pthread_mutex_t task_mutex
Definition: quark.c:142
icl_list_t * icl_list_new()
Definition: icl_list.c:22
#define QUARK_TASK_MIN_PRIORITY
Definition: quark.h:86
int lock_to_thread
Definition: quark.c:153
void(* function)(Quark *)
Definition: quark.c:143
unsigned long long tasklevel
Definition: quark.c:152
Quark_Sequence * sequence
Definition: quark.c:157
struct ll_list_node_s * ptr_to_task_in_sequence
Definition: quark.c:158
int priority
Definition: quark.c:156
char * task_label
Definition: quark.c:154
volatile task_status status
Definition: quark.c:144
Definition: quark.c:92
icl_list_t * args_list
Definition: quark.c:147
static char * quark_task_default_label
Definition: quark.c:291
char * task_color
Definition: quark.c:155
icl_list_t * scratch_list
Definition: quark.c:149
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr)
unsigned long long taskid
Definition: quark.c:151
volatile int num_dependencies
Definition: quark.c:145
#define ULLONG_MAX
Definition: quark.c:85
icl_list_t * dependency_list
Definition: quark.c:148
static char * quark_task_default_color
Definition: quark.c:292
struct dependency_s * locality_preserving_dep
Definition: quark.c:150

Here is the call graph for this function:

Here is the caller graph for this function:

int QUARK_Thread_Rank ( Quark quark)

Return the rank of a thread

Definition at line 377 of file quark.c.

References quark_s::num_threads, pthread_equal(), pthread_self(), worker_s::thread_id, and quark_s::worker.

378 {
379  pthread_t self_id = pthread_self();
380  int i;
381  for (i=0; i<quark->num_threads; i++)
382  if (pthread_equal(quark->worker[i]->thread_id, self_id))
383  return i;
384  return -1;
385 }
struct worker_s ** worker
Definition: quark.c:100
int num_threads
Definition: quark.c:99
pthread_t thread_id
Definition: quark.c:131
MAGMA_DLLPORT pthread_t MAGMA_CDECL pthread_self(void)
MAGMA_DLLPORT int MAGMA_CDECL pthread_equal(pthread_t thread1, pthread_t thread2)

Here is the call graph for this function:

Here is the caller graph for this function:

void quark_topology_finalize ( )

Definition at line 114 of file quarkos.c.

114 {}

Here is the caller graph for this function:

void quark_topology_init ( )

Definition at line 78 of file quarkos.c.

References pthread_mutex_lock(), and pthread_mutex_unlock().

78  {
80  if ( !topo_initialized ) {
81 #if (defined QUARK_OS_LINUX) || (defined QUARK_OS_AIX)
82 
83  sys_corenbr = sysconf(_SC_NPROCESSORS_ONLN);
84 
85 #elif (defined QUARK_OS_MACOS)
86 
87  int mib[4];
88  int cpu;
89  size_t len = sizeof(cpu);
90 
91  /* set the mib for hw.ncpu */
92  mib[0] = CTL_HW;
93  mib[1] = HW_AVAILCPU;
94 
95  /* get the number of CPUs from the system */
96  sysctl(mib, 2, &cpu, &len, NULL, 0);
97  if( cpu < 1 ) {
98  mib[1] = HW_NCPU;
99  sysctl( mib, 2, &cpu, &len, NULL, 0 );
100  }
101  if( cpu < 1 ) {
102  cpu = 1;
103  }
104  sys_corenbr = cpu;
105 #elif (defined QUARK_OS_WINDOWS)
106  SYSTEM_INFO sysinfo;
107  GetSystemInfo(&sysinfo);
108  sys_corenbr = sysinfo.dwNumberOfProcessors;
109 #endif
110  }
112 }
static volatile int topo_initialized
Definition: quarkos.c:69
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_unlock(pthread_mutex_t *mutex)
static pthread_mutex_t mutextopo
Definition: quarkos.c:67
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_lock(pthread_mutex_t *mutex)
static volatile int sys_corenbr
Definition: quarkos.c:68

Here is the call graph for this function:

Here is the caller graph for this function:

int quark_yield ( )

A thread can unlock the CPU if it has nothing to do to let another thread of less priority running for example for I/O.

Definition at line 187 of file quarkos.c.

References QUARK_ERR_NOT_SUPPORTED.

187  {
188 #if (defined QUARK_OS_LINUX) || (defined QUARK_OS_MACOS) || (defined QUARK_OS_AIX)
189  return sched_yield();
190 #elif QUARK_OS_WINDOWS
191  return SleepEx(0,0);
192 #else
194 #endif
195 }
#define QUARK_ERR_NOT_SUPPORTED
Definition: quark.h:45
RB_GENERATE ( task_priority_tree_head_s  ,
task_priority_tree_node_s  ,
n_entry  ,
compare_task_priority_tree_nodes   
)
RB_HEAD ( task_priority_tree_head_s  ,
task_priority_tree_node_s   
)
static void remove_completed_task_and_check_for_ready ( Quark quark,
Task task,
int  worker_rank 
)
static

Handle a single completed task, finding its children and putting the children that are ready to go (all dependencies satisfied) into worker ready queues.

Definition at line 2058 of file quark.c.

References address_set_node_accumulator_find_prepend(), address_set_node_delete(), address_set_node_initial_gatherv_check_and_launch(), address_set_node_initial_input_check_and_launch(), address_set_node_initial_output_check_and_launch(), dependency_s::address_set_node_ptr, dependency_s::address_set_waiting_deps_node_ptr, icl_list_s::data, quark_task_s::dependency_list, dependency_s::direction, quark_s::dot_dag_enable, quark_s::dot_dag_mutex, icl_list_delete(), icl_list_first(), icl_list_next(), INOUT, INPUT, address_set_node_s::last_thread, quark_s::num_queued_tasks, OUTPUT, pthread_mutex_lock_wrap(), pthread_mutex_unlock_wrap(), quark_avoid_war_dependencies(), quark_task_s::task_color, task_delete(), quark_task_s::task_label, quark_task_s::taskid, quark_task_s::tasklevel, quark_s::tasklevel_width, and quark_s::war_dependencies_enable.

2059 {
2060  if ( quark->dot_dag_enable ) {
2062  if (task->tasklevel < 1) task->tasklevel=1;
2063  fprintf(dot_dag_file, "t%lld [fillcolor=\"%s\",label=\"%s\",style=filled];\n", task->taskid, task->task_color, task->task_label);
2064  /* Track the width of each task level */
2065  quark->tasklevel_width[task->tasklevel]++;
2066  /* fprintf(dot_dag_file, "// critical-path depth %ld \n", task->tasklevel ); */
2067  fprintf(dot_dag_file, "{rank=same;%lld;t%lld};\n", task->tasklevel, task->taskid );
2069  }
2070 
2071  /* For each dependency in the task that was completed */
2072  icl_list_t *dep_node;
2073  for (dep_node = icl_list_first(task->dependency_list);
2074  dep_node != NULL && dep_node->data!=NULL;
2075  dep_node = icl_list_next(task->dependency_list, dep_node)) {
2076  Dependency *dep = (Dependency *)dep_node->data;
2077  Address_Set_Node *address_set_node = dep->address_set_node_ptr;
2078 
2079  /* Mark the address/data as having been written by worker_rank */
2080  if ( dep->direction==OUTPUT || dep->direction==INOUT )
2081  address_set_node->last_thread = worker_rank;
2082  if ( quark->dot_dag_enable ) {
2083  if ( dep->direction==OUTPUT || dep->direction==INOUT ) {
2084  /* Track last writer and level, needed when this structure becomes empty */
2085  address_set_node->last_writer_taskid = task->taskid;
2086  address_set_node->last_writer_tasklevel = task->tasklevel;
2087  }
2088  address_set_node->last_reader_or_writer_taskid = task->taskid;
2089  address_set_node->last_reader_or_writer_tasklevel = task->tasklevel;
2090  }
2091  /* Check the address set node to avoid WAR dependencies; if
2092  * just completed a write, and at least one more write
2093  * (sum>=2) is pending */
2094  if ( (quark->war_dependencies_enable) &&
2095  (dep->direction==OUTPUT || dep->direction==INOUT) &&
2096  ((address_set_node->num_waiting_output + address_set_node->num_waiting_inout) >= 2) ) {
2097  quark_avoid_war_dependencies( quark, address_set_node, task );
2098  }
2099  /* Remove competed dependency from address_set_node waiting_deps list */
2100  icl_list_delete( address_set_node->waiting_deps, dep->address_set_waiting_deps_node_ptr, NULL );
2101  /* Check initial INPUT next_deps attached to address_set_node */
2102  address_set_node_initial_input_check_and_launch( quark, address_set_node, dep, worker_rank );
2103  /* Handle any initial GATHERV dependencies */
2104  address_set_node_initial_gatherv_check_and_launch(quark, address_set_node, dep, worker_rank);
2105  /* Prepend any initial accumulater dependency that is ready to go */
2106  address_set_node_accumulator_find_prepend( quark, address_set_node );
2107  /* Check initial OUTPUT/INOUT deps waiting on address_set_node */
2108  address_set_node_initial_output_check_and_launch( quark, address_set_node, dep, worker_rank );
2109  /* Keep track of the waiting dependency counts for this address */
2110  if (dep->direction == INPUT) address_set_node->num_waiting_input--;
2111  else if (dep->direction == OUTPUT) address_set_node->num_waiting_output--;
2112  else if (dep->direction == INOUT) address_set_node->num_waiting_inout--;
2113 
2114  /* If this address_set_node has no more waiting_deps, remove it */
2115  if ( icl_list_first(address_set_node->waiting_deps) == NULL )
2116  address_set_node_delete( quark, address_set_node );
2117  }
2118 
2119  task_delete(quark, task);
2120  quark->num_queued_tasks--;
2121 }
int tasklevel_width[tasklevel_width_max_level]
Definition: quark.c:118
FILE * dot_dag_file
Definition: quark.c:297
void * data
Definition: icl_list.h:18
static void address_set_node_delete(Quark *quark, Address_Set_Node *address_set_node)
Definition: quark.c:1230
struct address_set_node_s * address_set_node_ptr
Definition: quark.c:170
unsigned long long tasklevel
Definition: quark.c:152
icl_list_t * icl_list_first(icl_list_t *head)
Definition: icl_list.c:192
static int pthread_mutex_unlock_wrap(pthread_mutex_t *mtx)
Definition: quark.c:278
static void address_set_node_initial_input_check_and_launch(Quark *quark, Address_Set_Node *address_set_node, Dependency *completed_dep, int worker_rank)
Definition: quark.c:1512
volatile int num_queued_tasks
Definition: quark.c:112
char * task_label
Definition: quark.c:154
int dot_dag_enable
Definition: quark.c:116
void quark_avoid_war_dependencies(Quark *quark, Address_Set_Node *asn_old, Task *parent_task)
Definition: quark.c:1314
static void address_set_node_initial_output_check_and_launch(Quark *quark, Address_Set_Node *address_set_node, Dependency *completed_dep, int worker_rank)
Definition: quark.c:1564
quark_direction_t direction
Definition: quark.c:166
int war_dependencies_enable
Definition: quark.c:114
char * task_color
Definition: quark.c:155
static int pthread_mutex_lock_wrap(pthread_mutex_t *mtx)
Definition: quark.c:277
icl_list_t * address_set_waiting_deps_node_ptr
Definition: quark.c:171
#define INPUT
Definition: quark.h:53
unsigned long long taskid
Definition: quark.c:151
int icl_list_delete(icl_list_t *head, icl_list_t *pos, void(*free_function)(void *))
Definition: icl_list.c:84
Definition: quark.h:52
static void task_delete(Quark *quark, Task *task)
Definition: quark.c:348
pthread_mutex_t dot_dag_mutex
Definition: quark.c:119
Definition: quark.h:52
volatile int last_thread
Definition: quark.c:186
static void address_set_node_initial_gatherv_check_and_launch(Quark *quark, Address_Set_Node *address_set_node, Dependency *completed_dep, int worker_rank)
Definition: quark.c:1413
static void address_set_node_accumulator_find_prepend(Quark *quark, Address_Set_Node *address_set_node)
Definition: quark.c:1450
icl_list_t * dependency_list
Definition: quark.c:148
icl_list_t * icl_list_next(icl_list_t *head, icl_list_t *pos)
Definition: icl_list.c:228

Here is the call graph for this function:

Here is the caller graph for this function:

static void scratch_allocate ( Task task)
static

Allocate any needed scratch space;

Definition at line 605 of file quark.c.

References icl_list_s::data, icl_list_first(), icl_list_next(), scratch_s::ptr, quark_task_s::scratch_list, scratch_s::size, and scratch_s::task_args_list_node_ptr.

606 {
607  icl_list_t *scr_node;
608  for (scr_node = icl_list_first( task->scratch_list );
609  scr_node != NULL && scr_node->data != NULL;
610  scr_node = icl_list_next(task->scratch_list, scr_node)) {
611  Scratch *scratch = (Scratch *)scr_node->data;
612  if ( scratch->ptr == NULL ) {
613  /* Since ptr is null, space is to be allocted and attached */
614  assert( scratch->size > 0 );
615  void *scratchspace = malloc( scratch->size );
616  assert( scratchspace != NULL );
617  *(void **)scratch->task_args_list_node_ptr->data = scratchspace;
618  }
619  }
620 }
void * data
Definition: icl_list.h:18
icl_list_t * icl_list_first(icl_list_t *head)
Definition: icl_list.c:192
void * ptr
Definition: quark.c:178
int size
Definition: quark.c:179
icl_list_t * scratch_list
Definition: quark.c:149
icl_list_t * task_args_list_node_ptr
Definition: quark.c:180
icl_list_t * icl_list_next(icl_list_t *head, icl_list_t *pos)
Definition: icl_list.c:228

Here is the call graph for this function:

Here is the caller graph for this function:

static void scratch_deallocate ( Task task)
static

Deallocate any scratch space.

Definition at line 626 of file quark.c.

References icl_list_s::data, icl_list_first(), icl_list_next(), scratch_s::ptr, quark_task_s::scratch_list, and scratch_s::task_args_list_node_ptr.

627 {
628  icl_list_t *scr_node;
629  for (scr_node = icl_list_first( task->scratch_list );
630  scr_node != NULL && scr_node->data!=NULL;
631  scr_node = icl_list_next(task->scratch_list, scr_node)) {
632  Scratch *scratch = (Scratch *)scr_node->data;
633  if ( scratch->ptr == NULL ) {
634  /* If scratch had to be allocated, free it */
635  free(*(void **)scratch->task_args_list_node_ptr->data);
636  }
637  }
638 }
void * data
Definition: icl_list.h:18
icl_list_t * icl_list_first(icl_list_t *head)
Definition: icl_list.c:192
void * ptr
Definition: quark.c:178
icl_list_t * scratch_list
Definition: quark.c:149
icl_list_t * task_args_list_node_ptr
Definition: quark.c:180
icl_list_t * icl_list_next(icl_list_t *head, icl_list_t *pos)
Definition: icl_list.c:228

Here is the call graph for this function:

Here is the caller graph for this function:

static Scratch * scratch_new ( void *  arg_ptr,
int  arg_size,
icl_list_t task_args_list_node_ptr 
)
static

The task requires scratch workspace, which will be allocated if needed. This records the scratch requirements.

Definition at line 591 of file quark.c.

References scratch_s::ptr, scratch_s::size, and scratch_s::task_args_list_node_ptr.

592 {
593  Scratch *scratch = (Scratch *)malloc(sizeof(Scratch));
594  assert(scratch != NULL);
595  scratch->ptr = arg_ptr;
596  scratch->size = arg_size;
597  scratch->task_args_list_node_ptr = task_args_list_node_ptr;
598  return(scratch);
599 }
void * ptr
Definition: quark.c:178
int size
Definition: quark.c:179
icl_list_t * task_args_list_node_ptr
Definition: quark.c:180

Here is the caller graph for this function:

TAILQ_HEAD ( completed_tasks_head_s  ,
completed_tasks_node_s   
)
static void task_delete ( Quark quark,
Task task 
)
static

Free the task data structure

Definition at line 348 of file quark.c.

References quark_task_s::args_list, quark_task_s::dependency_list, icl_hash_delete(), icl_list_destroy(), LIST_REMOVE, quark_s::num_tasks, pthread_mutex_destroy(), pthread_mutex_lock_wrap(), pthread_mutex_unlock_wrap(), quark_task_s::ptr_to_task_in_sequence, quark_task_s::scratch_list, quark_task_s::sequence, Quark_sequence_s::sequence_mutex, quark_task_s::task_color, quark_task_s::task_label, quark_task_s::task_mutex, quark_s::task_set, quark_s::task_set_mutex, and quark_task_s::taskid.

349 {
351  icl_hash_delete( quark->task_set, &task->taskid, NULL, NULL );
354  icl_list_destroy(task->args_list, free);
355  icl_list_destroy(task->dependency_list, free);
356  icl_list_destroy(task->scratch_list, free);
357  if ( task->ptr_to_task_in_sequence != NULL ) {
359  LIST_REMOVE( task->ptr_to_task_in_sequence, entries );
361  free( task->ptr_to_task_in_sequence );
362  }
363  if (task->task_color!=NULL && task->task_color!=quark_task_default_color) free(task->task_color);
364  if (task->task_label!=NULL && task->task_label!=quark_task_default_label) free(task->task_label);
367  free( task );
368  // TODO pthread_mutex_lock_asn( &quark->address_set_mutex );
369  quark->num_tasks--;
370  // TODO pthread_mutex_unlock_asn( &quark->address_set_mutex );
371 }
pthread_mutex_t task_mutex
Definition: quark.c:142
pthread_mutex_t task_set_mutex
Definition: quark.c:107
int icl_list_destroy(icl_list_t *head, void(*free_function)(void *))
Definition: icl_list.c:143
static int pthread_mutex_unlock_wrap(pthread_mutex_t *mtx)
Definition: quark.c:278
Quark_Sequence * sequence
Definition: quark.c:157
struct ll_list_node_s * ptr_to_task_in_sequence
Definition: quark.c:158
volatile long long num_tasks
Definition: quark.c:105
char * task_label
Definition: quark.c:154
icl_list_t * args_list
Definition: quark.c:147
static char * quark_task_default_label
Definition: quark.c:291
char * task_color
Definition: quark.c:155
icl_hash_t * task_set
Definition: quark.c:106
#define LIST_REMOVE(elm, field)
Definition: bsd_queue.h:403
static int pthread_mutex_lock_wrap(pthread_mutex_t *mtx)
Definition: quark.c:277
icl_list_t * scratch_list
Definition: quark.c:149
unsigned long long taskid
Definition: quark.c:151
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_destroy(pthread_mutex_t *mutex)
int icl_hash_delete(icl_hash_t *ht, void *key, void(*free_key)(void *), void(*free_data)(void *))
Definition: icl_hash.c:224
pthread_mutex_t sequence_mutex
Definition: quark.c:126
icl_list_t * dependency_list
Definition: quark.c:148
static char * quark_task_default_color
Definition: quark.c:292

Here is the call graph for this function:

Here is the caller graph for this function:

static unsigned int ullong_hash_function ( void *  key)
inlinestatic

Hash function for unsigned long longs (used for taskid)

Definition at line 475 of file quark.c.

References fnv_hash_function().

476 {
477  int len = sizeof(unsigned long long);
478  unsigned int hashval = fnv_hash_function( key, len );
479  return hashval;
480 }
static unsigned int fnv_hash_function(void *key, int len)
Definition: quark.c:440

Here is the call graph for this function:

Here is the caller graph for this function:

static int ullong_key_compare ( void *  key1,
void *  key2 
)
inlinestatic

Compare unsigned long longs for hash keys (used for taskid)

Definition at line 485 of file quark.c.

486 {
487  return ( *(unsigned long long*)key1 == *(unsigned long long*)key2 );
488 }

Here is the caller graph for this function:

static void work_main_loop ( Worker worker)
static

Called by the workers (and master) to continue executing tasks until some exit condition is reached.

Definition at line 1732 of file quark.c.

References quark_s::all_tasks_queued, CANCELLED, worker_s::current_task_ptr, DONE, worker_s::executing_task, FALSE, worker_s::finalize, quark_task_s::function, quark_task_s::lock_to_thread, quark_s::num_queued_tasks, quark_s::num_threads, process_completed_tasks(), pthread_mutex_lock_wrap(), pthread_mutex_trylock_ready_list(), pthread_mutex_unlock_ready_list(), pthread_mutex_unlock_wrap(), worker_s::quark_ptr, QUARK_Thread_Rank(), quark_s::queue_before_computing, RB_MAX, RB_MIN, RB_REMOVE, worker_s::ready_list, worker_s::ready_list_mutex, worker_s::ready_list_size, RUNNING, scratch_allocate(), scratch_deallocate(), quark_s::start, quark_task_s::status, task_priority_tree_node_s::task, quark_task_s::task_mutex, TRUE, quark_s::worker, and worker_remove_completed_task_enqueue_for_later_processing().

1733 {
1734  Quark *quark = worker->quark_ptr;
1735  Worker *worker_victim = NULL;
1736  task_priority_tree_node_t *task_priority_tree_node = NULL;
1737  Task *task = NULL;
1738  int ready_list_victim = -1;
1739 
1740  /* Busy wait while not ready */
1741  do {} while ( !quark->start );
1742  int worker_rank = QUARK_Thread_Rank(quark);
1743 
1744  /* Queue all tasks before running; this line for debugging use */
1745  /* while ( !quark->all_tasks_queued ) { if (worker_rank==0) return; else {} } */
1746  if ( quark->queue_before_computing )
1747  while ( !quark->all_tasks_queued ) { if (worker_rank==0) return; else {} }
1748  /* Master never does work; this line for debugging use */
1749  /* if (worker_rank == 0) return; */
1750 
1751  while ( !worker->finalize ) {
1752  /* Repeatedly try to find a task, first trying my own ready list,
1753  * then trying to steal from someone else */
1754  task = NULL;
1755  ready_list_victim = worker_rank;
1756  /* Loop while looking for tasks */
1757  while ( task==NULL && !worker->finalize ) {
1758 
1759  /* Process all completed tasks before doing work */
1760  if ( worker_rank==0 || worker_rank%10==1 ) process_completed_tasks(quark);
1761 
1762  worker_victim = quark->worker[ready_list_victim];
1763  task_priority_tree_node = NULL;
1764  assert ( worker_victim->ready_list_size >= 0 );
1765  if ( worker_victim->ready_list_size != 0 ) {
1766  /* Only lock if there is likely to be an item in the ready list */
1767  if ( pthread_mutex_trylock_ready_list( &worker_victim->ready_list_mutex ) == 0) {
1768  /* if (pthread_mutex_lock_ready_list(&worker_victim->ready_list_mutex)==0) { */
1769  /* Check front of my own queue, back of everyone else's queue */
1770  if ( worker_rank == ready_list_victim )
1771  task_priority_tree_node = RB_MIN( task_priority_tree_head_s, worker_victim->ready_list );
1772  else if ( worker_rank!=ready_list_victim && worker_victim->executing_task==TRUE )
1773  task_priority_tree_node = RB_MAX( task_priority_tree_head_s, worker_victim->ready_list );
1774  else
1775  task_priority_tree_node = NULL;
1776  /* Access task, checking to make sure it is not pinned to a thread */
1777  if ( task_priority_tree_node != NULL ) {
1778  task = task_priority_tree_node->task;
1779  /* If task should be locked to a thread, and this is not that thread, set task to NULL and continue */
1780  if ( task->lock_to_thread>=0 && task->lock_to_thread!=worker_rank) {
1781  task = NULL;
1782  } else {
1783  /* If task found, remove it from the ready list */
1784  RB_REMOVE( task_priority_tree_head_s, worker_victim->ready_list, task_priority_tree_node );
1785  free( task_priority_tree_node );
1786  worker_victim->ready_list_size--;
1787  }
1788  }
1790  }
1791  }
1792  /* If no task found */
1793  if (task == NULL) {
1794  /* Choose the next victim queue */
1795  ready_list_victim = (ready_list_victim + 1) % quark->num_threads;
1796  /* Break for master when a scan of all queues is finished and no tasks were found */
1797  if ( worker_rank==0 && ready_list_victim==0 ) break;
1798  /* If there are no tasks, wait for a task to be introduced, then check own queue first */
1799  if ( quark->num_queued_tasks==0 && !worker->finalize && worker_rank!=0 ) {
1800  do { assert( quark->num_queued_tasks >= 0); } while ( quark->num_queued_tasks==0 && !worker->finalize ) ;
1801  ready_list_victim = worker_rank;
1802  }
1803  }
1804  }
1805  /* EXECUTE THE TASK IF FOUND */
1806  if ( task!=NULL ) {
1807  //if ( quark->num_tasks != 1 ) { printf("quark->num_tasks %d %d %d\n", quark->num_tasks, quark->low_water_mark, quark->high_water_mark ); abort(); }
1809  if ( task->function == NULL ) {
1810  /* This can occur if the task is cancelled */
1811  task->status = CANCELLED;
1813  } else {
1814  /* Call the task */
1815  worker->executing_task = TRUE;
1816  task->status = RUNNING;
1818  scratch_allocate( task );
1819  worker->current_task_ptr = task;
1820  task->function( quark );
1821  scratch_deallocate( task );
1822  task->status = DONE;
1823  worker->executing_task = FALSE;
1824  }
1825  /* Remove the task from the address hash */
1826  /* Original solution */
1827  //pthread_mutex_lock_asn(&quark->address_set_mutex);
1828  //worker_remove_completed_task_and_check_for_ready(quark, task, worker_rank);
1829  //pthread_mutex_unlock_asn(&quark->address_set_mutex);
1830  /* New version */
1832  }
1833  /* Break if master */
1834  if ( worker_rank==0 && ready_list_victim==0 ) break;
1835  }
1836  /* Worker has exited loop; ready for next time this worker is activated */
1837  worker->finalize = FALSE;
1838 }
Definition: quark.c:96
volatile bool executing_task
Definition: quark.c:138
pthread_mutex_t ready_list_mutex
Definition: quark.c:132
pthread_mutex_t task_mutex
Definition: quark.c:142
#define RB_REMOVE(name, x, y)
Definition: bsd_tree.h:730
struct task_priority_tree_head_s * ready_list
Definition: quark.c:133
#define RB_MAX(name, x)
Definition: bsd_tree.h:736
int lock_to_thread
Definition: quark.c:153
int QUARK_Thread_Rank(Quark *quark)
Definition: quark.c:377
void(* function)(Quark *)
Definition: quark.c:143
volatile int ready_list_size
Definition: quark.c:134
static void scratch_deallocate(Task *task)
Definition: quark.c:626
Definition: quark.c:94
static int pthread_mutex_unlock_wrap(pthread_mutex_t *mtx)
Definition: quark.c:278
struct worker_s ** worker
Definition: quark.c:100
#define RB_MIN(name, x)
Definition: bsd_tree.h:735
volatile int num_queued_tasks
Definition: quark.c:112
static void process_completed_tasks(Quark *quark)
Definition: quark.c:2032
volatile bool start
Definition: quark.c:103
volatile task_status status
Definition: quark.c:144
int queue_before_computing
Definition: quark.c:117
Quark * quark_ptr
Definition: quark.c:136
int num_threads
Definition: quark.c:99
volatile bool finalize
Definition: quark.c:137
static int pthread_mutex_lock_wrap(pthread_mutex_t *mtx)
Definition: quark.c:277
static void worker_remove_completed_task_enqueue_for_later_processing(Quark *quark, Task *task, int worker_rank)
Definition: quark.c:2012
Definition: quark.c:92
Definition: quark.c:94
static void scratch_allocate(Task *task)
Definition: quark.c:605
static int pthread_mutex_unlock_ready_list(pthread_mutex_t *mtx)
Definition: quark.c:275
Quark_Task * current_task_ptr
Definition: quark.c:135
Definition: quark.c:92
static int pthread_mutex_trylock_ready_list(pthread_mutex_t *mtx)
Definition: quark.c:274
volatile bool all_tasks_queued
Definition: quark.c:104

Here is the call graph for this function:

Here is the caller graph for this function:

static void work_set_affinity_and_call_main_loop ( Worker worker)
static

Called when spawning the worker thread to set affinity to specific core and then call the main work loop. This function is used internally, when the scheduler spawns and manages the threads. If an external driver is using the scheduler (e.g. PLASMA) then it does the thread management and any affinity must be set in the external driver.

Definition at line 1718 of file quark.c.

References quark_s::coresbind, worker_s::quark_ptr, quark_setaffinity(), QUARK_Thread_Rank(), work_main_loop(), and quark_s::worker.

1719 {
1720  Quark *quark = worker->quark_ptr;
1721  int thread_rank = QUARK_Thread_Rank(quark);
1722  quark_setaffinity( quark->coresbind[thread_rank] ) ;
1723  work_main_loop( quark->worker[thread_rank] );
1724  return;
1725 }
Definition: quark.c:96
int QUARK_Thread_Rank(Quark *quark)
Definition: quark.c:377
int quark_setaffinity(int rank)
Definition: quarkos.c:125
struct worker_s ** worker
Definition: quark.c:100
static void work_main_loop(Worker *worker)
Definition: quark.c:1732
Quark * quark_ptr
Definition: quark.c:136
int * coresbind
Definition: quark.c:101

Here is the call graph for this function:

Here is the caller graph for this function:

static void worker_delete ( Worker worker)
static

Cleanup and free worker data structures

Definition at line 572 of file quark.c.

References pthread_mutex_destroy(), RB_MIN, RB_NEXT, RB_REMOVE, worker_s::ready_list, and worker_s::ready_list_mutex.

573 {
574  task_priority_tree_node_t *node, *nxt;
575  /* Destroy the workers priority queue, if there is still anything there */
576  for ( node = RB_MIN( task_priority_tree_head_s, worker->ready_list ); node != NULL; node = nxt) {
577  nxt = RB_NEXT( task_priority_tree_head_s, worker->ready_list, node );
578  RB_REMOVE( task_priority_tree_head_s, worker->ready_list, node );
579  free(node);
580  }
581  free( worker->ready_list );
583  free(worker);
584 }
#define RB_NEXT(name, x, y)
Definition: bsd_tree.h:733
pthread_mutex_t ready_list_mutex
Definition: quark.c:132
#define RB_REMOVE(name, x, y)
Definition: bsd_tree.h:730
struct task_priority_tree_head_s * ready_list
Definition: quark.c:133
#define RB_MIN(name, x)
Definition: bsd_tree.h:735
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_destroy(pthread_mutex_t *mutex)

Here is the call graph for this function:

Here is the caller graph for this function:

static Worker * worker_new ( Quark quark,
int  rank 
)
static

Allocate and initialize a worker structure

Definition at line 550 of file quark.c.

References worker_s::current_task_ptr, worker_s::executing_task, FALSE, worker_s::finalize, pthread_mutex_init(), pthread_self(), worker_s::quark_ptr, RB_INIT, worker_s::ready_list, worker_s::ready_list_mutex, worker_s::ready_list_size, and worker_s::thread_id.

551 {
552  Worker *worker = (Worker *) malloc(sizeof(Worker));
553  assert(worker != NULL);
554  worker->thread_id = pthread_self();
555  worker->ready_list = malloc(sizeof(task_priority_tree_head_t));
556  assert(worker->ready_list != NULL);
557  RB_INIT( worker->ready_list );
558  worker->ready_list_size = 0;
559  pthread_mutex_init(&worker->ready_list_mutex, NULL);
560  /* convenience pointer to the real args for the task */
561  worker->current_task_ptr = NULL;
562  worker->quark_ptr = quark;
563  worker->finalize = FALSE;
564  worker->executing_task = FALSE;
565  return worker;
566 }
volatile bool executing_task
Definition: quark.c:138
pthread_mutex_t ready_list_mutex
Definition: quark.c:132
struct task_priority_tree_head_s * ready_list
Definition: quark.c:133
volatile int ready_list_size
Definition: quark.c:134
Definition: quark.c:94
Quark * quark_ptr
Definition: quark.c:136
volatile bool finalize
Definition: quark.c:137
MAGMA_DLLPORT int MAGMA_CDECL pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr)
pthread_t thread_id
Definition: quark.c:131
MAGMA_DLLPORT pthread_t MAGMA_CDECL pthread_self(void)
struct task_priority_tree_head_s task_priority_tree_head_t
Definition: quark.c:224
Quark_Task * current_task_ptr
Definition: quark.c:135
#define RB_INIT(root)
Definition: bsd_tree.h:303

Here is the call graph for this function:

Here is the caller graph for this function:

static void worker_remove_completed_task_enqueue_for_later_processing ( Quark quark,
Task task,
int  worker_rank 
)
static

When a task is completed, queue it for further handling by another process.

Definition at line 2012 of file quark.c.

References quark_s::completed_tasks, quark_s::completed_tasks_mutex, pthread_mutex_lock_completed_tasks(), pthread_mutex_lock_wrap(), pthread_mutex_unlock_completed_tasks(), pthread_mutex_unlock_wrap(), TAILQ_INSERT_TAIL, completed_tasks_node_s::task, quark_task_s::task_mutex, quark_task_s::task_thread_count, and completed_tasks_node_s::workerid.

2013 {
2014  int threads_remaining_for_this_task = -1;
2016  threads_remaining_for_this_task = --task->task_thread_count;
2018  if ( threads_remaining_for_this_task == 0 ) {
2019  completed_tasks_node_t *node = malloc(sizeof(completed_tasks_node_t));
2020  node->task = task;
2021  node->workerid = worker_rank;
2023  TAILQ_INSERT_TAIL( quark->completed_tasks, node, entries );
2025  }
2026 }
int task_thread_count
Definition: quark.c:159
static int pthread_mutex_lock_completed_tasks(pthread_mutex_t *mtx)
Definition: quark.c:280
pthread_mutex_t task_mutex
Definition: quark.c:142
static int pthread_mutex_unlock_wrap(pthread_mutex_t *mtx)
Definition: quark.c:278
static int pthread_mutex_unlock_completed_tasks(pthread_mutex_t *mtx)
Definition: quark.c:282
static int pthread_mutex_lock_wrap(pthread_mutex_t *mtx)
Definition: quark.c:277
struct completed_tasks_head_s * completed_tasks
Definition: quark.c:121
#define TAILQ_INSERT_TAIL(head, elm, field)
Definition: bsd_queue.h:547
pthread_mutex_t completed_tasks_mutex
Definition: quark.c:120

Here is the call graph for this function:

Here is the caller graph for this function:

Variable Documentation

FILE* dot_dag_file

Definition at line 297 of file quark.c.

char* quark_task_default_color = "white"
static

Definition at line 292 of file quark.c.

char* quark_task_default_label = " "
static

Definition at line 291 of file quark.c.