|
MAGMA
1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
|
#include <stdio.h>#include <stdlib.h>#include <assert.h>#include <stdarg.h>#include <string.h>#include <limits.h>#include <errno.h>#include <pthread.h>#include "icl_list.h"#include "icl_hash.h"#include "bsd_queue.h"#include "bsd_tree.h"#include "quark.h"#include "quark_unpack_args.h"Go to the source code of this file.
Classes | |
| struct | quark_s |
| struct | Quark_sequence_s |
| struct | worker_s |
| struct | quark_task_s |
| struct | dependency_s |
| struct | scratch_s |
| struct | address_set_node_s |
| struct | ll_list_node_s |
| struct | completed_tasks_node_s |
| struct | task_priority_tree_node_s |
Macros | |
| #define | inline __inline |
| #define | fopen(ppfile, name, mode) *ppfile = fopen(name, mode) |
| #define | ULLONG_MAX 18446744073709551615ULL |
| #define | DIRECTION_MASK 0x07 |
| #define | tasklevel_width_max_level 5000 |
| #define | DEPCOLOR "black" |
| #define | ANTIDEPCOLOR "red" |
| #define | GATHERVDEPCOLOR "green" |
| #define | DOT_DAG_FILENAME "dot_dag_file.dot" |
| #define | dot_dag_level_update(parent_level, child_level, quark) |
| #define | dot_dag_print_edge(parentid, childid, color) |
Typedefs | |
| typedef struct worker_s | Worker |
| typedef struct quark_task_s | Task |
| typedef struct dependency_s | Dependency |
| typedef struct scratch_s | Scratch |
| typedef struct address_set_node_s | Address_Set_Node |
| typedef struct ll_list_node_s | ll_list_node_t |
| typedef struct ll_list_head_s | ll_list_head_t |
| typedef struct completed_tasks_node_s | completed_tasks_node_t |
| typedef struct completed_tasks_head_s | completed_tasks_head_t |
| typedef struct task_priority_tree_node_s | task_priority_tree_node_t |
| typedef struct task_priority_tree_head_s | task_priority_tree_head_t |
Enumerations | |
| enum | task_status { NOTREADY, QUEUED, RUNNING, DONE, CANCELLED } |
| enum | task_num { DGETRF, DTSTRF, DGESSM, DSSSM } |
| enum | bool { FALSE, TRUE } |
Variables | |
| static char * | quark_task_default_label = " " |
| static char * | quark_task_default_color = "white" |
| FILE * | dot_dag_file |
QUARK (QUeuing And Runtime for Kernels) provides a runtime enviornment for the dynamic execution of precedence-constrained tasks.
QUARK is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver.
Definition in file quark.c.
| #define dot_dag_level_update | ( | parent_level, | |
| child_level, | |||
| quark | |||
| ) |
| #define dot_dag_print_edge | ( | parentid, | |
| childid, | |||
| color | |||
| ) |
| #define fopen | ( | ppfile, | |
| name, | |||
| mode | |||
| ) | *ppfile = fopen(name, mode) |
| typedef struct address_set_node_s Address_Set_Node |
| typedef struct completed_tasks_head_s completed_tasks_head_t |
| typedef struct completed_tasks_node_s completed_tasks_node_t |
| typedef struct dependency_s Dependency |
| typedef struct ll_list_head_s ll_list_head_t |
| typedef struct ll_list_node_s ll_list_node_t |
| typedef struct quark_task_s Task |
| typedef struct task_priority_tree_head_s task_priority_tree_head_t |
| typedef struct task_priority_tree_node_s task_priority_tree_node_t |
| enum task_num |
| enum task_status |
|
inlinestatic |
Hash function to map addresses, cut into "long" size chunks, then XOR. The result will be matched to hash table size using mod in the hash table implementation
Definition at line 456 of file quark.c.
References fnv_hash_function().
|
inlinestatic |
|
static |
Called by a worker each time a task is removed from an address set node. Sweeps through a sequence of ACCUMULATOR tasks from the beginning and prepends one at the beginning if only one (chained) dependency remaining. This does not actually lauch the prepended task, it depends on another function to do that. Assumes address_set_mutex is locked.
Definition at line 1450 of file quark.c.
References dependency_s::accumulator, dependency_s::address_set_waiting_deps_node_ptr, icl_list_s::data, FALSE, icl_list_delete(), icl_list_first(), icl_list_next(), icl_list_prepend(), quark_task_s::num_dependencies_remaining, dependency_s::task, and address_set_node_s::waiting_deps.
|
static |
Clean and free address set node structures.
Definition at line 1230 of file quark.c.
References address_set_node_s::address, quark_s::address_set, address_set_node_s::delete_data_at_address_when_node_is_deleted, quark_s::dot_dag_enable, icl_hash_delete(), icl_list_destroy(), TRUE, and address_set_node_s::waiting_deps.
|
static |
Called by a worker each time a task is removed from an address set node. Sweeps through a sequence of GATHERV dependencies from the beginning, and enables them all. Assumes address_set_mutex is locked.
Definition at line 1413 of file quark.c.
References icl_list_s::data, dependency_s::direction, dot_dag_level_update, dot_dag_print_edge, FALSE, dependency_s::gatherv, GATHERVDEPCOLOR, icl_list_first(), icl_list_next(), INOUT, quark_task_s::num_dependencies_remaining, OUTPUT, quark_check_and_queue_ready_task(), dependency_s::ready, dependency_s::task, quark_task_s::taskid, quark_task_s::tasklevel, TRUE, and address_set_node_s::waiting_deps.
|
static |
Called by a worker each time a task is removed from an address set node. Sweeps through a sequence of initial INPUT dependencies on an address, and launches any that are ready to go. Assumes address_set_mutex is locked.
Definition at line 1512 of file quark.c.
References ANTIDEPCOLOR, icl_list_s::data, DEPCOLOR, dependency_s::direction, quark_s::dot_dag_enable, dot_dag_level_update, dot_dag_print_edge, FALSE, icl_list_first(), icl_list_next(), INOUT, INPUT, OUTPUT, quark_check_and_queue_ready_task(), dependency_s::ready, dependency_s::task, quark_task_s::taskid, quark_task_s::tasklevel, TRUE, and address_set_node_s::waiting_deps.
|
static |
Called by a worker each time a task is removed from an address set node. Checks any initial OUTPUT/INOUT dependencies on an address, and launches any tasks that are ready to go. Assumes address_set_mutex is locked.
Definition at line 1564 of file quark.c.
References icl_list_s::data, DEPCOLOR, dependency_s::direction, dot_dag_level_update, dot_dag_print_edge, FALSE, icl_list_first(), INOUT, OUTPUT, quark_check_and_queue_ready_task(), dependency_s::ready, dependency_s::task, quark_task_s::taskid, quark_task_s::tasklevel, TRUE, and address_set_node_s::waiting_deps.
|
static |
Allocate and initialize address_set_node structure. These are inserted into the hash table.
Definition at line 1206 of file quark.c.
References address_set_node_s::address, address_set_node_s::delete_data_at_address_when_node_is_deleted, FALSE, icl_list_new(), address_set_node_s::last_reader_or_writer_taskid, address_set_node_s::last_reader_or_writer_tasklevel, address_set_node_s::last_thread, address_set_node_s::last_writer_taskid, address_set_node_s::last_writer_tasklevel, address_set_node_s::num_waiting_inout, address_set_node_s::num_waiting_input, address_set_node_s::num_waiting_output, address_set_node_s::size, and address_set_node_s::waiting_deps.
|
inlinestatic |
|
static |
|
inlinestatic |
Allocate and initialize a dependency structure
Definition at line 521 of file quark.c.
References dependency_s::accumulator, dependency_s::address, dependency_s::address_set_node_ptr, dependency_s::address_set_waiting_deps_node_ptr, dependency_s::direction, FALSE, dependency_s::gatherv, INOUT, dependency_s::locality, quark_task_s::locality_preserving_dep, OUTPUT, dependency_s::ready, dependency_s::size, dependency_s::task, dependency_s::task_args_list_node_ptr, and dependency_s::task_dependency_list_node_ptr.
|
inlinestatic |
| LIST_HEAD | ( | ll_list_head_s | , |
| ll_list_node_s | |||
| ) |
|
static |
Handle the queue of completed tasks.
Definition at line 2032 of file quark.c.
References quark_s::address_set_mutex, quark_s::completed_tasks, quark_s::completed_tasks_mutex, pthread_mutex_trylock_asn(), pthread_mutex_trylock_completed_tasks(), pthread_mutex_unlock_asn(), pthread_mutex_unlock_completed_tasks(), remove_completed_task_and_check_for_ready(), TAILQ_FIRST, TAILQ_REMOVE, completed_tasks_node_s::task, and completed_tasks_node_s::workerid.
|
inlinestatic |
Definition at line 284 of file quark.c.
References pthread_cond_wait().
|
inlinestatic |
Mutex wrappers for tracing/timing purposes. Makes it easier to profile the costs of these pthreads routines.
Definition at line 269 of file quark.c.
References pthread_mutex_lock().
|
inlinestatic |
Definition at line 280 of file quark.c.
References pthread_mutex_lock().
|
inlinestatic |
Definition at line 273 of file quark.c.
References pthread_mutex_lock().
|
inlinestatic |
Definition at line 277 of file quark.c.
References pthread_mutex_lock().
|
inlinestatic |
Definition at line 270 of file quark.c.
References pthread_mutex_trylock().
|
inlinestatic |
Definition at line 281 of file quark.c.
References pthread_mutex_trylock().
|
inlinestatic |
Definition at line 274 of file quark.c.
References pthread_mutex_trylock().
|
inlinestatic |
Definition at line 271 of file quark.c.
References pthread_mutex_unlock().
|
inlinestatic |
Definition at line 282 of file quark.c.
References pthread_mutex_unlock().
|
inlinestatic |
Definition at line 275 of file quark.c.
References pthread_mutex_unlock().
|
inlinestatic |
Definition at line 278 of file quark.c.
References pthread_mutex_unlock().
| void quark_avoid_war_dependencies | ( | Quark * | quark, |
| Address_Set_Node * | asn_old, | ||
| Task * | parent_task | ||
| ) |
Routine to avoid false (WAR write-after-read) dependencies by making copies of the data. Check if there are suffient INPUTS in the beginning of a address dependency followed by a OUTPUT or an INOUT (data<-RRRRW). If so, make a copy of the data, adjust the pointers of the read dependencies to point to the new copy (copy<-RRRR and data<-W) and send to workers if the tasks are ready. The copy can be automacally freed when all the reads are done. The write can proceed at once. The address_set_mutex is already locked when this is called.
Definition at line 1314 of file quark.c.
References dependency_s::address, address_set_node_s::address, quark_s::address_set, address_set_node_new(), dependency_s::address_set_node_ptr, dependency_s::address_set_waiting_deps_node_ptr, icl_list_s::data, address_set_node_s::delete_data_at_address_when_node_is_deleted, DEPCOLOR, dependency_s::direction, DONE, dot_dag_level_update, dot_dag_print_edge, FALSE, icl_hash_insert(), icl_list_append(), icl_list_delete(), icl_list_first(), icl_list_next(), INOUT, INPUT, quark_s::low_water_mark, NOTREADY, quark_task_s::num_dependencies_remaining, quark_s::num_queued_tasks, quark_s::num_tasks, quark_s::num_threads, address_set_node_s::num_waiting_input, OUTPUT, quark_check_and_queue_ready_task(), quark_getenv_int(), dependency_s::ready, address_set_node_s::size, quark_task_s::status, dependency_s::task, dependency_s::task_args_list_node_ptr, quark_task_s::taskid, quark_task_s::tasklevel, TRUE, address_set_node_s::waiting_deps, and quark_s::war_dependencies_enable.
Queue ready tasks on a worker node, either using locality information or a round robin scheme. The address_set_mutex should be set when calling this, since we touch the task data structure (task->status) and update the quark->num_queued_tasks.
Definition at line 1258 of file quark.c.
References dependency_s::address_set_node_ptr, DONE, address_set_node_s::last_thread, quark_task_s::locality_preserving_dep, quark_task_s::lock_to_thread, quark_task_s::num_dependencies_remaining, quark_s::num_queued_tasks, quark_s::num_queued_tasks_cond, quark_s::num_threads, quark_task_s::priority, task_priority_tree_node_s::priority, pthread_cond_broadcast(), pthread_mutex_lock_ready_list(), pthread_mutex_unlock_ready_list(), quark_revolve_robin(), QUEUED, RB_INSERT, worker_s::ready_list, worker_s::ready_list_mutex, worker_s::ready_list_size, RUNNING, quark_task_s::status, task_priority_tree_node_s::task, and quark_s::worker.
| int* quark_get_affthreads | ( | ) |
Definition at line 245 of file quarkos.c.
References CONTEXT_THREADS_MAX, QUARK_CLEANENV, and QUARK_GETENV.
| int quark_get_numthreads | ( | ) |
Definition at line 222 of file quarkos.c.
References QUARK_CLEANENV, QUARK_GETENV, and sys_corenbr.
| int quark_getenv_int | ( | char * | name, |
| int | defval | ||
| ) |
Definition at line 300 of file quarkos.c.
References QUARK_CLEANENV, and QUARK_GETENV.
Called by the master insert task dependencies into the hash table. Any tasks that are ready to run are queued. The address_set_mutex must be locked before calling this routine.
Definition at line 1597 of file quark.c.
References dependency_s::address, quark_s::address_set, address_set_node_new(), dependency_s::address_set_node_ptr, dependency_s::address_set_waiting_deps_node_ptr, icl_list_s::data, DEPCOLOR, quark_task_s::dependency_list, dependency_s::direction, dot_dag_level_update, dot_dag_print_edge, FALSE, icl_hash_find(), icl_hash_insert(), icl_list_append(), icl_list_delete(), icl_list_first(), icl_list_next(), icl_list_prev(), INOUT, INPUT, quark_task_s::num_dependencies_remaining, OUTPUT, quark_avoid_war_dependencies(), dependency_s::ready, dependency_s::size, dependency_s::task, dependency_s::task_dependency_list_node_ptr, quark_task_s::taskid, quark_task_s::tasklevel, and TRUE.
|
inlinestatic |
Rotate the next worker queue that will get a task assigned to it. The master (0) never gets round-robin tasks assigned to it.
Definition at line 495 of file quark.c.
References quark_s::list_robin, and quark_s::num_threads.
| Task* quark_set_task_flags_in_task_structure | ( | Quark * | quark, |
| Task * | task, | ||
| Quark_Task_Flags * | task_flags | ||
| ) |
Use the task_flags data structure to set various items in the task (priority, lock_to_thread, color, labels, etc )
Definition at line 868 of file quark.c.
References quark_s::dot_dag_enable, quark_task_s::lock_to_thread, quark_task_s::priority, quark_task_s::sequence, quark_task_flags_s::task_color, quark_task_s::task_color, quark_task_flags_s::task_label, quark_task_s::task_label, quark_task_flags_s::task_lock_to_thread, quark_task_flags_s::task_priority, quark_task_flags_s::task_sequence, quark_task_flags_s::task_thread_count, and quark_task_s::task_thread_count.
| int quark_setaffinity | ( | int | rank | ) |
This routine will set affinity for the calling thread that has rank 'rank'. Ranks start with 0.
If there are multiple instances of QUARK then affinity will be wrong: all ranks 0 will be pinned to core 0.
Also, affinity is not resotred when QUARK_Finalize() is called.
Definition at line 125 of file quarkos.c.
References QUARK_ERR_NOT_SUPPORTED, QUARK_ERR_UNEXPECTED, and QUARK_SUCCESS.
|
static |
Local function prototypes, declared static so they are not available outside the scope of this file.
Initialize the task data structure
Definition at line 314 of file quark.c.
References quark_task_s::args_list, quark_task_s::dependency_list, quark_task_s::function, icl_list_new(), quark_task_s::locality_preserving_dep, quark_task_s::lock_to_thread, NOTREADY, quark_task_s::num_dependencies, quark_task_s::num_dependencies_remaining, quark_task_s::priority, pthread_mutex_init(), quark_task_s::ptr_to_task_in_sequence, quark_task_default_color, quark_task_default_label, QUARK_TASK_MIN_PRIORITY, quark_task_s::scratch_list, quark_task_s::sequence, quark_task_s::status, quark_task_s::task_color, quark_task_s::task_label, quark_task_s::task_mutex, quark_task_s::task_thread_count, quark_task_s::taskid, quark_task_s::tasklevel, and ULLONG_MAX.
| int QUARK_Thread_Rank | ( | Quark * | quark | ) |
Return the rank of a thread
Definition at line 377 of file quark.c.
References quark_s::num_threads, pthread_equal(), pthread_self(), worker_s::thread_id, and quark_s::worker.
| void quark_topology_finalize | ( | ) |
| void quark_topology_init | ( | ) |
Definition at line 78 of file quarkos.c.
References pthread_mutex_lock(), and pthread_mutex_unlock().
| int quark_yield | ( | ) |
Definition at line 187 of file quarkos.c.
References QUARK_ERR_NOT_SUPPORTED.
| RB_GENERATE | ( | task_priority_tree_head_s | , |
| task_priority_tree_node_s | , | ||
| n_entry | , | ||
| compare_task_priority_tree_nodes | |||
| ) |
| RB_HEAD | ( | task_priority_tree_head_s | , |
| task_priority_tree_node_s | |||
| ) |
|
static |
Handle a single completed task, finding its children and putting the children that are ready to go (all dependencies satisfied) into worker ready queues.
Definition at line 2058 of file quark.c.
References address_set_node_accumulator_find_prepend(), address_set_node_delete(), address_set_node_initial_gatherv_check_and_launch(), address_set_node_initial_input_check_and_launch(), address_set_node_initial_output_check_and_launch(), dependency_s::address_set_node_ptr, dependency_s::address_set_waiting_deps_node_ptr, icl_list_s::data, quark_task_s::dependency_list, dependency_s::direction, quark_s::dot_dag_enable, quark_s::dot_dag_mutex, icl_list_delete(), icl_list_first(), icl_list_next(), INOUT, INPUT, address_set_node_s::last_thread, quark_s::num_queued_tasks, OUTPUT, pthread_mutex_lock_wrap(), pthread_mutex_unlock_wrap(), quark_avoid_war_dependencies(), quark_task_s::task_color, task_delete(), quark_task_s::task_label, quark_task_s::taskid, quark_task_s::tasklevel, quark_s::tasklevel_width, and quark_s::war_dependencies_enable.
|
static |
Allocate any needed scratch space;
Definition at line 605 of file quark.c.
References icl_list_s::data, icl_list_first(), icl_list_next(), scratch_s::ptr, quark_task_s::scratch_list, scratch_s::size, and scratch_s::task_args_list_node_ptr.
|
static |
Deallocate any scratch space.
Definition at line 626 of file quark.c.
References icl_list_s::data, icl_list_first(), icl_list_next(), scratch_s::ptr, quark_task_s::scratch_list, and scratch_s::task_args_list_node_ptr.
|
static |
The task requires scratch workspace, which will be allocated if needed. This records the scratch requirements.
Definition at line 591 of file quark.c.
References scratch_s::ptr, scratch_s::size, and scratch_s::task_args_list_node_ptr.
| TAILQ_HEAD | ( | completed_tasks_head_s | , |
| completed_tasks_node_s | |||
| ) |
Free the task data structure
Definition at line 348 of file quark.c.
References quark_task_s::args_list, quark_task_s::dependency_list, icl_hash_delete(), icl_list_destroy(), LIST_REMOVE, quark_s::num_tasks, pthread_mutex_destroy(), pthread_mutex_lock_wrap(), pthread_mutex_unlock_wrap(), quark_task_s::ptr_to_task_in_sequence, quark_task_s::scratch_list, quark_task_s::sequence, Quark_sequence_s::sequence_mutex, quark_task_s::task_color, quark_task_s::task_label, quark_task_s::task_mutex, quark_s::task_set, quark_s::task_set_mutex, and quark_task_s::taskid.
|
inlinestatic |
Hash function for unsigned long longs (used for taskid)
Definition at line 475 of file quark.c.
References fnv_hash_function().
|
inlinestatic |
|
static |
Called by the workers (and master) to continue executing tasks until some exit condition is reached.
Definition at line 1732 of file quark.c.
References quark_s::all_tasks_queued, CANCELLED, worker_s::current_task_ptr, DONE, worker_s::executing_task, FALSE, worker_s::finalize, quark_task_s::function, quark_task_s::lock_to_thread, quark_s::num_queued_tasks, quark_s::num_threads, process_completed_tasks(), pthread_mutex_lock_wrap(), pthread_mutex_trylock_ready_list(), pthread_mutex_unlock_ready_list(), pthread_mutex_unlock_wrap(), worker_s::quark_ptr, QUARK_Thread_Rank(), quark_s::queue_before_computing, RB_MAX, RB_MIN, RB_REMOVE, worker_s::ready_list, worker_s::ready_list_mutex, worker_s::ready_list_size, RUNNING, scratch_allocate(), scratch_deallocate(), quark_s::start, quark_task_s::status, task_priority_tree_node_s::task, quark_task_s::task_mutex, TRUE, quark_s::worker, and worker_remove_completed_task_enqueue_for_later_processing().
|
static |
Called when spawning the worker thread to set affinity to specific core and then call the main work loop. This function is used internally, when the scheduler spawns and manages the threads. If an external driver is using the scheduler (e.g. PLASMA) then it does the thread management and any affinity must be set in the external driver.
Definition at line 1718 of file quark.c.
References quark_s::coresbind, worker_s::quark_ptr, quark_setaffinity(), QUARK_Thread_Rank(), work_main_loop(), and quark_s::worker.
|
static |
Cleanup and free worker data structures
Definition at line 572 of file quark.c.
References pthread_mutex_destroy(), RB_MIN, RB_NEXT, RB_REMOVE, worker_s::ready_list, and worker_s::ready_list_mutex.
Allocate and initialize a worker structure
Definition at line 550 of file quark.c.
References worker_s::current_task_ptr, worker_s::executing_task, FALSE, worker_s::finalize, pthread_mutex_init(), pthread_self(), worker_s::quark_ptr, RB_INIT, worker_s::ready_list, worker_s::ready_list_mutex, worker_s::ready_list_size, and worker_s::thread_id.
|
static |
When a task is completed, queue it for further handling by another process.
Definition at line 2012 of file quark.c.
References quark_s::completed_tasks, quark_s::completed_tasks_mutex, pthread_mutex_lock_completed_tasks(), pthread_mutex_lock_wrap(), pthread_mutex_unlock_completed_tasks(), pthread_mutex_unlock_wrap(), TAILQ_INSERT_TAIL, completed_tasks_node_s::task, quark_task_s::task_mutex, quark_task_s::task_thread_count, and completed_tasks_node_s::workerid.