Skip to content

Commit

Permalink
Merge pull request #648 from abouteiller/new/best-device-and-version-…
Browse files Browse the repository at this point in the history
…counting

Compute CPU and GPU versions without lying during kernel epilog (enable TTG/PTG versioning to coexist)
  • Loading branch information
abouteiller authored Apr 11, 2024
2 parents 1ababbe + 3935f31 commit e95ec85
Show file tree
Hide file tree
Showing 13 changed files with 638 additions and 611 deletions.
2 changes: 1 addition & 1 deletion parsec/data_dist/matrix/map_operator.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ add_task_to_list(parsec_execution_stream_t *es,
parsec_task_t* new_context = (parsec_task_t*)parsec_thread_mempool_allocate( es->context_mempool );

PARSEC_COPY_EXECUTION_CONTEXT(new_context, newcontext);
new_context->status = PARSEC_TASK_STATUS_NONE;
pready_list[vpid_dst] = (parsec_task_t*)parsec_list_item_ring_push_sorted( (parsec_list_item_t*)(pready_list[vpid_dst]),
(parsec_list_item_t*)new_context,
parsec_execution_context_priority_comparator );
Expand Down Expand Up @@ -430,6 +429,7 @@ static void parsec_map_operator_startup_fn(parsec_context_t *context,
parsec_execution_stream_t* es;

*startup_list = NULL;
PARSEC_OBJ_CONSTRUCT(&fake_context, parsec_task_t);
fake_context.task_class = &parsec_map_operator;
fake_context.taskpool = tp;
fake_context.priority = 0;
Expand Down
20 changes: 3 additions & 17 deletions parsec/interfaces/dtd/insert_function.c
Original file line number Diff line number Diff line change
Expand Up @@ -1828,12 +1828,6 @@ complete_hook_of_dtd(parsec_execution_stream_t *es,

if( PARSEC_INOUT == (op_type_on_current_flow & PARSEC_GET_OP_TYPE) ||
PARSEC_OUTPUT == (op_type_on_current_flow & PARSEC_GET_OP_TYPE)) {
parsec_data_copy_t *data_out = this_task->data[current_dep].data_out;
/* assert(NULL != data_out); // DEBUG */
if( NULL != data_out ) {
data_out->version++;
}

parsec_data_copy_t *data_in = this_task->data[current_dep].data_in;
if( PARSEC_PULLIN & op_type_on_current_flow ) {
assert(NULL != data_in);
Expand Down Expand Up @@ -2281,14 +2275,7 @@ parsec_dtd_template_release( const parsec_task_class_t *tc )
static parsec_hook_return_t parsec_dtd_gpu_task_submit(parsec_execution_stream_t *es, parsec_task_t *this_task)
{
(void) es;
int dev_index;
int64_t load;

dev_index = parsec_get_best_device(this_task, &load);
assert(dev_index >= 0 && dev_index < parsec_mca_device_enabled());
if (!parsec_mca_device_is_gpu(dev_index)) {
return PARSEC_HOOK_RETURN_NEXT; /* Fall back */
}
#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) || defined(PARSEC_HAVE_DEV_LEVEL_ZERO_SUPPORT)
parsec_dtd_task_t *dtd_task = (parsec_dtd_task_t *)this_task;
parsec_dtd_task_class_t *dtd_tc = (parsec_dtd_task_class_t*)this_task->task_class;
Expand All @@ -2298,7 +2285,6 @@ static parsec_hook_return_t parsec_dtd_gpu_task_submit(parsec_execution_stream_t
gpu_task->ec = (parsec_task_t *) this_task;
gpu_task->submit = dtd_tc->gpu_func_ptr;
gpu_task->task_type = 0;
gpu_task->load = load;
gpu_task->last_data_check_epoch = -1; /* force at least one validation for the task */
gpu_task->pushout = 0;
for(int i = 0; i < dtd_tc->super.nb_flows; i++) {
Expand All @@ -2309,7 +2295,7 @@ static parsec_hook_return_t parsec_dtd_gpu_task_submit(parsec_execution_stream_t
gpu_task->flow_nb_elts[i] = this_task->data[i].data_in->original->nb_elts;
}

parsec_device_module_t *device = parsec_mca_device_get(dev_index);
parsec_device_module_t *device = this_task->selected_device;
assert(NULL != device);
/* We already know the device is a GPU device from the test above */
gpu_task->stage_in = parsec_default_gpu_stage_in;
Expand All @@ -2331,6 +2317,7 @@ static parsec_hook_return_t parsec_dtd_cpu_task_submit(parsec_execution_stream_t
parsec_dtd_flow_info_t *flow = FLOW_OF(dtd_task, i);
if( PARSEC_INOUT == (flow->op_type & PARSEC_GET_OP_TYPE) ||
PARSEC_OUTPUT == (flow->op_type & PARSEC_GET_OP_TYPE)) {
this_task->data[i].data_in->version++;
int8_t data_owner_dev = this_task->data[i].data_in->original->owner_device;
assert(data_owner_dev < parsec_mca_device_enabled());
if( data_owner_dev >= 0 && parsec_mca_device_is_gpu(data_owner_dev) ) {
Expand All @@ -2339,7 +2326,6 @@ static parsec_hook_return_t parsec_dtd_cpu_task_submit(parsec_execution_stream_t
access = PARSEC_FLOW_ACCESS_RW;
else
access = PARSEC_FLOW_ACCESS_WRITE;
this_task->data[i].data_in->version++;
parsec_data_transfer_ownership_to_copy(this_task->data[i].data_in->original, 0, access);
// We need to remove ourselves as reader on this data, as transfer_ownership always counts an additional reader
this_task->data[i].data_in->readers--;
Expand Down Expand Up @@ -2645,6 +2631,7 @@ parsec_dtd_create_and_initialize_task(parsec_dtd_taskpool_t *dtd_tp,

assert(this_task->super.super.super.obj_reference_count == 1);

PARSEC_OBJ_CONSTRUCT(&this_task->super, parsec_task_t);
this_task->orig_task = NULL;
this_task->super.taskpool = (parsec_taskpool_t *)dtd_tp;
this_task->ht_item.key = (parsec_key_t)(uintptr_t)(dtd_tp->task_id++);
Expand All @@ -2662,7 +2649,6 @@ parsec_dtd_create_and_initialize_task(parsec_dtd_taskpool_t *dtd_tp,
this_task->rank = rank;
this_task->super.priority = 0;
this_task->super.chore_mask = PARSEC_DEV_ANY_TYPE;
this_task->super.status = PARSEC_TASK_STATUS_NONE;

int j;
parsec_dtd_flow_info_t *flow;
Expand Down
Loading

0 comments on commit e95ec85

Please sign in to comment.