Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions parsec/interfaces/dtd/insert_function.c
Original file line number Diff line number Diff line change
Expand Up @@ -2371,6 +2371,7 @@ int parsec_dtd_task_class_add_chore(parsec_taskpool_t *tp,
}

incarnations[i].type = device_type;
incarnations[i].flags = PARSEC_CHORE_FLAG_NONE;
if(PARSEC_DEV_CUDA == device_type) {
incarnations[i].hook = parsec_dtd_gpu_task_submit;
dtd_tc->gpu_func_ptr = (parsec_advance_task_function_t)function;
Expand Down
4 changes: 3 additions & 1 deletion parsec/interfaces/ptg/ptg-compiler/jdf2c.c
Original file line number Diff line number Diff line change
Expand Up @@ -3954,6 +3954,7 @@ jdf_generate_function_incarnation_list( const jdf_t *jdf,
}
string_arena_add_string(sa, "#if defined(PARSEC_HAVE_DEV_%s_SUPPORT)\n", dev_upper);
string_arena_add_string(sa, " { .type = PARSEC_DEV_%s,\n", dev_upper);
string_arena_add_string(sa, " .flags = PARSEC_CHORE_FLAG_NONE,\n");
if( NULL == dyld_property ) {
Comment thread
devreal marked this conversation as resolved.
string_arena_add_string(sa, " .dyld = NULL,\n");
} else {
Expand Down Expand Up @@ -3986,7 +3987,7 @@ jdf_generate_function_incarnation_list( const jdf_t *jdf,
} while (NULL != body);
string_arena_add_string(sa,
" { .type = PARSEC_DEV_NONE,\n"
" .evaluate = NULL,\n"
" .flags = PARSEC_CHORE_FLAG_NONE,\n"
" .hook = (parsec_hook_t*)NULL }, /* End marker */\n"
"};\n\n");
}
Expand Down Expand Up @@ -4497,6 +4498,7 @@ static void jdf_generate_startup_hook( const jdf_t *jdf )
" idx++;\n"
" }\n"
" chores[idx].type = PARSEC_DEV_NONE;\n"
" chores[idx].flags = PARSEC_CHORE_FLAG_NONE;\n"
" chores[idx].evaluate = NULL;\n"
" chores[idx].hook = NULL;\n"
" /* Create the initialization tasks for each taskclass */\n"
Expand Down
32 changes: 31 additions & 1 deletion parsec/mca/device/device_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2026 Stony Brook University. All rights reserved.
*/

#include "parsec/parsec_config.h"
Expand Down Expand Up @@ -2498,6 +2499,16 @@ parsec_device_kernel_cleanout( parsec_device_gpu_module_t *gpu_device,
return 0;
}

/**
* Returns true if the task's completion should be shifted to worker threads.
*/
static bool shift_completed_task(parsec_device_gpu_module_t* gpu_device, parsec_gpu_task_t* gpu_task)
{
parsec_task_t* this_task = gpu_task->ec;
const __parsec_chore_t *chore = &this_task->task_class->incarnations[this_task->selected_chore];
return (bool)(chore->flags & PARSEC_CHORE_FLAG_SHIFT_COMPLETION);
}

/**
* This version is based on 4 streams: one for transfers from the memory to
* the GPU, 2 for kernel executions and one for transfers from the GPU into
Expand Down Expand Up @@ -2703,8 +2714,27 @@ parsec_device_kernel_scheduler( parsec_device_module_t *module,
goto remove_gpu_task;
}
parsec_device_kernel_epilog( gpu_device, gpu_task );
__parsec_complete_execution( es, gpu_task->ec );

Comment thread
devreal marked this conversation as resolved.
#if defined(PARSEC_DEBUG_PARANOID)
/**
* Batched submissions should have been split again for completion but
* in case this ever changes we will catch that here.
*/
assert(parsec_gpu_task_is_singleton(gpu_task));
#endif
if (shift_completed_task(gpu_device, gpu_task)) {
// ship the task to other threads to complete its execution
gpu_task->ec->status = PARSEC_TASK_STATUS_COMPLETE;
PARSEC_LIST_ITEM_SINGLETON(gpu_task->ec);
__parsec_schedule(es, gpu_task->ec, 1);
PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%d:%s]: task %p of gpu_task %p scheduled for completion",
gpu_device->super.device_index, gpu_device->super.name,
gpu_task->ec, gpu_task);
} else {
__parsec_complete_execution( es, gpu_task->ec );
}
gpu_device->super.executed_tasks++;

remove_gpu_task:
PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%d:%s]: gpu_task %p freed",
Comment thread
bosilca marked this conversation as resolved.
gpu_device->super.device_index, gpu_device->super.name,
Expand Down
5 changes: 4 additions & 1 deletion parsec/parsec_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -392,9 +392,12 @@ int parsec_update_deps_with_counter_count_task(parsec_taskpool_t *tp,
const parsec_task_t* PARSEC_RESTRICT origin,
const parsec_flow_t* PARSEC_RESTRICT origin_flow,
const parsec_flow_t* PARSEC_RESTRICT dest_flow);


#define PARSEC_CHORE_FLAG_NONE 0x00
#define PARSEC_CHORE_FLAG_SHIFT_COMPLETION 0x01
typedef struct __parsec_internal_incarnation_s {
int32_t type;
int32_t flags;
parsec_evaluate_function_t *evaluate;
parsec_hook_t *hook;
char *dyld;
Expand Down