Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion parsec/arena.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ int parsec_arena_allocate_device_private(parsec_data_copy_t *copy,
assert(0 == (((ptrdiff_t)chunk->data) % arena->alignment));
assert((arena->elem_size + (ptrdiff_t)chunk->data) <= (size + (ptrdiff_t)chunk));

data->nb_elts = count * arena->elem_size;
data->span = count * arena->elem_size;

copy->flags = PARSEC_DATA_FLAG_ARENA |
PARSEC_DATA_FLAG_PARSEC_OWNED |
Expand Down
6 changes: 3 additions & 3 deletions parsec/data.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ static void parsec_data_construct(parsec_data_t* obj )
obj->owner_device = -1;
obj->preferred_device = -1;
obj->key = 0;
obj->nb_elts = 0;
obj->span = 0;
for( uint32_t i = 0; i < parsec_nb_devices;
obj->device_copies[i] = NULL, i++ );
obj->dc = NULL;
Expand Down Expand Up @@ -511,7 +511,7 @@ parsec_data_create( parsec_data_t **holder,
data->owner_device = 0;
data->key = key;
data->dc = desc;
data->nb_elts = size;
data->span = size;
parsec_data_copy_attach(data, data_copy, 0);

if( !parsec_atomic_cas_ptr(holder, NULL, data) ) {
Expand Down Expand Up @@ -548,7 +548,7 @@ parsec_data_create_with_type( parsec_data_collection_t *desc,
clone->owner_device = 0;
clone->key = key;
clone->dc = desc;
clone->nb_elts = size;
clone->span = size;
parsec_data_copy_attach(clone, data_copy, 0);

return clone;
Expand Down
2 changes: 1 addition & 1 deletion parsec/data_dist/matrix/broadcast.jdf
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ static parsec_data_t* data_of(parsec_data_collection_t *desc, ...)
data->owner_device = 0;
data->key = k;
data->dc = (parsec_data_collection_t*)desc;
data->nb_elts = 1;
data->span = 1;
parsec_data_copy_t* data_copy = (parsec_data_copy_t*)PARSEC_OBJ_NEW(parsec_data_copy_t);
parsec_data_copy_attach(data, data_copy, 0);
data_copy->device_private = NULL;
Expand Down
2 changes: 1 addition & 1 deletion parsec/data_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct parsec_data_s {
* which device this data should be modified RW when there
* are multiple choices. -1 means no preference. */
struct parsec_data_collection_s* dc;
size_t nb_elts; /* size in bytes of the memory layout */
size_t span; /* size in bytes of the memory layout */
Comment thread
abouteiller marked this conversation as resolved.
struct parsec_data_copy_s *device_copies[]; /* this array allocated according to the number of devices
* (parsec_nb_devices). It points to the most recent
* version of the data.
Expand Down
12 changes: 5 additions & 7 deletions parsec/interfaces/dtd/insert_function.c
Original file line number Diff line number Diff line change
Expand Up @@ -2398,20 +2398,18 @@ static parsec_hook_return_t parsec_dtd_gpu_task_submit(parsec_execution_stream_t
#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) || defined(PARSEC_HAVE_DEV_LEVEL_ZERO_SUPPORT)
parsec_dtd_task_t *dtd_task = (parsec_dtd_task_t *)this_task;
parsec_dtd_task_class_t *dtd_tc = (parsec_dtd_task_class_t*)this_task->task_class;
parsec_gpu_task_t *gpu_task = (parsec_gpu_task_t *) calloc(1, sizeof(parsec_gpu_task_t));
PARSEC_OBJ_CONSTRUCT(gpu_task, parsec_list_item_t);
gpu_task->release_device_task = free; /* by default free the device task */
parsec_gpu_task_t *gpu_task = (parsec_gpu_task_t*)PARSEC_OBJ_NEW(parsec_gpu_dsl_task_t);
gpu_task->ec = (parsec_task_t *) this_task;
gpu_task->submit = dtd_tc->gpu_func_ptr;
gpu_task->task_type = 0;
gpu_task->last_data_check_epoch = -1; /* force at least one validation for the task */
gpu_task->task_type = PARSEC_GPU_TASK_TYPE_KERNEL;
gpu_task->pushout = 0;
gpu_task->nb_flows = dtd_tc->super.nb_flows; /* inherit the flows from the task class */
for(int i = 0; i < dtd_tc->super.nb_flows; i++) {
parsec_dtd_flow_info_t *flow = FLOW_OF(dtd_task, i);
if(flow->op_type & PARSEC_PUSHOUT)
gpu_task->pushout |= 1<<i;
gpu_task->flow[i] = dtd_tc->super.in[i];
gpu_task->flow_nb_elts[i] = this_task->data[i].data_in->original->nb_elts;
gpu_task->flow_info[i].flow = dtd_tc->super.in[i];
gpu_task->flow_info[i].flow_span = this_task->data[i].data_in->original->span;
}

parsec_device_module_t *device = this_task->selected_device;
Expand Down
51 changes: 21 additions & 30 deletions parsec/interfaces/ptg/ptg-compiler/jdf2c.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2009-2024 The University of Tennessee and The University
* Copyright (c) 2009-2025 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2024-2026 NVIDIA Corporation. All rights reserved.
Expand Down Expand Up @@ -6828,66 +6828,56 @@ static void jdf_generate_code_hook_gpu(const jdf_t *jdf,
" assert(NULL != dev);\n"
" assert(PARSEC_DEV_IS_GPU(dev->type));\n"
"\n"
" gpu_task = (parsec_gpu_task_t*)calloc(1, sizeof(parsec_gpu_task_t));\n"
" PARSEC_OBJ_CONSTRUCT(gpu_task, parsec_list_item_t);\n"
" gpu_task->release_device_task = free; /* by default free the device task */\n"
" gpu_task = (parsec_gpu_task_t*)PARSEC_OBJ_NEW(parsec_gpu_dsl_task_t);"
" gpu_task->ec = (parsec_task_t*)this_task;\n"
" gpu_task->submit = &%s_kernel_submit_%s_%s;\n"
" gpu_task->task_type = 0;\n"
" gpu_task->last_data_check_epoch = -1; /* force at least one validation for the task */\n",
" gpu_task->task_type = PARSEC_GPU_TASK_TYPE_KERNEL;\n",
dev_lower, jdf_basename, f->fname);

/* Set up stage in/out callbacks */
jdf_find_property(body->properties, "stage_in", &stage_in_property);
jdf_find_property(body->properties, "stage_out", &stage_out_property);

if(stage_in_property == NULL) {
coutput(" gpu_task->stage_in = parsec_default_gpu_stage_in;\n");
}else{
coutput(" gpu_task->stage_in = %s;\n", dump_expr((void**)stage_in_property->expr, &info));
}
coutput(" gpu_task->stage_in = %s;\n", (NULL == stage_in_property) ? "parsec_default_gpu_stage_in"
: dump_expr((void **)stage_in_property->expr, &info));

if(stage_out_property == NULL) {
coutput(" gpu_task->stage_out = parsec_default_gpu_stage_out;\n");
}else{
coutput(" gpu_task->stage_out = %s;\n", dump_expr((void**)stage_out_property->expr, &info));
}
jdf_find_property(body->properties, "stage_out", &stage_out_property);
coutput(" gpu_task->stage_out = %s;\n", (NULL == stage_out_property) ? "parsec_default_gpu_stage_out"
: dump_expr((void **)stage_out_property->expr, &info));

/* Dump the dataflow */
coutput(" gpu_task->pushout = 0;\n");
for(fl = f->dataflow, di = 0; fl != NULL; fl = fl->next, di++) {
coutput(" gpu_task->flow[%d] = &%s;\n",
coutput(" gpu_task->flow_info[%d].flow = &%s;\n",
di, JDF_OBJECT_ONAME( fl ));

sprintf(sa->ptr, "%s.dc", fl->varname);
jdf_find_property(body->properties, sa->ptr, &desc_property);
if(desc_property == NULL){
coutput(" gpu_task->flow_dc[%d] = NULL;\n", di);
if(desc_property == NULL) {
coutput(" gpu_task->flow_info[%d].flow_dc = NULL;\n", di);
}else{
coutput(" gpu_task->flow_dc[%d] = (parsec_data_collection_t *)%s;\n", di,
coutput(" gpu_task->flow_info[%d].flow_dc = (parsec_data_collection_t *)%s;\n", di,
dump_expr((void**)desc_property->expr, &info));
}

sprintf(sa->ptr, "%s.size", fl->varname);
jdf_find_property(body->properties, sa->ptr, &size_property);

if(fl->flow_flags & JDF_FLOW_TYPE_CTL) {
if(size_property != NULL){
if(size_property != NULL) {
fprintf(stderr, "Error: specifying GPU buffer size for CTL flow %s at line %d\n",
fl->varname, JDF_OBJECT_LINENO(fl));
exit(-1);
}
coutput(" gpu_task->flow_nb_elts[%d] = 0;\n", di);
}else{
coutput(" gpu_task->flow_info[%d].flow_span = 0;\n", di);
} else {
coutput(" // A shortcut to check if the flow exists\n");
coutput(" if (gpu_task->ec->data[%d].data_in != NULL) {\n", di);
if(size_property == NULL){
coutput(" gpu_task->flow_nb_elts[%d] = gpu_task->ec->data[%d].data_in->original->nb_elts;\n", di, di);
}else{
coutput(" gpu_task->flow_nb_elts[%d] = %s;\n",
di, dump_expr((void**)size_property->expr, &info));
coutput(" gpu_task->flow_info[%d].flow_span = gpu_task->ec->data[%d].data_in->original->span;\n", di, di);
} else {
coutput(" gpu_task->flow_info[%d].flow_span = %s;\n",
di, dump_expr((void **)size_property->expr, &info));
if( (stage_in_property == NULL) || ( stage_out_property == NULL )){
coutput(" assert(gpu_task->ec->data[%d].data_in->original->nb_elts <= %s);\n",
coutput(" assert(gpu_task->ec->data[%d].data_in->original->span <= %s);\n",
di, dump_expr((void**)size_property->expr, &info));
}

Expand Down Expand Up @@ -6955,6 +6945,7 @@ static void jdf_generate_code_hook_gpu(const jdf_t *jdf,
}
}
string_arena_free(info.sa);
coutput(" gpu_task->nb_flows = %d; /* inherit the flows from the task_class */\n", di);

coutput("\n"
" return dev->kernel_scheduler(dev, es, gpu_task);\n"
Expand Down
Loading
Loading