diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f5a03f51..e96c7c43c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -168,8 +168,10 @@ mark_as_advanced(PARSEC_SCHED_DEPS_MASK) mark_as_advanced(PARSEC_DIST_THREAD PARSEC_DIST_PRIORITIES) option(PARSEC_DIST_WITH_MPI "Build PaRSEC for distributed memory with MPI backend (conflicts all other backends)" ON) -if(PARSEC_DIST_WITH_MPI AND 0) - message(FATAL_ERROR "PARSEC_DIST_WITH_MPI and PARSEC_DIST_WITH_OTHER are mutually exclusive, please select only one") +option(PARSEC_DIST_WITH_UCX + "Build PaRSEC for distributed memory with UCX backend bootstrapped by PMIx" OFF) +if(PARSEC_DIST_WITH_MPI AND PARSEC_DIST_WITH_UCX) + message(FATAL_ERROR "The UCX backend currently requires PARSEC_DIST_WITH_MPI=OFF because the MPI build still exposes MPI_Datatype in the public datatype ABI") endif() option(PARSEC_MPI_IS_GPU_AWARE "Build PaRSEC assuming the MPI library is GPU-aware, aka. can move data directly to and from GPU memory.\ @@ -647,6 +649,17 @@ if( BUILD_PARSEC ) endif (NOT MPI_C_FOUND) list(APPEND EXTRA_LIBS ${MPI_C_LIBRARIES}) endif (PARSEC_DIST_WITH_MPI) + if (PARSEC_DIST_WITH_UCX) + find_package(PkgConfig REQUIRED) + pkg_check_modules(PARSEC_UCX REQUIRED ucx) + pkg_check_modules(PARSEC_PMIX REQUIRED pmix) + set(PARSEC_HAVE_UCX TRUE) + set(PARSEC_HAVE_PMIX TRUE) + include_directories(BEFORE ${PARSEC_UCX_INCLUDE_DIRS} ${PARSEC_PMIX_INCLUDE_DIRS}) + link_directories(${PARSEC_UCX_LIBRARY_DIRS} ${PARSEC_PMIX_LIBRARY_DIRS}) + list(APPEND EXTRA_INCLUDES ${PARSEC_UCX_INCLUDE_DIRS} ${PARSEC_PMIX_INCLUDE_DIRS}) + list(APPEND EXTRA_LIBS ${PARSEC_UCX_LIBRARIES} ${PARSEC_PMIX_LIBRARIES}) + endif (PARSEC_DIST_WITH_UCX) # # Check to see if # support for MPI 2.0 is available diff --git a/parsec/CMakeLists.txt b/parsec/CMakeLists.txt index df3dfe05c..2b0fb2abe 100644 --- a/parsec/CMakeLists.txt +++ b/parsec/CMakeLists.txt @@ -120,6 +120,7 @@ set(SOURCES private_mempool.c remote_dep.c parsec_comm_engine.c + datatype/datatype_module.c scheduling.c compound.c vpmap.c @@ -130,16 +131,13 @@ set(SOURCES if( PARSEC_PROF_TRACE ) list(APPEND SOURCES dictionary.c) endif( PARSEC_PROF_TRACE ) -if( PARSEC_HAVE_MPI ) +if( PARSEC_HAVE_MPI OR PARSEC_HAVE_UCX ) list(APPEND SOURCES - parsec_mpi_funnelled.c - remote_dep_mpi.c) -endif( PARSEC_HAVE_MPI ) -if( NOT MPI_C_FOUND ) + remote_dep_comm.c) +endif( PARSEC_HAVE_MPI OR PARSEC_HAVE_UCX ) +if( NOT PARSEC_HAVE_MPI ) list(APPEND SOURCES datatype/datatype.c) -else( NOT MPI_C_FOUND ) - list(APPEND SOURCES datatype/datatype_mpi.c) -endif( NOT MPI_C_FOUND ) +endif( NOT PARSEC_HAVE_MPI ) list(APPEND SOURCES parsec_hwloc.c) if( PARSEC_PROF_GRAPHER ) @@ -310,6 +308,7 @@ if( BUILD_PARSEC ) ${CMAKE_CURRENT_SOURCE_DIR}/include/parsec/parsec_config_bottom.h ${CMAKE_CURRENT_SOURCE_DIR}/include/parsec/data_distribution.h ${CMAKE_CURRENT_SOURCE_DIR}/datatype.h + ${CMAKE_CURRENT_SOURCE_DIR}/datatype_module.h ${CMAKE_CURRENT_SOURCE_DIR}/profiling.h ${CMAKE_CURRENT_SOURCE_DIR}/dictionary.h ${CMAKE_CURRENT_SOURCE_DIR}/data.h diff --git a/parsec/data.c b/parsec/data.c index 306a6c507..29770d593 100644 --- a/parsec/data.c +++ b/parsec/data.c @@ -635,7 +635,7 @@ static void parsec_arena_datatype_construct(parsec_object_t *obj) { adt->ht_item.next_item = NULL; /* keep Coverity happy */ adt->ht_item.hash64 = 0; /* keep Coverity happy */ adt->ht_item.key = 0; /* keep Coverity happy */ - adt->opaque_dtt = NULL; + adt->opaque_dtt = PARSEC_DATATYPE_NULL; } static void parsec_arena_datatype_destruct(parsec_object_t *obj) { diff --git a/parsec/datatype.h b/parsec/datatype.h index 3efdc31a6..dedd233d9 100644 --- a/parsec/datatype.h +++ b/parsec/datatype.h @@ -2,6 +2,7 @@ * Copyright (c) 2015-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #ifndef PARSEC_DATATYPE_H_HAS_BEEN_INCLUDED @@ -66,9 +67,12 @@ typedef intptr_t parsec_datatype_t; BEGIN_C_DECLS /** - * Map the datatype creation to the well designed and well known MPI datatype - * API. The datatype support remains extremely basic, providing API only for - * basic datatypes and functions to mix them together. + * Datatype portability API used by communication and data-movement engines. + * + * The public parsec_type_* functions are stable entry points. Their + * implementation is selected by the active communication backend, so MPI builds + * can keep MPI datatypes while other transports can provide another + * representation. */ int parsec_type_size(parsec_datatype_t type, int *size); @@ -120,7 +124,8 @@ int parsec_type_match(parsec_datatype_t dtt1, /** * Routine to check if a datatype is contiguous. * @param[in] parsec_datatype_t datatype - * @return PARSEC_SUCCESS if it was created with MPI_Type_contiguous, PARSEC_ERROR otherwise. + * @return PARSEC_SUCCESS if the selected backend recognizes it as contiguous, + * PARSEC_ERROR otherwise. */ int parsec_type_contiguous(parsec_datatype_t dtt); END_C_DECLS diff --git a/parsec/datatype/datatype.c b/parsec/datatype/datatype.c index 022d30307..117ed8403 100644 --- a/parsec/datatype/datatype.c +++ b/parsec/datatype/datatype.c @@ -2,151 +2,358 @@ * Copyright (c) 2015-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" -#include "parsec/datatype.h" +#include "parsec/datatype_module.h" +#include /** - * Map the datatype creation to the well designed and well known MPI datatype - * manipulation. However, right now we only provide the most basic types and - * functions to mix them together. - * - * However, this file contains only the support functions needed when MPI is not - * available. + * Minimal datatype backend used when no communication component provides a + * richer datatype implementation. It recognizes PaRSEC's predefined scalar + * datatypes and records the size and extent of simple derived datatypes. This + * is enough for communication backends that only need contiguous byte ranges, + * while still failing through the public API if a caller asks for information + * about an unknown datatype. */ -int parsec_type_size( parsec_datatype_t type, - int *size ) + +typedef struct parsec_datatype_basic_desc_s { + uint64_t magic; + int size; + ptrdiff_t lb; + ptrdiff_t extent; + int contiguous; +} parsec_datatype_basic_desc_t; + +#define PARSEC_DATATYPE_BASIC_MAGIC 0x7061727365636474ULL + +static int +parsec_datatype_basic_is_predefined(parsec_datatype_t type) +{ + return (type >= parsec_datatype_int_t) && + (type <= parsec_datatype_double_complex_t); +} + +static parsec_datatype_basic_desc_t * +parsec_datatype_basic_get_desc(parsec_datatype_t type) { - *size = 0; + parsec_datatype_basic_desc_t *desc; + uintptr_t handle = (uintptr_t)type; + + if( parsec_datatype_basic_is_predefined(type) || + (PARSEC_DATATYPE_NULL == type) ) { + return NULL; + } + if( handle < 4096 || 0 != (handle % sizeof(void *)) ) { + return NULL; + } + desc = (parsec_datatype_basic_desc_t *)(intptr_t)type; + if( PARSEC_DATATYPE_BASIC_MAGIC != desc->magic ) { + return NULL; + } + return desc; +} + +static int +parsec_datatype_basic_get_info(parsec_datatype_t type, + int *size, + ptrdiff_t *lb, + ptrdiff_t *extent, + int *contiguous) +{ + parsec_datatype_basic_desc_t *desc; + int predefined_size; + switch( type ) { case parsec_datatype_int_t: - *size = sizeof( int ); break; + predefined_size = sizeof( int ); + break; case parsec_datatype_int8_t: - *size = sizeof( int8_t ); break; + predefined_size = sizeof( int8_t ); + break; case parsec_datatype_int16_t: - *size = sizeof( int16_t ); break; + predefined_size = sizeof( int16_t ); + break; case parsec_datatype_int32_t: - *size = sizeof( int32_t ); break; + predefined_size = sizeof( int32_t ); + break; case parsec_datatype_int64_t: - *size = sizeof( int64_t ); break; + predefined_size = sizeof( int64_t ); + break; case parsec_datatype_uint8_t: - *size = sizeof( uint8_t ); break; + predefined_size = sizeof( uint8_t ); + break; case parsec_datatype_uint16_t: - *size = sizeof( uint16_t ); break; + predefined_size = sizeof( uint16_t ); + break; case parsec_datatype_uint32_t: - *size = sizeof( uint32_t ); break; + predefined_size = sizeof( uint32_t ); + break; case parsec_datatype_uint64_t: - *size = sizeof( uint64_t ); break; + predefined_size = sizeof( uint64_t ); + break; case parsec_datatype_float_t: - *size = sizeof( float ); break; + predefined_size = sizeof( float ); + break; case parsec_datatype_double_t: - *size = sizeof( double ); break; + predefined_size = sizeof( double ); + break; case parsec_datatype_long_double_t: - *size = sizeof( long double ); break; + predefined_size = sizeof( long double ); + break; case parsec_datatype_complex_t: - *size = 2 * sizeof( float ); break; + predefined_size = 2 * sizeof( float ); + break; case parsec_datatype_double_complex_t: - *size = 2 * sizeof( double ); break; + predefined_size = 2 * sizeof( double ); + break; default: - return PARSEC_ERR_NOT_SUPPORTED; + desc = parsec_datatype_basic_get_desc(type); + if( NULL == desc ) { + return PARSEC_ERR_NOT_SUPPORTED; + } + if( NULL != size ) *size = desc->size; + if( NULL != lb ) *lb = desc->lb; + if( NULL != extent ) *extent = desc->extent; + if( NULL != contiguous ) *contiguous = desc->contiguous; + return PARSEC_SUCCESS; } + + if( NULL != size ) *size = predefined_size; + if( NULL != lb ) *lb = 0; + if( NULL != extent ) *extent = predefined_size; + if( NULL != contiguous ) *contiguous = 1; return PARSEC_SUCCESS; } -int parsec_type_extent(parsec_datatype_t type, ptrdiff_t* lb, ptrdiff_t* extent) { - int size, rc; - rc = parsec_type_size(type, &size); - *extent = size; - return rc; -} +static int +parsec_datatype_basic_desc_create(int size, + ptrdiff_t lb, + ptrdiff_t extent, + int contiguous, + parsec_datatype_t *newtype) +{ + parsec_datatype_basic_desc_t *desc; -int parsec_type_free(parsec_datatype_t* type) { - *type = PARSEC_DATATYPE_NULL; + desc = (parsec_datatype_basic_desc_t *)calloc(1, sizeof(*desc)); + if( NULL == desc ) { + return PARSEC_ERR_OUT_OF_RESOURCE; + } + desc->magic = PARSEC_DATATYPE_BASIC_MAGIC; + desc->size = size; + desc->lb = lb; + desc->extent = extent; + desc->contiguous = contiguous; + *newtype = (parsec_datatype_t)(intptr_t)desc; return PARSEC_SUCCESS; } -int parsec_type_create_contiguous( int count, - parsec_datatype_t oldtype, - parsec_datatype_t* newtype ) +static int +parsec_datatype_basic_size(parsec_datatype_t type, int *size) { - *newtype = PARSEC_DATATYPE_NULL; - (void)count; (void)oldtype; - return PARSEC_SUCCESS; + return parsec_datatype_basic_get_info(type, size, NULL, NULL, NULL); } -int parsec_type_create_vector( int count, - int blocklength, - int stride, - parsec_datatype_t oldtype, - parsec_datatype_t* newtype ) +static int +parsec_datatype_basic_extent(parsec_datatype_t type, ptrdiff_t *lb, ptrdiff_t *extent) { - *newtype = PARSEC_DATATYPE_NULL; - (void)count; (void)blocklength; (void)stride; (void)oldtype; - return PARSEC_SUCCESS; + return parsec_datatype_basic_get_info(type, NULL, lb, extent, NULL); } -int parsec_type_create_hvector( int count, - int blocklength, - ptrdiff_t stride, - parsec_datatype_t oldtype, - parsec_datatype_t* newtype ) +static int +parsec_datatype_basic_free(parsec_datatype_t *type) { - *newtype = PARSEC_DATATYPE_NULL; - (void)count; (void)blocklength; (void)stride; (void)oldtype; + parsec_datatype_basic_desc_t *desc; + + desc = parsec_datatype_basic_get_desc(*type); + if( NULL != desc ) { + desc->magic = 0; + free(desc); + } + *type = PARSEC_DATATYPE_NULL; return PARSEC_SUCCESS; } -int parsec_type_create_indexed(int count, - const int array_of_blocklengths[], - const int array_of_displacements[], - parsec_datatype_t oldtype, - parsec_datatype_t *newtype) +static int +parsec_datatype_basic_create_contiguous(int count, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { - *newtype = PARSEC_DATATYPE_NULL; - (void)count; (void)array_of_blocklengths; (void)array_of_displacements; (void)oldtype; - return PARSEC_SUCCESS; + int oldsize, rc, contiguous; + ptrdiff_t oldlb, oldextent; + + rc = parsec_datatype_basic_get_info(oldtype, &oldsize, &oldlb, + &oldextent, &contiguous); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + return parsec_datatype_basic_desc_create(count * oldsize, oldlb, + count * oldextent, + contiguous, newtype); } -int parsec_type_create_indexed_block(int count, +static int +parsec_datatype_basic_create_vector(int count, int blocklength, - const int array_of_displacements[], + int stride, parsec_datatype_t oldtype, parsec_datatype_t *newtype) { - *newtype = PARSEC_DATATYPE_NULL; - (void)count; (void)blocklength; (void)array_of_displacements; (void)oldtype; - return PARSEC_SUCCESS; + int oldsize, rc, contiguous; + ptrdiff_t oldlb, oldextent, extent; + + rc = parsec_datatype_basic_get_info(oldtype, &oldsize, &oldlb, + &oldextent, &contiguous); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + extent = ((ptrdiff_t)(count - 1) * stride + blocklength) * oldextent; + return parsec_datatype_basic_desc_create(count * blocklength * oldsize, + oldlb, extent, + contiguous && (extent == (count * blocklength * oldsize)), + newtype); } -int parsec_type_create_struct(int count, - const int array_of_blocklengths[], - const ptrdiff_t array_of_displacements[], - const parsec_datatype_t array_of_types[], - parsec_datatype_t *newtype) +static int +parsec_datatype_basic_create_hvector(int count, + int blocklength, + ptrdiff_t stride, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { - *newtype = PARSEC_DATATYPE_NULL; - (void)count; (void)array_of_blocklengths; (void)array_of_displacements; (void)array_of_types; - return PARSEC_SUCCESS; + int oldsize, rc, contiguous; + ptrdiff_t oldlb, oldextent, extent; + + rc = parsec_datatype_basic_get_info(oldtype, &oldsize, &oldlb, + &oldextent, &contiguous); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + extent = ((ptrdiff_t)(count - 1) * stride) + blocklength * oldextent; + return parsec_datatype_basic_desc_create(count * blocklength * oldsize, + oldlb, extent, + contiguous && (extent == (count * blocklength * oldsize)), + newtype); } -int parsec_type_create_resized(parsec_datatype_t oldtype, - ptrdiff_t lb, - ptrdiff_t extent, - parsec_datatype_t *newtype) +static int +parsec_datatype_basic_create_indexed(int count, + const int array_of_blocklengths[], + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { - *newtype = PARSEC_DATATYPE_NULL; - (void)lb; (void)extent; (void)oldtype; - return PARSEC_SUCCESS; + int oldsize, rc, contiguous, size = 0; + ptrdiff_t oldlb, oldextent, min_disp = 0, max_disp = 0; + + rc = parsec_datatype_basic_get_info(oldtype, &oldsize, &oldlb, + &oldextent, &contiguous); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + for(int i = 0; i < count; i++) { + ptrdiff_t begin = (ptrdiff_t)array_of_displacements[i] * oldextent; + ptrdiff_t end = begin + array_of_blocklengths[i] * oldextent; + if( (0 == i) || (begin < min_disp) ) min_disp = begin; + if( (0 == i) || (end > max_disp) ) max_disp = end; + size += array_of_blocklengths[i] * oldsize; + } + return parsec_datatype_basic_desc_create(size, oldlb + min_disp, + max_disp - min_disp, 0, newtype); } -int parsec_type_match(parsec_datatype_t dtt1, - parsec_datatype_t dtt2){ - (void)dtt1; (void)dtt2; - return PARSEC_SUCCESS; +static int +parsec_datatype_basic_create_indexed_block(int count, + int blocklength, + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) +{ + int oldsize, rc, contiguous, size; + ptrdiff_t oldlb, oldextent, min_disp = 0, max_disp = 0; + + rc = parsec_datatype_basic_get_info(oldtype, &oldsize, &oldlb, + &oldextent, &contiguous); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + for(int i = 0; i < count; i++) { + ptrdiff_t begin = (ptrdiff_t)array_of_displacements[i] * oldextent; + ptrdiff_t end = begin + blocklength * oldextent; + if( (0 == i) || (begin < min_disp) ) min_disp = begin; + if( (0 == i) || (end > max_disp) ) max_disp = end; + } + size = count * blocklength * oldsize; + return parsec_datatype_basic_desc_create(size, oldlb + min_disp, + max_disp - min_disp, 0, newtype); +} + +static int +parsec_datatype_basic_create_struct(int count, + const int array_of_blocklengths[], + const ptrdiff_t array_of_displacements[], + const parsec_datatype_t array_of_types[], + parsec_datatype_t *newtype) +{ + int rc, oldsize, size = 0; + ptrdiff_t oldlb, oldextent, min_disp = 0, max_disp = 0; + + for(int i = 0; i < count; i++) { + rc = parsec_datatype_basic_get_info(array_of_types[i], &oldsize, + &oldlb, &oldextent, NULL); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + ptrdiff_t begin = array_of_displacements[i] + oldlb; + ptrdiff_t end = begin + array_of_blocklengths[i] * oldextent; + if( (0 == i) || (begin < min_disp) ) min_disp = begin; + if( (0 == i) || (end > max_disp) ) max_disp = end; + size += array_of_blocklengths[i] * oldsize; + } + return parsec_datatype_basic_desc_create(size, min_disp, + max_disp - min_disp, 0, newtype); } -int parsec_type_contiguous(parsec_datatype_t dtt) +static int +parsec_datatype_basic_create_resized(parsec_datatype_t oldtype, + ptrdiff_t lb, + ptrdiff_t extent, + parsec_datatype_t *newtype) { - (void)dtt; - return PARSEC_SUCCESS; + int oldsize, rc, contiguous; + + rc = parsec_datatype_basic_get_info(oldtype, &oldsize, NULL, NULL, + &contiguous); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + return parsec_datatype_basic_desc_create(oldsize, lb, extent, + contiguous && (extent == oldsize), + newtype); } + +static int +parsec_datatype_basic_contiguous(parsec_datatype_t dtt) +{ + int contiguous, rc; + + rc = parsec_datatype_basic_get_info(dtt, NULL, NULL, NULL, &contiguous); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + return contiguous ? PARSEC_SUCCESS : PARSEC_ERROR; +} + +const parsec_datatype_module_t parsec_datatype_basic_module = { + .size = parsec_datatype_basic_size, + .extent = parsec_datatype_basic_extent, + .free = parsec_datatype_basic_free, + .create_contiguous = parsec_datatype_basic_create_contiguous, + .create_vector = parsec_datatype_basic_create_vector, + .create_hvector = parsec_datatype_basic_create_hvector, + .create_indexed = parsec_datatype_basic_create_indexed, + .create_indexed_block = parsec_datatype_basic_create_indexed_block, + .create_struct = parsec_datatype_basic_create_struct, + .create_resized = parsec_datatype_basic_create_resized, + .contiguous = parsec_datatype_basic_contiguous, +}; diff --git a/parsec/datatype/datatype_module.c b/parsec/datatype/datatype_module.c new file mode 100644 index 000000000..99ad6fddf --- /dev/null +++ b/parsec/datatype/datatype_module.c @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ + +#include "parsec/parsec_config.h" +#include "parsec/datatype_module.h" +#include "parsec/utils/debug.h" + +#if !defined(PARSEC_HAVE_MPI) +extern const parsec_datatype_module_t parsec_datatype_basic_module; +static const parsec_datatype_module_t *parsec_datatype_selected_module = &parsec_datatype_basic_module; +#else +/* + * MPI-enabled builds start without a datatype backend on purpose: the selected + * communication component owns the datatype representation and installs the + * matching module during parsec_comm_engine_init(). + */ +static const parsec_datatype_module_t *parsec_datatype_selected_module = NULL; +#endif + +static int +parsec_datatype_module_ready(void) +{ + if( NULL != parsec_datatype_selected_module ) { + return 1; + } + + parsec_warning("No datatype backend has been installed"); + return 0; +} + +int +parsec_datatype_module_install(const parsec_datatype_module_t *module) +{ + if( NULL == module ) { + return PARSEC_ERR_BAD_PARAM; + } + if( (NULL == module->size) || + (NULL == module->extent) || + (NULL == module->free) || + (NULL == module->create_contiguous) || + (NULL == module->create_vector) || + (NULL == module->create_hvector) || + (NULL == module->create_indexed) || + (NULL == module->create_indexed_block) || + (NULL == module->create_struct) || + (NULL == module->create_resized) || + (NULL == module->contiguous) ) { + return PARSEC_ERR_BAD_PARAM; + } + + parsec_datatype_selected_module = module; + return PARSEC_SUCCESS; +} + +int +parsec_type_size(parsec_datatype_t type, int *size) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->size(type, size); +} + +int +parsec_type_extent(parsec_datatype_t type, ptrdiff_t *lb, ptrdiff_t *extent) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->extent(type, lb, extent); +} + +int +parsec_type_free(parsec_datatype_t *type) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->free(type); +} + +int +parsec_type_create_contiguous(int count, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->create_contiguous(count, oldtype, newtype); +} + +int +parsec_type_create_vector(int count, + int blocklength, + int stride, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->create_vector(count, blocklength, stride, + oldtype, newtype); +} + +int +parsec_type_create_hvector(int count, + int blocklength, + ptrdiff_t stride, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->create_hvector(count, blocklength, stride, + oldtype, newtype); +} + +int +parsec_type_create_indexed(int count, + const int array_of_blocklengths[], + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->create_indexed(count, array_of_blocklengths, + array_of_displacements, + oldtype, newtype); +} + +int +parsec_type_create_indexed_block(int count, + int blocklength, + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->create_indexed_block(count, blocklength, + array_of_displacements, + oldtype, newtype); +} + +int +parsec_type_create_struct(int count, + const int array_of_blocklengths[], + const ptrdiff_t array_of_displacements[], + const parsec_datatype_t array_of_types[], + parsec_datatype_t *newtype) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->create_struct(count, array_of_blocklengths, + array_of_displacements, + array_of_types, newtype); +} + +int +parsec_type_create_resized(parsec_datatype_t oldtype, + ptrdiff_t lb, + ptrdiff_t extent, + parsec_datatype_t *newtype) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->create_resized(oldtype, lb, extent, newtype); +} + +int +parsec_type_match(parsec_datatype_t dtt1, parsec_datatype_t dtt2) +{ +#if defined(PARSEC_HAVE_MPI) + return (dtt1 == dtt2 ? PARSEC_SUCCESS : PARSEC_ERROR); +#else + (void)dtt1; (void)dtt2; + return PARSEC_SUCCESS; +#endif +} + +int +parsec_type_contiguous(parsec_datatype_t dtt) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->contiguous(dtt); +} diff --git a/parsec/datatype_module.h b/parsec/datatype_module.h new file mode 100644 index 000000000..c0b068c62 --- /dev/null +++ b/parsec/datatype_module.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ +/** + * @file + * + * Backend datatype module interface. + * + * PaRSEC keeps the public parsec_type_* API stable, but the implementation of + * those routines is provided by the communication backend selected for the + * process. This lets an MPI backend keep using MPI datatypes while another + * backend can provide a different representation. + * + * Datatype matching is intentionally not part of this module. The current + * parsec_type_match() API is a lightweight compatibility helper and does not + * require backend-specific layout comparison. + */ +#ifndef PARSEC_DATATYPE_MODULE_H_HAS_BEEN_INCLUDED +#define PARSEC_DATATYPE_MODULE_H_HAS_BEEN_INCLUDED + +#include "parsec/parsec_config.h" +#include "parsec/datatype.h" + +BEGIN_C_DECLS + +typedef int (*parsec_datatype_module_size_fn_t)(parsec_datatype_t type, + int *size); +typedef int (*parsec_datatype_module_extent_fn_t)(parsec_datatype_t type, + ptrdiff_t *lb, + ptrdiff_t *extent); +typedef int (*parsec_datatype_module_free_fn_t)(parsec_datatype_t *type); +typedef int (*parsec_datatype_module_create_contiguous_fn_t)(int count, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype); +typedef int (*parsec_datatype_module_create_vector_fn_t)(int count, + int blocklength, + int stride, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype); +typedef int (*parsec_datatype_module_create_hvector_fn_t)(int count, + int blocklength, + ptrdiff_t stride, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype); +typedef int (*parsec_datatype_module_create_indexed_fn_t)(int count, + const int array_of_blocklengths[], + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype); +typedef int (*parsec_datatype_module_create_indexed_block_fn_t)(int count, + int blocklength, + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype); +typedef int (*parsec_datatype_module_create_struct_fn_t)(int count, + const int array_of_blocklengths[], + const ptrdiff_t array_of_displacements[], + const parsec_datatype_t array_of_types[], + parsec_datatype_t *newtype); +typedef int (*parsec_datatype_module_create_resized_fn_t)(parsec_datatype_t oldtype, + ptrdiff_t lb, + ptrdiff_t extent, + parsec_datatype_t *newtype); +typedef int (*parsec_datatype_module_contiguous_fn_t)(parsec_datatype_t type); + +typedef struct parsec_datatype_module_s { + parsec_datatype_module_size_fn_t size; + parsec_datatype_module_extent_fn_t extent; + parsec_datatype_module_free_fn_t free; + parsec_datatype_module_create_contiguous_fn_t create_contiguous; + parsec_datatype_module_create_vector_fn_t create_vector; + parsec_datatype_module_create_hvector_fn_t create_hvector; + parsec_datatype_module_create_indexed_fn_t create_indexed; + parsec_datatype_module_create_indexed_block_fn_t create_indexed_block; + parsec_datatype_module_create_struct_fn_t create_struct; + parsec_datatype_module_create_resized_fn_t create_resized; + parsec_datatype_module_contiguous_fn_t contiguous; +} parsec_datatype_module_t; + +/** + * Install the datatype backend used by the public parsec_type_* API. + * + * The selected communication component calls this during initialization. The + * installed module must remain valid for the rest of the process lifetime, + * because datatype objects can be freed during runtime teardown after the + * communication engine itself has been finalized. + */ +int parsec_datatype_module_install(const parsec_datatype_module_t *module); + +END_C_DECLS + +#endif /* PARSEC_DATATYPE_MODULE_H_HAS_BEEN_INCLUDED */ diff --git a/parsec/include/parsec/parsec_config_bottom.h b/parsec/include/parsec/parsec_config_bottom.h index 88a9d1045..04286b96c 100644 --- a/parsec/include/parsec/parsec_config_bottom.h +++ b/parsec/include/parsec/parsec_config_bottom.h @@ -132,7 +132,7 @@ #define __STDC_FORMAT_MACROS #include -#if defined(PARSEC_HAVE_MPI) +#if defined(PARSEC_HAVE_MPI) || defined(PARSEC_HAVE_UCX) # define DISTRIBUTED #else # undef DISTRIBUTED @@ -201,4 +201,3 @@ typedef int32_t parsec_dependency_t; #endif #endif /* PARSEC_CONFIG_BOTTOM_H_HAS_BEEN_INCLUDED */ - diff --git a/parsec/include/parsec/parsec_options.h.in b/parsec/include/parsec/parsec_options.h.in index d0936ca4c..4d375afe9 100644 --- a/parsec/include/parsec/parsec_options.h.in +++ b/parsec/include/parsec/parsec_options.h.in @@ -141,6 +141,8 @@ #cmakedefine PARSEC_HAVE_MPI_20 #cmakedefine PARSEC_HAVE_MPI_30 #cmakedefine PARSEC_HAVE_MPI_OVERTAKE +#cmakedefine PARSEC_HAVE_UCX +#cmakedefine PARSEC_HAVE_PMIX #cmakedefine PARSEC_HAVE_AYUDAME #cmakedefine PARSEC_HAVE_DEV_CPU_SUPPORT diff --git a/parsec/mca/comm/CMakeLists.txt b/parsec/mca/comm/CMakeLists.txt new file mode 100644 index 000000000..c25717cdd --- /dev/null +++ b/parsec/mca/comm/CMakeLists.txt @@ -0,0 +1,9 @@ +# Sources that belong to the comm framework itself. Backend-specific files are +# discovered from each module directory through its ValidateModule.CMake file. +set(MCA_${COMPONENT}_SOURCES mca/comm/comm.c) + +# Install the framework header so in-tree and future out-of-tree comm components +# can build against the same component/module contract. +set_property(TARGET parsec + APPEND PROPERTY + PUBLIC_HEADER_H mca/comm/comm.h) diff --git a/parsec/mca/comm/comm.c b/parsec/mca/comm/comm.c new file mode 100644 index 000000000..3bd2657a0 --- /dev/null +++ b/parsec/mca/comm/comm.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ + +#include "parsec/parsec_config.h" +#include "parsec/datatype_module.h" +#include "parsec/mca/comm/comm.h" +#include "parsec/mca/mca_repository.h" +#include "parsec/utils/debug.h" +#include + +static parsec_comm_base_component_t *parsec_comm_selected_component = NULL; + +parsec_comm_engine_t * +parsec_comm_engine_component_init(parsec_context_t *context) +{ + mca_base_component_t **components; + mca_base_module_t *selected_module = NULL; + mca_base_component_t *selected_component = NULL; + parsec_comm_module_t *comm_module; + parsec_comm_engine_t *ce; + + assert(NULL == parsec_comm_selected_component); + + /* + * Query all compiled and user-enabled comm components, close every component + * that was not selected, and keep the selected component open until + * parsec_comm_engine_component_fini(). + */ + components = mca_components_open_bytype("comm"); + mca_components_query(components, &selected_module, &selected_component); + mca_components_close(components); + + if( NULL == selected_module ) { + parsec_warning("No communication engine component could be selected"); + return NULL; + } + + comm_module = (parsec_comm_module_t *)selected_module; + parsec_comm_selected_component = (parsec_comm_base_component_t *)selected_component; + + parsec_debug_verbose(4, parsec_debug_output, "Installing communication engine %s", + parsec_comm_selected_component->base_version.mca_component_name); + + if( NULL == comm_module->module.init ) { + parsec_warning("Communication engine %s did not provide an init function", + parsec_comm_selected_component->base_version.mca_component_name); + mca_component_close((mca_base_component_t *)parsec_comm_selected_component); + parsec_comm_selected_component = NULL; + return NULL; + } + if( NULL == comm_module->datatype ) { + parsec_warning("Communication engine %s did not provide datatype support", + parsec_comm_selected_component->base_version.mca_component_name); + mca_component_close((mca_base_component_t *)parsec_comm_selected_component); + parsec_comm_selected_component = NULL; + return NULL; + } + + ce = comm_module->module.init(context); + if( NULL == ce ) { + mca_component_close((mca_base_component_t *)parsec_comm_selected_component); + parsec_comm_selected_component = NULL; + return NULL; + } + + /* + * Datatype handling follows the selected transport. MPI-backed runs keep + * using MPI datatypes; future non-MPI communication engines can install + * their own representation without changing the public parsec_type_* API. + */ + if( PARSEC_SUCCESS != parsec_datatype_module_install(comm_module->datatype) ) { + parsec_warning("Communication engine %s did not provide valid datatype support", + parsec_comm_selected_component->base_version.mca_component_name); + if( NULL != ce->fini ) { + ce->fini(ce); + } + mca_component_close((mca_base_component_t *)parsec_comm_selected_component); + parsec_comm_selected_component = NULL; + return NULL; + } + return ce; +} + +int +parsec_comm_engine_component_fini(void) +{ + if( NULL != parsec_comm_selected_component ) { + mca_component_close((mca_base_component_t *)parsec_comm_selected_component); + parsec_comm_selected_component = NULL; + } + return PARSEC_SUCCESS; +} diff --git a/parsec/mca/comm/comm.h b/parsec/mca/comm/comm.h new file mode 100644 index 000000000..54bfe981d --- /dev/null +++ b/parsec/mca/comm/comm.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ +/** + * @file + * + * Communication engine MCA framework. + * + * The comm framework selects exactly one transport backend for a PaRSEC + * context. The selected backend fills the existing parsec_comm_engine_t + * interface; the rest of the runtime continues to use parsec_ce and the + * function table from parsec_comm_engine.h. + * + * This framework deliberately exposes only selection and teardown entry points. + * Callers should not reach back into the selected MCA module after init; all + * transport operations go through the returned parsec_comm_engine_t. + */ +#ifndef PARSEC_COMM_H_HAS_BEEN_INCLUDED +#define PARSEC_COMM_H_HAS_BEEN_INCLUDED + +#include "parsec/parsec_config.h" +#include "parsec/parsec_comm_engine.h" +#include "parsec/mca/mca.h" + +BEGIN_C_DECLS + +struct parsec_datatype_module_s; + +/** + * Common component header for communication engine components. + * + * Component-specific state should live in the component source file or in the + * parsec_comm_engine_t implementation, not in this base type. + */ +struct parsec_comm_base_component_2_0_0 { + mca_base_component_2_0_0_t base_version; + mca_base_component_data_2_0_0_t base_data; +}; + +typedef struct parsec_comm_base_component_2_0_0 parsec_comm_base_component_2_0_0_t; +typedef struct parsec_comm_base_component_2_0_0 parsec_comm_base_component_t; + +/** + * Initialize a communication engine backend. + * + * @param[inout] context PaRSEC context that owns the selected communication + * engine instance. + * + * @return A fully initialized parsec_comm_engine_t on success, or NULL if this + * module cannot initialize for the provided context. + */ +typedef parsec_comm_engine_t *(*parsec_comm_base_module_init_fn_t)(parsec_context_t *context); + +/** + * Communication module contract. + * + * The module builds the concrete parsec_comm_engine_t used by the runtime and + * publishes the datatype operations that match this transport. Backend + * communication operations themselves are the function pointers stored in the + * returned parsec_comm_engine_t. + */ +struct parsec_comm_base_module_1_0_0_t { + parsec_comm_base_module_init_fn_t init; +}; + +typedef struct parsec_comm_base_module_1_0_0_t parsec_comm_base_module_1_0_0_t; +typedef struct parsec_comm_base_module_1_0_0_t parsec_comm_base_module_t; + +typedef struct parsec_comm_module_s { + /* + * Keep the component pointer in the module, following the existing MCA + * framework convention in PaRSEC. The generic MCA query API returns an + * opaque mca_base_module_t pointer, and the comm framework casts it back to + * this complete module type after selection. + */ + const parsec_comm_base_component_t *component; + parsec_comm_base_module_t module; + /* + * Datatype operations used by the public parsec_type_* API while this + * transport backend is selected. The module storage must outlive runtime + * finalization because datatype objects can be released late in teardown. + */ + const struct parsec_datatype_module_s *datatype; +} parsec_comm_module_t; + +/** + * MCA version tuple for the comm framework. + */ +#define PARSEC_COMM_BASE_VERSION_2_0_0 \ + MCA_BASE_VERSION_2_0_0, \ + "comm", 2, 0, 0 + +/** + * Select and initialize the active communication engine component. + * + * This is internal to the runtime wrapper in parsec_comm_engine.c. It opens all + * available comm components, keeps only the selected component open, and calls + * the selected module's init method. + */ +parsec_comm_engine_t *parsec_comm_engine_component_init(parsec_context_t *context); + +/** + * Close the component selected by parsec_comm_engine_component_init(). + * + * The parsec_comm_engine_t itself must have already been finalized through its + * fini function before this call; this function only releases the MCA component + * lifetime. + */ +int parsec_comm_engine_component_fini(void); + +END_C_DECLS + +#endif /* PARSEC_COMM_H_HAS_BEEN_INCLUDED */ diff --git a/parsec/mca/comm/mpi/ValidateModule.CMake b/parsec/mca/comm/mpi/ValidateModule.CMake new file mode 100644 index 000000000..5180b8d38 --- /dev/null +++ b/parsec/mca/comm/mpi/ValidateModule.CMake @@ -0,0 +1,10 @@ +# The MPI backend is the current implementation of the distributed +# communication engine. Build it only when PaRSEC itself was configured with +# MPI support; otherwise the runtime keeps using the existing local-only path. +if(PARSEC_HAVE_MPI) + set(MCA_${COMPONENT}_${MODULE} ON) + file(GLOB MCA_${COMPONENT}_${MODULE}_SOURCES ${MCA_BASE_DIR}/${COMPONENT}/${MODULE}/[^\\.]*.c) + set(MCA_${COMPONENT}_${MODULE}_CONSTRUCTOR "${COMPONENT}_${MODULE}_static_component") +else() + set(MCA_${COMPONENT}_${MODULE} OFF) +endif() diff --git a/parsec/mca/comm/mpi/comm_mpi.h b/parsec/mca/comm/mpi/comm_mpi.h new file mode 100644 index 000000000..9bd6239a2 --- /dev/null +++ b/parsec/mca/comm/mpi/comm_mpi.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ +/** + * @file + * + * MPI communication engine MCA component declaration. + * + * This component keeps the existing funnelled MPI transport behind the new + * comm framework. The transport implementation remains in + * comm_mpi_funnelled.c; this header only exposes the component symbol required + * by the MCA repository. + */ +#ifndef PARSEC_COMM_MPI_H_HAS_BEEN_INCLUDED +#define PARSEC_COMM_MPI_H_HAS_BEEN_INCLUDED + +#include "parsec/mca/comm/comm.h" +#include "parsec/datatype_module.h" + +BEGIN_C_DECLS + +/** + * MCA component descriptor for the MPI communication engine. + */ +PARSEC_DECLSPEC extern const parsec_comm_base_component_t parsec_comm_mpi_component; + +/** + * MPI datatype backend installed together with the MPI communication engine. + */ +PARSEC_DECLSPEC extern const parsec_datatype_module_t parsec_comm_mpi_datatype_module; + +/** + * Constructor used by the static MCA component table. + */ +PARSEC_DECLSPEC mca_base_component_t *comm_mpi_static_component(void); + +END_C_DECLS + +#endif /* PARSEC_COMM_MPI_H_HAS_BEEN_INCLUDED */ diff --git a/parsec/mca/comm/mpi/comm_mpi_component.c b/parsec/mca/comm/mpi/comm_mpi_component.c new file mode 100644 index 000000000..ee0b2c124 --- /dev/null +++ b/parsec/mca/comm/mpi/comm_mpi_component.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ + +#include "parsec/parsec_config.h" +#include "parsec/mca/comm/mpi/comm_mpi.h" +#include "comm_mpi_funnelled.h" + +static int comm_mpi_component_query(mca_base_module_t **module, int *priority); + +/* + * The MPI component currently wraps the pre-existing funnelled MPI engine. + * The module init function returns the global parsec_ce populated with MPI + * callbacks; no extra selected-component accessor is needed after init. + */ +static parsec_comm_module_t parsec_comm_mpi_module = { + .component = &parsec_comm_mpi_component, + .module = { + .init = mpi_funnelled_init, + }, + .datatype = &parsec_comm_mpi_datatype_module, +}; + +const parsec_comm_base_component_t parsec_comm_mpi_component = { + { + PARSEC_COMM_BASE_VERSION_2_0_0, + + "mpi", + "", + PARSEC_VERSION_MAJOR, + PARSEC_VERSION_MINOR, + + NULL, + NULL, + comm_mpi_component_query, + NULL, + "", + }, + { + MCA_BASE_METADATA_PARAM_NONE, + "", + } +}; + +mca_base_component_t * +comm_mpi_static_component(void) +{ + return (mca_base_component_t *)&parsec_comm_mpi_component; +} + +static int +comm_mpi_component_query(mca_base_module_t **module, int *priority) +{ + /* + * MPI is the only comm component in this first componentization step, so it + * keeps a high fixed priority and remains the default backend. + */ + *priority = 100; + *module = (mca_base_module_t *)&parsec_comm_mpi_module; + return MCA_SUCCESS; +} diff --git a/parsec/datatype/datatype_mpi.c b/parsec/mca/comm/mpi/comm_mpi_datatype.c similarity index 54% rename from parsec/datatype/datatype_mpi.c rename to parsec/mca/comm/mpi/comm_mpi_datatype.c index c28476d01..111a32590 100644 --- a/parsec/datatype/datatype_mpi.c +++ b/parsec/mca/comm/mpi/comm_mpi_datatype.c @@ -2,24 +2,25 @@ * Copyright (c) 2015-2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" -#include "parsec/datatype.h" +#include "parsec/datatype_module.h" #if !defined(PARSEC_HAVE_MPI) #error __FILE__ should only be used when MPI support is enabled. #endif /* !defined(PARSEC_HAVE_MPI) */ -int -parsec_type_size( parsec_datatype_t type, int *size ) +static int +comm_mpi_datatype_size(parsec_datatype_t type, int *size) { int rc = MPI_Type_size( type, size ); return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_extent( parsec_datatype_t type, ptrdiff_t* lb, ptrdiff_t* extent) +static int +comm_mpi_datatype_extent(parsec_datatype_t type, ptrdiff_t *lb, ptrdiff_t *extent) { int rc; MPI_Aint mpi_extent, mpi_lb; @@ -33,17 +34,17 @@ parsec_type_extent( parsec_datatype_t type, ptrdiff_t* lb, ptrdiff_t* extent) return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_free( parsec_datatype_t* type ) +static int +comm_mpi_datatype_free(parsec_datatype_t *type) { int rc = MPI_Type_free(type); return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_create_contiguous( int count, - parsec_datatype_t oldtype, - parsec_datatype_t* newtype ) +static int +comm_mpi_datatype_create_contiguous(int count, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { int rc = MPI_Type_contiguous( count, oldtype, newtype ); if( MPI_SUCCESS != rc ) return PARSEC_ERROR; @@ -51,12 +52,12 @@ parsec_type_create_contiguous( int count, return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_create_vector( int count, - int blocklength, - int stride, - parsec_datatype_t oldtype, - parsec_datatype_t* newtype ) +static int +comm_mpi_datatype_create_vector(int count, + int blocklength, + int stride, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { int rc = MPI_Type_vector( count, blocklength, stride, oldtype, newtype ); @@ -65,12 +66,12 @@ parsec_type_create_vector( int count, return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_create_hvector( int count, - int blocklength, - ptrdiff_t stride, - parsec_datatype_t oldtype, - parsec_datatype_t* newtype ) +static int +comm_mpi_datatype_create_hvector(int count, + int blocklength, + ptrdiff_t stride, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { int rc = MPI_Type_create_hvector( count, blocklength, stride, oldtype, newtype ); @@ -79,12 +80,12 @@ parsec_type_create_hvector( int count, return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_create_indexed( int count, - const int array_of_blocklengths[], - const int array_of_displacements[], - parsec_datatype_t oldtype, - parsec_datatype_t *newtype ) +static int +comm_mpi_datatype_create_indexed(int count, + const int array_of_blocklengths[], + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { int rc = MPI_Type_indexed( count, array_of_blocklengths, @@ -95,12 +96,12 @@ parsec_type_create_indexed( int count, return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_create_indexed_block( int count, - int blocklength, - const int array_of_displacements[], - parsec_datatype_t oldtype, - parsec_datatype_t *newtype ) +static int +comm_mpi_datatype_create_indexed_block(int count, + int blocklength, + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { int rc = MPI_Type_create_indexed_block( count, blocklength, array_of_displacements, @@ -110,12 +111,12 @@ parsec_type_create_indexed_block( int count, return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_create_struct( int count, - const int *array_of_blocklengths, - const ptrdiff_t *array_of_displacements, - const parsec_datatype_t *array_of_types, - parsec_datatype_t *newtype ) +static int +comm_mpi_datatype_create_struct(int count, + const int *array_of_blocklengths, + const ptrdiff_t *array_of_displacements, + const parsec_datatype_t *array_of_types, + parsec_datatype_t *newtype) { int rc = MPI_Type_create_struct( count, array_of_blocklengths, @@ -126,11 +127,11 @@ parsec_type_create_struct( int count, return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_create_resized( parsec_datatype_t oldtype, - ptrdiff_t lb, - ptrdiff_t extent, - parsec_datatype_t *newtype ) +static int +comm_mpi_datatype_create_resized(parsec_datatype_t oldtype, + ptrdiff_t lb, + ptrdiff_t extent, + parsec_datatype_t *newtype) { int rc; #if defined(PARSEC_HAVE_MPI_20) @@ -146,14 +147,8 @@ parsec_type_create_resized( parsec_datatype_t oldtype, return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int parsec_type_match(parsec_datatype_t dtt1, - parsec_datatype_t dtt2) -{ - (void)dtt1; (void)dtt2; - return ( dtt1 == dtt2 ? PARSEC_SUCCESS : PARSEC_ERROR); -} - -int parsec_type_contiguous(parsec_datatype_t dtt) +static int +comm_mpi_datatype_contiguous(parsec_datatype_t dtt) { int rc; int num_integers, num_addresses, num_datatypes, combiner; @@ -165,3 +160,17 @@ int parsec_type_contiguous(parsec_datatype_t dtt) } return PARSEC_ERROR; } + +const parsec_datatype_module_t parsec_comm_mpi_datatype_module = { + .size = comm_mpi_datatype_size, + .extent = comm_mpi_datatype_extent, + .free = comm_mpi_datatype_free, + .create_contiguous = comm_mpi_datatype_create_contiguous, + .create_vector = comm_mpi_datatype_create_vector, + .create_hvector = comm_mpi_datatype_create_hvector, + .create_indexed = comm_mpi_datatype_create_indexed, + .create_indexed_block = comm_mpi_datatype_create_indexed_block, + .create_struct = comm_mpi_datatype_create_struct, + .create_resized = comm_mpi_datatype_create_resized, + .contiguous = comm_mpi_datatype_contiguous, +}; diff --git a/parsec/parsec_mpi_funnelled.c b/parsec/mca/comm/mpi/comm_mpi_funnelled.c similarity index 90% rename from parsec/parsec_mpi_funnelled.c rename to parsec/mca/comm/mpi/comm_mpi_funnelled.c index 5e3c670ef..a4f87df58 100644 --- a/parsec/parsec_mpi_funnelled.c +++ b/parsec/mca/comm/mpi/comm_mpi_funnelled.c @@ -4,12 +4,22 @@ * reserved. * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ +/** + * @file + * + * Funnelled MPI implementation of the communication engine callbacks. + * + * This file is intentionally still transport-specific after the first comm MCA + * componentization step. The generic remote-dependency protocol continues to + * call through parsec_comm_engine_t, while this backend provides the MPI active + * message, memory-handle, pack/unpack, and progress operations. + */ #include #include #include #include -#include "parsec/parsec_mpi_funnelled.h" +#include "comm_mpi_funnelled.h" #include "parsec/remote_dep.h" #include "parsec/class/parsec_hash_table.h" #include "parsec/class/dequeue.h" @@ -62,6 +72,19 @@ PARSEC_OBJ_CLASS_INSTANCE(mpi_funnelled_mem_reg_handle_t, parsec_list_item_t, * if the layer has been initialized or not. */ static int MAX_MPI_TAG = -1, mca_tag_ub = -1; +/* + * Track MPI ownership explicitly. Applications that initialized MPI keep + * ownership of MPI_Finalize(); PaRSEC only finalizes MPI when the MPI backend + * had to initialize it during parsec_init(). + */ +static int mpi_funnelled_initialized_mpi = 0; +/* + * context->comm_ctx is also owned by the MPI backend when the backend installs + * the default communicator or duplicates a user-provided communicator through + * set_ctx(). The communication engine may later replace that duplicate with + * parsec_ce_mpi_comm, which has its own lifetime. + */ +static int mpi_funnelled_context_comm_owned = 0; static volatile int __VAL_NEXT_TAG = 0; #if INT_MAX == INT32_MAX #define next_tag_cas(t, o, n) parsec_atomic_cas_int32(t, o, n) @@ -92,6 +115,45 @@ static inline int next_tag(int k) { return __tag; } +static const char * +mpi_funnelled_thread_level_name(int level) +{ + switch(level) { + case MPI_THREAD_SINGLE: return "MPI_THREAD_SINGLE"; + case MPI_THREAD_FUNNELED: return "MPI_THREAD_FUNNELED"; + case MPI_THREAD_SERIALIZED: return "MPI_THREAD_SERIALIZED"; + case MPI_THREAD_MULTIPLE: return "MPI_THREAD_MULTIPLE"; + default: return "MPI_THREAD_UNKNOWN"; + } +} + +static int +mpi_funnelled_requested_thread_level(void) +{ + /* + * When PaRSEC owns MPI initialization, ask MPI for the strongest guarantee. + * The existing comm_thread_multiple logic still decides whether PaRSEC uses + * concurrent MPI access internally, but applications and tests may call MPI + * collectives after parsec_init() while PaRSEC is active. + */ + return MPI_THREAD_MULTIPLE; +} + +static void +mpi_funnelled_release_context_comm(parsec_context_t *context) +{ + if( mpi_funnelled_context_comm_owned && (-1 != context->comm_ctx) ) { + MPI_Comm comm = (MPI_Comm)context->comm_ctx; + + if( (MPI_COMM_NULL != comm) && + (MPI_COMM_WORLD != comm) ) { + MPI_Comm_free(&comm); + } + context->comm_ctx = -1; + mpi_funnelled_context_comm_owned = 0; + } +} + /* Count and protect the internal building of the arrays of AM */ static int parsec_ce_am_design_version = 0; static int parsec_ce_am_build_version = 0; @@ -534,8 +596,14 @@ static int parsec_mpi_set_ctx(parsec_comm_engine_t* ce, intptr_t opaque_comm_ctx assert( -1 != context->comm_ctx ); MPI_Comm_free((MPI_Comm*)&context->comm_ctx); } + mpi_funnelled_release_context_comm(context); + rc = MPI_Comm_dup((MPI_Comm)opaque_comm_ctx, &comm); + if( MPI_SUCCESS != rc ) { + return PARSEC_ERROR; + } context->comm_ctx = (intptr_t)comm; + mpi_funnelled_context_comm_owned = 1; parsec_ce_am_design_version++; /* signal need for update */ /* We need to know who we are and how many others are there, in order to * correctly initialize the communication engine at the next start. */ @@ -641,7 +709,79 @@ static int mpi_funneled_init_once(parsec_context_t* context) parsec_comm_engine_t * mpi_funnelled_init(parsec_context_t *context) { - int i, rc; + int i, rc, is_mpi_up = 0, is_mpi_finalized = 0; + int requested_thread_level, thread_level_support; + + if( (MPI_SUCCESS != MPI_Finalized(&is_mpi_finalized)) || + is_mpi_finalized ) { + context->nb_nodes = 1; + parsec_communication_engine_up = -1; + parsec_fatal("MPI was already finalized before PaRSEC initialized the MPI communication backend.\n"); + return NULL; + } + + if( MPI_SUCCESS != MPI_Initialized(&is_mpi_up) ) { + context->nb_nodes = 1; + parsec_communication_engine_up = -1; + parsec_fatal("PaRSEC could not query the MPI initialization state.\n"); + return NULL; + } + if( 0 == is_mpi_up ) { + /* + * Tests and applications that only need PaRSEC's communication backend + * should not have to initialize MPI themselves. Initialize MPI lazily + * here, and remember ownership so mpi_funnelled_fini() can perform the + * matching MPI_Finalize(). + */ + requested_thread_level = mpi_funnelled_requested_thread_level(); + rc = MPI_Init_thread(NULL, NULL, requested_thread_level, &thread_level_support); + if( MPI_SUCCESS != rc ) { + context->nb_nodes = 1; + parsec_communication_engine_up = -1; /* No communications supported */ + parsec_fatal("PaRSEC failed to initialize MPI for the MPI communication backend.\n"); + return NULL; + } + mpi_funnelled_initialized_mpi = 1; + PARSEC_DEBUG_VERBOSE(4, parsec_comm_output_stream, + "MPI backend initialized MPI: requested %s, provided %s", + mpi_funnelled_thread_level_name(requested_thread_level), + mpi_funnelled_thread_level_name(thread_level_support)); + } else { + MPI_Query_thread(&thread_level_support); + } + + if( thread_level_support == MPI_THREAD_SINGLE || + thread_level_support == MPI_THREAD_FUNNELED ) { + parsec_warning("MPI was not initialized with the appropriate level of thread support.\n" + "\t* Current level is %s, while MPI_THREAD_SERIALIZED or MPI_THREAD_MULTIPLE is needed\n" + "\t* to guarantee correctness of the PaRSEC runtime.\n", + mpi_funnelled_thread_level_name(thread_level_support)); + } + + /* Establish rank/size as soon as MPI is available, including the case + * where this backend initialized MPI on behalf of parsec_init(). + */ + if( -1 == context->comm_ctx ) { + MPI_Comm comm; + + /* + * Keep PaRSEC isolated from application-level MPI communicator changes. + * Even when the MPI backend initialized MPI itself, MPI_COMM_WORLD is + * duplicated before being stored in the PaRSEC context. + */ + rc = MPI_Comm_dup(MPI_COMM_WORLD, &comm); + if( MPI_SUCCESS != rc ) { + context->nb_nodes = 1; + parsec_communication_engine_up = -1; + parsec_fatal("PaRSEC failed to duplicate MPI_COMM_WORLD for the MPI communication backend.\n"); + return NULL; + } + context->comm_ctx = (intptr_t)comm; + mpi_funnelled_context_comm_owned = 1; + MPI_Comm_size( comm, (int*)&(context->nb_nodes)); + MPI_Comm_rank( comm, (int*)&(context->my_rank)); + parsec_debug_rank = context->my_rank; + } if( -1 == MAX_MPI_TAG ) if( 0 != (rc = mpi_funneled_init_once(context)) ) { @@ -688,18 +828,13 @@ mpi_funnelled_init(parsec_context_t *context) parsec_ce.reshape = NULL; parsec_ce.can_serve = NULL; parsec_ce.send_am = NULL; + parsec_ce.taskpool_sync_ids = mpi_no_thread_taskpool_sync_ids; parsec_ce.parsec_context = context; parsec_ce.capabilites.sided = 2; parsec_ce.capabilites.supports_noncontiguous_datatype = 1; + parsec_ce.capabilites.multithreaded = (thread_level_support >= MPI_THREAD_MULTIPLE); - /* Define some sensible values. We assume the application will initialize PaRSEC using - * the entire MPI_COMM_WORLD, but we need to prepare some decent default values. */ - if( -1 == context->comm_ctx ) { - MPI_Comm_size( MPI_COMM_WORLD, (int*)&(context->nb_nodes)); - MPI_Comm_rank( MPI_COMM_WORLD, (int*)&(context->my_rank)); - context->comm_ctx = (intptr_t)MPI_COMM_WORLD; - } /* Register for internal GET and PUT AMs */ parsec_ce.tag_register(PARSEC_CE_MPI_FUNNELLED_GET_TAG_INTERNAL, mpi_funnelled_internal_get_am_callback, @@ -763,6 +898,9 @@ mpi_funnelled_fini(parsec_comm_engine_t *ce) MPI_Comm_free(&parsec_ce_mpi_am_comm[i]); } ce->parsec_context->comm_ctx = -1; /* We use -1 for the opaque comm_ctx, rather than the MPI specific MPI_COMM_NULL */ + mpi_funnelled_context_comm_owned = 0; + } else { + mpi_funnelled_release_context_comm(ce->parsec_context); } assert(MPI_COMM_NULL == parsec_ce_mpi_comm ); /* no communicator */ assert(MPI_COMM_NULL == parsec_ce_mpi_am_comm[0] ); /* no communicator */ @@ -771,6 +909,16 @@ mpi_funnelled_fini(parsec_comm_engine_t *ce) mpi_funnelled_last_active_req = 0; mpi_funnelled_static_req_idx = 0; + if( mpi_funnelled_initialized_mpi ) { + int is_mpi_finalized = 0; + + if( (MPI_SUCCESS == MPI_Finalized(&is_mpi_finalized)) && + !is_mpi_finalized ) { + MPI_Finalize(); + } + mpi_funnelled_initialized_mpi = 0; + } + return 1; } @@ -1461,7 +1609,7 @@ int mpi_no_thread_enable(parsec_comm_engine_t *ce) { parsec_context_t *context = ce->parsec_context; - int i; + int i, rc; /* Did anything changed that would require a reconstruction of the management structures? */ assert(-1 != context->comm_ctx); @@ -1498,6 +1646,7 @@ mpi_no_thread_enable(parsec_comm_engine_t *ce) parsec_ce.reshape = parsec_mpi_sendrecv; parsec_ce.can_serve = mpi_no_thread_can_push_more; parsec_ce.send_am = mpi_no_thread_send_active_message; + parsec_ce.taskpool_sync_ids = mpi_no_thread_taskpool_sync_ids; /* Initialize the arrays */ array_of_callbacks = (mpi_funnelled_callback_t *) calloc(parsec_param_comm_mpi_dynamic_requests, @@ -1533,8 +1682,16 @@ mpi_no_thread_enable(parsec_comm_engine_t *ce) } #endif /* defined(PARSEC_HAVE_MPI_OVERTAKE) */ /* There is no need to enable overtake for the AM communicator */ - MPI_Comm_dup_with_info((MPI_Comm) context->comm_ctx, info, &parsec_ce_mpi_comm); + MPI_Comm input_comm = (MPI_Comm)context->comm_ctx; + rc = MPI_Comm_dup_with_info(input_comm, info, &parsec_ce_mpi_comm); MPI_Info_free(&info); + if( MPI_SUCCESS != rc ) { + return PARSEC_ERROR; + } + if( mpi_funnelled_context_comm_owned ) { + MPI_Comm_free(&input_comm); + mpi_funnelled_context_comm_owned = 0; + } /* Replace the provided communicator with a pointer to the PaRSEC duplicate */ context->comm_ctx = (uintptr_t)parsec_ce_mpi_comm; @@ -1615,3 +1772,28 @@ mpi_no_thread_can_push_more(parsec_comm_engine_t *ce) /* Do we have room to post more requests? */ return mpi_funnelled_last_active_req < current_size_of_total_reqs; } + +int +mpi_no_thread_taskpool_sync_ids(parsec_comm_engine_t *ce, + intptr_t comm_ctx, + uint32_t *next_taskpool_id) +{ + MPI_Comm comm = (MPI_Comm)comm_ctx; + int mpi_is_on, idx; + + if( (NULL == next_taskpool_id) || + (MPI_SUCCESS != MPI_Initialized(&mpi_is_on)) || + !mpi_is_on ) { + return PARSEC_ERR_NOT_IMPLEMENTED; + } + if( MPI_COMM_NULL == comm ) { + comm = (NULL != ce) ? (MPI_Comm)ce->parsec_context->comm_ctx : MPI_COMM_WORLD; + } + + idx = (int)*next_taskpool_id; + if( MPI_SUCCESS != MPI_Allreduce(MPI_IN_PLACE, &idx, 1, MPI_INT, MPI_MAX, comm) ) { + return PARSEC_ERROR; + } + *next_taskpool_id = (uint32_t)idx; + return PARSEC_SUCCESS; +} diff --git a/parsec/parsec_mpi_funnelled.h b/parsec/mca/comm/mpi/comm_mpi_funnelled.h similarity index 63% rename from parsec/parsec_mpi_funnelled.h rename to parsec/mca/comm/mpi/comm_mpi_funnelled.h index 9996f89ef..da8297787 100644 --- a/parsec/parsec_mpi_funnelled.h +++ b/parsec/mca/comm/mpi/comm_mpi_funnelled.h @@ -2,23 +2,39 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ +/** + * @file + * + * Backend-local interface for the funnelled MPI communication engine. + * + * These entry points populate the transport function table exposed through + * parsec_comm_engine_t. They remain MPI-specific and should not be called + * directly by generic remote-dependency code; generic code should use the + * parsec_comm_engine_t callbacks instead. */ #ifndef __USE_PARSEC_MPI_FUNNELLED_H__ #define __USE_PARSEC_MPI_FUNNELLED_H__ #include "parsec/parsec_comm_engine.h" -/* ------- Funnelled MPI implementation below ------- */ +/** Initialize the funnelled MPI communication engine for a PaRSEC context. */ parsec_comm_engine_t * mpi_funnelled_init(parsec_context_t *parsec_context); + +/** Finalize the funnelled MPI communication engine instance. */ int mpi_funnelled_fini(parsec_comm_engine_t *comm_engine); +/** Register an active-message tag and receive callback in the MPI backend. */ int mpi_no_thread_tag_register(parsec_ce_tag_t tag, parsec_ce_am_callback_t cb, void *cb_data, size_t msg_length); +/** Unregister a previously registered active-message tag. */ int mpi_no_thread_tag_unregister(parsec_ce_tag_t tag); +/** Register a local memory region and return the backend memory handle. */ int mpi_no_thread_mem_register(void *mem, parsec_mem_type_t mem_type, size_t count, parsec_datatype_t datatype, @@ -26,12 +42,16 @@ mpi_no_thread_mem_register(void *mem, parsec_mem_type_t mem_type, parsec_ce_mem_reg_handle_t *lreg, size_t *lreg_size); +/** Release a memory handle returned by mpi_no_thread_mem_register(). */ int mpi_no_thread_mem_unregister(parsec_ce_mem_reg_handle_t *lreg); +/** Return the fixed wire size used for MPI memory-registration handles. */ int mpi_no_thread_get_mem_reg_handle_size(void); +/** Decode a local MPI memory-registration handle. */ int mpi_no_thread_mem_retrieve(parsec_ce_mem_reg_handle_t lreg, void **mem, parsec_datatype_t *datatype, int *count); +/** Start a remote PUT through the funnelled MPI backend. */ int mpi_no_thread_put(parsec_comm_engine_t *comm_engine, parsec_ce_mem_reg_handle_t lreg, ptrdiff_t ldispl, @@ -42,6 +62,7 @@ int mpi_no_thread_put(parsec_comm_engine_t *comm_engine, parsec_ce_onesided_callback_t l_cb, void *l_cb_data, parsec_ce_tag_t r_tag, void *r_cb_data, size_t r_cb_data_size); +/** Start a remote GET through the funnelled MPI backend. */ int mpi_no_thread_get(parsec_comm_engine_t *comm_engine, parsec_ce_mem_reg_handle_t lreg, ptrdiff_t ldispl, @@ -52,32 +73,48 @@ int mpi_no_thread_get(parsec_comm_engine_t *comm_engine, parsec_ce_onesided_callback_t l_cb, void *l_cb_data, parsec_ce_tag_t r_tag, void *r_cb_data, size_t r_cb_data_size); +/** Send an active message to a remote rank. */ int mpi_no_thread_send_active_message(parsec_comm_engine_t *comm_engine, parsec_ce_tag_t tag, int remote, void *addr, size_t size); +/** Make progress on pending MPI communication requests. */ int mpi_no_thread_progress(parsec_comm_engine_t *comm_engine); +/** Enable active-message receives for the MPI backend. */ int mpi_no_thread_enable(parsec_comm_engine_t *comm_engine); + +/** Disable active-message receives for the MPI backend. */ int mpi_no_thread_disable(parsec_comm_engine_t *comm_engine); +/** Pack data using MPI datatype semantics. */ int mpi_no_thread_pack(parsec_comm_engine_t *ce, void *inbuf, int incount, parsec_datatype_t type, void *outbuf, int outsize, int *positionA); +/** Compute the size needed to pack data with MPI datatype semantics. */ int mpi_no_thread_pack_size(parsec_comm_engine_t *ce, int incount, parsec_datatype_t type, int *size); +/** Unpack data using MPI datatype semantics. */ int mpi_no_thread_unpack(parsec_comm_engine_t *ce, void *inbuf, int insize, int *position, void *outbuf, int outcount, parsec_datatype_t type); +/** Synchronize all outstanding MPI communication operations. */ int mpi_no_thread_sync(parsec_comm_engine_t *comm_engine); +/** Report whether the MPI backend can accept more pending work. */ int mpi_no_thread_can_push_more(parsec_comm_engine_t *c_e); +/** Synchronize the next taskpool id across the selected MPI communicator. */ +int +mpi_no_thread_taskpool_sync_ids(parsec_comm_engine_t *comm_engine, + intptr_t comm_ctx, + uint32_t *next_taskpool_id); + #endif /* __USE_PARSEC_MPI_FUNNELLED_H__ */ diff --git a/parsec/mca/comm/ucx/ValidateModule.CMake b/parsec/mca/comm/ucx/ValidateModule.CMake new file mode 100644 index 000000000..7234a3b82 --- /dev/null +++ b/parsec/mca/comm/ucx/ValidateModule.CMake @@ -0,0 +1,16 @@ +# The UCX backend is optional and is bootstrapped through PMIx. The first +# implementation is intentionally restricted to builds where PaRSEC owns the +# datatype representation, because MPI-enabled builds still expose MPI_Datatype +# as parsec_datatype_t. +if(PARSEC_HAVE_UCX AND PARSEC_HAVE_PMIX AND NOT PARSEC_HAVE_MPI) + set(MCA_${COMPONENT}_${MODULE} ON) + file(GLOB MCA_${COMPONENT}_${MODULE}_SOURCES ${MCA_BASE_DIR}/${COMPONENT}/${MODULE}/[^\\.]*.c) + set(MCA_${COMPONENT}_${MODULE}_CONSTRUCTOR "${COMPONENT}_${MODULE}_static_component") +else() + if(PARSEC_DIST_WITH_UCX AND PARSEC_HAVE_MPI) + message(STATUS "Module ${MODULE} not selectable: UCX currently requires PARSEC_DIST_WITH_MPI=OFF") + elseif(PARSEC_DIST_WITH_UCX) + message(STATUS "Module ${MODULE} not selectable: UCX and PMIx were not both found") + endif() + set(MCA_${COMPONENT}_${MODULE} OFF) +endif() diff --git a/parsec/mca/comm/ucx/comm_ucx.c b/parsec/mca/comm/ucx/comm_ucx.c new file mode 100644 index 000000000..2bd03958c --- /dev/null +++ b/parsec/mca/comm/ucx/comm_ucx.c @@ -0,0 +1,1217 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ +/** + * @file + * + * UCX communication engine backend. + * + * This first UCX backend uses PMIx for bootstrap and UCX active messages plus + * CPU-memory RMA. It intentionally advertises only contiguous CPU datatype + * support; callers that need sparse datatype movement must pack before handing + * memory to this backend. + */ + +#include "parsec/parsec_config.h" +#include "parsec/mca/comm/ucx/comm_ucx.h" +#include "parsec/parsec_comm_engine.h" +#include "parsec/remote_dep.h" +#include "parsec/utils/debug.h" + +#include +#include +#include +#include +#include + +#include +#include + +#define PARSEC_UCX_WORKER_ADDRESS_KEY "parsec.ucx.worker.address" +#define PARSEC_UCX_MAX_RKEY_SIZE 512 + +typedef struct parsec_ucx_am_header_s { + int32_t source; +} parsec_ucx_am_header_t; + +typedef struct parsec_ucx_callback_am_header_s { + int32_t source; + uintptr_t callback; +} parsec_ucx_callback_am_header_t; + +typedef struct parsec_ucx_mem_handle_wire_s { + uint64_t remote_addr; + uint64_t mem_size; + uint32_t rkey_size; + unsigned char rkey[PARSEC_UCX_MAX_RKEY_SIZE]; +} parsec_ucx_mem_handle_wire_t; + +typedef struct parsec_ucx_mem_handle_s { + parsec_ucx_mem_handle_wire_t wire; + void *mem; + size_t mem_size; + parsec_datatype_t datatype; + int count; + ucp_mem_h memh; +} parsec_ucx_mem_handle_t; + +typedef struct parsec_ucx_am_registration_s { + parsec_ce_tag_t tag; + parsec_ce_am_callback_t callback; + void *cb_data; + size_t max_msg_length; +} parsec_ucx_am_registration_t; + +typedef struct parsec_ucx_state_s { + pmix_proc_t pmix_proc; + int pmix_initialized; + int rank; + int size; + + ucp_context_h context; + ucp_worker_h worker; + ucp_address_t *worker_address; + size_t worker_address_length; + ucp_ep_h *eps; + int owns_context; + int owns_worker; + + parsec_ucx_am_registration_t tags[PARSEC_MAX_REGISTERED_TAGS]; +} parsec_ucx_state_t; + +static parsec_ucx_state_t parsec_ucx_state; + +static int comm_ucx_enable(parsec_comm_engine_t *comm_engine); +static int comm_ucx_disable(parsec_comm_engine_t *comm_engine); +static int comm_ucx_set_ctx(parsec_comm_engine_t *comm_engine, intptr_t ctx); +static int comm_ucx_fini(parsec_comm_engine_t *comm_engine); +static int comm_ucx_tag_register(parsec_ce_tag_t tag, + parsec_ce_am_callback_t cb, + void *cb_data, + size_t msg_length); +static int comm_ucx_tag_unregister(parsec_ce_tag_t tag); +static int comm_ucx_mem_register(void *mem, + parsec_mem_type_t mem_type, + size_t count, + parsec_datatype_t datatype, + size_t mem_size, + parsec_ce_mem_reg_handle_t *lreg, + size_t *lreg_size); +static int comm_ucx_mem_unregister(parsec_ce_mem_reg_handle_t *lreg); +static int comm_ucx_get_mem_reg_handle_size(void); +static int comm_ucx_mem_retrieve(parsec_ce_mem_reg_handle_t lreg, + void **mem, + parsec_datatype_t *datatype, + int *count); +static int comm_ucx_put(parsec_comm_engine_t *comm_engine, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + parsec_ce_onesided_callback_t l_cb, + void *l_cb_data, + parsec_ce_tag_t r_tag, + void *r_cb_data, + size_t r_cb_data_size); +static int comm_ucx_get(parsec_comm_engine_t *comm_engine, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + parsec_ce_onesided_callback_t l_cb, + void *l_cb_data, + parsec_ce_tag_t r_tag, + void *r_cb_data, + size_t r_cb_data_size); +static int comm_ucx_send_am(parsec_comm_engine_t *comm_engine, + parsec_ce_tag_t tag, + int remote, + void *addr, + size_t size); +static int comm_ucx_progress(parsec_comm_engine_t *comm_engine); +static int comm_ucx_pack(parsec_comm_engine_t *ce, + void *inbuf, + int incount, + parsec_datatype_t type, + void *outbuf, + int outsize, + int *position); +static int comm_ucx_pack_size(parsec_comm_engine_t *ce, + int incount, + parsec_datatype_t type, + int *size); +static int comm_ucx_unpack(parsec_comm_engine_t *ce, + void *inbuf, + int insize, + int *position, + void *outbuf, + int outcount, + parsec_datatype_t type); +static int comm_ucx_sync(parsec_comm_engine_t *comm_engine); +static int comm_ucx_can_serve(parsec_comm_engine_t *comm_engine); +static int comm_ucx_taskpool_sync_ids(parsec_comm_engine_t *comm_engine, + intptr_t comm_ctx, + uint32_t *next_taskpool_id); +static int comm_ucx_reshape(parsec_comm_engine_t *ce, + parsec_execution_stream_t *es, + parsec_data_copy_t *dst, + int64_t displ_dst, + parsec_datatype_t layout_dst, + uint64_t count_dst, + parsec_data_copy_t *src, + int64_t displ_src, + parsec_datatype_t layout_src, + uint64_t count_src); +static int comm_ucx_install_callback_am_handler(parsec_ucx_state_t *state); +static int comm_ucx_send_callback_am(parsec_comm_engine_t *comm_engine, + int remote, + parsec_ce_tag_t callback, + void *cb_data, + size_t cb_data_size); +static int comm_ucx_late_init(parsec_context_t *context, + parsec_ucx_state_t *state); + +static int +comm_ucx_status_to_parsec(ucs_status_t status, const char *what) +{ + if( UCS_OK == status ) { + return PARSEC_SUCCESS; + } + parsec_warning("UCX %s failed: %s", what, ucs_status_string(status)); + return PARSEC_ERROR; +} + +static int +comm_ucx_wait_request(parsec_ucx_state_t *state, void *request, const char *what) +{ + ucs_status_t status; + + if( NULL == request ) { + return PARSEC_SUCCESS; + } + if( UCS_PTR_IS_ERR(request) ) { + return comm_ucx_status_to_parsec(UCS_PTR_STATUS(request), what); + } + + do { + status = ucp_request_check_status(request); + if( UCS_INPROGRESS == status ) { + ucp_worker_progress(state->worker); + } + } while( UCS_INPROGRESS == status ); + + ucp_request_free(request); + return comm_ucx_status_to_parsec(status, what); +} + +static int +comm_ucx_direct_am(parsec_comm_engine_t *ce, + parsec_ucx_am_registration_t *registration, + void *addr, + size_t size, + int source) +{ + void *buffer = NULL; + int rc; + + if( NULL == registration->callback ) { + return PARSEC_ERR_NOT_FOUND; + } + if( registration->max_msg_length < size ) { + return PARSEC_ERR_BAD_PARAM; + } + + if( 0 != size ) { + buffer = malloc(size); + if( NULL == buffer ) { + return PARSEC_ERR_OUT_OF_RESOURCE; + } + memcpy(buffer, addr, size); + } + rc = registration->callback(ce, registration->tag, buffer, size, + source, registration->cb_data); + free(buffer); + return rc; +} + +static ucs_status_t +comm_ucx_am_callback(void *arg, + const void *header, + size_t header_length, + void *data, + size_t length, + const ucp_am_recv_param_t *param) +{ + parsec_ucx_am_registration_t *registration = (parsec_ucx_am_registration_t *)arg; + parsec_ucx_am_header_t am_header; + void *buffer = NULL; + + if( sizeof(am_header) != header_length ) { + return UCS_ERR_INVALID_PARAM; + } + memcpy(&am_header, header, sizeof(am_header)); + + if( NULL == registration->callback ) { + return UCS_OK; + } + if( param->recv_attr & UCP_AM_RECV_ATTR_FLAG_RNDV ) { + return UCS_ERR_UNSUPPORTED; + } + if( registration->max_msg_length < length ) { + return UCS_ERR_MESSAGE_TRUNCATED; + } + + if( 0 != length ) { + buffer = malloc(length); + if( NULL == buffer ) { + return UCS_ERR_NO_MEMORY; + } + memcpy(buffer, data, length); + } + + registration->callback(&parsec_ce, registration->tag, buffer, length, + am_header.source, registration->cb_data); + free(buffer); + return UCS_OK; +} + +static ucs_status_t +comm_ucx_callback_am_callback(void *arg, + const void *header, + size_t header_length, + void *data, + size_t length, + const ucp_am_recv_param_t *param) +{ + parsec_ucx_callback_am_header_t callback_header; + parsec_ce_am_callback_t callback; + void *buffer = NULL; + + (void)arg; + if( sizeof(callback_header) != header_length ) { + return UCS_ERR_INVALID_PARAM; + } + memcpy(&callback_header, header, sizeof(callback_header)); + callback = (parsec_ce_am_callback_t)callback_header.callback; + if( NULL == callback ) { + return UCS_OK; + } + if( param->recv_attr & UCP_AM_RECV_ATTR_FLAG_RNDV ) { + return UCS_ERR_UNSUPPORTED; + } + + if( 0 != length ) { + buffer = malloc(length); + if( NULL == buffer ) { + return UCS_ERR_NO_MEMORY; + } + memcpy(buffer, data, length); + } + + callback(&parsec_ce, PARSEC_CE_REMOTE_DEP_PUT_END_TAG, buffer, length, + callback_header.source, NULL); + free(buffer); + return UCS_OK; +} + +static int +comm_ucx_install_am_handler(parsec_ucx_state_t *state, parsec_ce_tag_t tag) +{ + ucp_am_handler_param_t params; + ucs_status_t status; + + if( tag >= PARSEC_MAX_REGISTERED_TAGS ) { + return PARSEC_ERR_VALUE_OUT_OF_BOUNDS; + } + + memset(¶ms, 0, sizeof(params)); + params.field_mask = UCP_AM_HANDLER_PARAM_FIELD_ID | + UCP_AM_HANDLER_PARAM_FIELD_CB | + UCP_AM_HANDLER_PARAM_FIELD_ARG | + UCP_AM_HANDLER_PARAM_FIELD_FLAGS; + params.id = (uint16_t)tag; + params.cb = comm_ucx_am_callback; + params.arg = &state->tags[tag]; + params.flags = UCP_AM_FLAG_WHOLE_MSG; + status = ucp_worker_set_am_recv_handler(state->worker, ¶ms); + return comm_ucx_status_to_parsec(status, "AM handler registration"); +} + +static int +comm_ucx_install_callback_am_handler(parsec_ucx_state_t *state) +{ + ucp_am_handler_param_t params; + ucs_status_t status; + + memset(¶ms, 0, sizeof(params)); + params.field_mask = UCP_AM_HANDLER_PARAM_FIELD_ID | + UCP_AM_HANDLER_PARAM_FIELD_CB | + UCP_AM_HANDLER_PARAM_FIELD_ARG | + UCP_AM_HANDLER_PARAM_FIELD_FLAGS; + params.id = PARSEC_CE_REMOTE_DEP_PUT_END_TAG; + params.cb = comm_ucx_callback_am_callback; + params.arg = state; + params.flags = UCP_AM_FLAG_WHOLE_MSG; + status = ucp_worker_set_am_recv_handler(state->worker, ¶ms); + return comm_ucx_status_to_parsec(status, "callback AM handler registration"); +} + +static int +comm_ucx_pmix_get_job_size(parsec_ucx_state_t *state) +{ + pmix_value_t *value = NULL; + pmix_status_t prc; + + prc = PMIx_Get(&state->pmix_proc, PMIX_JOB_SIZE, NULL, 0, &value); + if( PMIX_SUCCESS != prc ) { + parsec_warning("PMIx failed to retrieve %s: %d", PMIX_JOB_SIZE, prc); + return PARSEC_ERROR; + } + + switch(value->type) { + case PMIX_UINT32: + state->size = (int)value->data.uint32; + break; + case PMIX_UINT64: + state->size = (int)value->data.uint64; + break; + case PMIX_SIZE: + state->size = (int)value->data.size; + break; + case PMIX_INT: + state->size = value->data.integer; + break; + default: + PMIX_VALUE_RELEASE(value); + parsec_warning("PMIx returned unsupported %s type", PMIX_JOB_SIZE); + return PARSEC_ERROR; + } + PMIX_VALUE_RELEASE(value); + return (state->size > 0) ? PARSEC_SUCCESS : PARSEC_ERROR; +} + +static int +comm_ucx_pmix_bootstrap(parsec_ucx_state_t *state) +{ + pmix_value_t value; + pmix_status_t prc; + + prc = PMIx_Init(&state->pmix_proc, NULL, 0); + if( PMIX_SUCCESS != prc ) { + parsec_warning("PMIx_Init failed: %d", prc); + return PARSEC_ERROR; + } + state->pmix_initialized = 1; + state->rank = (int)state->pmix_proc.rank; + if( PARSEC_SUCCESS != comm_ucx_pmix_get_job_size(state) ) { + return PARSEC_ERROR; + } + + memset(&value, 0, sizeof(value)); + value.type = PMIX_BYTE_OBJECT; + value.data.bo.bytes = (char *)state->worker_address; + value.data.bo.size = state->worker_address_length; + + prc = PMIx_Put(PMIX_GLOBAL, PARSEC_UCX_WORKER_ADDRESS_KEY, &value); + if( PMIX_SUCCESS != prc ) { + parsec_warning("PMIx_Put failed while publishing UCX worker address: %d", prc); + return PARSEC_ERROR; + } + prc = PMIx_Commit(); + if( PMIX_SUCCESS != prc ) { + parsec_warning("PMIx_Commit failed while publishing UCX worker address: %d", prc); + return PARSEC_ERROR; + } + prc = PMIx_Fence(NULL, 0, NULL, 0); + if( PMIX_SUCCESS != prc ) { + parsec_warning("PMIx_Fence failed during UCX bootstrap: %d", prc); + return PARSEC_ERROR; + } + return PARSEC_SUCCESS; +} + +static int +comm_ucx_connect_endpoints(parsec_ucx_state_t *state) +{ + for(int peer_rank = 0; peer_rank < state->size; peer_rank++) { + pmix_proc_t peer; + pmix_value_t *value = NULL; + pmix_status_t prc; + ucp_ep_params_t ep_params; + ucs_status_t status; + + if( peer_rank == state->rank ) { + continue; + } + + PMIX_LOAD_PROCID(&peer, state->pmix_proc.nspace, peer_rank); + prc = PMIx_Get(&peer, PARSEC_UCX_WORKER_ADDRESS_KEY, NULL, 0, &value); + if( PMIX_SUCCESS != prc ) { + parsec_warning("PMIx_Get failed for UCX worker address of rank %d: %d", + peer_rank, prc); + return PARSEC_ERROR; + } + if( (PMIX_BYTE_OBJECT != value->type) || + (NULL == value->data.bo.bytes) || + (0 == value->data.bo.size) ) { + PMIX_VALUE_RELEASE(value); + parsec_warning("PMIx returned an invalid UCX worker address for rank %d", + peer_rank); + return PARSEC_ERROR; + } + + memset(&ep_params, 0, sizeof(ep_params)); + ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS | + UCP_EP_PARAM_FIELD_ERR_MODE; + ep_params.address = (ucp_address_t *)value->data.bo.bytes; + ep_params.err_mode = UCP_ERR_HANDLING_MODE_NONE; + status = ucp_ep_create(state->worker, &ep_params, &state->eps[peer_rank]); + PMIX_VALUE_RELEASE(value); + if( UCS_OK != status ) { + return comm_ucx_status_to_parsec(status, "endpoint creation"); + } + } + return PARSEC_SUCCESS; +} + +static int +comm_ucx_init_context(parsec_ucx_state_t *state) +{ + ucp_config_t *config; + ucp_params_t params; + ucp_worker_params_t worker_params; + ucs_status_t status; + + status = ucp_config_read(NULL, NULL, &config); + if( UCS_OK != status ) { + return comm_ucx_status_to_parsec(status, "config read"); + } + + memset(¶ms, 0, sizeof(params)); + params.field_mask = UCP_PARAM_FIELD_FEATURES; + params.features = UCP_FEATURE_AM | UCP_FEATURE_RMA; + status = ucp_init(¶ms, config, &state->context); + ucp_config_release(config); + if( UCS_OK != status ) { + return comm_ucx_status_to_parsec(status, "context initialization"); + } + state->owns_context = 1; + + memset(&worker_params, 0, sizeof(worker_params)); + worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; + worker_params.thread_mode = UCS_THREAD_MODE_SINGLE; + status = ucp_worker_create(state->context, &worker_params, &state->worker); + if( UCS_OK != status ) { + return comm_ucx_status_to_parsec(status, "worker creation"); + } + state->owns_worker = 1; + + status = ucp_worker_get_address(state->worker, + &state->worker_address, + &state->worker_address_length); + return comm_ucx_status_to_parsec(status, "worker address retrieval"); +} + +static int +comm_ucx_attach_external_worker(parsec_ucx_state_t *state, + const parsec_comm_ucx_external_worker_t *external) +{ + ucs_status_t status; + + if( (NULL == external) || + (NULL == external->context) || + (NULL == external->worker) ) { + return PARSEC_ERR_BAD_PARAM; + } + + state->context = external->context; + state->worker = external->worker; + state->owns_context = 0; + state->owns_worker = 0; + + status = ucp_worker_get_address(state->worker, + &state->worker_address, + &state->worker_address_length); + return comm_ucx_status_to_parsec(status, "external worker address retrieval"); +} + +static void +comm_ucx_init_tags(parsec_ucx_state_t *state) +{ + for(parsec_ce_tag_t tag = 0; tag < PARSEC_MAX_REGISTERED_TAGS; tag++) { + state->tags[tag].tag = tag; + state->tags[tag].callback = NULL; + state->tags[tag].cb_data = NULL; + state->tags[tag].max_msg_length = 0; + } +} + +static void +comm_ucx_install_engine(parsec_context_t *context, parsec_ucx_state_t *state) +{ + parsec_ce.parsec_context = context; + parsec_ce.capabilites.sided = 2; + parsec_ce.capabilites.supports_noncontiguous_datatype = 0; + parsec_ce.capabilites.multithreaded = 0; + parsec_ce.enable = comm_ucx_enable; + parsec_ce.disable = comm_ucx_disable; + parsec_ce.set_ctx = comm_ucx_set_ctx; + parsec_ce.fini = comm_ucx_fini; + parsec_ce.tag_register = comm_ucx_tag_register; + parsec_ce.tag_unregister = comm_ucx_tag_unregister; + parsec_ce.mem_register = comm_ucx_mem_register; + parsec_ce.mem_unregister = comm_ucx_mem_unregister; + parsec_ce.get_mem_handle_size = comm_ucx_get_mem_reg_handle_size; + parsec_ce.mem_retrieve = comm_ucx_mem_retrieve; + parsec_ce.put = comm_ucx_put; + parsec_ce.get = comm_ucx_get; + parsec_ce.progress = comm_ucx_progress; + parsec_ce.pack = comm_ucx_pack; + parsec_ce.pack_size = comm_ucx_pack_size; + parsec_ce.unpack = comm_ucx_unpack; + parsec_ce.reshape = comm_ucx_reshape; + parsec_ce.sync = comm_ucx_sync; + parsec_ce.can_serve = comm_ucx_can_serve; + parsec_ce.send_am = comm_ucx_send_am; + parsec_ce.taskpool_sync_ids = comm_ucx_taskpool_sync_ids; + + context->my_rank = state->rank; + context->nb_nodes = state->size; + context->comm_ctx = (intptr_t)state; +} + +static int +comm_ucx_late_init(parsec_context_t *context, parsec_ucx_state_t *state) +{ + if( PARSEC_SUCCESS != comm_ucx_pmix_bootstrap(state) ) { + return PARSEC_ERROR; + } + + state->eps = (ucp_ep_h *)calloc((size_t)state->size, sizeof(*state->eps)); + if( NULL == state->eps ) { + return PARSEC_ERR_OUT_OF_RESOURCE; + } + if( PARSEC_SUCCESS != comm_ucx_connect_endpoints(state) ) { + return PARSEC_ERROR; + } + if( PARSEC_SUCCESS != comm_ucx_install_callback_am_handler(state) ) { + return PARSEC_ERROR; + } + + comm_ucx_init_tags(state); + comm_ucx_install_engine(context, state); + return PARSEC_SUCCESS; +} + +parsec_comm_engine_t * +comm_ucx_init(parsec_context_t *context) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + intptr_t external_ctx = context->comm_ctx; + + memset(state, 0, sizeof(*state)); + state->rank = -1; + state->size = -1; + + if( -1 != external_ctx ) { + if( PARSEC_SUCCESS != comm_ucx_attach_external_worker(state, + (const parsec_comm_ucx_external_worker_t *)external_ctx) ) { + comm_ucx_fini(&parsec_ce); + return NULL; + } + } else if( PARSEC_SUCCESS != comm_ucx_init_context(state) ) { + comm_ucx_fini(&parsec_ce); + return NULL; + } + + if( PARSEC_SUCCESS != comm_ucx_late_init(context, state) ) { + comm_ucx_fini(&parsec_ce); + return NULL; + } + + parsec_debug_verbose(4, parsec_debug_output, + "UCX communication engine initialized rank %d/%d", + context->my_rank, context->nb_nodes); + return &parsec_ce; +} + +static int +comm_ucx_enable(parsec_comm_engine_t *comm_engine) +{ + (void)comm_engine; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_disable(parsec_comm_engine_t *comm_engine) +{ + (void)comm_engine; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_set_ctx(parsec_comm_engine_t *comm_engine, intptr_t ctx) +{ + parsec_context_t *context = comm_engine->parsec_context; + parsec_ucx_state_t *state = &parsec_ucx_state; + int rc; + + if( 1 < parsec_communication_engine_up ) { + parsec_warning("Cannot change PaRSEC's UCX worker while the communication engine is running [ignored]"); + return PARSEC_ERROR; + } + if( -1 == ctx ) { + return PARSEC_ERR_BAD_PARAM; + } + + /* + * set_ctx hands PaRSEC an application-owned UCX worker. PaRSEC releases + * only the resources it creates around that worker: worker address, + * endpoints, AM handlers, and PMIx publication. + */ + comm_ucx_fini(comm_engine); + memset(state, 0, sizeof(*state)); + state->rank = -1; + state->size = -1; + + rc = comm_ucx_attach_external_worker(state, + (const parsec_comm_ucx_external_worker_t *)ctx); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + rc = comm_ucx_late_init(context, state); + if( PARSEC_SUCCESS != rc ) { + comm_ucx_fini(comm_engine); + } + return rc; +} + +static int +comm_ucx_fini(parsec_comm_engine_t *comm_engine) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + ucp_request_param_t close_params; + + memset(&close_params, 0, sizeof(close_params)); + + if( NULL != state->eps ) { + for(int peer_rank = 0; peer_rank < state->size; peer_rank++) { + if( NULL != state->eps[peer_rank] ) { + void *request = ucp_ep_close_nbx(state->eps[peer_rank], &close_params); + (void)comm_ucx_wait_request(state, request, "endpoint close"); + state->eps[peer_rank] = NULL; + } + } + free(state->eps); + state->eps = NULL; + } + if( NULL != state->worker_address ) { + ucp_worker_release_address(state->worker, state->worker_address); + state->worker_address = NULL; + state->worker_address_length = 0; + } + if( NULL != state->worker ) { + if( state->owns_worker ) { + ucp_worker_destroy(state->worker); + } + state->worker = NULL; + } + if( NULL != state->context ) { + if( state->owns_context ) { + ucp_cleanup(state->context); + } + state->context = NULL; + } + if( state->pmix_initialized ) { + PMIx_Finalize(NULL, 0); + state->pmix_initialized = 0; + } + memset(state, 0, sizeof(*state)); + if( (NULL != comm_engine) && (NULL != comm_engine->parsec_context) ) { + comm_engine->parsec_context->comm_ctx = -1; + } + return PARSEC_SUCCESS; +} + +static int +comm_ucx_tag_register(parsec_ce_tag_t tag, + parsec_ce_am_callback_t cb, + void *cb_data, + size_t msg_length) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + + if( tag >= PARSEC_MAX_REGISTERED_TAGS ) { + return PARSEC_ERR_VALUE_OUT_OF_BOUNDS; + } + if( PARSEC_CE_REMOTE_DEP_PUT_END_TAG == tag ) { + return PARSEC_ERR_EXISTS; + } + + state->tags[tag].tag = tag; + state->tags[tag].callback = cb; + state->tags[tag].cb_data = cb_data; + state->tags[tag].max_msg_length = msg_length; + return comm_ucx_install_am_handler(state, tag); +} + +static int +comm_ucx_tag_unregister(parsec_ce_tag_t tag) +{ + if( tag >= PARSEC_MAX_REGISTERED_TAGS ) { + return PARSEC_ERR_VALUE_OUT_OF_BOUNDS; + } + if( PARSEC_CE_REMOTE_DEP_PUT_END_TAG == tag ) { + return PARSEC_SUCCESS; + } + parsec_ucx_state.tags[tag].callback = NULL; + parsec_ucx_state.tags[tag].cb_data = NULL; + parsec_ucx_state.tags[tag].max_msg_length = 0; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_mem_register(void *mem, + parsec_mem_type_t mem_type, + size_t count, + parsec_datatype_t datatype, + size_t mem_size, + parsec_ce_mem_reg_handle_t *lreg, + size_t *lreg_size) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + parsec_ucx_mem_handle_t *handle; + ucp_mem_map_params_t params; + void *rkey_buffer = NULL; + size_t rkey_size = 0; + ucs_status_t status; + + if( (PARSEC_MEM_TYPE_CONTIGUOUS != mem_type) || + (NULL == mem) || + ((size_t)-1 == mem_size) || + (0 == mem_size) ) { + return PARSEC_ERR_NOT_SUPPORTED; + } + + handle = (parsec_ucx_mem_handle_t *)calloc(1, sizeof(*handle)); + if( NULL == handle ) { + return PARSEC_ERR_OUT_OF_RESOURCE; + } + + memset(¶ms, 0, sizeof(params)); + params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH; + params.address = mem; + params.length = mem_size; + status = ucp_mem_map(state->context, ¶ms, &handle->memh); + if( UCS_OK != status ) { + free(handle); + return comm_ucx_status_to_parsec(status, "memory registration"); + } + + status = ucp_rkey_pack(state->context, handle->memh, &rkey_buffer, &rkey_size); + if( UCS_OK != status ) { + ucp_mem_unmap(state->context, handle->memh); + free(handle); + return comm_ucx_status_to_parsec(status, "rkey packing"); + } + if( rkey_size > PARSEC_UCX_MAX_RKEY_SIZE ) { + ucp_rkey_buffer_release(rkey_buffer); + ucp_mem_unmap(state->context, handle->memh); + free(handle); + parsec_warning("UCX rkey size %zu exceeds PaRSEC wire limit %d", + rkey_size, PARSEC_UCX_MAX_RKEY_SIZE); + return PARSEC_ERR_NOT_SUPPORTED; + } + + handle->mem = mem; + handle->mem_size = mem_size; + handle->datatype = datatype; + handle->count = (count > (size_t)INT_MAX) ? INT_MAX : (int)count; + handle->wire.remote_addr = (uint64_t)(uintptr_t)mem; + handle->wire.mem_size = (uint64_t)mem_size; + handle->wire.rkey_size = (uint32_t)rkey_size; + memcpy(handle->wire.rkey, rkey_buffer, rkey_size); + ucp_rkey_buffer_release(rkey_buffer); + + *lreg = handle; + *lreg_size = sizeof(handle->wire); + return PARSEC_SUCCESS; +} + +static int +comm_ucx_mem_unregister(parsec_ce_mem_reg_handle_t *lreg) +{ + parsec_ucx_mem_handle_t *handle; + + if( (NULL == lreg) || (NULL == *lreg) ) { + return PARSEC_SUCCESS; + } + handle = (parsec_ucx_mem_handle_t *)*lreg; + if( NULL != handle->memh ) { + ucp_mem_unmap(parsec_ucx_state.context, handle->memh); + } + free(handle); + *lreg = NULL; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_get_mem_reg_handle_size(void) +{ + return sizeof(parsec_ucx_mem_handle_wire_t); +} + +static int +comm_ucx_mem_retrieve(parsec_ce_mem_reg_handle_t lreg, + void **mem, + parsec_datatype_t *datatype, + int *count) +{ + parsec_ucx_mem_handle_t *handle = (parsec_ucx_mem_handle_t *)lreg; + + *mem = handle->mem; + *datatype = handle->datatype; + *count = handle->count; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_rkey_unpack(parsec_ucx_state_t *state, + int remote, + parsec_ucx_mem_handle_wire_t *remote_wire, + ucp_rkey_h *rkey) +{ + ucs_status_t status; + + if( (remote < 0) || (remote >= state->size) || + (remote == state->rank) || + (NULL == state->eps[remote]) ) { + return PARSEC_ERR_BAD_PARAM; + } + status = ucp_ep_rkey_unpack(state->eps[remote], + remote_wire->rkey, + rkey); + return comm_ucx_status_to_parsec(status, "rkey unpack"); +} + +static int +comm_ucx_put(parsec_comm_engine_t *comm_engine, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + parsec_ce_onesided_callback_t l_cb, + void *l_cb_data, + parsec_ce_tag_t r_tag, + void *r_cb_data, + size_t r_cb_data_size) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + parsec_ucx_mem_handle_t *local = (parsec_ucx_mem_handle_t *)lreg; + parsec_ucx_mem_handle_wire_t *remote_wire = (parsec_ucx_mem_handle_wire_t *)rreg; + size_t transfer_size = (0 == size) ? local->mem_size : size; + char *local_addr = (char *)local->mem + ldispl; + int rc; + + if( remote == state->rank ) { + memcpy((void *)(uintptr_t)(remote_wire->remote_addr + rdispl), + local_addr, transfer_size); + } else { + ucp_rkey_h rkey = NULL; + ucp_request_param_t params; + void *request; + + rc = comm_ucx_rkey_unpack(state, remote, remote_wire, &rkey); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + memset(¶ms, 0, sizeof(params)); + request = ucp_put_nbx(state->eps[remote], local_addr, transfer_size, + remote_wire->remote_addr + rdispl, rkey, ¶ms); + rc = comm_ucx_wait_request(state, request, "PUT"); + ucp_rkey_destroy(rkey); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + } + + if( 0 != r_tag ) { + rc = comm_ucx_send_callback_am(comm_engine, remote, r_tag, + r_cb_data, r_cb_data_size); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + } + if( NULL != l_cb ) { + return l_cb(comm_engine, lreg, ldispl, rreg, rdispl, + transfer_size, remote, l_cb_data); + } + return PARSEC_SUCCESS; +} + +static int +comm_ucx_get(parsec_comm_engine_t *comm_engine, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + parsec_ce_onesided_callback_t l_cb, + void *l_cb_data, + parsec_ce_tag_t r_tag, + void *r_cb_data, + size_t r_cb_data_size) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + parsec_ucx_mem_handle_t *local = (parsec_ucx_mem_handle_t *)lreg; + parsec_ucx_mem_handle_wire_t *remote_wire = (parsec_ucx_mem_handle_wire_t *)rreg; + size_t transfer_size = (0 == size) ? local->mem_size : size; + char *local_addr = (char *)local->mem + ldispl; + int rc; + + if( remote == state->rank ) { + memcpy(local_addr, (void *)(uintptr_t)(remote_wire->remote_addr + rdispl), + transfer_size); + } else { + ucp_rkey_h rkey = NULL; + ucp_request_param_t params; + void *request; + + rc = comm_ucx_rkey_unpack(state, remote, remote_wire, &rkey); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + memset(¶ms, 0, sizeof(params)); + request = ucp_get_nbx(state->eps[remote], local_addr, transfer_size, + remote_wire->remote_addr + rdispl, rkey, ¶ms); + rc = comm_ucx_wait_request(state, request, "GET"); + ucp_rkey_destroy(rkey); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + } + + if( NULL != l_cb ) { + rc = l_cb(comm_engine, lreg, ldispl, rreg, rdispl, + transfer_size, remote, l_cb_data); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + } + if( 0 != r_tag ) { + /* + * The comm-engine API carries the remote completion callback as a + * function pointer in r_tag. UCX AM ids cannot be those pointers, so + * use the reserved internal AM id and carry the callback pointer in the + * AM header. + */ + return comm_ucx_send_callback_am(comm_engine, remote, r_tag, + r_cb_data, r_cb_data_size); + } + return PARSEC_SUCCESS; +} + +static int +comm_ucx_send_callback_am(parsec_comm_engine_t *comm_engine, + int remote, + parsec_ce_tag_t callback, + void *cb_data, + size_t cb_data_size) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + parsec_ucx_callback_am_header_t header; + ucp_request_param_t params; + void *request; + + if( (remote < 0) || (remote >= state->size) ) { + return PARSEC_ERR_BAD_PARAM; + } + if( remote == state->rank ) { + parsec_ce_am_callback_t cb = (parsec_ce_am_callback_t)(uintptr_t)callback; + return cb(comm_engine, PARSEC_CE_REMOTE_DEP_PUT_END_TAG, + cb_data, cb_data_size, state->rank, NULL); + } + + header.source = state->rank; + header.callback = (uintptr_t)callback; + memset(¶ms, 0, sizeof(params)); + request = ucp_am_send_nbx(state->eps[remote], + PARSEC_CE_REMOTE_DEP_PUT_END_TAG, + &header, sizeof(header), + cb_data, cb_data_size, ¶ms); + return comm_ucx_wait_request(state, request, "callback active message send"); +} + +static int +comm_ucx_send_am(parsec_comm_engine_t *comm_engine, + parsec_ce_tag_t tag, + int remote, + void *addr, + size_t size) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + parsec_ucx_am_header_t header; + ucp_request_param_t params; + void *request; + + if( tag >= PARSEC_MAX_REGISTERED_TAGS ) { + return PARSEC_ERR_VALUE_OUT_OF_BOUNDS; + } + if( (remote < 0) || (remote >= state->size) ) { + return PARSEC_ERR_BAD_PARAM; + } + if( remote == state->rank ) { + return comm_ucx_direct_am(comm_engine, &state->tags[tag], + addr, size, state->rank); + } + if( state->tags[tag].max_msg_length < size ) { + return PARSEC_ERR_BAD_PARAM; + } + + header.source = state->rank; + memset(¶ms, 0, sizeof(params)); + request = ucp_am_send_nbx(state->eps[remote], (unsigned)tag, + &header, sizeof(header), + addr, size, ¶ms); + return comm_ucx_wait_request(state, request, "active message send"); +} + +static int +comm_ucx_progress(parsec_comm_engine_t *comm_engine) +{ + int count = 0; + + (void)comm_engine; + for(int i = 0; i < 16; i++) { + int rc = ucp_worker_progress(parsec_ucx_state.worker); + count += rc; + if( 0 == rc ) { + break; + } + } + return count; +} + +static int +comm_ucx_pack_size(parsec_comm_engine_t *ce, + int incount, + parsec_datatype_t type, + int *size) +{ + int dtt_size, rc; + + (void)ce; + if( PARSEC_SUCCESS != parsec_type_contiguous(type) ) { + return PARSEC_ERR_NOT_SUPPORTED; + } + rc = parsec_type_size(type, &dtt_size); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + *size = incount * dtt_size; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_pack(parsec_comm_engine_t *ce, + void *inbuf, + int incount, + parsec_datatype_t type, + void *outbuf, + int outsize, + int *position) +{ + int size, rc; + + rc = comm_ucx_pack_size(ce, incount, type, &size); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + if( (*position < 0) || ((*position + size) > outsize) ) { + return PARSEC_ERR_BAD_PARAM; + } + memcpy((char *)outbuf + *position, inbuf, (size_t)size); + *position += size; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_unpack(parsec_comm_engine_t *ce, + void *inbuf, + int insize, + int *position, + void *outbuf, + int outcount, + parsec_datatype_t type) +{ + int size, rc; + + rc = comm_ucx_pack_size(ce, outcount, type, &size); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + if( (*position < 0) || ((*position + size) > insize) ) { + return PARSEC_ERR_BAD_PARAM; + } + memcpy(outbuf, (char *)inbuf + *position, (size_t)size); + *position += size; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_sync(parsec_comm_engine_t *comm_engine) +{ + pmix_status_t prc; + + (void)comm_engine; + prc = PMIx_Fence(NULL, 0, NULL, 0); + return (PMIX_SUCCESS == prc) ? PARSEC_SUCCESS : PARSEC_ERROR; +} + +static int +comm_ucx_can_serve(parsec_comm_engine_t *comm_engine) +{ + (void)comm_engine; + return 1; +} + +static int +comm_ucx_taskpool_sync_ids(parsec_comm_engine_t *comm_engine, + intptr_t comm_ctx, + uint32_t *next_taskpool_id) +{ + (void)comm_engine; + (void)comm_ctx; + (void)next_taskpool_id; + /* + * UCX will need a backend-specific collective, likely through the PMIx + * bootstrap path, to replace MPI_Allreduce for taskpool-id convergence. + */ + return PARSEC_ERR_NOT_IMPLEMENTED; +} + +static int +comm_ucx_reshape(parsec_comm_engine_t *ce, + parsec_execution_stream_t *es, + parsec_data_copy_t *dst, + int64_t displ_dst, + parsec_datatype_t layout_dst, + uint64_t count_dst, + parsec_data_copy_t *src, + int64_t displ_src, + parsec_datatype_t layout_src, + uint64_t count_src) +{ + (void)ce; (void)es; (void)dst; (void)displ_dst; (void)layout_dst; + (void)count_dst; (void)src; (void)displ_src; (void)layout_src; + (void)count_src; + return PARSEC_ERR_NOT_SUPPORTED; +} diff --git a/parsec/mca/comm/ucx/comm_ucx.h b/parsec/mca/comm/ucx/comm_ucx.h new file mode 100644 index 000000000..beb8dc90b --- /dev/null +++ b/parsec/mca/comm/ucx/comm_ucx.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ +/** + * @file + * + * UCX communication engine MCA component declaration. + * + * The UCX backend uses PMIx only for process bootstrap and UCX worker-address + * exchange. Runtime data movement is done with UCX active messages and CPU + * RMA operations. + */ +#ifndef PARSEC_COMM_UCX_H_HAS_BEEN_INCLUDED +#define PARSEC_COMM_UCX_H_HAS_BEEN_INCLUDED + +#include "parsec/mca/comm/comm.h" +#include "parsec/datatype_module.h" +#include + +BEGIN_C_DECLS + +PARSEC_DECLSPEC extern const parsec_comm_base_component_t parsec_comm_ucx_component; +PARSEC_DECLSPEC extern const parsec_datatype_module_t parsec_datatype_basic_module; + +/** + * UCX state supplied by an application that initializes UCX itself. + * + * PaRSEC does not take ownership of either handle. The application must keep + * both alive until the PaRSEC context using this communication engine has been + * finalized. PaRSEC still performs the late runtime setup: worker-address + * publication through PMIx, endpoint creation, and active-message handler + * registration. + */ +typedef struct parsec_comm_ucx_external_worker_s { + ucp_context_h context; + ucp_worker_h worker; +} parsec_comm_ucx_external_worker_t; + +PARSEC_DECLSPEC parsec_comm_engine_t *comm_ucx_init(parsec_context_t *context); +PARSEC_DECLSPEC mca_base_component_t *comm_ucx_static_component(void); + +END_C_DECLS + +#endif /* PARSEC_COMM_UCX_H_HAS_BEEN_INCLUDED */ diff --git a/parsec/mca/comm/ucx/comm_ucx_component.c b/parsec/mca/comm/ucx/comm_ucx_component.c new file mode 100644 index 000000000..d25dec959 --- /dev/null +++ b/parsec/mca/comm/ucx/comm_ucx_component.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ + +#include "parsec/parsec_config.h" +#include "parsec/mca/comm/ucx/comm_ucx.h" + +static int comm_ucx_component_query(mca_base_module_t **module, int *priority); + +static parsec_comm_module_t parsec_comm_ucx_module = { + .component = &parsec_comm_ucx_component, + .module = { + .init = comm_ucx_init, + }, + .datatype = &parsec_datatype_basic_module, +}; + +const parsec_comm_base_component_t parsec_comm_ucx_component = { + { + PARSEC_COMM_BASE_VERSION_2_0_0, + + "ucx", + "", + PARSEC_VERSION_MAJOR, + PARSEC_VERSION_MINOR, + + NULL, + NULL, + comm_ucx_component_query, + NULL, + "", + }, + { + MCA_BASE_METADATA_PARAM_NONE, + "", + } +}; + +mca_base_component_t * +comm_ucx_static_component(void) +{ + return (mca_base_component_t *)&parsec_comm_ucx_component; +} + +static int +comm_ucx_component_query(mca_base_module_t **module, int *priority) +{ + /* + * Keep MPI as the default when both backends eventually become buildable + * together. UCX can be selected explicitly with the comm MCA parameter. + */ + *priority = 50; + *module = (mca_base_module_t *)&parsec_comm_ucx_module; + return MCA_SUCCESS; +} diff --git a/parsec/parsec.c b/parsec/parsec.c index 7f4bbf71c..a35d65c6f 100644 --- a/parsec/parsec.c +++ b/parsec/parsec.c @@ -988,6 +988,7 @@ int parsec_version( int* version_major, int* version_minor, int* version_release int parsec_version_ex( size_t len, char* version_string) { int ret; + char *comm_components = mca_components_list_compiled("comm"); char *sched_components = mca_components_list_compiled("sched"); char *device_components = mca_components_list_compiled("device"); char *pins_components = mca_components_list_compiled("pins"); @@ -1058,18 +1059,7 @@ int parsec_version_ex( size_t len, char* version_string) { "no" #endif /*PARSEC_PROF_TRACE*/ , -#if defined(PARSEC_HAVE_MPI) - "mpi" -#if defined(PARSEC_HAVE_MPI_20) - "2" -#endif -#if defined(PARSEC_DIST_THREAD) - "+thread_multiple" -#endif -#else /* defined(PARSEC_HAVE_MPI) */ - "single process only" -#endif - , + comm_components, device_components, sched_components, #if defined(PARSEC_HAVE_HWLOC) @@ -1118,6 +1108,7 @@ int parsec_version_ex( size_t len, char* version_string) { CMAKE_PARSEC_C_COMPILER, CMAKE_PARSEC_C_FLAGS ); + free(comm_components); free(device_components); free(sched_components); free(pins_components); @@ -2197,16 +2188,30 @@ void parsec_taskpool_sync_ids_context( intptr_t comm ) parsec_atomic_lock( &taskpool_array_lock ); idx = (int)taskpool_array_pos; msz = (int)taskpool_array_size; -#if defined(DISTRIBUTED) && defined(PARSEC_HAVE_MPI) +#if defined(DISTRIBUTED) + int rc = PARSEC_ERR_NOT_IMPLEMENTED; + if( NULL != parsec_ce.taskpool_sync_ids ) { + rc = parsec_ce.taskpool_sync_ids(&parsec_ce, comm, &idx); + } +#if defined(PARSEC_HAVE_MPI) + /* + * Keep the legacy direct MPI path for applications that synchronize + * taskpool ids before the communication engine has been selected. + */ int mpi_is_on; - MPI_Initialized(&mpi_is_on); - if( mpi_is_on ) { + if( (PARSEC_ERR_NOT_IMPLEMENTED == rc) && + (NULL == parsec_ce.taskpool_sync_ids) && + (MPI_SUCCESS == MPI_Initialized(&mpi_is_on)) && mpi_is_on ) { MPI_Allreduce( MPI_IN_PLACE, &idx, 1, MPI_INT, MPI_MAX, (MPI_Comm)comm ); - while (idx >= msz){ - msz <<= 1; - } + rc = PARSEC_SUCCESS; } -#endif +#endif /* defined(PARSEC_HAVE_MPI) */ + while( (PARSEC_SUCCESS == rc) && (idx >= msz) ) { + msz <<= 1; + } +#else + (void)comm; +#endif /* defined(DISTRIBUTED) */ if( msz > taskpool_array_size ) { taskpool_array = (parsec_taskpool_t**)realloc(taskpool_array, msz * sizeof(parsec_taskpool_t*) ); /* NULLify all the new elements */ @@ -2998,7 +3003,7 @@ int parsec_context_query(parsec_context_t *context, parsec_context_query_cmd_t c case PARSEC_CONTEXT_QUERY_NODES: switch (parsec_communication_engine_up) { case 0: return 0; /* context not ready for distributed runs, and lacking datatype handling capabilities */ - case 1: return 1; /* single node runs, but the context has datatype management capabilities */ + case 1: return context->nb_nodes; /* communication engine initialized, but not necessarily awake */ case 2: return PARSEC_ERR_NOT_FOUND; /* we are in a distributed run, but the MPI engine is not yet ready, so the nb_nodes might not be accurate */ case 3: return context->nb_nodes; } diff --git a/parsec/parsec_comm_engine.c b/parsec/parsec_comm_engine.c index 95c88bead..bcf44fab2 100644 --- a/parsec/parsec_comm_engine.c +++ b/parsec/parsec_comm_engine.c @@ -2,24 +2,25 @@ * Copyright (c) 2009-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include #include "parsec/parsec_config.h" -#include "parsec/parsec_mpi_funnelled.h" +#include "parsec/mca/comm/comm.h" #include "parsec/remote_dep.h" parsec_comm_engine_t parsec_ce; -#if defined(PARSEC_HAVE_MPI) +#if defined(DISTRIBUTED) -/* This function will be called by the runtime */ +/* Select and initialize the distributed communication backend. */ parsec_comm_engine_t * parsec_comm_engine_init(parsec_context_t *parsec_context) { - /* call the selected module init */ - parsec_comm_engine_t *ce = mpi_funnelled_init(parsec_context); + parsec_comm_engine_t *ce = parsec_comm_engine_component_init(parsec_context); + assert(NULL != ce); assert(ce->capabilites.sided > 0 && ce->capabilites.sided < 3); return ce; } @@ -31,8 +32,9 @@ parsec_comm_engine_fini(parsec_comm_engine_t *comm_engine) { (void) parsec_remote_dep_fini(comm_engine->parsec_context); remote_dep_ce_fini(comm_engine->parsec_context); - /* call the selected module fini */ + /* Finalize the backend engine before releasing the selected MCA component. */ parsec_ce.fini(&parsec_ce); + parsec_comm_engine_component_fini(); return PARSEC_SUCCESS; } @@ -41,10 +43,12 @@ parsec_comm_engine_fini(parsec_comm_engine_t *comm_engine) parsec_comm_engine_t * parsec_comm_engine_init(parsec_context_t *parsec_context) { + /* Local builds keep the in-process engine and do not select a comm component. */ parsec_ce.parsec_context = parsec_context; parsec_ce.capabilites.sided = 0; parsec_ce.capabilites.supports_noncontiguous_datatype = 0; parsec_ce.capabilites.multithreaded = 0; + parsec_ce.taskpool_sync_ids = NULL; return &parsec_ce; } @@ -55,4 +59,4 @@ parsec_comm_engine_fini(parsec_comm_engine_t *comm_engine) return PARSEC_SUCCESS; } -#endif /* defined(PARSEC_HAVE_MPI) */ +#endif /* defined(DISTRIBUTED) */ diff --git a/parsec/parsec_comm_engine.h b/parsec/parsec_comm_engine.h index 3ce32aca8..c4f7b135b 100644 --- a/parsec/parsec_comm_engine.h +++ b/parsec/parsec_comm_engine.h @@ -150,6 +150,14 @@ typedef int (*parsec_ce_unpack_fn_t)(parsec_comm_engine_t *ce, typedef int (*parsec_ce_sync_fn_t)(parsec_comm_engine_t *comm_engine); typedef int (*parsec_ce_can_serve_fn_t)(parsec_comm_engine_t *comm_engine); +/** + * Synchronize the next taskpool id across the processes known by a backend. + * The runtime owns the taskpool registry lock and storage; the backend only + * updates next_taskpool_id to the globally agreed value. + */ +typedef int (*parsec_ce_taskpool_sync_ids_fn_t)(parsec_comm_engine_t *comm_engine, + intptr_t comm_ctx, + uint32_t *next_taskpool_id); /** * This function realize a data reshaping, by conceptually packing the dst @@ -169,7 +177,9 @@ typedef int (*parsec_ce_reshape_fn_t)(parsec_comm_engine_t* ce, struct parsec_comm_engine_capabilites_s { unsigned int sided : 2; /* Valid values are 1 and 2 */ + /** The backend can register and move non-contiguous datatypes directly. */ unsigned int supports_noncontiguous_datatype : 1; + /** The backend can safely be called concurrently by worker threads. */ unsigned int multithreaded : 1; }; @@ -196,6 +206,7 @@ struct parsec_comm_engine_s { parsec_ce_sync_fn_t sync; parsec_ce_can_serve_fn_t can_serve; parsec_ce_send_active_message_fn_t send_am; + parsec_ce_taskpool_sync_ids_fn_t taskpool_sync_ids; }; /* global comm_engine */ diff --git a/parsec/remote_dep.h b/parsec/remote_dep.h index e81e262b9..d2fa4b79b 100644 --- a/parsec/remote_dep.h +++ b/parsec/remote_dep.h @@ -256,7 +256,6 @@ int parsec_remote_dep_propagate(parsec_execution_stream_t* es, #define parsec_remote_dep_progress(ctx) 0 #define parsec_remote_dep_activate(ctx, o, r) -1 #define parsec_remote_dep_new_taskpool(ctx) 0 -#define remote_dep_mpi_initialize_execution_stream(ctx) 0 #endif /* DISTRIBUTED */ /* check if this data description represents a CTL dependency */ @@ -338,12 +337,13 @@ struct dep_cmd_item_s { }; #define dep_cmd_prio (offsetof(dep_cmd_item_t, priority)) -#define dep_mpi_pos_list (offsetof(dep_cmd_item_t, priority) - offsetof(dep_cmd_item_t, pos_list)) +#define dep_cmd_pos_list (offsetof(dep_cmd_item_t, priority) - offsetof(dep_cmd_item_t, pos_list)) #define rdep_prio (offsetof(parsec_remote_deps_t, max_priority)) /** - * These functions will be inherited from the current remote_dep_mpi.c - * and for the time being will remain in there. + * Remote-dependency communication protocol entry points. These functions are + * implemented by the transport-neutral protocol layer and use the selected + * parsec_comm_engine_t backend for network operations. */ void* remote_dep_dequeue_main(parsec_context_t* context); @@ -389,13 +389,13 @@ typedef struct { uint32_t tcid; // 20 int msg_size; // 24 int dep; // 28 -} parsec_profile_remote_dep_mpi_info_t; // 32 bytes +} parsec_profile_remote_dep_comm_info_t; // 32 bytes #ifdef PARSEC_PROF_TRACE #define TAKE_TIME_WITH_INFO(PROF, KEY, I, k, src, dst, rdw, nbdtt, dtt) \ do { \ if( parsec_profile_enabled ) { \ - parsec_profile_remote_dep_mpi_info_t __info; \ + parsec_profile_remote_dep_comm_info_t __info; \ parsec_taskpool_t *__tp = parsec_taskpool_lookup( (rdw).taskpool_id ); \ const parsec_task_class_t *__tc = __tp->task_classes_array[(rdw).task_class_id ]; \ __info.rank_src = (src); \ diff --git a/parsec/remote_dep_mpi.c b/parsec/remote_dep_comm.c similarity index 84% rename from parsec/remote_dep_mpi.c rename to parsec/remote_dep_comm.c index d21071dc7..4f77af480 100644 --- a/parsec/remote_dep_mpi.c +++ b/parsec/remote_dep_comm.c @@ -4,10 +4,20 @@ * reserved. * Copyright (c) 2023-2026 NVIDIA Corporation. All rights reserved. */ +/** + * @file + * + * Transport-neutral remote-dependency protocol. + * + * This file owns the remote-dependency command queues, activation-message + * protocol, eager payload handling, rendezvous GET/PUT sequence, delayed DTD + * activation, and local reshape requests. It intentionally talks to the + * selected communication backend only through parsec_comm_engine_t callbacks. + * Backend-specific bootstrap and transport mechanics belong in mca/comm. + */ #include "parsec/parsec_config.h" -#include #include "profiling.h" #include "parsec/class/list.h" #include "parsec/utils/output.h" @@ -40,7 +50,7 @@ int parsec_comm_puts = 0; * larger the amount spent in ordering the tasks, but greater the potential * benefits of doing things in the right order. */ -static void remote_dep_mpi_params(parsec_context_t* context); +static void remote_dep_comm_params(parsec_context_t* context); static int parsec_param_nb_tasks_extracted = 20; /* For the meaning of aggregate, short and eager, refer to the * param register help text for comm_aggregate, and @@ -84,7 +94,7 @@ remote_dep_cmd_to_string(remote_dep_wire_activate_t* origin, return parsec_task_snprintf(str, len, &task); } -/* TODO: fix heterogeneous restriction by using proper mpi datatypes */ +/* TODO: fix heterogeneous restriction by using transport-neutral datatypes. */ #define dep_dtt parsec_datatype_int8_t #define dep_count sizeof(remote_dep_wire_activate_t) #define dep_extent dep_count @@ -106,13 +116,13 @@ parsec_list_t dep_put_fifo; /* ordered non threaded fifo */ /* help manage the messages in the same category, where a category is either messages * to the same destination, or with the same action key. */ -static dep_cmd_item_t** parsec_mpi_same_pos_items; -static int parsec_mpi_same_pos_items_size = 0; +static dep_cmd_item_t** remote_dep_same_pos_items; +static int remote_dep_same_pos_items_size = 0; -static int mpi_initialized = 0; +static int remote_dep_initialized = 0; #if defined(PARSEC_REMOTE_DEP_USE_THREADS) -static pthread_mutex_t mpi_thread_mutex; -static pthread_cond_t mpi_thread_condition; +static pthread_mutex_t comm_thread_mutex; +static pthread_cond_t comm_thread_condition; #endif parsec_execution_stream_t parsec_comm_es = { @@ -140,11 +150,15 @@ parsec_execution_stream_t parsec_comm_es = { .datarepo_mempools = {0} }; -static void remote_dep_mpi_put_start(parsec_execution_stream_t* es, dep_cmd_item_t* item); -static void remote_dep_mpi_get_start(parsec_execution_stream_t* es, parsec_remote_deps_t* deps); +static void remote_dep_comm_put_start(parsec_execution_stream_t* es, dep_cmd_item_t* item); +static void remote_dep_comm_get_start(parsec_execution_stream_t* es, parsec_remote_deps_t* deps); + +static void remote_dep_comm_get_end(parsec_execution_stream_t* es, + int idx, + parsec_remote_deps_t* deps); static int -remote_dep_mpi_get_end_cb(parsec_comm_engine_t *ce, +remote_dep_comm_get_end_cb(parsec_comm_engine_t *ce, parsec_ce_tag_t tag, void *msg, size_t msg_size, @@ -152,7 +166,7 @@ remote_dep_mpi_get_end_cb(parsec_comm_engine_t *ce, void *cb_data); static int -remote_dep_mpi_put_end_cb(parsec_comm_engine_t *ce, +remote_dep_comm_put_end_cb(parsec_comm_engine_t *ce, parsec_ce_mem_reg_handle_t lreg, ptrdiff_t ldispl, parsec_ce_mem_reg_handle_t rreg, @@ -173,12 +187,12 @@ int remote_dep_ce_init(parsec_context_t* context); static int local_dep_nothread_reshape(parsec_execution_stream_t* es, dep_cmd_item_t *item); -static int remote_dep_mpi_progress(parsec_execution_stream_t* es); +static int remote_dep_comm_progress(parsec_execution_stream_t* es); -static void remote_dep_mpi_new_taskpool(parsec_execution_stream_t* es, +static void remote_dep_comm_new_taskpool(parsec_execution_stream_t* es, dep_cmd_item_t *dep_cmd_item); -static void remote_dep_mpi_release_delayed_deps(parsec_execution_stream_t* es, +static void remote_dep_comm_release_delayed_deps(parsec_execution_stream_t* es, dep_cmd_item_t *item); /* Perform a memcpy with datatypes by doing a local sendrecv */ @@ -188,13 +202,13 @@ static int remote_dep_nothread_memcpy(parsec_execution_stream_t* es, int remote_dep_ce_reconfigure(parsec_context_t* context); #ifdef PARSEC_PROF_TRACE -static void remote_dep_mpi_profiling_init(void); +static void remote_dep_comm_profiling_init(void); #else -#define remote_dep_mpi_profiling_init() do {} while(0) +#define remote_dep_comm_profiling_init() do {} while(0) #endif // PARSEC_PROF_TRACE -static void remote_dep_mpi_params(parsec_context_t* context) { +static void remote_dep_comm_params(parsec_context_t* context) { (void)context; #if RDEP_MSG_SHORT_LIMIT != 0 parsec_mca_param_reg_sizet_name("runtime", "comm_short_limit", "Controls the maximum size of a short message. Short messages contain both the control message notifying the completion of a task and the associated data that fit completely in that buffer length. The maximum size of a short message should be lower than the network MTU.", @@ -212,69 +226,42 @@ int remote_dep_dequeue_init(parsec_context_t* context) { pthread_attr_t thread_attr; - int is_mpi_up = 0; - int thread_level_support; - - assert(mpi_initialized == 0); + assert(remote_dep_initialized == 0); - remote_dep_mpi_params(context); - - MPI_Initialized(&is_mpi_up); - if( 0 == is_mpi_up ) { - /** - * MPI is not up, so we will consider this as a single node run. Fall - * back to the no-MPI case. - */ - context->nb_nodes = 1; - parsec_communication_engine_up = -1; /* No communications supported */ - /*TODO: restore the original behavior when modular datatype engine is - * available */ - parsec_fatal("MPI was not initialized. This version of PaRSEC was compiled with MPI datatype supports and *needs* MPI to execute.\n" - "\t* Please initialized MPI in the application (MPI_Init/MPI_Init_thread) prior to initializing PaRSEC.\n" - "\t* Alternatively, compile a version of PaRSEC without MPI (-DPARSEC_DIST_WITH_MPI=OFF in ccmake)\n"); - return PARSEC_SUCCESS; - } + remote_dep_comm_params(context); parsec_communication_engine_up = 0; /* we have communication capabilities */ - MPI_Query_thread( &thread_level_support ); - if( thread_level_support == MPI_THREAD_SINGLE || - thread_level_support == MPI_THREAD_FUNNELED ) { - parsec_warning("MPI was not initialized with the appropriate level of thread support.\n" - "\t* Current level is %s, while MPI_THREAD_SERIALIZED or MPI_THREAD_MULTIPLE is needed\n" - "\t* to guarantee correctness of the PaRSEC runtime.\n", - thread_level_support == MPI_THREAD_SINGLE ? "MPI_THREAD_SINGLE" : "MPI_THREAD_FUNNELED"); - } - /* Do this first to give a chance to the communication engine to define * who this process is by setting the corresponding info in the * parsec_context. */ if( NULL == parsec_comm_engine_init(context) ) { + parsec_communication_engine_up = -1; parsec_warning("Communication engine failed to start. Additional information might be available in the corresponding error message"); return PARSEC_ERR_NOT_FOUND; } if(parsec_param_comm_thread_multiple) { - if( thread_level_support >= MPI_THREAD_MULTIPLE ) { + if( parsec_ce.capabilites.multithreaded ) { context->flags |= PARSEC_CONTEXT_FLAG_COMM_MT; } else if(parsec_param_comm_thread_multiple != -1) { - parsec_warning("Requested multithreaded access to the communication engine, but MPI is not initialized with MPI_THREAD_MULTIPLE.\n" - "\t* PaRSEC will continue with the funneled thread communication engine model.\n"); + parsec_warning("Requested multithreaded access to the communication engine, but the selected backend does not support it.\n" + "\t* PaRSEC will continue with the funneled communication engine model.\n"); } } PARSEC_OBJ_CONSTRUCT(&dep_cmd_queue, parsec_dequeue_t); PARSEC_OBJ_CONSTRUCT(&dep_cmd_fifo, parsec_list_t); - /* Build the condition used to drive the MPI thread */ - pthread_mutex_init( &mpi_thread_mutex, NULL ); - pthread_cond_init( &mpi_thread_condition, NULL ); + /* Build the condition used to drive the communication thread. */ + pthread_mutex_init( &comm_thread_mutex, NULL ); + pthread_cond_init( &comm_thread_condition, NULL ); pthread_attr_init(&thread_attr); pthread_attr_setscope(&thread_attr, PTHREAD_SCOPE_SYSTEM); - remote_dep_mpi_profiling_init(); + remote_dep_comm_profiling_init(); /* From now on the communication capabilities are enabled */ parsec_communication_engine_up = 1; @@ -284,22 +271,23 @@ remote_dep_dequeue_init(parsec_context_t* context) /** * We need to synchronize with the newly spawned thread. We will use the - * condition for this. If we lock the mutex prior to spawning the MPI thread, - * and then go in a condition wait, the MPI thread can lock the mutex, and - * then call condition signal. This insure proper synchronization. Similar - * mechanism will be used to turn on and off the MPI thread. + * condition for this. If we lock the mutex prior to spawning the + * communication thread, and then go in a condition wait, the communication + * thread can lock the mutex, and then call condition signal. This ensures + * proper synchronization. A similar mechanism will be used to turn the + * communication thread on and off. */ - pthread_mutex_lock(&mpi_thread_mutex); + pthread_mutex_lock(&comm_thread_mutex); pthread_create(&dep_thread_id, &thread_attr, (void* (*)(void*))remote_dep_dequeue_main, (void*)context); - /* Wait until the MPI thread signals it's awakening */ - pthread_cond_wait( &mpi_thread_condition, &mpi_thread_mutex ); + /* Wait until the communication thread signals it is awake. */ + pthread_cond_wait( &comm_thread_condition, &comm_thread_mutex ); up_and_running: - mpi_initialized = 1; /* up and running */ + remote_dep_initialized = 1; /* up and running */ remote_dep_ce_init(context); return PARSEC_SUCCESS; @@ -308,14 +296,15 @@ remote_dep_dequeue_init(parsec_context_t* context) int remote_dep_dequeue_fini(parsec_context_t* context) { - if( 0 == mpi_initialized ) return 0; + if( 0 == remote_dep_initialized ) return 0; /** * We suppose the disable function was called before. Then we will append a - * shutdown command in the MPI thread queue, and wake the MPI thread. Upon - * processing of the pending command the MPI thread will exit, we will be - * able to catch this by locking the mutex. Once we know the MPI thread is - * gone, cleaning up will be straightforward. + * shutdown command in the communication thread queue, and wake the + * communication thread. Upon processing of the pending command the + * communication thread will exit, we will be able to catch this by locking + * the mutex. Once we know the communication thread is gone, cleaning up + * will be straightforward. */ if( 1 < parsec_communication_engine_up ) { dep_cmd_item_t* item = (dep_cmd_item_t*) calloc(1, sizeof(dep_cmd_item_t)); @@ -323,13 +312,13 @@ remote_dep_dequeue_fini(parsec_context_t* context) void *ret; item->action = DEP_CTL; - item->cmd.ctl.enable = -1; /* turn off and return from the MPI thread */ + item->cmd.ctl.enable = -1; /* turn off and return from the communication thread */ item->priority = 0; parsec_dequeue_push_back(&dep_cmd_queue, (parsec_list_item_t*) item); - /* I am supposed to own the lock. Wake the MPI thread */ - pthread_cond_signal(&mpi_thread_condition); - pthread_mutex_unlock(&mpi_thread_mutex); + /* I am supposed to own the lock. Wake the communication thread. */ + pthread_cond_signal(&comm_thread_condition); + pthread_mutex_unlock(&comm_thread_mutex); pthread_join(dep_thread_id, &ret); assert((parsec_context_t*)ret == context); } @@ -338,7 +327,7 @@ remote_dep_dequeue_fini(parsec_context_t* context) PARSEC_OBJ_DESTRUCT(&dep_cmd_queue); assert(NULL == parsec_dequeue_pop_front(&dep_cmd_fifo)); PARSEC_OBJ_DESTRUCT(&dep_cmd_fifo); - mpi_initialized = 0; + remote_dep_initialized = 0; PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "Process has reshaped %zu tiles.", count_reshaping); (void)context; @@ -356,26 +345,26 @@ remote_dep_dequeue_on(parsec_context_t* context) { /* If we are the only participant in this execution, we should not have to * communicate with any other process. However, we might have to execute all - * local data copies, which requires MPI. + * local data copies, which require the selected communication backend. */ if( 0 >= parsec_communication_engine_up ) return -1; if( context->nb_nodes == 1 ) return 1; - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI: comm engine signalled ON on process %d/%d", + PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "RDEP: comm engine signalled ON on process %d/%d", context->my_rank, context->nb_nodes); /* At this point I am supposed to own the mutex */ parsec_communication_engine_up = 2; - pthread_cond_signal(&mpi_thread_condition); - pthread_mutex_unlock(&mpi_thread_mutex); + pthread_cond_signal(&comm_thread_condition); + pthread_mutex_unlock(&comm_thread_mutex); /* The waking up of the communication thread happen asynchronously, once the thread - * receives the signal. At that point it acquires the mpi_thread_mutex and set the + * receives the signal. At that point it acquires the comm_thread_mutex and set the * global variable parsec_communication_engine_up to 3. */ /** - * We need to wait for the communication thread to perform the mpi_setup + * We need to wait for the communication thread to perform the backend setup * as it will fill-up my_rank on the context. */ while( 3 != parsec_communication_engine_up ) sched_yield(); @@ -392,16 +381,16 @@ remote_dep_dequeue_off(parsec_context_t* context) dep_cmd_item_t* item = (dep_cmd_item_t*) calloc(1, sizeof(dep_cmd_item_t)); PARSEC_OBJ_CONSTRUCT(item, parsec_list_item_t); item->action = DEP_CTL; - item->cmd.ctl.enable = 0; /* turn OFF the MPI thread */ + item->cmd.ctl.enable = 0; /* turn OFF the communication thread */ item->priority = 0; /* wait until the communication thread is up and running */ while( 3 != parsec_communication_engine_up ) sched_yield(); - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI: comm engine signalled OFF on process %d/%d", + PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "RDEP: comm engine signalled OFF on process %d/%d", context->my_rank, context->nb_nodes); parsec_dequeue_push_back(&dep_cmd_queue, (parsec_list_item_t*) item); - /* wait until we own the PaRSEC MPI synchronization mutex */ - pthread_mutex_lock(&mpi_thread_mutex); + /* wait until we own the PaRSEC communication synchronization mutex */ + pthread_mutex_lock(&comm_thread_mutex); assert( 1 == parsec_communication_engine_up ); (void)context; /* silence warning */ @@ -409,7 +398,7 @@ remote_dep_dequeue_off(parsec_context_t* context) } static void -remote_dep_mpi_initialize_execution_stream(parsec_context_t *context) +remote_dep_comm_initialize_execution_stream(parsec_context_t *context) { parsec_comm_es.th_id = 0; /* Pretend to be the master thread */ parsec_comm_es.virtual_process = context->virtual_processes[0]; @@ -428,8 +417,8 @@ void* remote_dep_dequeue_main(parsec_context_t* context) PARSEC_PAPI_SDE_THREAD_INIT(); /* Now synchronize with the main thread */ - pthread_mutex_lock(&mpi_thread_mutex); - pthread_cond_signal(&mpi_thread_condition); + pthread_mutex_lock(&comm_thread_mutex); + pthread_cond_signal(&comm_thread_condition); #ifdef PARSEC_PROF_TRACE parsec_comm_es.es_profile = parsec_profiling_stream_init( 2*1024*1024, "Comm thread"); @@ -437,19 +426,20 @@ void* remote_dep_dequeue_main(parsec_context_t* context) #endif // PARSEC_PROF_TRACE /* This is the main loop. Wait until being woken up by the main thread, do - * the MPI stuff until we get the OFF or FINI commands. Then react the them. + * the communication engine until we get the OFF or FINI commands. Then + * react to them. * However, the first time do the delayed initialization that could not have * been done before due to the lack of other component initialization. */ while( -1 != whatsup ) { /* Let's wait until we are awaken */ - pthread_cond_wait(&mpi_thread_condition, &mpi_thread_mutex); + pthread_cond_wait(&comm_thread_condition, &comm_thread_mutex); - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI: comm engine ON on process %d/%d", + PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "RDEP: comm engine ON on process %d/%d", context->my_rank, context->nb_nodes); - /* The MPI thread is owning the lock */ + /* The communication thread is owning the lock. */ assert( parsec_communication_engine_up == 2 ); parsec_ce.enable(&parsec_ce); @@ -469,7 +459,7 @@ void* remote_dep_dequeue_main(parsec_context_t* context) } whatsup = remote_dep_dequeue_nothread_progress(&parsec_comm_es, -1 /* loop till explicitly asked to return */); - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI: comm engine OFF on process %d/%d", + PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "RDEP: comm engine OFF on process %d/%d", context->my_rank, context->nb_nodes); parsec_communication_engine_up = 1; /* went to sleep */ } @@ -482,7 +472,7 @@ void* remote_dep_dequeue_main(parsec_context_t* context) int remote_dep_dequeue_new_taskpool(parsec_taskpool_t* tp) { - if(!mpi_initialized) return 0; + if(!remote_dep_initialized) return 0; remote_dep_inc_flying_messages(tp); dep_cmd_item_t* item = (dep_cmd_item_t*)calloc(1, sizeof(dep_cmd_item_t)); PARSEC_OBJ_CONSTRUCT(item, parsec_list_item_t); @@ -496,7 +486,7 @@ int remote_dep_dequeue_new_taskpool(parsec_taskpool_t* tp) int remote_dep_dequeue_delayed_dep_release(parsec_remote_deps_t *deps) { - if(!mpi_initialized) return 0; + if(!remote_dep_initialized) return 0; dep_cmd_item_t* item = (dep_cmd_item_t*)calloc(1, sizeof(dep_cmd_item_t)); PARSEC_OBJ_CONSTRUCT(item, parsec_list_item_t); item->action = DEP_DTD_DELAYED_RELEASE; @@ -597,10 +587,10 @@ remote_dep_copy_allocate(parsec_dep_type_description_t* data, int preferred_devi /* don't use preferred_device, it might not be the location where the data copy resides */ parsec_data_start_transfer_ownership_to_copy(dc->original, dc->device_index, PARSEC_FLOW_ACCESS_WRITE); if (dc->device_index != preferred_device) { - PARSEC_DEBUG_VERBOSE(5, parsec_comm_output_stream, "MPI:\tFail to allocate tile on device %d and instead allocate on device %d\n", + PARSEC_DEBUG_VERBOSE(5, parsec_comm_output_stream, "RDEP:\tFail to allocate tile on device %d and instead allocate on device %d\n", preferred_device, dc->device_index); } - PARSEC_DEBUG_VERBOSE(5, parsec_comm_output_stream, "MPI:\tMalloc new temporary tile [dev %d] copy %p size %" PRIu64 " count = %" PRIu64 " displ = %" PRIi64 " %p", + PARSEC_DEBUG_VERBOSE(5, parsec_comm_output_stream, "RDEP:\tMalloc new temporary tile [dev %d] copy %p size %" PRIu64 " count = %" PRIu64 " displ = %" PRIi64 " %p", dc->device_index, dc, data->arena->elem_size, data->dst_count, data->dst_displ, data->arena); return dc; } @@ -626,9 +616,9 @@ reshape_copy_allocate(parsec_dep_type_description_t* data) /** * - * Fulfill a reshape promise by the current thread - * (when MPI_THREAD_MULTIPLE) or delegate the reshaping to the communication - * thread. + * Routine to fulfilled a reshape promise by the current thread when the + * selected backend supports multithreaded access, or delegate the reshaping to + * the communication thread otherwise. * Routine set as callback when initializing a future. * * @param[inout] future future for the reshaping, may be fulfilled by this call or @@ -661,37 +651,40 @@ void parsec_local_reshape_cb(parsec_base_future_t *future, ... ) #endif #if defined(PARSEC_DEBUG) || defined(PARSEC_DEBUG_NOISIER) - char type_name_src[MAX_TASK_STRLEN] = "NULL"; - char type_name_dst[MAX_TASK_STRLEN] = "NULL"; - int len; + uintptr_t type_id_src = 0; + uintptr_t type_id_dst = 0; int src_pack_size=0, dst_pack_size=0; if(dt->local->src_datatype != PARSEC_DATATYPE_NULL) { - MPI_Type_get_name(dt->local->src_datatype, type_name_src, &len); - MPI_Pack_size(dt->local->src_count, dt->local->src_datatype, MPI_COMM_WORLD, &src_pack_size); + type_id_src = (uintptr_t)dt->local->src_datatype; + if( NULL != parsec_ce.pack_size ) { + parsec_ce.pack_size(&parsec_ce, dt->local->src_count, dt->local->src_datatype, &src_pack_size); + } } if(dt->local->dst_datatype != PARSEC_DATATYPE_NULL) { - MPI_Type_get_name(dt->local->dst_datatype, type_name_dst, &len); - MPI_Pack_size(dt->local->dst_count, dt->local->dst_datatype, MPI_COMM_WORLD, &dst_pack_size); + type_id_dst = (uintptr_t)dt->local->dst_datatype; + if( NULL != parsec_ce.pack_size ) { + parsec_ce.pack_size(&parsec_ce, dt->local->dst_count, dt->local->dst_datatype, &dst_pack_size); + } } if(src_pack_size != dst_pack_size){ - parsec_warning("parsec_local_reshape: reshape requested between dtt with different packed size fut %p dtt [%p:%s = sz(%d) -> %p:%s= sz(%d)]", - future, - dt->local->src_datatype, type_name_src, src_pack_size, - dt->local->dst_datatype, type_name_dst, dst_pack_size); + parsec_warning("parsec_local_reshape: reshape requested between dtt with different packed size fut %p dtt [0x%" PRIxPTR " = sz(%d) -> 0x%" PRIxPTR " = sz(%d)]", + future, + type_id_src, src_pack_size, + type_id_dst, dst_pack_size); } #endif - /* if MPI is multithreaded do not thread-shift the sendrecv */ + /* If the selected backend is multithreaded, do not thread-shift the reshape. */ if( (es->virtual_process->parsec_context->flags & PARSEC_CONTEXT_FLAG_COMM_MT) || (tp == NULL && task == NULL)/* || I AM COMM THREAD */) { parsec_data_copy_t *reshape_data = reshape_copy_allocate(dt->local); PARSEC_DEBUG_VERBOSE(2, parsec_debug_output, - "th%d RESHAPE_PROMISE COMPLETED COMP-THREAD to [%p:%p:%s -> %p:%p:%s] for %s fut %p", - es->th_id, dt->data, dt->data->dtt, type_name_src, - reshape_data, dt->local->dst_datatype, type_name_dst, task_string, future); + "th%d RESHAPE_PROMISE COMPLETED COMP-THREAD to [%p:0x%" PRIxPTR " -> %p:0x%" PRIxPTR "] for %s fut %p", + es->th_id, dt->data, type_id_src, + reshape_data, type_id_dst, task_string, future); parsec_ce.reshape(&parsec_ce, es, reshape_data, dt->local->dst_displ, dt->local->dst_datatype, dt->local->dst_count, @@ -706,9 +699,9 @@ void parsec_local_reshape_cb(parsec_base_future_t *future, ... ) } PARSEC_DEBUG_VERBOSE(4, parsec_debug_output, - "th%d RESHAPE_PROMISE TRIGGERED to [%p:%p:%s -> ...:%p:%s] for %s fut %p", - es->th_id, dt->data, dt->data->dtt, type_name_src, - dt->local->dst_datatype, type_name_dst, task_string, future); + "th%d RESHAPE_PROMISE TRIGGERED to [%p:0x%" PRIxPTR " -> ...:0x%" PRIxPTR "] for %s fut %p", + es->th_id, dt->data, type_id_src, + type_id_dst, task_string, future); dep_cmd_item_t* item = (dep_cmd_item_t*)calloc(1, sizeof(dep_cmd_item_t)); PARSEC_OBJ_CONSTRUCT(item, parsec_list_item_t); @@ -736,8 +729,8 @@ void parsec_local_reshape_cb(parsec_base_future_t *future, ... ) * once a datatype has been successfully retrieved it must cancel the iterator * progress in order to return ASAP the datatype to the communication engine. */ -parsec_ontask_iterate_t -remote_dep_mpi_retrieve_datatype(parsec_execution_stream_t *eu, +static parsec_ontask_iterate_t +remote_dep_comm_retrieve_datatype(parsec_execution_stream_t *eu, const parsec_task_t *newcontext, const parsec_task_t *oldcontext, const parsec_dep_t* dep, @@ -810,13 +803,10 @@ remote_dep_mpi_retrieve_datatype(parsec_execution_stream_t *eu, */ if(old_dtt != output->data.remote.dst_datatype) { #if defined(PARSEC_DEBUG_NOISIER) - char type_name_src[MAX_TASK_STRLEN] = "NULL"; - char type_name_dst[MAX_TASK_STRLEN] = "NULL"; - int len; - if(old_dtt != PARSEC_DATATYPE_NULL) MPI_Type_get_name(old_dtt, type_name_src, &len); - if(output->data.remote.dst_datatype != PARSEC_DATATYPE_NULL) MPI_Type_get_name(output->data.remote.dst_datatype, type_name_dst, &len); - PARSEC_DEBUG_VERBOSE(30, parsec_comm_output_stream, "MPI: retrieve dtt for %s [dep_datatype_index %x] DTT: old %s new %s (%p) --> PACKED", - newcontext->task_class->name, dep->dep_datatype_index, type_name_src, type_name_dst, output->data.remote.dst_datatype); + PARSEC_DEBUG_VERBOSE(30, parsec_comm_output_stream, + "RDEP: retrieve dtt for %s [dep_datatype_index %x] DTT: old 0x%" PRIxPTR " new 0x%" PRIxPTR " --> PACKED", + newcontext->task_class->name, dep->dep_datatype_index, + (uintptr_t)old_dtt, (uintptr_t)output->data.remote.dst_datatype); #endif int dsize; parsec_ce.pack_size(&parsec_ce, output->data.remote.dst_count, output->data.remote.dst_datatype, &dsize); @@ -827,13 +817,10 @@ remote_dep_mpi_retrieve_datatype(parsec_execution_stream_t *eu, } #if defined(PARSEC_DEBUG_NOISIER) { - char type_name_src[MAX_TASK_STRLEN] = "NULL"; - char type_name_dst[MAX_TASK_STRLEN] = "NULL"; - int len; - if(old_dtt!=PARSEC_DATATYPE_NULL) MPI_Type_get_name(old_dtt, type_name_src, &len); - if(output->data.remote.dst_datatype!=PARSEC_DATATYPE_NULL) MPI_Type_get_name(output->data.remote.dst_datatype, type_name_dst, &len); - PARSEC_DEBUG_VERBOSE(30, parsec_comm_output_stream, "MPI: retrieve dtt for %s [dep_datatype_index %x] DTT: old %s new %s (%p)--> CONTINUE", - newcontext->task_class->name, dep->dep_datatype_index, type_name_src, type_name_dst, output->data.remote.dst_datatype); + PARSEC_DEBUG_VERBOSE(30, parsec_comm_output_stream, + "RDEP: retrieve dtt for %s [dep_datatype_index %x] DTT: old 0x%" PRIxPTR " new 0x%" PRIxPTR " --> CONTINUE", + newcontext->task_class->name, dep->dep_datatype_index, + (uintptr_t)old_dtt, (uintptr_t)output->data.remote.dst_datatype); } #endif /* Predict where the incoming temporary should be located, by using the data_affinity. @@ -924,12 +911,12 @@ remote_dep_get_datatypes(parsec_execution_stream_t* es, if(return_defer) { return -2; } - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI:\tRetrieve datatype with mask 0x%x (remote_dep_get_datatypes)", (1U<msg.task_class_id = dtd_task->super.task_class->task_class_id; origin->output[k].data.remote.src_datatype = origin->output[k].data.remote.dst_datatype = PARSEC_DATATYPE_NULL; dtd_task->super.task_class->iterate_successors(es, (parsec_task_t *)dtd_task, (1U<output[k].data.remote.src_count = (idx < data_sizes[0]) ? data_sizes[idx+1] : 0; PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, - "MPI:\tRetrieve datatype with mask 0x%x (remote_dep_get_datatypes) remote size %u", + "RDEP:\tRetrieve datatype with mask 0x%x (remote_dep_get_datatypes) remote size %u", local_mask, origin->output[k].data.remote.src_count); incoming_mask = origin->incoming_mask; task.task_class->iterate_successors(es, &task, local_mask, - remote_dep_mpi_retrieve_datatype, + remote_dep_comm_retrieve_datatype, origin); if( (origin->incoming_mask & ~incoming_mask & (1U<output[k].data.remote.dst_count) ) { @@ -1054,7 +1041,7 @@ remote_dep_release_incoming(parsec_execution_stream_t* es, target = task.task_class->out[++pidx]; assert(NULL != target); } - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "MPI:\tDATA %p(%s) released from %p[%d] flow idx %d", + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "RDEP:\tDATA %p(%s) released from %p[%d] flow idx %d", origin->output[i].data.data, target->name, origin, i, target->flow_index); task.data[target->flow_index].source_repo = NULL; task.data[target->flow_index].source_repo_entry = NULL; @@ -1090,7 +1077,7 @@ remote_dep_release_incoming(parsec_execution_stream_t* es, } else { assert(0); } - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "MPI:\tTranslate mask from 0x%lx to 0x%x (remote_dep_release_incoming)", + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "RDEP:\tTranslate mask from 0x%lx to 0x%x (remote_dep_release_incoming)", complete_mask, action_mask); (void)task.task_class->release_deps(es, &task, action_mask | PARSEC_ACTION_RELEASE_LOCAL_DEPS | PARSEC_ACTION_RESHAPE_REMOTE_ON_RELEASE, @@ -1171,10 +1158,10 @@ remote_dep_dequeue_nothread_progress(parsec_execution_stream_t* es, position = (DEP_ACTIVATE == item->action) ? item->cmd.activate.peer : (context->nb_nodes + item->action); parsec_list_item_singleton(&item->pos_list); - same_pos = parsec_mpi_same_pos_items[position]; + same_pos = remote_dep_same_pos_items[position]; if((NULL != same_pos) && (same_pos->priority >= item->priority)) { /* insert the item in the peer list */ - parsec_list_item_ring_push_sorted(&same_pos->pos_list, &item->pos_list, dep_mpi_pos_list); + parsec_list_item_ring_push_sorted(&same_pos->pos_list, &item->pos_list, dep_cmd_pos_list); } else { if(NULL != same_pos) { /* this is the new head of the list. */ @@ -1190,7 +1177,7 @@ remote_dep_dequeue_nothread_progress(parsec_execution_stream_t* es, #endif parsec_list_item_singleton((parsec_list_item_t*)same_pos); } - parsec_mpi_same_pos_items[position] = item; + remote_dep_same_pos_items[position] = item; /* And add ourselves in the temp list */ parsec_list_nolock_push_front(&temp_list, (parsec_list_item_t*)item); } @@ -1209,7 +1196,7 @@ remote_dep_dequeue_nothread_progress(parsec_execution_stream_t* es, if(NULL == (item = (dep_cmd_item_t*)parsec_list_nolock_pop_front(&dep_cmd_fifo)) ) { /* only progress MPI if necessary */ if (context->nb_nodes > 1) { - ret = remote_dep_mpi_progress(es); + ret = remote_dep_comm_progress(es); if( 0 == ret && ((comm_yield == 2) || (comm_yield == 1 /* communication list is full, we need to forcefully drain the network */ @@ -1234,10 +1221,10 @@ remote_dep_dequeue_nothread_progress(parsec_execution_stream_t* es, free(item); return ret; /* FINI or OFF */ case DEP_NEW_TASKPOOL: - remote_dep_mpi_new_taskpool(es, item); + remote_dep_comm_new_taskpool(es, item); break; case DEP_DTD_DELAYED_RELEASE: - remote_dep_mpi_release_delayed_deps(es, item); + remote_dep_comm_release_delayed_deps(es, item); break; case DEP_ACTIVATE: remote_dep_nothread_send(es, &item); @@ -1265,25 +1252,24 @@ remote_dep_dequeue_nothread_progress(parsec_execution_stream_t* es, /* if we still have pending messages of the same type, stay here for an extra loop */ if( cycles >= 0 ) cycles++; } - parsec_mpi_same_pos_items[position] = same_pos; + remote_dep_same_pos_items[position] = same_pos; goto check_pending_queues; } #ifdef PARSEC_PROF_TRACE -static int MPI_Activate_sk, MPI_Activate_ek; -static int MPI_Data_ctl_sk, MPI_Data_ctl_ek; -static int MPI_Data_plds_sk, MPI_Data_plds_ek; -static int MPI_Data_pldr_sk, MPI_Data_pldr_ek; +static int RDEP_Activate_sk, RDEP_Activate_ek; +static int RDEP_Data_ctl_sk, RDEP_Data_ctl_ek; +static int RDEP_Data_plds_sk, RDEP_Data_plds_ek; +static int RDEP_Data_pldr_sk, RDEP_Data_pldr_ek; /** - * The structure describe the MPI events saves into the profiling stream. The following - * string represent it's description so that an external package can decrypt the - * binary format of the stream. + * Description of the remote-dependency events saved into the profiling stream. + * The following string lets external tools decode the binary event payload. */ -static const char *parsec_profile_remote_dep_mpi_info_to_string = "src{int32_t};" +static const char *parsec_profile_remote_dep_comm_info_to_string = "src{int32_t};" "dst{int32_t};" "tid{uint64_t};" "tpid{uint32_t};" @@ -1291,34 +1277,34 @@ static const char *parsec_profile_remote_dep_mpi_info_to_string = "src{int32_t}; "msg_size{int32_t};" "dep{int32_t}"; -static void remote_dep_mpi_profiling_init(void) +static void remote_dep_comm_profiling_init(void) { - parsec_profiling_add_dictionary_keyword( "MPI_ACTIVATE", "fill:#FF0000", - sizeof(parsec_profile_remote_dep_mpi_info_t), - parsec_profile_remote_dep_mpi_info_to_string, - &MPI_Activate_sk, &MPI_Activate_ek); - parsec_profiling_add_dictionary_keyword( "MPI_DATA_CTL", "fill:#000077", - sizeof(parsec_profile_remote_dep_mpi_info_t), - parsec_profile_remote_dep_mpi_info_to_string, - &MPI_Data_ctl_sk, &MPI_Data_ctl_ek); - parsec_profiling_add_dictionary_keyword( "MPI_DATA_PLD_SND", "fill:#B08080", - sizeof(parsec_profile_remote_dep_mpi_info_t), - parsec_profile_remote_dep_mpi_info_to_string, - &MPI_Data_plds_sk, &MPI_Data_plds_ek); - parsec_profiling_add_dictionary_keyword( "MPI_DATA_PLD_RCV", "fill:#80B080", - sizeof(parsec_profile_remote_dep_mpi_info_t), - parsec_profile_remote_dep_mpi_info_to_string, - &MPI_Data_pldr_sk, &MPI_Data_pldr_ek); + parsec_profiling_add_dictionary_keyword( "RDEP_ACTIVATE", "fill:#FF0000", + sizeof(parsec_profile_remote_dep_comm_info_t), + parsec_profile_remote_dep_comm_info_to_string, + &RDEP_Activate_sk, &RDEP_Activate_ek); + parsec_profiling_add_dictionary_keyword( "RDEP_DATA_CTL", "fill:#000077", + sizeof(parsec_profile_remote_dep_comm_info_t), + parsec_profile_remote_dep_comm_info_to_string, + &RDEP_Data_ctl_sk, &RDEP_Data_ctl_ek); + parsec_profiling_add_dictionary_keyword( "RDEP_DATA_PLD_SND", "fill:#B08080", + sizeof(parsec_profile_remote_dep_comm_info_t), + parsec_profile_remote_dep_comm_info_to_string, + &RDEP_Data_plds_sk, &RDEP_Data_plds_ek); + parsec_profiling_add_dictionary_keyword( "RDEP_DATA_PLD_RCV", "fill:#80B080", + sizeof(parsec_profile_remote_dep_comm_info_t), + parsec_profile_remote_dep_comm_info_to_string, + &RDEP_Data_pldr_sk, &RDEP_Data_pldr_ek); } -static void remote_dep_mpi_profiling_fini(void) +static void remote_dep_comm_profiling_fini(void) { /* Nothing to do, the thread_profiling structures will be automatically * released when the master profiling system is shut down. */ } -static inline uint64_t remote_dep_mpi_profiling_event_id(void) +static inline uint64_t remote_dep_comm_profiling_event_id(void) { static uint64_t event_id = 0; /* we only need distinct event ids for events triggered by the comm thread, @@ -1328,8 +1314,8 @@ static inline uint64_t remote_dep_mpi_profiling_event_id(void) } #else -#define remote_dep_mpi_profiling_fini() do {} while(0) -#define remote_dep_mpi_profiling_event_id() (0UL) +#define remote_dep_comm_profiling_fini() do {} while(0) +#define remote_dep_comm_profiling_event_id() (0UL) #endif /* PARSEC_PROF_TRACE */ @@ -1344,7 +1330,7 @@ static inline uint64_t remote_dep_mpi_profiling_event_id(void) * @returns 1 if the message can't be packed due to lack of space, or 0 * otherwise. */ -static int remote_dep_mpi_pack_dep(int peer, +static int remote_dep_comm_pack_dep(int peer, dep_cmd_item_t* item, char* packed_buffer, int length, @@ -1450,7 +1436,7 @@ static int remote_dep_mpi_pack_dep(int peer, (void)parsec_atomic_fetch_add_int32(&deps->pending_ack, expected); /* Keep track of the inflight data */ #if defined(PARSEC_DEBUG) || defined(PARSEC_DEBUG_NOISIER) - parsec_debug_verbose(6, parsec_comm_output_stream, "MPI:\tTO\t%d\tActivate\t% -8s\n" + parsec_debug_verbose(6, parsec_comm_output_stream, "RDEP:\tTO\t%d\tActivate\t% -8s\n" " \t\t\twith datakey %lx\tmask %lx short mask %lu length %d", peer, tmp, msg->deps, msg->output_mask, msg->output_mask ^ item->cmd.activate.task.output_mask, msg->length); @@ -1516,7 +1502,7 @@ static int local_dep_nothread_reshape(parsec_execution_stream_t* es, PARSEC_DATA_COPY_RETAIN(cmd->memcpy.source); int rc = remote_dep_nothread_memcpy(es, item); - assert(MPI_SUCCESS == rc); + assert(PARSEC_SUCCESS == rc); parsec_future_set(item->cmd.memcpy_reshape.future, cmd->memcpy.destination); @@ -1528,7 +1514,7 @@ static int local_dep_nothread_reshape(parsec_execution_stream_t* es, #endif (void)es; - return (MPI_SUCCESS == rc ? 0 : -1); + return (PARSEC_SUCCESS == rc ? 0 : -1); } /** @@ -1555,7 +1541,7 @@ static int remote_dep_nothread_send(parsec_execution_stream_t* es, deps = (parsec_remote_deps_t*)item->cmd.activate.task.source_deps; parsec_list_item_singleton((parsec_list_item_t*)item); - if( 0 == remote_dep_mpi_pack_dep(peer, item, packed_buffer, + if( 0 == remote_dep_comm_pack_dep(peer, item, packed_buffer, DEP_SHORT_BUFFER_SIZE, &position) ) { /* space left on the buffer. Move to the next item with the same destination */ dep_cmd_item_t* next = (dep_cmd_item_t*)parsec_list_item_ring_chop(&item->pos_list); @@ -1572,11 +1558,11 @@ static int remote_dep_nothread_send(parsec_execution_stream_t* es, assert(NULL != ring); /* dep index is meaningless in this context, set to -1 */ - TAKE_TIME_WITH_INFO(es->es_profile, MPI_Activate_sk, 0, -1, + TAKE_TIME_WITH_INFO(es->es_profile, RDEP_Activate_sk, 0, -1, es->virtual_process->parsec_context->my_rank, peer, deps->msg, position, PARSEC_DATATYPE_PACKED); parsec_ce.send_am(&parsec_ce, PARSEC_CE_REMOTE_DEP_ACTIVATE_TAG, peer, packed_buffer, position); - TAKE_TIME(es->es_profile, MPI_Activate_ek, 0); + TAKE_TIME(es->es_profile, RDEP_Activate_ek, 0); DEBUG_MARK_CTL_MSG_ACTIVATE_SENT(peer, (void*)&deps->msg, &deps->msg); do { @@ -1597,7 +1583,7 @@ static int remote_dep_nothread_send(parsec_execution_stream_t* es, * target) before draining the network and pushing out the highest priority * actions. */ -static int remote_dep_mpi_progress(parsec_execution_stream_t* es) +static int remote_dep_comm_progress(parsec_execution_stream_t* es) { int ret = 0; @@ -1607,12 +1593,12 @@ static int remote_dep_mpi_progress(parsec_execution_stream_t* es) if(parsec_ce.can_serve(&parsec_ce) && !parsec_list_nolock_is_empty(&dep_activates_fifo)) { parsec_remote_deps_t* deps = (parsec_remote_deps_t*)parsec_list_nolock_pop_front(&dep_activates_fifo); - remote_dep_mpi_get_start(es, deps); + remote_dep_comm_get_start(es, deps); ret++; } if(parsec_ce.can_serve(&parsec_ce) && !parsec_list_nolock_is_empty(&dep_put_fifo)) { dep_cmd_item_t* item = (dep_cmd_item_t*)parsec_list_nolock_pop_front(&dep_put_fifo); - remote_dep_mpi_put_start(es, item); + remote_dep_comm_put_start(es, item); ret++; } @@ -1620,7 +1606,7 @@ static int remote_dep_mpi_progress(parsec_execution_stream_t* es) } static int -remote_dep_mpi_save_put_cb(parsec_comm_engine_t *ce, +remote_dep_comm_save_put_cb(parsec_comm_engine_t *ce, parsec_ce_tag_t tag, void *msg, size_t msg_size, @@ -1660,7 +1646,7 @@ remote_dep_mpi_save_put_cb(parsec_comm_engine_t *ce, assert(0 != deps->outgoing_mask); item->priority = deps->max_priority; - PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "MPI: Put cb_received for %s from %d tag %u which 0x%x (deps %p)", + PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "RDEP: Put cb_received for %s from %d tag %u which 0x%x (deps %p)", remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), item->cmd.activate.peer, -1, task->output_mask, (void*)deps); @@ -1668,9 +1654,9 @@ remote_dep_mpi_save_put_cb(parsec_comm_engine_t *ce, parsec_list_nolock_push_sorted(&dep_put_fifo, (parsec_list_item_t*)item, dep_cmd_prio); if( parsec_ce.can_serve(&parsec_ce) ) { item = (dep_cmd_item_t*)parsec_list_nolock_pop_front(&dep_put_fifo); - remote_dep_mpi_put_start(es, item); + remote_dep_comm_put_start(es, item); } else { - PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "MPI: Put DELAYED for %s from %d tag %u which 0x%x (deps %p)", + PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "RDEP: Put DELAYED for %s from %d tag %u which 0x%x (deps %p)", remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), item->cmd.activate.peer, -1, task->output_mask, (void*)deps); } @@ -1678,7 +1664,7 @@ remote_dep_mpi_save_put_cb(parsec_comm_engine_t *ce, } static void -remote_dep_mpi_put_start(parsec_execution_stream_t* es, +remote_dep_comm_put_start(parsec_execution_stream_t* es, dep_cmd_item_t* item) { remote_dep_wire_get_t* task = &(item->cmd.activate.task); @@ -1686,19 +1672,15 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, parsec_remote_deps_t* deps = (parsec_remote_deps_t*) (uintptr_t) task->source_deps; int k, nbdtt; void* dataptr; - MPI_Datatype dtt; + parsec_datatype_t dtt; #endif /* !defined(PARSEC_PROF_DRY_DEP) */ -#if defined(PARSEC_DEBUG_NOISIER) - char type_name[MPI_MAX_OBJECT_NAME]; - int len; -#endif (void)es; DEBUG_MARK_CTL_MSG_GET_RECV(item->cmd.activate.peer, (void*)task, task); #if !defined(PARSEC_PROF_DRY_DEP) assert(task->output_mask); - PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "MPI:\tPUT mask=%lx deps 0x%lx", task->output_mask, task->source_deps); + PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "RDEP:\tPUT mask=%lx deps 0x%lx", task->output_mask, task->source_deps); #ifdef PARSEC_RESHAPE_BEFORE_SEND_TO_REMOTE int all_completed = 1; @@ -1738,7 +1720,7 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, } } if( ! all_completed ) { - PARSEC_DEBUG_VERBOSE(4, parsec_comm_output_stream, "MPI:\tReshaping promises not yet completed for deps 0x%lx. Reschedule.", deps); + PARSEC_DEBUG_VERBOSE(4, parsec_comm_output_stream, "RDEP:\tReshaping promises not yet completed for deps 0x%lx. Reschedule.", deps); parsec_list_nolock_push_front(&dep_put_fifo, (parsec_list_item_t*)item); return; } @@ -1748,7 +1730,7 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, assert(k < MAX_PARAM_COUNT); if(!((1U<output_mask)) continue; - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "MPI:\t[idx %d mask(0x%x / 0x%x)] %p, %p", k, (1U<output_mask, + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "RDEP:\t[idx %d mask(0x%x / 0x%x)] %p, %p", k, (1U<output_mask, deps->output[k].data.data, PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data)); dataptr = PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data); dtt = deps->output[k].data.remote.src_datatype; @@ -1780,11 +1762,10 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, parsec_ce_mem_reg_handle_t remote_memory_handle = item->cmd.activate.remote_memory_handle; -#if defined(PARSEC_DEBUG_NOISIER) - MPI_Type_get_name(dtt, type_name, &len); - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "MPI:\tTO\t%d\tPut START\tunknown \tk=%d\twith deps 0x%lx at %p type %s (%p)\t(src_mem_handle = %p, dst_mem_handle = %p)", - item->cmd.activate.peer, k, task->source_deps, dataptr, type_name, dtt, source_memory_handle, remote_memory_handle); -#endif + PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, + "RDEP:\tTO\t%d\tPut START\tunknown \tk=%d\twith deps 0x%lx at %p type 0x%" PRIxPTR "\t(src_mem_handle = %p, dst_mem_handle = %p)", + item->cmd.activate.peer, k, task->source_deps, dataptr, + (uintptr_t)dtt, source_memory_handle, remote_memory_handle); remote_dep_cb_data_t *cb_data = (remote_dep_cb_data_t *) parsec_thread_mempool_allocate (parsec_remote_dep_cb_data_mempool->thread_mempools); @@ -1792,10 +1773,10 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, cb_data->k = k; #if defined(PARSEC_PROF_TRACE) - uint64_t event_id = remote_dep_mpi_profiling_event_id(); + uint64_t event_id = remote_dep_comm_profiling_event_id(); cb_data->event_id = event_id; #endif /* PARSEC_PROF_TRACE */ - TAKE_TIME_WITH_INFO(es->es_profile, MPI_Data_plds_sk, event_id, k, + TAKE_TIME_WITH_INFO(es->es_profile, RDEP_Data_plds_sk, event_id, k, es->virtual_process->parsec_context->my_rank, item->cmd.activate.peer, deps->msg, nbdtt, dtt); @@ -1803,7 +1784,7 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, parsec_ce.put(&parsec_ce, source_memory_handle, 0, remote_memory_handle, 0, 0, item->cmd.activate.peer, - remote_dep_mpi_put_end_cb, cb_data, + remote_dep_comm_put_end_cb, cb_data, (parsec_ce_tag_t)task->callback_fn, &task->remote_callback_data, sizeof(uintptr_t)); parsec_comm_puts++; @@ -1819,7 +1800,7 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, } static int -remote_dep_mpi_put_end_cb(parsec_comm_engine_t *ce, +remote_dep_comm_put_end_cb(parsec_comm_engine_t *ce, parsec_ce_mem_reg_handle_t lreg, ptrdiff_t ldispl, parsec_ce_mem_reg_handle_t rreg, @@ -1832,11 +1813,11 @@ remote_dep_mpi_put_end_cb(parsec_comm_engine_t *ce, /* Retrieve deps from callback_data */ parsec_remote_deps_t* deps = ((remote_dep_cb_data_t *)cb_data)->deps; - PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "MPI:\tTO\tna\tPut END \tunknown \tk=%d\twith deps %p\tparams bla\t(src_mem_handle = %p, dst_mem_handle=%p", + PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "RDEP:\tTO\tna\tPut END \tunknown \tk=%d\twith deps %p\tparams bla\t(src_mem_handle = %p, dst_mem_handle=%p", ((remote_dep_cb_data_t *)cb_data)->k, deps, lreg, rreg); #if defined(PARSEC_PROF_TRACE) - TAKE_TIME(parsec_comm_es.es_profile, MPI_Data_plds_ek, + TAKE_TIME(parsec_comm_es.es_profile, RDEP_Data_plds_ek, ((remote_dep_cb_data_t *)cb_data)->event_id); #endif /* PARSEC_PROF_TRACE */ @@ -1857,7 +1838,7 @@ remote_dep_mpi_put_end_cb(parsec_comm_engine_t *ce, * the buffer, post all the control messages to initiate RGET, and all other local * cleanups. */ -static void remote_dep_mpi_recv_activate(parsec_execution_stream_t* es, +static void remote_dep_comm_recv_activate(parsec_execution_stream_t* es, parsec_remote_deps_t* deps, char* packed_buffer, int length, @@ -1874,7 +1855,7 @@ static void remote_dep_mpi_recv_activate(parsec_execution_stream_t* es, #endif #if defined(PARSEC_DEBUG) || defined(PARSEC_DEBUG_NOISIER) - parsec_debug_verbose(6, parsec_comm_output_stream, "MPI:\tFROM\t%d\tActivate\t% -8s\n" + parsec_debug_verbose(6, parsec_comm_output_stream, "RDEP:\tFROM\t%d\tActivate\t% -8s\n" "\twith datakey %lx\tparams %lx length %d (pack buf %d/%d) prio %d", deps->from, tmp, deps->msg.deps, deps->incoming_mask, deps->msg.length, *position, length, deps->max_priority); @@ -1894,7 +1875,7 @@ static void remote_dep_mpi_recv_activate(parsec_execution_stream_t* es, parsec_dep_type_description_t *type_desc = &data_desc->remote; /* Check for CTL and data that do not carry payload */ if( parsec_is_CTL_dep(data_desc) ) { - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "MPI:\tHERE\t%d\tGet NONE\t% -8s\tk=%d\twith datakey %lx at type CONTROL", + PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "RDEP:\tHERE\t%d\tGet NONE\t% -8s\tk=%d\twith datakey %lx at type CONTROL", deps->from, tmp, k, deps->msg.deps); /* deps->output[k].data.data = NULL; This is unnecessary*/ complete_mask |= (1U<from, tmp, k, deps->msg.deps); } @@ -1959,7 +1940,7 @@ static void remote_dep_mpi_recv_activate(parsec_execution_stream_t* es, #if defined(PARSEC_DEBUG_NOISIER) for(int k = 0; complete_mask>>k; k++) if((1U<from, tmp, k, deps->msg.deps, deps->output[k].data.data); #endif /* If this is the only call then force the remote deps propagation */ @@ -1976,12 +1957,12 @@ static void remote_dep_mpi_recv_activate(parsec_execution_stream_t* es, /* Check if we have any pending GET orders */ if(parsec_ce.can_serve(&parsec_ce) && !parsec_list_nolock_is_empty(&dep_activates_fifo)) { deps = (parsec_remote_deps_t*)parsec_list_nolock_pop_front(&dep_activates_fifo); - remote_dep_mpi_get_start(es, deps); + remote_dep_comm_get_start(es, deps); } } static int -remote_dep_mpi_save_activate_cb(parsec_comm_engine_t *ce, parsec_ce_tag_t tag, +remote_dep_comm_save_activate_cb(parsec_comm_engine_t *ce, parsec_ce_tag_t tag, void *msg, size_t msg_size, int src, void *cb_data) { @@ -2010,7 +1991,7 @@ remote_dep_mpi_save_activate_cb(parsec_comm_engine_t *ce, parsec_ce_tag_t tag, if( -1 == rc ) { /* the corresponding tp doesn't exist, yet. Put it in unexpected */ char* packed_buffer; - PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "MPI:\tFROM\t%d\tActivate NoTPool\t% -8s\tk=%d\twith datakey %lx\tparams %lx", + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "RDEP:\tFROM\t%d\tActivate NoTPool\t% -8s\tk=%d\twith datakey %lx\tparams %lx", deps->from, remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), 0, deps->msg.deps, deps->msg.output_mask); /* Copy the eager data to some temp storage */ @@ -2028,11 +2009,11 @@ remote_dep_mpi_save_activate_cb(parsec_comm_engine_t *ce, parsec_ce_tag_t tag, } } - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "MPI:\tFROM\t%d\tActivate\t% -8s\tk=%d\twith datakey %lx\tparams %lx", + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "RDEP:\tFROM\t%d\tActivate\t% -8s\tk=%d\twith datakey %lx\tparams %lx", src, remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), 0, deps->msg.deps, deps->msg.output_mask); /* Import the activation message and prepare for the reception */ - remote_dep_mpi_recv_activate(es, deps, msg, + remote_dep_comm_recv_activate(es, deps, msg, position + deps->msg.length, &position); assert( parsec_param_enable_aggregate || (position == length)); deps->eager_msg = NULL; /* this buffer will now be reused, not safe to store here */ @@ -2043,7 +2024,7 @@ remote_dep_mpi_save_activate_cb(parsec_comm_engine_t *ce, parsec_ce_tag_t tag, } void -remote_dep_mpi_new_taskpool(parsec_execution_stream_t* es, +remote_dep_comm_new_taskpool(parsec_execution_stream_t* es, dep_cmd_item_t *dep_cmd_item) { parsec_taskpool_t* obj = dep_cmd_item->cmd.new_taskpool.tp; @@ -2051,7 +2032,7 @@ remote_dep_mpi_new_taskpool(parsec_execution_stream_t* es, #if defined(PARSEC_DEBUG_NOISIER) char tmp[MAX_TASK_STRLEN]; #endif - PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "OPAQUE_MPI: ThreadID %"PRIxPTR"\tNew taskpool %d registered", + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "OPAQUE_RDEP: ThreadID %"PRIxPTR"\tNew taskpool %d registered", (intptr_t)pthread_self(), obj->taskpool_id); for(item = PARSEC_LIST_ITERATOR_FIRST(&dep_activates_noobj_fifo); item != PARSEC_LIST_ITERATOR_END(&dep_activates_noobj_fifo); @@ -2064,7 +2045,7 @@ remote_dep_mpi_new_taskpool(parsec_execution_stream_t* es, deps->eager_msg = buffer; /* provide get_datatype with access to the remote sizes */ rc = remote_dep_get_datatypes(es, deps, PARSEC_DTD_SKIP_SAVING, &position); assert( -1 != rc ); assert(deps->taskpool != NULL); - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "MPI:\tFROM\t%d\tActivate NEWOBJ\t% -8s\twith datakey %lx\tparams %lx", + PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "RDEP:\tFROM\t%d\tActivate NEWOBJ\t% -8s\twith datakey %lx\tparams %lx", deps->from, remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), deps->msg.deps, deps->msg.output_mask); @@ -2080,7 +2061,7 @@ remote_dep_mpi_new_taskpool(parsec_execution_stream_t* es, continue; } - remote_dep_mpi_recv_activate(es, deps, buffer, deps->msg.length, &position); + remote_dep_comm_recv_activate(es, deps, buffer, deps->msg.length, &position); deps->eager_msg = NULL; /* back to NULL */ free(buffer); (void)rc; @@ -2097,7 +2078,7 @@ remote_dep_mpi_new_taskpool(parsec_execution_stream_t* es, * the remote task. */ static void -remote_dep_mpi_release_delayed_deps(parsec_execution_stream_t* es, +remote_dep_comm_release_delayed_deps(parsec_execution_stream_t* es, dep_cmd_item_t *item) { PARSEC_PINS(es, ACTIVATE_CB_BEGIN, NULL); @@ -2112,21 +2093,20 @@ remote_dep_mpi_release_delayed_deps(parsec_execution_stream_t* es, (void)rc; assert(deps != NULL); - remote_dep_mpi_recv_activate(es, deps, buffer, deps->msg.length, &position); + remote_dep_comm_recv_activate(es, deps, buffer, deps->msg.length, &position); free(buffer); PARSEC_PINS(es, ACTIVATE_CB_END, NULL); } -static void remote_dep_mpi_get_start(parsec_execution_stream_t* es, +static void remote_dep_comm_get_start(parsec_execution_stream_t* es, parsec_remote_deps_t* deps) { remote_dep_wire_activate_t* task = &(deps->msg); int from = deps->from, k, count, nbdtt; remote_dep_wire_get_t msg; - MPI_Datatype dtt; + parsec_datatype_t dtt; #if defined(PARSEC_DEBUG_NOISIER) - char tmp[MAX_TASK_STRLEN], type_name[MPI_MAX_OBJECT_NAME]; - int len; + char tmp[MAX_TASK_STRLEN]; remote_dep_cmd_to_string(task, tmp, MAX_TASK_STRLEN); #endif for(k = count = 0; deps->incoming_mask >> k; k++) @@ -2136,7 +2116,7 @@ static void remote_dep_mpi_get_start(parsec_execution_stream_t* es, DEBUG_MARK_CTL_MSG_ACTIVATE_RECV(from, (void*)task, task); msg.source_deps = task->deps; /* the deps copied from activate message from source */ - msg.callback_fn = (uintptr_t)remote_dep_mpi_get_end_cb; /* We let the source know to call this + msg.callback_fn = (uintptr_t)remote_dep_comm_get_end_cb; /* We let the source know to call this * function when the PUT is over, in a true * one sided case the (integer) value of this * function pointer will be registered as the @@ -2192,14 +2172,16 @@ static void remote_dep_mpi_get_start(parsec_execution_stream_t* es, } -# if defined(PARSEC_DEBUG_NOISIER) - MPI_Type_get_name(dtt, type_name, &len); +#if defined(PARSEC_DEBUG_NOISIER) int _size; - MPI_Type_size(dtt, &_size); - PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "MPI:\tTO\t%d\tGet START\t% -8s\tk=%d\twith datakey %lx at %p type %s count %d displ %ld \t(k=%d, dst_mem_handle=%p)", - from, tmp, k, task->deps, PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data), type_name, nbdtt, - deps->output[k].data.remote.dst_displ, k, receiver_memory_handle); -# endif + parsec_type_size(dtt, &_size); + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + "RDEP:\tTO\t%d\tGet START\t% -8s\tk=%d\twith datakey %lx at %p type 0x%" PRIxPTR " size %d count %d displ %ld \t(k=%d, dst_mem_handle=%p)", + from, tmp, k, task->deps, + PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data), + (uintptr_t)dtt, _size, nbdtt, + deps->output[k].data.remote.dst_displ, k, receiver_memory_handle); +#endif callback_data->memory_handle = receiver_memory_handle; @@ -2221,19 +2203,19 @@ static void remote_dep_mpi_get_start(parsec_execution_stream_t* es, receiver_memory_handle_size ); #if defined(PARSEC_PROF_TRACE) - uint64_t event_id = remote_dep_mpi_profiling_event_id(); + uint64_t event_id = remote_dep_comm_profiling_event_id(); callback_data->event_id = event_id; #endif /* PARSEC_PROF_TRACE */ /* Send AM */ - TAKE_TIME_WITH_INFO(es->es_profile, MPI_Data_pldr_sk, event_id, k, + TAKE_TIME_WITH_INFO(es->es_profile, RDEP_Data_pldr_sk, event_id, k, from, es->virtual_process->parsec_context->my_rank, *task, nbdtt, dtt); - TAKE_TIME_WITH_INFO(es->es_profile, MPI_Data_ctl_sk, event_id, k, + TAKE_TIME_WITH_INFO(es->es_profile, RDEP_Data_ctl_sk, event_id, k, from, es->virtual_process->parsec_context->my_rank, *task, nbdtt, dtt); parsec_ce.send_am(&parsec_ce, PARSEC_CE_REMOTE_DEP_GET_DATA_TAG, from, buf, buf_size); - TAKE_TIME(es->es_profile, MPI_Data_ctl_ek, event_id); + TAKE_TIME(es->es_profile, RDEP_Data_ctl_ek, event_id); free(buf); @@ -2241,8 +2223,16 @@ static void remote_dep_mpi_get_start(parsec_execution_stream_t* es, } } +static void remote_dep_comm_get_end(parsec_execution_stream_t* es, + int idx, + parsec_remote_deps_t* deps) +{ + /* The ref on the data will be released below */ + remote_dep_release_incoming(es, deps, (1U<msg, tmp, MAX_TASK_STRLEN), callback_data->k, deps->incoming_mask, src); #if defined(PARSEC_PROF_TRACE) - TAKE_TIME(es->es_profile, MPI_Data_pldr_ek, callback_data->event_id); + TAKE_TIME(es->es_profile, RDEP_Data_pldr_ek, callback_data->event_id); #endif /* PARSEC_PROF_TRACE */ - remote_dep_release_incoming(es, deps, (1U << callback_data->k)); + remote_dep_comm_get_end(es, callback_data->k, deps); parsec_ce.mem_unregister(&callback_data->memory_handle); parsec_thread_mempool_free(parsec_remote_dep_cb_data_mempool->thread_mempools, callback_data); @@ -2292,9 +2282,9 @@ remote_dep_mpi_get_end_cb(parsec_comm_engine_t *ce, */ int remote_dep_ce_reconfigure(parsec_context_t* context) { - if( NULL != parsec_mpi_same_pos_items ) { - free(parsec_mpi_same_pos_items); parsec_mpi_same_pos_items = NULL; - parsec_mpi_same_pos_items_size = 0; + if( NULL != remote_dep_same_pos_items ) { + free(remote_dep_same_pos_items); remote_dep_same_pos_items = NULL; + remote_dep_same_pos_items_size = 0; } /** * Finalize the initialization of the upper level structures @@ -2303,9 +2293,9 @@ int remote_dep_ce_reconfigure(parsec_context_t* context) */ remote_deps_allocation_init(context->nb_nodes, MAX_PARAM_COUNT); - parsec_mpi_same_pos_items_size = context->nb_nodes + (int)DEP_LAST; - assert( NULL == parsec_mpi_same_pos_items ); - parsec_mpi_same_pos_items = (dep_cmd_item_t**)calloc(parsec_mpi_same_pos_items_size, + remote_dep_same_pos_items_size = context->nb_nodes + (int)DEP_LAST; + assert( NULL == remote_dep_same_pos_items ); + remote_dep_same_pos_items = (dep_cmd_item_t**)calloc(remote_dep_same_pos_items_size, sizeof(dep_cmd_item_t*)); if(1 < context->nb_nodes) { @@ -2329,14 +2319,14 @@ remote_dep_ce_init(parsec_context_t* context) PARSEC_OBJ_CONSTRUCT(&dep_put_fifo, parsec_list_t); /* Register Persistent requests */ - rc = parsec_ce.tag_register(PARSEC_CE_REMOTE_DEP_ACTIVATE_TAG, remote_dep_mpi_save_activate_cb, context, + rc = parsec_ce.tag_register(PARSEC_CE_REMOTE_DEP_ACTIVATE_TAG, remote_dep_comm_save_activate_cb, context, DEP_SHORT_BUFFER_SIZE * sizeof(char)); if( PARSEC_SUCCESS != rc ) { parsec_warning("[CE] Failed to register communication tag PARSEC_CE_REMOTE_DEP_ACTIVATE_TAG (error %d)\n", rc); parsec_comm_engine_fini(&parsec_ce); return rc; } - rc = parsec_ce.tag_register(PARSEC_CE_REMOTE_DEP_GET_DATA_TAG, remote_dep_mpi_save_put_cb, context, + rc = parsec_ce.tag_register(PARSEC_CE_REMOTE_DEP_GET_DATA_TAG, remote_dep_comm_save_put_cb, context, 4096); if( PARSEC_SUCCESS != rc ) { parsec_warning("[CE] Failed to register communication tag PARSEC_CE_REMOTE_DEP_GET_DATA_TAG (error %d)\n", rc); @@ -2351,14 +2341,14 @@ remote_dep_ce_init(parsec_context_t* context) offsetof(remote_dep_cb_data_t, mempool_owner), 1); /* Lazy or delayed initializations */ - remote_dep_mpi_initialize_execution_stream(context); + remote_dep_comm_initialize_execution_stream(context); return PARSEC_SUCCESS; } int remote_dep_ce_fini(parsec_context_t* context) { (void)context; - remote_dep_mpi_profiling_fini(); + remote_dep_comm_profiling_fini(); // Unregister tags parsec_ce.tag_unregister(PARSEC_CE_REMOTE_DEP_ACTIVATE_TAG); @@ -2369,9 +2359,9 @@ int remote_dep_ce_fini(parsec_context_t* context) parsec_mempool_destruct(parsec_remote_dep_cb_data_mempool); free(parsec_remote_dep_cb_data_mempool); parsec_remote_dep_cb_data_mempool = NULL; } - if( NULL != parsec_mpi_same_pos_items ) { - free(parsec_mpi_same_pos_items); parsec_mpi_same_pos_items = NULL; - parsec_mpi_same_pos_items_size = 0; + if( NULL != remote_dep_same_pos_items ) { + free(remote_dep_same_pos_items); remote_dep_same_pos_items = NULL; + remote_dep_same_pos_items_size = 0; } PARSEC_OBJ_DESTRUCT(&dep_activates_fifo); diff --git a/parsec/runtime.h b/parsec/runtime.h index 1688bb0df..fd8d557dd 100644 --- a/parsec/runtime.h +++ b/parsec/runtime.h @@ -162,6 +162,12 @@ typedef enum parsec_hook_return_e { * execution context. Several contexts can coexist on disjoint resources * at the same time. * + * If the selected communication backend needs an external process runtime + * and that runtime has not been initialized by the application, parsec_init() + * may initialize it. In that case, parsec_fini() releases the runtime during + * communication backend finalization. For example, the MPI backend initializes + * MPI on demand and finalizes MPI only if PaRSEC initialized it. + * * @param[in] nb_cores the number of cores to use * @param[inout] pargc a pointer to the number of arguments passed in pargv * @param[inout] pargv an argv-like NULL terminated array of arguments to pass to @@ -211,9 +217,11 @@ int parsec_version_ex( size_t len, char* version_string); * * @details * Reset the comm engine associated with the PaRSEC context, and use - * the communication context opaque_comm_ctx in the future (typically an MPI - * communicator). The context can only be changed while the PaRSEC runtime - * is down, more specifically while the communication thread is not active. + * the communication context opaque_comm_ctx in the future. For the MPI backend + * this is an MPI communicator. For the UCX backend this is a pointer to a + * parsec_comm_ucx_external_worker_t declared by the UCX comm component. The + * context can only be changed while the PaRSEC runtime is down, more + * specifically while the communication thread is not active. * * parsec_context_wait becomes collective across nodes spanning * on this communication context. @@ -246,7 +254,8 @@ void parsec_abort( parsec_context_t* pcontext, int status); * @details * Complete all pending operations on the execution context, and release * all associated resources. Threads and accelerators attached to this - * context will be released. + * context will be released. If parsec_init() initialized the selected + * communication backend's process runtime, parsec_fini() finalizes it. * * @param[inout] pcontext a pointer to the PaRSEC context to finalize * @return PARSEC_SUCCESS on success @@ -307,11 +316,18 @@ typedef enum parsec_context_query_cmd_e { * @brief Query PaRSEC context's properties. * * @details - * Query properties of the runtime, such as number of devices of a certain type - * or number of cores available to the context. + * Query properties of the runtime, such as the rank and size known by the + * selected communication engine, the number of devices of a certain type, or + * the number of cores available to the context. + * + * PARSEC_CONTEXT_QUERY_RANK and PARSEC_CONTEXT_QUERY_NODES are valid after + * parsec_init() returns. The selected communication backend owns how those + * values are discovered, so callers should use these queries instead of + * assuming MPI_COMM_WORLD. * * @param[in] context the PaRSEC context - * @param[in] device_type the type of device the query is about + * @param[in] cmd the property to query + * @param[in] ... optional arguments required by the selected query command * @return PARSEC_ERR_NOT_SUPPORTED if the command is not supported, PARSEC_ERR_NOT_FOUND * if the correct answer cannot yet be returned (such as when the PaRSEC context * has not yet properly been initialized), or the answer to the query (always diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1ffcdacba..9d02e7cc2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,3 +1,7 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + add_custom_target(parsec_build_tests) add_test(parsec_build_tests "${CMAKE_COMMAND}" --build ${CMAKE_BINARY_DIR} --target parsec_build_tests) @@ -101,6 +105,9 @@ check_function_exists(random PARSEC_HAVE_RANDOM) add_library(tests_common OBJECT tests_data.c) target_link_libraries(tests_common PRIVATE parsec) +add_library(tests_runtime_common OBJECT tests_runtime.c) +target_link_libraries(tests_runtime_common PRIVATE parsec) + add_subdirectory(class) add_subdirectory(api) if( TARGET parsec-ptgpp ) diff --git a/tests/api/CMakeLists.txt b/tests/api/CMakeLists.txt index 7046132bf..45a2e2ff5 100644 --- a/tests/api/CMakeLists.txt +++ b/tests/api/CMakeLists.txt @@ -1,8 +1,14 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + if(TARGET parsec-ptgpp) parsec_addtest_executable(C touch_ex SOURCES touch_ex.c) + target_link_libraries(touch_ex PRIVATE tests_runtime_common) target_ptg_sources(touch_ex PRIVATE "touch.jdf") parsec_addtest_executable(C touch_ex_inline SOURCES touch_ex.c) + target_link_libraries(touch_ex_inline PRIVATE tests_runtime_common) target_ptg_sources(touch_ex_inline PRIVATE "touch.jdf") target_compile_definitions(touch_ex_inline PRIVATE BUILDING_PARSEC) target_compile_options(touch_ex_inline PRIVATE ${PARSEC_ATOMIC_SUPPORT_OPTIONS}) @@ -26,3 +32,6 @@ endif(TARGET parsec-ptgpp) parsec_addtest_executable(C init_fini SOURCES init_fini.c) parsec_addtest_executable(C operator SOURCES operator.c) parsec_addtest_executable(C compose SOURCES compose.c) +target_link_libraries(init_fini PRIVATE tests_runtime_common) +target_link_libraries(operator PRIVATE tests_runtime_common) +target_link_libraries(compose PRIVATE tests_runtime_common) diff --git a/tests/api/compose.c b/tests/api/compose.c index 9100d73ab..05c4b587c 100644 --- a/tests/api/compose.c +++ b/tests/api/compose.c @@ -2,11 +2,13 @@ * Copyright (c) 2018-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" #include "parsec/execution_stream.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #include #define TYPE PARSEC_MATRIX_INTEGER @@ -24,9 +26,7 @@ parsec_operator_print_id( struct parsec_execution_stream_s *es, va_list ap; int m, n, rank = 0; -#if defined(PARSEC_HAVE_MPI) - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif + rank = parsec_context_query(es->virtual_process->parsec_context, PARSEC_CONTEXT_QUERY_RANK); va_start(ap, op_data); m = va_arg(ap, int); @@ -44,15 +44,6 @@ int main(int argc, char* argv[]) parsec_taskpool_t *tp1, *tp2, *tp3; int nodes, rank, rc, i = 0; -#if defined(PARSEC_HAVE_MPI) - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &nodes); - MPI_Comm_size(MPI_COMM_WORLD, &nodes); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - nodes = 1; - rank = 0; -#endif - int pargc = 0; char **pargv = NULL; for( i = 1; i < argc; i++) { if( 0 == strncmp(argv[i], "--", 3) ) { @@ -79,8 +70,10 @@ int main(int argc, char* argv[]) } } - parsec = parsec_init(1, &pargc, &pargv); - assert( NULL != parsec ); + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec, &rank, &nodes); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &dcA, TYPE, PARSEC_MATRIX_TILE, rank, @@ -117,10 +110,7 @@ int main(int argc, char* argv[]) parsec_data_free(dcA.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/api/init_fini.c b/tests/api/init_fini.c index bdc531a18..6f3efe51c 100644 --- a/tests/api/init_fini.c +++ b/tests/api/init_fini.c @@ -2,22 +2,23 @@ * Copyright (c) 2021-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include "parsec.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" int main(int argc, char *argv[]) { -#if defined(PARSEC_HAVE_MPI) - int mpith = MPI_THREAD_SINGLE; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &mpith); - assert(mpith >= MPI_THREAD_SERIALIZED); // parsec will do the complaining in NDEBUG -#endif - parsec_context_t *parsec = parsec_init(-1, &argc, &argv); - parsec_fini(&parsec); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + parsec_context_t *parsec; + int rc; + + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); + + return 0; } diff --git a/tests/api/operator.c b/tests/api/operator.c index a3a2f687b..1a965fb8a 100644 --- a/tests/api/operator.c +++ b/tests/api/operator.c @@ -2,6 +2,7 @@ * Copyright (c) 2011-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" @@ -10,6 +11,7 @@ #include "parsec/execution_stream.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" static int parsec_operator_print_id( struct parsec_execution_stream_s *es, @@ -20,9 +22,8 @@ parsec_operator_print_id( struct parsec_execution_stream_s *es, va_list ap; int k, n, rank = 0; -#if defined(PARSEC_HAVE_MPI) - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif + rank = parsec_context_query(es->virtual_process->parsec_context, + PARSEC_CONTEXT_QUERY_RANK); va_start(ap, op_data); k = va_arg(ap, int); @@ -43,16 +44,10 @@ int main( int argc, char* argv[] ) int lm = 1000, ln = 1000; int rows = 1, rc; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif - - parsec = parsec_init(cores, &argc, &argv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &dcA, PARSEC_MATRIX_FLOAT, PARSEC_MATRIX_TILE, rank, mb, nb, lm, ln, 0, 0, lm, ln, @@ -80,11 +75,8 @@ int main( int argc, char* argv[] ) parsec_data_free(dcA.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA); - parsec_fini(&parsec); - -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/api/taskpool_wait/CMakeLists.txt b/tests/api/taskpool_wait/CMakeLists.txt index 84fc088f7..cf5a4feb1 100644 --- a/tests/api/taskpool_wait/CMakeLists.txt +++ b/tests/api/taskpool_wait/CMakeLists.txt @@ -1,5 +1,10 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C taskpool_wait SOURCES main.c dtd_tp.c) +target_link_libraries(taskpool_wait PRIVATE tests_runtime_common) target_include_directories(taskpool_wait PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(taskpool_wait PRIVATE "ptg_tp.jdf") diff --git a/tests/api/taskpool_wait/dtd_tp.c b/tests/api/taskpool_wait/dtd_tp.c index 27f94af0d..1812f6e5d 100644 --- a/tests/api/taskpool_wait/dtd_tp.c +++ b/tests/api/taskpool_wait/dtd_tp.c @@ -2,6 +2,7 @@ * Copyright (c) 2023-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" #include "parsec/interfaces/dtd/insert_function.h" @@ -11,10 +12,6 @@ #include "parsec/execution_stream.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - int task( parsec_execution_stream_t *es, parsec_task_t *this_task ) { (void)es; int delta, m, n, *A; diff --git a/tests/api/taskpool_wait/main.c b/tests/api/taskpool_wait/main.c index 3d7e1b380..27b12b8df 100644 --- a/tests/api/taskpool_wait/main.c +++ b/tests/api/taskpool_wait/main.c @@ -2,9 +2,11 @@ * Copyright (c) 2023-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #include "ptg_tp.h" #include "dtd_tp.h" @@ -22,16 +24,14 @@ int main(int argc, char *argv[]) { int rc; err = 0; + (void)argc; + (void)argv; parsec_context_t *parsec; -#if defined(PARSEC_HAVE_MPI) - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); - MPI_Comm_size(MPI_COMM_WORLD, &world_size); -#endif - - parsec = parsec_init(-1, NULL, NULL); + err = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_MULTIPLE, + NULL, NULL, + &parsec, &my_rank, &world_size); + PARSEC_CHECK_ERROR(err, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init(&A, PARSEC_MATRIX_INTEGER, PARSEC_MATRIX_TILE, my_rank, @@ -107,9 +107,7 @@ int main(int argc, char *argv[]) { parsec_dtd_data_collection_fini(&A.super.super); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&A); - parsec_fini(&parsec); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return err; } diff --git a/tests/api/touch_ex.c b/tests/api/touch_ex.c index db3ada37b..1e3d7e3f0 100644 --- a/tests/api/touch_ex.c +++ b/tests/api/touch_ex.c @@ -2,10 +2,13 @@ * Copyright (c) 2013-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "touch.h" #include #include @@ -21,11 +24,7 @@ int main( int argc, char** argv ) { parsec_context_t* parsec; parsec_taskpool_t* tp; - int i = 1, rc, verbose; - -#ifdef PARSEC_HAVE_MPI - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &rc); -#endif + int i = 1, rc, verbose = 0; int pargc = 0; char **pargv = NULL; for( i = 1; i < argc; i++) { @@ -40,10 +39,9 @@ int main( int argc, char** argv ) } } - parsec = parsec_init(1, &pargc, &pargv); - if( NULL == parsec ) { - exit(-2); - } + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); tp = touch_initialize(BLOCK, N); rc = parsec_context_add_taskpool( parsec, tp ); @@ -58,12 +56,10 @@ int main( int argc, char** argv ) touch_finalize(); parsec_taskpool_free(tp); - parsec_fini( &parsec); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); if( verbose >= 5 ) { } -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif return 0; } diff --git a/tests/api/touch_exf.F90 b/tests/api/touch_exf.F90 index afcaff5a2..37350edc9 100644 --- a/tests/api/touch_exf.F90 +++ b/tests/api/touch_exf.F90 @@ -2,13 +2,13 @@ ! Copyright (c) 2021-2024 The University of Tennessee and The University ! of Tennessee Research Foundation. All rights ! reserved. +! Copyright (c) 2026 NVIDIA Corporation. All rights reserved. ! PROGRAM TOUCH_EXF use, INTRINSIC :: ISO_C_BINDING, only : c_int use parsec_f08_interfaces - use mpi interface function touch_initialize_f08(block, n) BIND(C, name="touch_initialize") @@ -26,14 +26,12 @@ function touch_finalize_f08() BIND(C, name="touch_finalize") end function touch_finalize_f08 end interface - integer BLOCK, N, mpith, ret + integer BLOCK, N, ret parameter (BLOCK=10, N=100) type(parsec_context_t) :: context type(parsec_taskpool_t) :: tp - call MPI_Init_thread(MPI_THREAD_MULTIPLE, mpith, ret) - call parsec_init(1, context) tp = touch_initialize_f08(BLOCK, N) @@ -50,7 +48,5 @@ end function touch_finalize_f08 call parsec_fini(context) - call MPI_Finalize(ret) - call exit(ret) END diff --git a/tests/apps/CMakeLists.txt b/tests/apps/CMakeLists.txt index 1b6985261..ff57e1f05 100644 --- a/tests/apps/CMakeLists.txt +++ b/tests/apps/CMakeLists.txt @@ -4,6 +4,7 @@ if(TARGET parsec-ptgpp) add_subdirectory(generalized_reduction) add_subdirectory(stencil) add_subdirectory(merge_sort) - add_subdirectory(haar_tree) + if(MPI_C_FOUND) + add_subdirectory(haar_tree) + endif(MPI_C_FOUND) endif(TARGET parsec-ptgpp) - diff --git a/tests/apps/all2all/CMakeLists.txt b/tests/apps/all2all/CMakeLists.txt index 4bdf7c650..a2b519373 100644 --- a/tests/apps/all2all/CMakeLists.txt +++ b/tests/apps/all2all/CMakeLists.txt @@ -1,6 +1,10 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C a2a SOURCES main.c a2a_data.c) +target_link_libraries(a2a PRIVATE tests_runtime_common) target_ptg_sources(a2a PRIVATE "a2a.jdf") target_link_libraries(a2a PRIVATE m) - diff --git a/tests/apps/all2all/a2a.jdf b/tests/apps/all2all/a2a.jdf index bb6fad334..ec280a81a 100644 --- a/tests/apps/all2all/a2a.jdf +++ b/tests/apps/all2all/a2a.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2013-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. * * @precisions normal z -> s d c * @@ -11,9 +12,6 @@ extern "C" %{ #include "parsec/data_distribution.h" #include "parsec/data_dist/matrix/matrix.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include int32_t always_zero() @@ -109,16 +107,10 @@ extern "C" %{ */ parsec_taskpool_t *a2a_new(parsec_tiled_matrix_t *A, parsec_tiled_matrix_t *B, int size, int repeat) { - int worldsize; + int worldsize = (int)A->super.nodes; parsec_a2a_taskpool_t *tp = NULL; (void)size; -#if defined(PARSEC_HAVE_MPI) - MPI_Comm_size(MPI_COMM_WORLD, &worldsize); -#else - worldsize = 1; -#endif - if( repeat <= 0 ) { fprintf(stderr, "To work, A2A must do at least one exchange of at least one byte\n"); return (parsec_taskpool_t*)tp; diff --git a/tests/apps/all2all/main.c b/tests/apps/all2all/main.c index fecf16fe2..d2be1f43f 100644 --- a/tests/apps/all2all/main.c +++ b/tests/apps/all2all/main.c @@ -2,10 +2,12 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "a2a_wrapper.h" #include "a2a_data.h" #if defined(PARSEC_HAVE_STRING_H) @@ -20,18 +22,10 @@ int main(int argc, char *argv[]) parsec_tiled_matrix_t *dcA, *dcB; parsec_taskpool_t *a2a; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - parsec = parsec_init(cores, &argc, &argv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); size = 256; repeat = 10; @@ -52,13 +46,10 @@ int main(int argc, char *argv[]) PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); a2a_free(a2a); - parsec_fini(&parsec); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); free_data(dcA); free_data(dcB); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif - return 0; } diff --git a/tests/apps/generalized_reduction/BT_reduction_wrapper.c b/tests/apps/generalized_reduction/BT_reduction_wrapper.c index 4e19c43c6..2cb16fb78 100644 --- a/tests/apps/generalized_reduction/BT_reduction_wrapper.c +++ b/tests/apps/generalized_reduction/BT_reduction_wrapper.c @@ -2,15 +2,13 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/data_dist/matrix/matrix.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif static parsec_datatype_t block; #include diff --git a/tests/apps/generalized_reduction/CMakeLists.txt b/tests/apps/generalized_reduction/CMakeLists.txt index f77a6d3ef..bf5bd32c7 100644 --- a/tests/apps/generalized_reduction/CMakeLists.txt +++ b/tests/apps/generalized_reduction/CMakeLists.txt @@ -1,4 +1,9 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C BT_reduction SOURCES main.c BT_reduction_wrapper.c reduc_data.c) +target_link_libraries(BT_reduction PRIVATE tests_runtime_common) target_ptg_sources(BT_reduction PRIVATE "BT_reduction.jdf") diff --git a/tests/apps/generalized_reduction/main.c b/tests/apps/generalized_reduction/main.c index 57a6e5f7c..7dbc9ee00 100644 --- a/tests/apps/generalized_reduction/main.c +++ b/tests/apps/generalized_reduction/main.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include @@ -10,6 +11,7 @@ #endif #include "parsec/runtime.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "BT_reduction_wrapper.h" #if defined(PARSEC_HAVE_STRING_H) #include @@ -24,18 +26,10 @@ int main(int argc, char *argv[]) parsec_tiled_matrix_t *dcA; parsec_taskpool_t *BT_reduction; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - parsec = parsec_init(cores, &argc, &argv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); nb = 1; nt = 7; @@ -59,11 +53,8 @@ int main(int argc, char *argv[]) parsec_taskpool_free((parsec_taskpool_t*)BT_reduction); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/apps/haar_tree/CMakeLists.txt b/tests/apps/haar_tree/CMakeLists.txt index 79332681d..a69950dda 100644 --- a/tests/apps/haar_tree/CMakeLists.txt +++ b/tests/apps/haar_tree/CMakeLists.txt @@ -3,10 +3,10 @@ include(ParsecCompilePTG) parsec_addtest_executable(C project SOURCES main.c tree_dist.c) target_ptg_sources(project PRIVATE "project.jdf;walk.jdf") target_include_directories(project PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) -target_link_libraries(project PRIVATE Threads::Threads m) +target_link_libraries(project PRIVATE Threads::Threads m tests_runtime_common) parsec_addtest_executable(C project_dyn SOURCES main.c tree_dist.c) target_ptg_sources(project_dyn PRIVATE "project_dyn.jdf;walk.jdf") target_compile_definitions(project_dyn PUBLIC parsec_project_new=parsec_project_dyn_new) target_include_directories(project_dyn PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) -target_link_libraries(project_dyn PRIVATE Threads::Threads m) +target_link_libraries(project_dyn PRIVATE Threads::Threads m tests_runtime_common) diff --git a/tests/apps/haar_tree/Testings.cmake b/tests/apps/haar_tree/Testings.cmake index f29ec204a..b67848047 100644 --- a/tests/apps/haar_tree/Testings.cmake +++ b/tests/apps/haar_tree/Testings.cmake @@ -1,4 +1,3 @@ -parsec_addtest_cmd(apps/haar_tree ${SHM_TEST_CMD_LIST} apps/haar_tree/project -x) if( MPI_C_FOUND ) parsec_addtest_cmd(apps/haar_tree:mp ${MPI_TEST_CMD_LIST} 4 apps/haar_tree/project -x) if(TEST apps/haar_tree:mp) diff --git a/tests/apps/haar_tree/main.c b/tests/apps/haar_tree/main.c index 45dd1ef10..209b80422 100644 --- a/tests/apps/haar_tree/main.c +++ b/tests/apps/haar_tree/main.c @@ -2,10 +2,12 @@ * Copyright (c) 2016-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/arena.h" +#include "tests/tests_runtime.h" #include "tree_dist.h" #include "project.h" @@ -156,18 +158,6 @@ int main(int argc, char *argv[]) uint64_t cksum = 0; redim_string_t *rs; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - pargc = 0; pargv = NULL; for(i = 1; i < argc; i++) { if( strcmp(argv[i], "--") == 0 ) { @@ -176,7 +166,9 @@ int main(int argc, char *argv[]) break; } } - parsec = parsec_init(1, &pargc, &pargv); + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); while ((ch = getopt(argc, argv, "xvd:m:M:f:")) != -1) { switch (ch) { @@ -224,9 +216,10 @@ int main(int argc, char *argv[]) parsec_matrix_adt_define_rect( adt, parsec_datatype_float_t, 2, 1, 2); -#if defined(HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); -#endif + rc = parsec_tests_barrier(parsec); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); + } project = parsec_project_new(treeA, world, (parsec_data_collection_t*)&fakeDesc, 1e-3, be_verbose, 1.0); project->arenas_datatypes[PARSEC_project_DEFAULT_ADT_IDX] = *adt; @@ -257,7 +250,7 @@ int main(int argc, char *argv[]) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); -#if defined(HAVE_MPI) +#if defined(PARSEC_HAVE_MPI) if( do_checks ) { uint64_t sum = 0; printf("Rank %d contributes with %llx\n", rank, cksum); @@ -292,7 +285,7 @@ int main(int argc, char *argv[]) rs_free(rs); } } -#endif /* defined(HAVE_MPI) */ +#endif /* defined(PARSEC_HAVE_MPI) */ parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&fakeDesc); tree_dist_free(treeA); @@ -300,11 +293,8 @@ int main(int argc, char *argv[]) parsec_taskpool_free(&project->super); parsec_matrix_adt_free( &adt ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/apps/merge_sort/CMakeLists.txt b/tests/apps/merge_sort/CMakeLists.txt index 9b2453eca..6bdfa3ffb 100644 --- a/tests/apps/merge_sort/CMakeLists.txt +++ b/tests/apps/merge_sort/CMakeLists.txt @@ -1,6 +1,11 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) if(PARSEC_HAVE_RANDOM) parsec_addtest_executable(C merge_sort SOURCES main.c merge_sort_wrapper.c sort_data.c) +target_link_libraries(merge_sort PRIVATE tests_runtime_common) target_ptg_sources(merge_sort PRIVATE "merge_sort.jdf") endif(PARSEC_HAVE_RANDOM) diff --git a/tests/apps/merge_sort/main.c b/tests/apps/merge_sort/main.c index cc000a11c..33f288ab4 100644 --- a/tests/apps/merge_sort/main.c +++ b/tests/apps/merge_sort/main.c @@ -2,12 +2,14 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include #include "parsec/runtime.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "merge_sort_wrapper.h" #if defined(PARSEC_HAVE_STRING_H) #include @@ -22,14 +24,6 @@ int main(int argc, char *argv[]) parsec_tiled_matrix_t *dcA; parsec_taskpool_t *msort; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif if( argc > 1 ) { char* endptr; long val = strtol(argv[1], &endptr, 0); @@ -44,10 +38,10 @@ int main(int argc, char *argv[]) } } - parsec = parsec_init(cores, &argc, &argv); - if( NULL == parsec ) { - exit(1); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); dcA = create_and_distribute_data(rank, world, nb, nt, sizeof(int)); parsec_data_collection_set_key((parsec_data_collection_t *)dcA, "A"); @@ -64,11 +58,8 @@ int main(int argc, char *argv[]) merge_sort_free((parsec_taskpool_t*)msort); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/apps/merge_sort/merge_sort_wrapper.c b/tests/apps/merge_sort/merge_sort_wrapper.c index c1c37a88f..5d5e88b52 100644 --- a/tests/apps/merge_sort/merge_sort_wrapper.c +++ b/tests/apps/merge_sort/merge_sort_wrapper.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -9,9 +10,6 @@ #include "parsec/arena.h" #include "parsec/data_dist/matrix/matrix.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include #include "merge_sort.h" diff --git a/tests/apps/pingpong/CMakeLists.txt b/tests/apps/pingpong/CMakeLists.txt index a863dcbad..48c6b6c9f 100644 --- a/tests/apps/pingpong/CMakeLists.txt +++ b/tests/apps/pingpong/CMakeLists.txt @@ -1,9 +1,14 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C rtt SOURCES main.c rtt_wrapper.c rtt_data.c) +target_link_libraries(rtt PRIVATE tests_runtime_common) target_ptg_sources(rtt PRIVATE "rtt.jdf") parsec_addtest_executable(C bw_test) +target_link_libraries(bw_test PRIVATE tests_runtime_common) set_source_files_properties("bandwidth.jdf" PROPERTIES PTGPP_COMPILE_OPTIONS "--Wremoteref") target_ptg_sources(bw_test PRIVATE "bandwidth.jdf") - diff --git a/tests/apps/pingpong/bandwidth.jdf b/tests/apps/pingpong/bandwidth.jdf index df38aa8d3..288f889da 100644 --- a/tests/apps/pingpong/bandwidth.jdf +++ b/tests/apps/pingpong/bandwidth.jdf @@ -24,9 +24,7 @@ extern "C" %{ #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif +#include "tests/tests_runtime.h" %} @@ -180,6 +178,7 @@ int main(int argc, char *argv[]) int rank, nodes, ch, i; int pargc = 0; char **pargv = NULL; + int rc; struct timeval tstart, tend; double t, bw, messages; @@ -191,18 +190,6 @@ int main(int argc, char *argv[]) int cores = 1; int nb_runs = 1; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &nodes); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - nodes = 1; - rank = 0; -#endif - while ((ch = getopt(argc, argv, "n:f:l:u:c:h:e:")) != -1) { switch (ch) { case 'n': loops = atoi(optarg); break; @@ -237,14 +224,13 @@ int main(int argc, char *argv[]) break; } } - /* Initialize PaRSEC */ - parsec = parsec_init(cores, &pargc, &pargv); - if( NULL == parsec ) { - /* Failed to correctly initialize. In a correct scenario report - * upstream, but in this particular case bail out. - */ - exit(-1); + /* Initialize PaRSEC */ + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_MULTIPLE, + &pargc, &pargv, &parsec, &rank, &nodes); + if( PARSEC_SUCCESS != rc ) { + fprintf(stderr, "parsec_tests_context_init failed: %d\n", rc); + exit(EXIT_FAILURE); } /* If the number of cores has not been defined as a parameter earlier @@ -292,9 +278,11 @@ int main(int argc, char *argv[]) parsec_datatype_uint8_t, 1, size, 1); /* Time start */ -#if defined(PARSEC_HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); -#endif /* defined(PARSEC_HAVE_MPI) */ + rc = parsec_tests_barrier(parsec); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + fprintf(stderr, "parsec_tests_barrier failed: %d\n", rc); + exit(EXIT_FAILURE); + } gettimeofday(&tstart, NULL); parsec_context_add_taskpool(parsec, bandwidth_taskpool); @@ -302,9 +290,11 @@ int main(int argc, char *argv[]) parsec_context_wait(parsec); /* Time end */ -#if defined(PARSEC_HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); -#endif /* defined(PARSEC_HAVE_MPI) */ + rc = parsec_tests_barrier(parsec); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + fprintf(stderr, "parsec_tests_barrier failed: %d\n", rc); + exit(EXIT_FAILURE); + } gettimeofday(&tend, NULL); if( 0 == rank ) { @@ -327,11 +317,11 @@ int main(int argc, char *argv[]) parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&Disk); /* Clean up parsec*/ - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + if( PARSEC_SUCCESS != rc ) { + fprintf(stderr, "parsec_tests_context_fini failed: %d\n", rc); + exit(EXIT_FAILURE); + } return 0; } diff --git a/tests/apps/pingpong/main.c b/tests/apps/pingpong/main.c index a0aaff505..97be7a360 100644 --- a/tests/apps/pingpong/main.c +++ b/tests/apps/pingpong/main.c @@ -6,6 +6,7 @@ */ #include "parsec/runtime.h" +#include "tests/tests_runtime.h" #include "rtt_wrapper.h" #include "rtt_data.h" #if defined(PARSEC_HAVE_STRING_H) @@ -15,9 +16,6 @@ #include #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ #include "parsec/utils/debug.h" static int next_message_size(int current, int upper) @@ -55,18 +53,6 @@ int main(int argc, char *argv[]) parsec_data_collection_t *dcA; parsec_taskpool_t *rtt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - while ((ch = getopt(argc, argv, "n:l:u:h")) != -1) { switch (ch) { case 'n': loops = atoi(optarg); break; @@ -99,7 +85,10 @@ int main(int argc, char *argv[]) } } - parsec = parsec_init(-1, &pargc, &pargv); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); nb = loops * world; for(idx = 0, size = start_length; ; idx++) { @@ -114,9 +103,10 @@ int main(int argc, char *argv[]) rc = parsec_context_add_taskpool(parsec, rtt); PARSEC_CHECK_ERROR(rc, "parsec_context_add_taskpool"); -#if defined(PARSEC_HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); -#endif + rc = parsec_tests_barrier(parsec); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); + } gettimeofday(&tstart, NULL); rc = parsec_context_start(parsec); @@ -125,9 +115,10 @@ int main(int argc, char *argv[]) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); -#if defined(PARSEC_HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); -#endif + rc = parsec_tests_barrier(parsec); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); + } gettimeofday(&tend, NULL); if( 0 == rank ) { @@ -145,11 +136,8 @@ int main(int argc, char *argv[]) size = next_message_size(size, end_length); } - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/apps/pingpong/rtt_wrapper.c b/tests/apps/pingpong/rtt_wrapper.c index b76cf579b..1a6bb8e6e 100644 --- a/tests/apps/pingpong/rtt_wrapper.c +++ b/tests/apps/pingpong/rtt_wrapper.c @@ -11,9 +11,6 @@ #include "parsec/arena.h" #include "parsec/mca/device/device.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include #include "rtt.h" @@ -45,11 +42,7 @@ parsec_taskpool_t *rtt_new(parsec_data_collection_t *A, int size, int nb) { parsec_rtt_taskpool_t *tp = NULL; parsec_datatype_t block; - int worldsize = 1; - -#if defined(PARSEC_HAVE_MPI) - MPI_Comm_size(MPI_COMM_WORLD, &worldsize); -#endif + int worldsize = (int)A->nodes; if( nb <= 0 || size <= 0 ) { fprintf(stderr, "To work, RTT must do at least one round time trip of at least one byte\n"); diff --git a/tests/apps/stencil/CMakeLists.txt b/tests/apps/stencil/CMakeLists.txt index 6f508f473..068aea386 100644 --- a/tests/apps/stencil/CMakeLists.txt +++ b/tests/apps/stencil/CMakeLists.txt @@ -1,6 +1,10 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + file(COPY loop_gen_1D DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) parsec_addtest_executable(C testing_stencil_1D SOURCES stencil_internal.c testing_stencil_1D.c) target_include_directories(testing_stencil_1D PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(testing_stencil_1D PRIVATE "stencil_1D.jdf") -target_link_libraries(testing_stencil_1D PRIVATE m) +target_link_libraries(testing_stencil_1D PRIVATE m tests_runtime_common) diff --git a/tests/apps/stencil/stencil_internal.h b/tests/apps/stencil/stencil_internal.h index dcc16d0a4..17ecf78ad 100644 --- a/tests/apps/stencil/stencil_internal.h +++ b/tests/apps/stencil/stencil_internal.h @@ -2,6 +2,7 @@ * Copyright (c) 2019-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* includes parsec headers */ #include "parsec.h" @@ -18,10 +19,6 @@ #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif - /* Flops */ #define FLOPS_STENCIL_1D(n) ( (DTYPE)(iter) * (2*(2*R+1)) * (DTYPE)(n) ) diff --git a/tests/apps/stencil/testing_stencil_1D.c b/tests/apps/stencil/testing_stencil_1D.c index f9df84ff2..935dfcfcc 100644 --- a/tests/apps/stencil/testing_stencil_1D.c +++ b/tests/apps/stencil/testing_stencil_1D.c @@ -2,6 +2,7 @@ * Copyright (c) 2019-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "stencil_internal.h" #include "tests/tests_timing.h" @@ -15,7 +16,7 @@ DTYPE * weight_1D; int main(int argc, char *argv[]) { parsec_context_t* parsec; - int rank, nodes, ch; + int rank, nodes, ch, rc; int pargc = 0; char **pargv; double gflops, flops; @@ -49,7 +50,7 @@ int main(int argc, char *argv[]) case 'R': R = atoi(optarg); break; case '?': case 'h': default: fprintf(stderr, - "-m : initialize MPI_THREAD_MULTIPLE (default: 0/no)\n" + "-m : request multiple-thread support from the test runtime (default: 0/no)\n" "-M : row dimension (M) of the matrices (default: 8)\n" "-N : column dimension (N) of the matrices (default: 8)\n" "-t : row dimension (MB) of the tiles (default: 4)\n" @@ -65,19 +66,6 @@ int main(int argc, char *argv[]) } } -#if defined(PARSEC_HAVE_MPI) - { - int provided; - int requested = m? MPI_THREAD_MULTIPLE: MPI_THREAD_SERIALIZED; - MPI_Init_thread(&argc, &argv, requested, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &nodes); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - nodes = 1; - rank = 0; -#endif - pargc = 0; pargv = NULL; for(i = 1; i < argc; i++) { if( strcmp(argv[i], "--") == 0 ) { @@ -87,6 +75,11 @@ int main(int argc, char *argv[]) } } + rc = parsec_tests_context_init(cores, + m ? PARSEC_TEST_THREAD_MULTIPLE : PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, &rank, &nodes); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + if(0) { volatile int loop = 1; fprintf(stderr, "gdb -p %d\n", getpid()); @@ -94,16 +87,6 @@ int main(int argc, char *argv[]) sleep(1); } - /* Initialize PaRSEC */ - parsec = parsec_init(cores, &pargc, &pargv); - - if( NULL == parsec ) { - /* Failed to correctly initialize. In a correct scenario report - * upstream, but in this particular case bail out. - */ - exit(-1); - } - /* If the number of cores has not been defined as a parameter earlier * update it with the default parameter computed in parsec_init. */ if(cores <= 0) @@ -183,13 +166,16 @@ int main(int argc, char *argv[]) } } - MPI_Barrier(MPI_COMM_WORLD); + rc = parsec_tests_barrier(parsec); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); + } #endif /* Stencil_1D */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); parsec_stencil_1D(parsec, (parsec_tiled_matrix_t *)&dcA, iter, R); - SYNC_TIME_PRINT(rank, ("Stencil" "\tN= %d NB= %d M= %d MB= %d " + SYNC_TIME_PRINT(parsec, rank, ("Stencil" "\tN= %d NB= %d M= %d MB= %d " "PxQ= %d %d KPxKQ= %d %d " "Iteration= %d Radius= %d Kernel_type= %d " "Number_of_buffers= %d cores= %d : %lf gflops\n", @@ -200,12 +186,8 @@ int main(int argc, char *argv[]) parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA); /* Clean up parsec*/ - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif - + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/class/lifo.c b/tests/class/lifo.c index 2ee4219fa..d5f959f21 100644 --- a/tests/class/lifo.c +++ b/tests/class/lifo.c @@ -15,9 +15,6 @@ #include #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include "parsec/class/lifo.h" #include "parsec/os-spec-timing.h" @@ -217,12 +214,6 @@ int main(int argc, char *argv[]) min_time = 0; max_time = 0xffffffff; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } -#endif while( (ch = getopt(argc, argv, "c:n:N:h?")) != -1 ) { switch(ch) { case 'c': { @@ -341,8 +332,5 @@ int main(int argc, char *argv[]) printf(" - all tests passed\n"); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif return 0; } diff --git a/tests/class/list.c b/tests/class/list.c index 0ef61c81f..af1bab9f4 100644 --- a/tests/class/list.c +++ b/tests/class/list.c @@ -16,9 +16,6 @@ #include #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include "parsec/class/list.h" #include "parsec/os-spec-timing.h" @@ -261,13 +258,6 @@ int main(int argc, char *argv[]) min_time = 0; max_time = 0xffffffff; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } -#endif - while( (ch = getopt(argc, argv, "c:n:N:h?")) != -1 ) { switch(ch) { case 'c': { @@ -391,8 +381,5 @@ int main(int argc, char *argv[]) printf(" - all tests passed\n"); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif return 0; } diff --git a/tests/collections/CMakeLists.txt b/tests/collections/CMakeLists.txt index d705bb331..cfa718445 100644 --- a/tests/collections/CMakeLists.txt +++ b/tests/collections/CMakeLists.txt @@ -1,12 +1,18 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + parsec_addtest_executable(C reduce SOURCES reduce.c) +target_link_libraries(reduce PRIVATE tests_runtime_common) parsec_addtest_executable(C kcyclic) +target_link_libraries(kcyclic PRIVATE tests_runtime_common) target_ptg_sources(kcyclic PRIVATE "kcyclic.jdf") target_link_libraries(kcyclic PRIVATE m) add_subdirectory(two_dim_band) -if(PARSEC_HAVE_MPI) +if(MPI_C_FOUND) add_subdirectory(redistribute) -endif(PARSEC_HAVE_MPI) +endif(MPI_C_FOUND) add_subdirectory(reshape) diff --git a/tests/collections/Testings.cmake b/tests/collections/Testings.cmake index d88f0f616..9e9c9d4bb 100644 --- a/tests/collections/Testings.cmake +++ b/tests/collections/Testings.cmake @@ -1,7 +1,7 @@ parsec_addtest_cmd(collections/reduce ${SHM_TEST_CMD_LIST} collections/reduce) -if( PARSEC_HAVE_MPI ) +if( MPI_C_FOUND ) parsec_addtest_cmd(collections/redistribute:mp ${MPI_TEST_CMD_LIST} 8 collections/redistribute/testing_redistribute -M 2400 -N 2400 -a 2400 -A 2400 -t 300 -T 300 -b 200 -B 200 -m 2000 -n 2000 -I 30 -J 40 -i 100 -j 121 -v -z -x -P 2 -Q 4 -p 4 -q 2) set(PARSEC_REDISTRIBUTE_SMALL_ARGS @@ -42,7 +42,7 @@ if( PARSEC_HAVE_MPI ) endforeach() parsec_addtest_cmd(collections/redistribute_random:mp ${MPI_TEST_CMD_LIST} 8 collections/redistribute/testing_redistribute_random -M 2400 -N 2400 -a 2400 -A 2400 -t 300 -T 300 -b 200 -B 200 -m 2000 -n 2000 -I 30 -J 40 -i 100 -j 121 -v -z -x -P 2 -Q 4 -p 4 -q 2) -endif( PARSEC_HAVE_MPI ) +endif( MPI_C_FOUND ) parsec_addtest_cmd(collections/reshape ${SHM_TEST_CMD_LIST} collections/reshape/reshape -N 120 -t 9 -c 10) parsec_addtest_cmd(collections/reshape:mt ${SHM_TEST_CMD_LIST} collections/reshape/reshape -N 120 -t 9 -c 10 -m 1) diff --git a/tests/collections/kcyclic.jdf b/tests/collections/kcyclic.jdf index 122cc4ef6..ea12995af 100644 --- a/tests/collections/kcyclic.jdf +++ b/tests/collections/kcyclic.jdf @@ -3,18 +3,18 @@ extern "C" %{ * Copyright (c) 2019-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" #include "parsec/data_distribution.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include #include #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif #define TYPE PARSEC_MATRIX_INTEGER parsec_taskpool_t* kcyclic_taskpool(parsec_matrix_block_cyclic_t* A, @@ -33,17 +33,10 @@ int main( int argc, char** argv ) parsec_matrix_block_cyclic_t descCA; int rc; -#if defined(PARSEC_HAVE_MPI) - int required = MPI_THREAD_MULTIPLE, provided = 0; - MPI_Init_thread(&argc, &argv, required, &provided); - MPI_Comm_size(MPI_COMM_WORLD, &nodes); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif - - parsec = parsec_init(1, &argc, &argv); - if( NULL == parsec ) { - exit(-2); - } + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_MULTIPLE, + &argc, &argv, + &parsec, &rank, &nodes); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); n *= nodes; /* scale it */ p = sqrt(nodes); @@ -93,14 +86,12 @@ int main( int argc, char** argv ) kcyclic_taskpool_free(tp); - parsec_fini( &parsec); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); free(descA.mat); free(descCA.mat); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif return 0; } diff --git a/tests/collections/redistribute/CMakeLists.txt b/tests/collections/redistribute/CMakeLists.txt index a03f38f2a..d639e495d 100644 --- a/tests/collections/redistribute/CMakeLists.txt +++ b/tests/collections/redistribute/CMakeLists.txt @@ -1,11 +1,14 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include_directories(BEFORE "${CMAKE_CURRENT_SOURCE_DIR}") include_directories(BEFORE "${CMAKE_CURRENT_BINARY_DIR}") parsec_addtest_executable(C testing_redistribute SOURCES testing_redistribute.c common.c) -target_link_libraries(testing_redistribute PRIVATE m) +target_link_libraries(testing_redistribute PRIVATE m tests_runtime_common) target_ptg_sources(testing_redistribute PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/redistribute_check.jdf;${CMAKE_CURRENT_SOURCE_DIR}/redistribute_check2.jdf;${CMAKE_CURRENT_SOURCE_DIR}/redistribute_bound.jdf;${CMAKE_CURRENT_SOURCE_DIR}/redistribute_no_optimization.jdf") parsec_addtest_executable(C testing_redistribute_random SOURCES testing_redistribute_random.c common.c) -target_link_libraries(testing_redistribute_random PRIVATE m) +target_link_libraries(testing_redistribute_random PRIVATE m tests_runtime_common) target_ptg_sources(testing_redistribute_random PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/redistribute_check.jdf;${CMAKE_CURRENT_SOURCE_DIR}/redistribute_check2.jdf;${CMAKE_CURRENT_SOURCE_DIR}/redistribute_bound.jdf;${CMAKE_CURRENT_SOURCE_DIR}/redistribute_no_optimization.jdf") - diff --git a/tests/collections/redistribute/common.c b/tests/collections/redistribute/common.c index f31071fe3..7925517cb 100644 --- a/tests/collections/redistribute/common.c +++ b/tests/collections/redistribute/common.c @@ -15,10 +15,6 @@ #include #endif /* defined(PARSEC_HAVE_GETOPT_H) */ -#ifdef PARSEC_HAVE_MPI -#include -#endif - double time_elapsed = 0.0; double sync_time_elapsed = 0.0; @@ -127,7 +123,7 @@ void print_usage(void) " -h --help : this message\n" " -z --time : get run time\n" " -e --num-runs : number of runs\n" - " -f --thread_multiple : 0/default, init mpi with MPI_THREAD_SERIALIZED; others, MPI_THREAD_MULTIPLE\n" + " -f --thread_multiple : 0/default, serialized test runtime; others, multiple-thread test runtime\n" " -y --no-optimization : no_optimization version, send the whole tile to target; default 0, not no_optimization version\n" " -c --cores : number of concurrent threads (default: number of physical hyper-threads)\n" "\n Notes:\n" @@ -272,7 +268,7 @@ static void parse_arguments(int *_argc, char*** _argv, int* iparam, double *dpar /* Default number of runs: 1 */ iparam[IPARAM_NUM_RUNS] = 1; - /* Default MPI_THREAD_SERIALIZED */ + /* Default to serialized access to the selected test runtime. */ iparam[IPARAM_THREAD_MULTIPLE] = 0; /* Default Not no_optimization version */ @@ -472,38 +468,32 @@ static void print_arguments(int* iparam) parsec_context_t* setup_parsec(int argc, char **argv, int *iparam, double *dparam) { - parse_arguments(&argc, &argv, iparam, dparam); -#ifdef PARSEC_HAVE_MPI - { - int provided; - if( iparam[IPARAM_THREAD_MULTIPLE] ) - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - else - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &iparam[IPARAM_NNODES]); - MPI_Comm_rank(MPI_COMM_WORLD, &iparam[IPARAM_RANK]); -#else - iparam[IPARAM_NNODES] = 1; - iparam[IPARAM_RANK] = 0; -#endif - int verbose = iparam[IPARAM_VERBOSE]; - if(iparam[IPARAM_RANK] > 0 && verbose < 4) verbose = 0; + parsec_context_t* ctx = NULL; + int rc; - SYNC_TIME_START(); + parse_arguments(&argc, &argv, iparam, dparam); /* Once we got out arguments, we should pass whatever is left down */ int parsec_argc = argc - optind; char** parsec_argv = argv + optind; - parsec_context_t* ctx = parsec_init(iparam[IPARAM_NCORES], - &parsec_argc, &parsec_argv); - if( NULL == ctx ) { - /* Failed to correctly initialize. In a correct scenario report - * upstream, but in this particular case bail out. - */ + rc = parsec_tests_context_init(iparam[IPARAM_NCORES], + iparam[IPARAM_THREAD_MULTIPLE] ? + PARSEC_TEST_THREAD_MULTIPLE : + PARSEC_TEST_THREAD_SERIALIZED, + &parsec_argc, &parsec_argv, + &ctx, + &iparam[IPARAM_RANK], + &iparam[IPARAM_NNODES]); + if( PARSEC_SUCCESS != rc ) { + fprintf(stderr, "parsec_tests_context_init failed: %d\n", rc); exit(-1); } + int verbose = iparam[IPARAM_VERBOSE]; + if(iparam[IPARAM_RANK] > 0 && verbose < 4) verbose = 0; + + SYNC_TIME_START(ctx); + /* If the number of cores has not been defined as a parameter earlier update it with the default parameter computed in parsec_init. */ if(iparam[IPARAM_NCORES] <= 0) @@ -516,17 +506,14 @@ parsec_context_t* setup_parsec(int argc, char **argv, int *iparam, double *dpara } print_arguments(iparam); - if(verbose > 2) SYNC_TIME_PRINT(iparam[IPARAM_RANK], ("PaRSEC initialized\n")); + if(verbose > 2) SYNC_TIME_PRINT(ctx, iparam[IPARAM_RANK], ("PaRSEC initialized\n")); return ctx; } void cleanup_parsec(parsec_context_t* parsec, int *iparam, double *dparam) { - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + int rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); (void)iparam; (void)dparam; } diff --git a/tests/collections/redistribute/redistribute_bound.jdf b/tests/collections/redistribute/redistribute_bound.jdf index 8680d7aed..1fe18e90f 100644 --- a/tests/collections/redistribute/redistribute_bound.jdf +++ b/tests/collections/redistribute/redistribute_bound.jdf @@ -3,7 +3,7 @@ extern "C" %{ * Copyright (c) 2017-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "redistribute_test.h" @@ -377,10 +377,22 @@ double* parsec_redistribute_bound(parsec_context_t *parsec, long long int *total_remote_rank = (long long int *)calloc(world_size, sizeof(long long int)); MPI_Barrier( MPI_COMM_WORLD ); - MPI_Allreduce(&sum_remote[0], &total_remote, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&sum_local[0], &total_local, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&sum_nb_message_remote[0], &total_nb_message_remote, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&sum_nb_message_local[0], &total_nb_message_local, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); + int reduce_rc = parsec_tests_allreduce(parsec, &sum_remote[0], &total_remote, 1, + parsec_datatype_int64_t, + PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(reduce_rc, "parsec_tests_allreduce"); + reduce_rc = parsec_tests_allreduce(parsec, &sum_local[0], &total_local, 1, + parsec_datatype_int64_t, + PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(reduce_rc, "parsec_tests_allreduce"); + reduce_rc = parsec_tests_allreduce(parsec, &sum_nb_message_remote[0], &total_nb_message_remote, 1, + parsec_datatype_int64_t, + PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(reduce_rc, "parsec_tests_allreduce"); + reduce_rc = parsec_tests_allreduce(parsec, &sum_nb_message_local[0], &total_nb_message_local, 1, + parsec_datatype_int64_t, + PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(reduce_rc, "parsec_tests_allreduce"); MPI_Allgather( &sum_remote_rank_send[0], 1, MPI_LONG_LONG_INT, total_remote_rank_send, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD); MPI_Allgather( &sum_remote_rank_receive[0], 1, MPI_LONG_LONG_INT, total_remote_rank_receive, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD); MPI_Allgather( &sum_local_rank[0], 1, MPI_LONG_LONG_INT, total_local_rank, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD); diff --git a/tests/collections/redistribute/redistribute_check.jdf b/tests/collections/redistribute/redistribute_check.jdf index edaebebf1..06fcd37a5 100644 --- a/tests/collections/redistribute/redistribute_check.jdf +++ b/tests/collections/redistribute/redistribute_check.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2017-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "redistribute_test.h" @@ -328,7 +329,8 @@ int parsec_redistribute_check(parsec_context_t *parsec, T_g = (DTYPE *)calloc(size_row*size_col, sizeof(DTYPE)); } - MPI_Barrier(MPI_COMM_WORLD); + int barrier_rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(barrier_rc, "parsec_tests_barrier"); parsec_redistribute_check = parsec_redistribute_check_New( (parsec_tiled_matrix_t *)dcY, diff --git a/tests/collections/redistribute/redistribute_check2.jdf b/tests/collections/redistribute/redistribute_check2.jdf index 4a3d958b2..529dc2a1b 100644 --- a/tests/collections/redistribute/redistribute_check2.jdf +++ b/tests/collections/redistribute/redistribute_check2.jdf @@ -159,9 +159,14 @@ int parsec_redistribute_check2(parsec_context_t *parsec, info_sum_thd += info[i]; } - MPI_Barrier( MPI_COMM_WORLD ); - MPI_Allreduce(&info_sum_thd, &info_total, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); - MPI_Barrier( MPI_COMM_WORLD ); + int barrier_rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(barrier_rc, "parsec_tests_barrier"); + int reduce_rc = parsec_tests_allreduce(parsec, &info_sum_thd, &info_total, 1, + parsec_datatype_int64_t, + PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(reduce_rc, "parsec_tests_allreduce"); + barrier_rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(barrier_rc, "parsec_tests_barrier"); if( 0 == dcY->super.myrank ) { if( 0LL == info_total ) diff --git a/tests/collections/redistribute/redistribute_test.h b/tests/collections/redistribute/redistribute_test.h index 27d96c54c..a4c5aa5b4 100644 --- a/tests/collections/redistribute/redistribute_test.h +++ b/tests/collections/redistribute/redistribute_test.h @@ -2,8 +2,10 @@ * Copyright (c) 2017-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/data_dist/matrix/redistribute/redistribute_internal.h" +#include "tests/tests_runtime.h" /* Define whether run PTG or DTD */ #define RUN_PTG 1 diff --git a/tests/collections/redistribute/testing_redistribute.c b/tests/collections/redistribute/testing_redistribute.c index 92462757e..944ef5451 100644 --- a/tests/collections/redistribute/testing_redistribute.c +++ b/tests/collections/redistribute/testing_redistribute.c @@ -580,7 +580,7 @@ int main(int argc, char *argv[]) } /* Timer start */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); /* Main part, call parsec_redistribute; double is default, which could be * changed in parsec/data_dist/matrix/redistribute/redistribute_internal.h @@ -603,7 +603,7 @@ int main(int argc, char *argv[]) /* Timer end */ if( time ) { #if PRINT_MORE - SYNC_TIME_PRINT(rank, ("\"testing_redistribute_PTG\"" + SYNC_TIME_PRINT(parsec, rank, ("\"testing_redistribute_PTG\"" "\tRedistributed Size: m= %d n= %d" "\tSource: P= %d Q= %d M= %d N= %d MB= %d NB= %d I= %d J=%d SMB= %d SNB= %d" "\tTarget: PR= %d QR= %d MR= %d NR= %d MBR= %d NBR= %d i= %d j=%d SMBR= %d SNBR= %d" @@ -611,7 +611,7 @@ int main(int argc, char *argv[]) size_row, size_col, P, Q, M, N, MB, NB, disi_Y, disj_Y, SMB, SNB, PR, QR, MR, NR, MBR, NBR, disi_T, disj_T, SMBR, SNBR, cores)); #else - SYNC_TIME_STOP(); + SYNC_TIME_STOP(parsec); #endif time_ptg = sync_time_elapsed; } @@ -671,7 +671,7 @@ int main(int argc, char *argv[]) } /* Timer start */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); /* Main part, call parsec_redistribute_dtd; double is default, which could be * changed in parsec/data_dist/matrix/redistribute/redistribute_internal.h @@ -688,7 +688,7 @@ int main(int argc, char *argv[]) /* Timer end */ if( time ) { #if PRINT_MORE - SYNC_TIME_PRINT(rank, ("\"testing_redistribute_DTD\"" + SYNC_TIME_PRINT(parsec, rank, ("\"testing_redistribute_DTD\"" "\tRedistributed Size: m= %d n= %d" "\tSource: P= %d Q= %d M= %d N= %d MB= %d NB= %d I= %d J=%d SMB= %d SNB= %d" "\tTarget: PR= %d QR= %d MR= %d NR= %d MBR= %d NBR= %d i= %d j=%d SMBR= %d SNBR= %d" @@ -696,7 +696,7 @@ int main(int argc, char *argv[]) size_row, size_col, P, Q, M, N, MB, NB, disi_Y, disj_Y, SMB, SNB, PR, QR, MR, NR, MBR, NBR, disi_T, disj_T, SMBR, SNBR, cores)); #else - SYNC_TIME_STOP(); + SYNC_TIME_STOP(parsec); #endif time_dtd = sync_time_elapsed; } @@ -740,7 +740,7 @@ int main(int argc, char *argv[]) if( time ) { /* Timer start */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); /* Call parsec_redistribute_bound to get time bound */ results = parsec_redistribute_bound(parsec, dcY.desc, @@ -750,7 +750,7 @@ int main(int argc, char *argv[]) /* Timer end */ #if PRINT_MORE - SYNC_TIME_PRINT(rank, ("\"testing_redistribute_bound\"" + SYNC_TIME_PRINT(parsec, rank, ("\"testing_redistribute_bound\"" "\tRedistributed Size: m= %d n= %d" "\tSource: P= %d Q= %d M= %d N= %d MB= %d NB= %d I= %d J=%d SMB= %d SNB= %d" "\tTarget: PR= %d QR= %d MR= %d NR= %d MBR= %d NBR= %d i= %d j=%d SMBR= %d SNBR= %d" @@ -758,7 +758,7 @@ int main(int argc, char *argv[]) size_row, size_col, P, Q, M, N, MB, NB, disi_Y, disj_Y, SMB, SNB, PR, QR, MR, NR, MBR, NBR, disi_T, disj_T, SMBR, SNBR, cores)); #else - SYNC_TIME_STOP(); + SYNC_TIME_STOP(parsec); #endif } diff --git a/tests/collections/redistribute/testing_redistribute_random.c b/tests/collections/redistribute/testing_redistribute_random.c index 001433fac..97abb1a52 100644 --- a/tests/collections/redistribute/testing_redistribute_random.c +++ b/tests/collections/redistribute/testing_redistribute_random.c @@ -142,7 +142,7 @@ int main(int argc, char *argv[]) (parsec_tiled_matrix_unary_op_t)redistribute_init_ops, &op_args); /* Timer start */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); /* Main part, call parsec_redistribute; double is default, which could be * changed in parsec/data_dist/matrix/redistribute/redistribute_internal.h @@ -161,7 +161,7 @@ int main(int argc, char *argv[]) /* Timer end */ if( time ) { #if PRINT_MORE - SYNC_TIME_PRINT(rank, ("\"testing_redistribute_PTG\"" + SYNC_TIME_PRINT(parsec, rank, ("\"testing_redistribute_PTG\"" "\tRedistributed Size: m= %d n= %d" "\tSource: P= %d Q= %d M= %d N= %d MB= %d NB= %d I= %d J=%d SMB= %d SNB= %d" "\tTarget: PR= %d QR= %d MR= %d NR= %d MBR= %d NBR= %d i= %d j=%d SMBR= %d SNBR= %d" @@ -169,7 +169,7 @@ int main(int argc, char *argv[]) size_row, size_col, P, Q, M, N, MB, NB, disi_Y, disj_Y, SMB, SNB, PR, QR, MR, NR, MBR, NBR, disi_T, disj_T, SMBR, SNBR, cores)); #else - SYNC_TIME_STOP(); + SYNC_TIME_STOP(parsec); #endif time_ptg = sync_time_elapsed; } @@ -215,7 +215,7 @@ int main(int argc, char *argv[]) (parsec_tiled_matrix_unary_op_t)redistribute_init_ops, &op_args_dtd); /* Timer start */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); /* Main part, call parsec_redistribute_dtd; double is default, which could be * changed in parsec/data_dist/matrix/redistribute/redistribute_internal.h @@ -228,7 +228,7 @@ int main(int argc, char *argv[]) /* Timer end */ if( time ) { #if PRINT_MORE - SYNC_TIME_PRINT(rank, ("\"testing_redistribute_DTD\"" + SYNC_TIME_PRINT(parsec, rank, ("\"testing_redistribute_DTD\"" "\tRedistributed Size: m= %d n= %d" "\tSource: P= %d Q= %d M= %d N= %d MB= %d NB= %d I= %d J=%d SMB= %d SNB= %d" "\tTarget: PR= %d QR= %d MR= %d NR= %d MBR= %d NBR= %d i= %d j=%d SMBR= %d SNBR= %d" @@ -236,7 +236,7 @@ int main(int argc, char *argv[]) size_row, size_col, P, Q, M, N, MB, NB, disi_Y, disj_Y, SMB, SNB, PR, QR, MR, NR, MBR, NBR, disi_T, disj_T, SMBR, SNBR, cores)); #else - SYNC_TIME_STOP(); + SYNC_TIME_STOP(parsec); #endif time_dtd = sync_time_elapsed; } @@ -272,7 +272,7 @@ int main(int argc, char *argv[]) if( time ) { /* Timer start */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); /* Call parsec_redistribute_bound to get time bound */ results = parsec_redistribute_bound(parsec, (parsec_tiled_matrix_t *)&dcY, @@ -282,7 +282,7 @@ int main(int argc, char *argv[]) /* Timer end */ #if PRINT_MORE - SYNC_TIME_PRINT(rank, ("\"testing_redistribute_bound\"" + SYNC_TIME_PRINT(parsec, rank, ("\"testing_redistribute_bound\"" "\tRedistributed Size: m= %d n= %d" "\tSource: P= %d Q= %d M= %d N= %d MB= %d NB= %d I= %d J=%d SMB= %d SNB= %d" "\tTarget: PR= %d QR= %d MR= %d NR= %d MBR= %d NBR= %d i= %d j=%d SMBR= %d SNBR= %d" @@ -290,7 +290,7 @@ int main(int argc, char *argv[]) size_row, size_col, P, Q, M, N, MB, NB, disi_Y, disj_Y, SMB, SNB, PR, QR, MR, NR, MBR, NBR, disi_T, disj_T, SMBR, SNBR, cores)); #else - SYNC_TIME_STOP(); + SYNC_TIME_STOP(parsec); #endif } diff --git a/tests/collections/reduce.c b/tests/collections/reduce.c index d2adb7c89..09a99e9e5 100644 --- a/tests/collections/reduce.c +++ b/tests/collections/reduce.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -9,6 +10,8 @@ #include "parsec/arena.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/datatype.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include #include "parsec/data_dist/matrix/reduce.h" @@ -42,15 +45,6 @@ int main( int argc, char* argv[] ) char **pargv; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif - pargc = 0; pargv = NULL; for(i = 1; i < argc; i++) { if( strcmp(argv[i], "--") == 0 ) { @@ -60,7 +54,10 @@ int main( int argc, char* argv[] ) } } - parsec = parsec_init(cores, &pargc, &pargv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &dcA, PARSEC_MATRIX_FLOAT, PARSEC_MATRIX_TILE, rank, nb, 1, ln, 1, 0, 0, ln, 1, @@ -96,11 +93,8 @@ int main( int argc, char* argv[] ) parsec_data_free(dcA.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA); - parsec_fini(&parsec); - -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif /* defined(PARSEC_HAVE_MPI) */ + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/collections/reshape/CMakeLists.txt b/tests/collections/reshape/CMakeLists.txt index 333866159..6e2c567b2 100644 --- a/tests/collections/reshape/CMakeLists.txt +++ b/tests/collections/reshape/CMakeLists.txt @@ -2,16 +2,19 @@ include(ParsecCompilePTG) set(JDF_SOURCES "local_no_reshape.jdf;local_read_reshape.jdf;local_output_reshape.jdf;local_input_reshape.jdf;remote_read_reshape.jdf;remote_no_re_reshape.jdf;local_input_LU_LL.jdf;") parsec_addtest_executable(C reshape SOURCES testing_reshape.c common.c) +target_link_libraries(reshape PRIVATE tests_runtime_common) target_ptg_sources(reshape PRIVATE ${JDF_SOURCES}) parsec_addtest_executable(C input_dep_reshape_single_copy SOURCES testing_input_dep_reshape_single_copy.c common.c) +target_link_libraries(input_dep_reshape_single_copy PRIVATE tests_runtime_common) target_ptg_sources(input_dep_reshape_single_copy PRIVATE "input_dep_single_copy_reshape.jdf;") parsec_addtest_executable(C remote_multiple_outs_same_pred_flow SOURCES testing_remote_multiple_outs_same_pred_flow.c common.c) +target_link_libraries(remote_multiple_outs_same_pred_flow PRIVATE tests_runtime_common) target_ptg_sources(remote_multiple_outs_same_pred_flow PRIVATE "remote_multiple_outs_same_pred_flow.jdf;remote_multiple_outs_same_pred_flow_multiple_deps.jdf;") set(JDF_SOURCES "avoidable_reshape.jdf;") parsec_addtest_executable(C avoidable_reshape SOURCES testing_avoidable_reshape.c common.c) +target_link_libraries(avoidable_reshape PRIVATE tests_runtime_common) target_ptg_sources(avoidable_reshape PRIVATE ${JDF_SOURCES}) - diff --git a/tests/collections/reshape/common.c b/tests/collections/reshape/common.c index 5d5bc31c6..1415516d1 100644 --- a/tests/collections/reshape/common.c +++ b/tests/collections/reshape/common.c @@ -157,7 +157,10 @@ int reshape_set_matrix_value_position_swap(parsec_execution_stream_t *es, return 0; } -int check_matrix_equal(parsec_matrix_block_cyclic_t dcA, parsec_matrix_block_cyclic_t dcA_check){ +int check_matrix_equal(parsec_context_t *parsec, + parsec_matrix_block_cyclic_t dcA, + parsec_matrix_block_cyclic_t dcA_check) +{ int ret = 0; for(size_t i = 0; i < (dcA_check.super.nb_local_tiles * dcA_check.super.bsiz); i++) { if( ((int*)dcA.mat)[i] != ((int*)dcA_check.mat)[i]){ @@ -165,9 +168,12 @@ int check_matrix_equal(parsec_matrix_block_cyclic_t dcA, parsec_matrix_block_cyc break; } } -#if defined(PARSEC_HAVE_MPI) - MPI_Allreduce(MPI_IN_PLACE, &ret, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); -#endif + int rc = parsec_tests_allreduce(parsec, NULL, &ret, 1, + parsec_datatype_int_t, + PARSEC_TESTS_REDUCE_SUM); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + PARSEC_CHECK_ERROR(rc, "parsec_tests_allreduce"); + } return ret; } diff --git a/tests/collections/reshape/common.h b/tests/collections/reshape/common.h index 0f1fca88a..0f2a1d580 100644 --- a/tests/collections/reshape/common.h +++ b/tests/collections/reshape/common.h @@ -2,6 +2,7 @@ * Copyright (c) 2017-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #ifndef _reshape_h @@ -14,10 +15,8 @@ #include "parsec/profiling.h" #include "parsec/execution_stream.h" #include "parsec/utils/mca_param.h" - -#if defined(PARSEC_HAVE_MPI) -#include -#endif +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" int reshape_set_matrix_value(parsec_execution_stream_t *es, const parsec_tiled_matrix_t *descA, @@ -50,35 +49,22 @@ int reshape_set_matrix_value_position_swap(parsec_execution_stream_t *es, void *_A, parsec_matrix_uplo_t uplo, int m, int n, void *args); -int check_matrix_equal(parsec_matrix_block_cyclic_t dcA, parsec_matrix_block_cyclic_t dcA_check); +int check_matrix_equal(parsec_context_t *parsec, + parsec_matrix_block_cyclic_t dcA, + parsec_matrix_block_cyclic_t dcA_check); int reshape_print(parsec_execution_stream_t *es, const parsec_tiled_matrix_t *descA, void *_A, parsec_matrix_uplo_t uplo, int m, int n, void *args); -#if defined(PARSEC_HAVE_MPI) -#define BARRIER MPI_Barrier(MPI_COMM_WORLD); -#else -#define BARRIER -#endif - -#if defined(PARSEC_HAVE_MPI) - #define DO_INIT_MPI() \ - int provided; \ - int requested = m ? MPI_THREAD_MULTIPLE : MPI_THREAD_SERIALIZED; \ - MPI_Init_thread(&argc, &argv, requested, &provided); \ - MPI_Comm_size(MPI_COMM_WORLD, &nodes); \ - MPI_Comm_rank(MPI_COMM_WORLD, &rank); \ - if( requested > provided ) { \ - fprintf(stderr, "#XXXXX User requested %s but the implementation returned a lower thread\n", requested==MPI_THREAD_MULTIPLE? "MPI_THREAD_MULTIPLE": "MPI_THREAD_SERIALIZED");\ - exit(2); \ - } -#else - #define DO_INIT_MPI() \ - nodes = 1; \ - rank = 0; -#endif +#define BARRIER do { \ + int _barrier_rc = parsec_tests_barrier(parsec); \ + if( (PARSEC_SUCCESS != _barrier_rc) && \ + (PARSEC_ERR_NOT_IMPLEMENTED != _barrier_rc) ) { \ + PARSEC_CHECK_ERROR(_barrier_rc, "parsec_tests_barrier"); \ + } \ +} while(0) #define DO_INIT() \ char *name; \ @@ -96,7 +82,7 @@ int reshape_print(parsec_execution_stream_t *es, case 'w': do_sleep = 1; break; \ case '?': case 'h': default: \ fprintf(stderr, \ - "-m : initialize MPI_THREAD_MULTIPLE (default: 0/no)\n" \ + "-m : request multiple-thread support from the test runtime (default: 0/no)\n"\ "-N : rowxcolumn dimension (N, M) of the matrices (default: 8)\n"\ "-t : row dimension (MB) of the tiles (default: 4)\n" \ "-T : column dimension (NB) of the tiles (default: 4)\n" \ @@ -107,9 +93,6 @@ int reshape_print(parsec_execution_stream_t *es, exit(1); \ } \ } \ - DO_INIT_MPI(); \ - if(do_sleep) sleep(10); \ - /* Initialize PaRSEC */ \ pargc = 0; pargv = NULL; \ for(int i = 1; i < argc; i++) { \ if( strcmp(argv[i], "--") == 0 ) { \ @@ -118,12 +101,12 @@ int reshape_print(parsec_execution_stream_t *es, break; \ } \ } \ - parsec = parsec_init(cores, &pargc, &pargv); \ - if( NULL == parsec ) { \ - /* Failed to correctly initialize. In a correct scenario report*/ \ - /* upstream, but in this particular case bail out.*/ \ - exit(-1); \ - } \ + int _init_rc = parsec_tests_context_init(cores, \ + m ? PARSEC_TEST_THREAD_MULTIPLE : \ + PARSEC_TEST_THREAD_SERIALIZED, \ + &pargc, &pargv, &parsec, &rank, &nodes); \ + PARSEC_CHECK_ERROR(_init_rc, "parsec_tests_context_init"); \ + if(do_sleep) sleep(10); \ (void)name; @@ -158,7 +141,7 @@ int reshape_print(parsec_execution_stream_t *es, #define DO_CHECK(NAME, dc, dc_check) do { \ - cret = check_matrix_equal(dc, dc_check ); \ + cret = check_matrix_equal(parsec, dc, dc_check ); \ if(rank==0) \ printf("Test " #NAME " %s\n", (cret > 0)? "FAILED" : "PASSED"); \ ret |= cret; \ @@ -173,7 +156,7 @@ int reshape_print(parsec_execution_stream_t *es, parsec_context_wait(parsec); \ parsec_taskpool_free((parsec_taskpool_t*)tp); \ \ - cret = check_matrix_equal(dcA, dcA_check ); \ + cret = check_matrix_equal(parsec, dcA, dcA_check ); \ if(rank==0) \ printf("Test " #NAME " %s\n", (cret > 0)? "FAILED" : "PASSED"); \ ret |= cret; \ diff --git a/tests/collections/reshape/testing_avoidable_reshape.c b/tests/collections/reshape/testing_avoidable_reshape.c index 59327458a..ee8ee5da4 100644 --- a/tests/collections/reshape/testing_avoidable_reshape.c +++ b/tests/collections/reshape/testing_avoidable_reshape.c @@ -2,14 +2,11 @@ * Copyright (c) 2017-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif - #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "common.h" @@ -107,11 +104,8 @@ int main(int argc, char *argv[]) parsec_data_free(dcA_check.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA_check); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + int fini_rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(fini_rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/collections/reshape/testing_input_dep_reshape_single_copy.c b/tests/collections/reshape/testing_input_dep_reshape_single_copy.c index 4c6d00972..5ea6b0477 100644 --- a/tests/collections/reshape/testing_input_dep_reshape_single_copy.c +++ b/tests/collections/reshape/testing_input_dep_reshape_single_copy.c @@ -2,14 +2,11 @@ * Copyright (c) 2017-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif - #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "common.h" @@ -42,7 +39,19 @@ int main(int argc, char *argv[]) DO_INIT(); - assert(cores == 2); + int runtime_cores = parsec_context_query(parsec, PARSEC_CONTEXT_QUERY_CORES); + PARSEC_CHECK_ERROR(runtime_cores, "parsec_context_query(PARSEC_CONTEXT_QUERY_CORES)"); + if( (cores < 2) || (runtime_cores < 2) ) { + if( 0 == rank ) { + fprintf(stderr, + "input_dep_single_copy_reshape requires at least two PaRSEC execution streams " + "(requested %d, runtime provided %d)\n", + cores, runtime_cores); + } + int fini_rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(fini_rc, "parsec_tests_context_fini"); + return -PARSEC_ERR_DEVICE; + } DO_INI_DATATYPES(); @@ -90,11 +99,8 @@ int main(int argc, char *argv[]) parsec_data_free(dcA_check.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA_check); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + int fini_rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(fini_rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/collections/reshape/testing_remote_multiple_outs_same_pred_flow.c b/tests/collections/reshape/testing_remote_multiple_outs_same_pred_flow.c index 6a7803e13..483a535be 100644 --- a/tests/collections/reshape/testing_remote_multiple_outs_same_pred_flow.c +++ b/tests/collections/reshape/testing_remote_multiple_outs_same_pred_flow.c @@ -2,14 +2,11 @@ * Copyright (c) 2017-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif - #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "common.h" @@ -169,11 +166,8 @@ int main(int argc, char *argv[]) parsec_data_free(dcV.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcV); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + int fini_rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(fini_rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/collections/reshape/testing_reshape.c b/tests/collections/reshape/testing_reshape.c index d7f75601a..9ea8338a2 100644 --- a/tests/collections/reshape/testing_reshape.c +++ b/tests/collections/reshape/testing_reshape.c @@ -2,14 +2,11 @@ * Copyright (c) 2017-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif - #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "common.h" @@ -259,11 +256,8 @@ int main(int argc, char *argv[]) parsec_data_free(dcA_check.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA_check); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + int fini_rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(fini_rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/collections/two_dim_band/CMakeLists.txt b/tests/collections/two_dim_band/CMakeLists.txt index ab5abf708..bcce8d1b4 100644 --- a/tests/collections/two_dim_band/CMakeLists.txt +++ b/tests/collections/two_dim_band/CMakeLists.txt @@ -1,6 +1,10 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C testing_band SOURCES main.c) +target_link_libraries(testing_band PRIVATE tests_runtime_common) target_include_directories(testing_band PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(testing_band PRIVATE "two_dim_band.jdf;two_dim_band_free.jdf") - diff --git a/tests/collections/two_dim_band/main.c b/tests/collections/two_dim_band/main.c index 5afaa56b9..989402d42 100644 --- a/tests/collections/two_dim_band/main.c +++ b/tests/collections/two_dim_band/main.c @@ -2,19 +2,18 @@ * Copyright (c) 2017-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/data_dist/matrix/matrix.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic_band.h" #include "parsec/data_dist/matrix/sym_two_dim_rectangle_cyclic_band.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "two_dim_band_test.h" #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif - int main(int argc, char *argv[]) { parsec_context_t* parsec; @@ -23,23 +22,12 @@ int main(int argc, char *argv[]) char **pargv = NULL; parsec_matrix_uplo_t uplo = PARSEC_MATRIX_UPPER; //PARSEC_MATRIX_LOWER parsec_matrix_uplo_t full = PARSEC_MATRIX_FULL; + int rc; /* Super */ int N = 16, NB = 4, P = 1, KP = 1, KQ = 1; /* Band */ int P_BAND = 1, KP_BAND = 1, KQ_BAND = 1, BAND_SIZE = 1; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &nodes); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - nodes = 1; - rank = 0; -#endif - for(i = 1; i < argc; i++) { if( strcmp(argv[i], "--") == 0 ) { pargc = argc - i; @@ -49,7 +37,10 @@ int main(int argc, char *argv[]) } /* Initialize PaRSEC */ - parsec = parsec_init(-1, &pargc, &pargv); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_MULTIPLE, + &pargc, &pargv, + &parsec, &rank, &nodes); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); while ((ch = getopt(argc, argv, "N:T:s:S:P:p:f:F:b:h")) != -1) { switch (ch) { @@ -143,11 +134,8 @@ int main(int argc, char *argv[]) parsec_tiled_matrix_destroy(&dcYP.off_band.super); /* Clean up parsec*/ - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/collections/two_dim_band/two_dim_band_test.h b/tests/collections/two_dim_band/two_dim_band_test.h index 01a4a692d..b79386eb6 100644 --- a/tests/collections/two_dim_band/two_dim_band_test.h +++ b/tests/collections/two_dim_band/two_dim_band_test.h @@ -2,13 +2,10 @@ * Copyright (c) 2017-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/data_dist/matrix/matrix.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif - /** * @param [in] Y: the data, already distributed and allocated * @param [in] uplo: Upper / Lower / UpperLower diff --git a/tests/dsl/dtd/CMakeLists.txt b/tests/dsl/dtd/CMakeLists.txt index b5da79051..687e16742 100644 --- a/tests/dsl/dtd/CMakeLists.txt +++ b/tests/dsl/dtd/CMakeLists.txt @@ -1,4 +1,8 @@ -link_libraries(tests_common) +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + +link_libraries(tests_common tests_runtime_common) parsec_addtest_executable(C dtd_test_empty SOURCES dtd_test_empty.c) parsec_addtest_executable(C dtd_test_pingpong SOURCES dtd_test_pingpong.c) diff --git a/tests/dsl/dtd/dtd_test_allreduce.c b/tests/dsl/dtd/dtd_test_allreduce.c index 4899875dc..bfc825b94 100644 --- a/tests/dsl/dtd/dtd_test_allreduce.c +++ b/tests/dsl/dtd/dtd_test_allreduce.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* Naive star-based reduce-bcast allreduce; just an example, so keep it * simple... */ @@ -19,15 +20,12 @@ #include "parsec/data_internal.h" #include "parsec/execution_stream.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #if defined(PARSEC_HAVE_STRING_H) #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - static int verbose = 0; /* IDs for the Arena Datatypes */ @@ -90,25 +88,12 @@ int main(int argc, char **argv) { parsec_context_t* parsec; parsec_arena_datatype_t *adt; - int rc, nb, nt; + int rc, nb, nt = 0; int rank, world, cores = -1, root = 0; int i; parsec_tiled_matrix_t *dcA; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - nb = 1; /* tile_size */ - nt = world*10; /* total no. of tiles */ verbose = 0; int pargc = 0; char **pargv = NULL; @@ -120,7 +105,7 @@ int main(int argc, char **argv) } if( 0 == strncmp(argv[i], "-n=", 3) ) { nt = strtol(argv[i]+3, NULL, 10); - if( 0 >= nt ) nt = world*10; /* set to default value */ + if( 0 >= nt ) nt = 0; /* set to default value after rank discovery */ continue; } if( 0 == strncmp(argv[i], "-v", 2) ) { @@ -129,9 +114,13 @@ int main(int argc, char **argv) } } - parsec = parsec_init( cores, &pargc, &pargv ); - if( NULL == parsec ) { - return -1; + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + if( 0 >= nt ) { + nt = world*10; /* total no. of tiles */ } parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); @@ -231,11 +220,8 @@ int main(int argc, char **argv) parsec_tiled_matrix_destroy(dcA); free(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_batch_cpu.c b/tests/dsl/dtd/dtd_test_batch_cpu.c index 96d20324e..39a091a9d 100644 --- a/tests/dsl/dtd/dtd_test_batch_cpu.c +++ b/tests/dsl/dtd/dtd_test_batch_cpu.c @@ -8,10 +8,7 @@ #include "parsec.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/mca/device/device.h" - -#if defined(PARSEC_HAVE_MPI) -#include -#endif +#include "tests/tests_runtime.h" #include #include @@ -41,14 +38,6 @@ main(int argc, char **argv) int expected = 0; int ret = 0; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - } -#endif - if( NULL != argv[1] ) { ntasks = atoi(argv[1]); } @@ -56,8 +45,9 @@ main(int argc, char **argv) ntasks = 32; } - parsec = parsec_init(-1, &argc, &argv); - assert(NULL != parsec); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); dtd_tp = parsec_dtd_taskpool_new(); @@ -102,11 +92,8 @@ main(int argc, char **argv) parsec_dtd_task_class_release(dtd_tp, tc); parsec_taskpool_free(dtd_tp); - parsec_fini(&parsec); - -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/dsl/dtd/dtd_test_broadcast.c b/tests/dsl/dtd/dtd_test_broadcast.c index 7ac4b4301..f7f0b6612 100644 --- a/tests/dsl/dtd/dtd_test_broadcast.c +++ b/tests/dsl/dtd/dtd_test_broadcast.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -13,6 +14,7 @@ #include "tests/tests_data.h" #include "tests/tests_timing.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -20,10 +22,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -63,26 +61,18 @@ int main(int argc, char **argv) parsec_tiled_matrix_t *dcA; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - nb = 1; /* tile_size */ - nt = world; /* total no. of tiles */ if(argv[1] != NULL){ cores = atoi(argv[1]); } - parsec = parsec_init( cores, &argc, &argv ); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + nt = world; /* total no. of tiles */ parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); @@ -158,11 +148,8 @@ int main(int argc, char **argv) parsec_dtd_data_collection_fini( A ); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_ce.c b/tests/dsl/dtd/dtd_test_ce.c index c681f6e56..edf728b10 100644 --- a/tests/dsl/dtd/dtd_test_ce.c +++ b/tests/dsl/dtd/dtd_test_ce.c @@ -2,8 +2,8 @@ * Copyright (c) 2022-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ -#include #include #include #include @@ -12,12 +12,18 @@ #include "parsec/parsec_comm_engine.h" #include "parsec/runtime.h" +#include "tests/tests_runtime.h" -#define ACTIVE_MESSAGE_FROM_0_TAG 2 -#define ACTIVE_MESSAGE_FROM_1_TAG 3 -#define NOTIFY_ABOUT_GET_FROM_0_TAG 4 -#define NOTIFY_ABOUT_PUT_FROM_0_TAG 5 -#define NOTIFY_ABOUT_MEM_HANDLE_FROM_1_TAG 6 +/* + * parsec_init() registers the runtime's own communication-engine control tags. + * Keep this direct CE test on separate tags so the test callbacks do not + * collide with remote-dependency callbacks installed by the runtime. + */ +#define ACTIVE_MESSAGE_FROM_0_TAG 7 +#define ACTIVE_MESSAGE_FROM_1_TAG 8 +#define NOTIFY_ABOUT_GET_FROM_0_TAG 9 +#define NOTIFY_ABOUT_PUT_FROM_0_TAG 10 +#define NOTIFY_ABOUT_MEM_HANDLE_FROM_1_TAG 11 int get_end(parsec_comm_engine_t *ce, @@ -434,29 +440,26 @@ put_end_ack(parsec_comm_engine_t *ce, int main(int argc, char **argv) { + parsec_context_t *parsec = NULL; int rank, world; int i; + int rc; int test_GET = 1; int test_PUT = 1; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_MULTIPLE, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); my_rank = rank; - parsec_comm_engine_t *ce = parsec_comm_engine_init(NULL); + parsec_comm_engine_t *ce = &parsec_ce; if( world != 2 ) { printf("World is too small, too bad! Buh-bye"); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } @@ -480,7 +483,8 @@ int main(int argc, char **argv) ce->enable(ce); /* To make sure all the ranks have the tags registered */ - MPI_Barrier(MPI_COMM_WORLD); + rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); /* Testing active message */ if(rank == 0) { @@ -504,7 +508,8 @@ int main(int argc, char **argv) } } - MPI_Barrier(MPI_COMM_WORLD); + rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); counter = 0; printf("-------------------------------------\n"); @@ -528,7 +533,8 @@ int main(int argc, char **argv) } } - MPI_Barrier(MPI_COMM_WORLD); + rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); counter = 0; printf("-------------------------------------\n"); @@ -603,7 +609,8 @@ int main(int argc, char **argv) } - MPI_Barrier(MPI_COMM_WORLD); + rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); counter = 0; if(test_PUT) { @@ -673,7 +680,8 @@ int main(int argc, char **argv) } } - MPI_Barrier(MPI_COMM_WORLD); + rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); ce->tag_unregister(ACTIVE_MESSAGE_FROM_0_TAG); ce->tag_unregister(ACTIVE_MESSAGE_FROM_1_TAG); @@ -681,11 +689,8 @@ int main(int argc, char **argv) ce->tag_unregister(NOTIFY_ABOUT_PUT_FROM_0_TAG); ce->tag_unregister(NOTIFY_ABOUT_MEM_HANDLE_FROM_1_TAG); - parsec_comm_engine_fini(ce); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_cuda_again_async.c b/tests/dsl/dtd/dtd_test_cuda_again_async.c index a3a4e6c19..bcf5b67c3 100644 --- a/tests/dsl/dtd/dtd_test_cuda_again_async.c +++ b/tests/dsl/dtd/dtd_test_cuda_again_async.c @@ -2,6 +2,7 @@ * Copyright (c) 2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" @@ -10,10 +11,7 @@ #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "tests/tests_data.h" - -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ +#include "tests/tests_runtime.h" void parsec_dtd_pack_args( parsec_task_t *this_task, ... ) { @@ -124,21 +122,11 @@ int main(int argc, char* argv[]) { int ret; parsec_context_t *parsec_context = NULL; - int rank, world; + int world; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - - parsec_context = parsec_init(-1, NULL, NULL); + ret = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + NULL, NULL, &parsec_context, NULL, &world); + PARSEC_CHECK_ERROR(ret, "parsec_tests_context_init"); // Create new DTD taskpool parsec_taskpool_t *tp = parsec_dtd_taskpool_new(); @@ -192,9 +180,6 @@ int main(int argc, char* argv[]) parsec_taskpool_free(tp); - parsec_fini(&parsec_context); - -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + ret = parsec_tests_context_fini(&parsec_context); + PARSEC_CHECK_ERROR(ret, "parsec_tests_context_fini"); } diff --git a/tests/dsl/dtd/dtd_test_cuda_task_insert.c b/tests/dsl/dtd/dtd_test_cuda_task_insert.c index 0b0b78de5..08feee78d 100644 --- a/tests/dsl/dtd/dtd_test_cuda_task_insert.c +++ b/tests/dsl/dtd/dtd_test_cuda_task_insert.c @@ -11,10 +11,7 @@ #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "tests/tests_data.h" - -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ +#include "tests/tests_runtime.h" static int TILE_FULL; @@ -881,26 +878,18 @@ int main(int argc, char **argv) parsec_context_t *parsec_context = NULL; int rank, world; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - // Number of CPU cores involved int ncores = -1; // Use all available cores - parsec_context = parsec_init(ncores, &argc, &argv); + rc = parsec_tests_context_init(ncores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec_context, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); rc = !(get_nb_cuda_devices() >= 1); print_test_result("Have CUDA accelerators", rc); if(rc != 0) { - parsec_fini(&parsec_context); + rc = parsec_tests_context_fini(&parsec_context); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return -1; } @@ -941,11 +930,8 @@ int main(int argc, char **argv) rc = test_cuda_multiple_devices(world, rank, parsec_context); ret += print_test_result("cuda multiple devices", rc); - parsec_fini(&parsec_context); - -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec_context); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/dsl/dtd/dtd_test_data_flush.c b/tests/dsl/dtd/dtd_test_data_flush.c index 98ccc6b81..abd41f8c9 100644 --- a/tests/dsl/dtd/dtd_test_data_flush.c +++ b/tests/dsl/dtd/dtd_test_data_flush.c @@ -2,6 +2,7 @@ * Copyright (c) 2018-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -13,6 +14,7 @@ #include "tests/tests_data.h" #include "tests/tests_timing.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -20,10 +22,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -94,18 +92,6 @@ int main(int argc, char ** argv) int nb, nt, rc; parsec_tiled_matrix_t *dcA; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - if(argv[1] != NULL){ cores = atoi(argv[1]); } @@ -113,7 +99,10 @@ int main(int argc, char ** argv) int i, j, total_tasks = 10000; /* Creating parsec context and initializing dtd environment */ - parsec = parsec_init(cores, &argc, &argv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_taskpool_t *dtd_tp; /* @@ -420,11 +409,8 @@ int main(int argc, char ** argv) parsec_dtd_free_arena_datatype(parsec, TILE_FULL); } - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_empty.c b/tests/dsl/dtd/dtd_test_empty.c index cf5eeeef5..840917b5f 100644 --- a/tests/dsl/dtd/dtd_test_empty.c +++ b/tests/dsl/dtd/dtd_test_empty.c @@ -2,32 +2,25 @@ * Copyright (c) 2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" int main(int argc, char **argv) { parsec_context_t* parsec; - int rank = 0, world = 1; + int rc; -#if defined(PARSEC_HAVE_MPI) - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); - parsec = parsec_init( -1, &argc, &argv ); parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_explicit_task_creation.c b/tests/dsl/dtd/dtd_test_explicit_task_creation.c index c34978a9c..8ee8e3a1d 100644 --- a/tests/dsl/dtd/dtd_test_explicit_task_creation.c +++ b/tests/dsl/dtd/dtd_test_explicit_task_creation.c @@ -13,6 +13,7 @@ #include #include "tests/tests_data.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -20,10 +21,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -68,30 +65,24 @@ int main(int argc, char ** argv) parsec_tiled_matrix_t *dcA; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); + if(argv[1] != NULL){ + cores = atoi(argv[1]); } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif - if( world != 1 ) { - parsec_fatal( "Nope! world is not right, we need exactly one MPI process. " - "Try with \"mpirun -np 1 .....\"\n" ); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); - if(argv[1] != NULL){ - cores = atoi(argv[1]); + if( world != 1 ) { + parsec_fatal( "Nope! world is not right, we need exactly one process. " + "Try with a single-process launcher.\n" ); } no_of_tasks = world; nb = 1; /* tile_size */ nt = no_of_tasks; /* total no. of tiles */ - parsec = parsec_init( cores, &argc, &argv ); - parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); adt = parsec_matrix_adt_new_rect( @@ -160,11 +151,8 @@ int main(int argc, char ** argv) parsec_dtd_data_collection_fini( A ); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_flag_dont_track.c b/tests/dsl/dtd/dtd_test_flag_dont_track.c index 355dcb65a..6d15a78a0 100644 --- a/tests/dsl/dtd/dtd_test_flag_dont_track.c +++ b/tests/dsl/dtd/dtd_test_flag_dont_track.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -13,6 +14,7 @@ #include "tests/tests_data.h" #include "tests/tests_timing.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -20,10 +22,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - int task_to_check_dont_track(parsec_execution_stream_t *es, parsec_task_t *this_task) { @@ -45,29 +43,20 @@ int main(int argc, char ** argv) int nb, nt, rc; parsec_tiled_matrix_t *dcA; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - - if( world != 1 ) { - parsec_fatal( "Nope! world is not right, we need exactly one MPI process. " - "Try with \"mpirun -np 1 .....\"\n" ); - } - if(argv[1] != NULL){ cores = atoi(argv[1]); } /* Creating parsec context and initializing dtd environment */ - parsec = parsec_init( cores, &argc, &argv ); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + if( world != 1 ) { + parsec_fatal( "Nope! world is not right, we need exactly one process. " + "Try with a single-process launcher.\n" ); + } /****** Checking Don't track flag ******/ parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); @@ -126,11 +115,8 @@ int main(int argc, char ** argv) parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_global_id_for_dc_assumed.c b/tests/dsl/dtd/dtd_test_global_id_for_dc_assumed.c index a744f0645..c49a55056 100644 --- a/tests/dsl/dtd/dtd_test_global_id_for_dc_assumed.c +++ b/tests/dsl/dtd/dtd_test_global_id_for_dc_assumed.c @@ -2,6 +2,7 @@ * Copyright (c) 2018-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -12,43 +13,32 @@ #include #include "tests/tests_data.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #if defined(PARSEC_HAVE_STRING_H) #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - int main(int argc, char **argv) { parsec_context_t* parsec; - /*int rc;*/ + int rc; int rank, world, cores; int nb, nt; parsec_tiled_matrix_t *dcA, *dcB, *dcC; uint32_t id = 0; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - nb = 1; /* tile_size */ - nt = world; /* total no. of tiles */ cores = 8; - parsec = parsec_init( cores, &argc, &argv ); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + nt = world; /* total no. of tiles */ dcA = create_and_distribute_data(rank, world, nb, nt); parsec_data_collection_set_key((parsec_data_collection_t *)dcA, "A"); @@ -91,11 +81,8 @@ int main(int argc, char **argv) free_data(dcB); free_data(dcC); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_hierarchy.c b/tests/dsl/dtd/dtd_test_hierarchy.c index dfb2179c0..d82354345 100644 --- a/tests/dsl/dtd/dtd_test_hierarchy.c +++ b/tests/dsl/dtd/dtd_test_hierarchy.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA CORPORATION. All rights reserved. */ /* parsec things */ @@ -20,10 +21,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* This testing shows graph pruning as well as hierarchical execution. * The only restriction is the parsec_taskpool_wait() before parsec_context_wait() */ @@ -106,25 +103,15 @@ int main(int argc, char ** argv) int rank, world, cores = -1, rc; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); int m; int nb, nt; parsec_tiled_matrix_t *dcA; parsec_taskpool_t *dtd_tp; - parsec = parsec_init( cores, &argc, &argv ); - dtd_tp = parsec_dtd_taskpool_new(); /* Registering the dtd_handle with PARSEC context */ @@ -144,7 +131,7 @@ int main(int argc, char ** argv) parsec_data_collection_t *A = (parsec_data_collection_t *)dcA; parsec_dtd_data_collection_init(A); - SYNC_TIME_START(); + SYNC_TIME_START(parsec); rc = parsec_context_start( parsec ); PARSEC_CHECK_ERROR(rc, "parsec_context_start"); @@ -167,7 +154,7 @@ int main(int argc, char ** argv) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); - SYNC_TIME_PRINT(rank, ("\n") ); + SYNC_TIME_PRINT(parsec, rank, ("\n") ); parsec_dtd_free_arena_datatype(parsec, TILE_FULL); parsec_dtd_data_collection_fini( A ); @@ -175,11 +162,8 @@ int main(int argc, char ** argv) parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_insert_task_interface.c b/tests/dsl/dtd/dtd_test_insert_task_interface.c index 452284c7f..1deeac952 100644 --- a/tests/dsl/dtd/dtd_test_insert_task_interface.c +++ b/tests/dsl/dtd/dtd_test_insert_task_interface.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -12,6 +13,7 @@ #include #include "tests/tests_data.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -19,10 +21,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -68,33 +66,24 @@ int main(int argc, char ** argv) parsec_tiled_matrix_t *dcA; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); + if(argv[1] != NULL){ + cores = atoi(argv[1]); } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - if( world != 1 ) { - parsec_fatal( "Nope! world is not right, we need exactly one MPI process. " - "Try with \"mpirun -np 1 .....\"\n" ); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); - if(argv[1] != NULL){ - cores = atoi(argv[1]); + if( world != 1 ) { + parsec_fatal( "Nope! world is not right, we need exactly one process. " + "Try with a single-process launcher.\n" ); } no_of_tasks = world; nb = 1; /* tile_size */ nt = no_of_tasks; /* total no. of tiles */ - parsec = parsec_init( cores, &argc, &argv ); - parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); adt = parsec_matrix_adt_new_rect( @@ -160,11 +149,8 @@ int main(int argc, char ** argv) parsec_dtd_data_collection_fini( A ); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_interleave_actions.c b/tests/dsl/dtd/dtd_test_interleave_actions.c index c76925ade..9a2c2d625 100644 --- a/tests/dsl/dtd/dtd_test_interleave_actions.c +++ b/tests/dsl/dtd/dtd_test_interleave_actions.c @@ -2,12 +2,9 @@ * Copyright (c) 2020-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ -#if defined(PARSEC_HAVE_MPI) -#include "mpi.h" -#endif /* defined(PARSEC_HAVE_MPI) */ - #include #include "parsec.h" @@ -15,6 +12,7 @@ #include "parsec/data_dist/matrix/matrix.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "tests/tests_data.h" +#include "tests/tests_runtime.h" /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -107,18 +105,6 @@ int main(int argc, char **argv) { pargc = argc - optind; pargv = argv + optind; - #if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - int nb = 1; int nt = 1; @@ -126,7 +112,10 @@ int main(int argc, char **argv) { nt = 1; int ncores = -1; - parsec_context = parsec_init(ncores, &pargc, &pargv); + ret = parsec_tests_context_init(ncores, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec_context, &rank, &world); + PARSEC_CHECK_ERROR(ret, "parsec_tests_context_init"); if(world == 1) { parsec_warning("*** This test only makes sense with at least two nodes"); @@ -219,11 +208,8 @@ int main(int argc, char **argv) { parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec_context); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + ret = parsec_tests_context_fini(&parsec_context); + PARSEC_CHECK_ERROR(ret, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_multiple_handle_wait.c b/tests/dsl/dtd/dtd_test_multiple_handle_wait.c index 117097ab1..2d651db40 100644 --- a/tests/dsl/dtd/dtd_test_multiple_handle_wait.c +++ b/tests/dsl/dtd/dtd_test_multiple_handle_wait.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -20,10 +21,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - double time_elapsed; double sync_time_elapsed; @@ -38,25 +35,10 @@ task_to_check_generation(parsec_execution_stream_t *es, parsec_task_t *this_task int main(int argc, char ** argv) { parsec_context_t* parsec; - int rank, world, cores = -1, rc; + int rank, cores = -1, rc; int parsec_argc; char** parsec_argv; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if(MPI_THREAD_MULTIPLE > provided) { - parsec_fatal( "This test requires MPI_THREAD_MULTIPLE because it uses simultaneously MPI within the PaRSEC runtime, and in the main program loop (in SYNC_TIME_START)"); - } - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - parsec_argv = &argv[1]; parsec_argc = argc - 1; if(argv[1] != NULL) { @@ -66,10 +48,10 @@ int main(int argc, char ** argv) } /* Creating parsec context and initializing dtd environment */ - parsec = parsec_init( cores, &parsec_argc, &parsec_argv ); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_MULTIPLE, + &parsec_argc, &parsec_argv, + &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /****** Checking task generation ******/ parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); @@ -88,7 +70,7 @@ int main(int argc, char ** argv) PARSEC_CHECK_ERROR(rc, "parsec_context_start"); for( i = 0; i < 6; i++ ) { - SYNC_TIME_START(); + SYNC_TIME_START(parsec); for( j = 0; j < total_tasks; j++ ) { /* This task does not have any data associated with it, so it will be inserted in all mpi processes */ parsec_dtd_insert_task(dtd_tp, task_to_check_generation, 0, PARSEC_DEV_CPU, "sample_task", @@ -97,7 +79,7 @@ int main(int argc, char ** argv) rc = parsec_taskpool_wait( dtd_tp ); PARSEC_CHECK_ERROR(rc, "parsec_taskpool_wait"); - SYNC_TIME_PRINT(rank, ("\n")); + SYNC_TIME_PRINT(parsec, rank, ("\n")); } parsec_taskpool_free( dtd_tp ); @@ -105,11 +87,8 @@ int main(int argc, char ** argv) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_new_tile.c b/tests/dsl/dtd/dtd_test_new_tile.c index c0ef693f1..75fc61a82 100644 --- a/tests/dsl/dtd/dtd_test_new_tile.c +++ b/tests/dsl/dtd/dtd_test_new_tile.c @@ -21,10 +21,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; static int32_t nb_errors = 0; @@ -259,24 +255,11 @@ int main(int argc, char **argv) parsec_device_cuda_module_t **gpu_devices = NULL; #endif -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if(MPI_THREAD_MULTIPLE > provided) { - parsec_fatal( "This benchmark requires MPI_THREAD_MULTIPLE because it uses simultaneously MPI within the PaRSEC runtime, and in the main program loop (in SYNC_TIME_START)"); - } - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - nb = NB; /* tile_size */ - parsec = parsec_init( cores, &argc, &argv ); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_MULTIPLE, + &argc, &argv, &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); #if defined(PARSEC_PROF_TRACE) parsec_profiling_start(); #endif @@ -520,11 +503,8 @@ int main(int argc, char **argv) parsec_dtd_free_arena_datatype(parsec, TILE_FULL); parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); if(nb_errors > 0) return EXIT_FAILURE; diff --git a/tests/dsl/dtd/dtd_test_null_as_tile.c b/tests/dsl/dtd/dtd_test_null_as_tile.c index 658f602ef..ab40b3c45 100644 --- a/tests/dsl/dtd/dtd_test_null_as_tile.c +++ b/tests/dsl/dtd/dtd_test_null_as_tile.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA CORPORATION. All rights reserved. */ /* parsec things */ @@ -19,10 +20,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - double time_elapsed; double sync_time_elapsed; @@ -42,36 +39,26 @@ call_to_kernel_type( parsec_execution_stream_t *es, int main(int argc, char ** argv) { parsec_context_t* parsec; - int rank, world, cores = -1, rc; + int rank, cores = -1, rc; if(argv[1] != NULL){ cores = atoi(argv[1]); } -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); int m; int no_of_tasks = 1; - parsec = parsec_init( cores, &argc, &argv ); - parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); /* Registering the dtd_handle with PARSEC context */ rc = parsec_context_add_taskpool( parsec, dtd_tp ); PARSEC_CHECK_ERROR(rc, "parsec_context_add_taskpool"); - SYNC_TIME_START(); + SYNC_TIME_START(parsec); rc = parsec_context_start( parsec ); PARSEC_CHECK_ERROR(rc, "parsec_context_start"); @@ -89,15 +76,12 @@ int main(int argc, char ** argv) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); - SYNC_TIME_PRINT(rank, ("\n")); + SYNC_TIME_PRINT(parsec, rank, ("\n")); parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_pingpong.c b/tests/dsl/dtd/dtd_test_pingpong.c index b718410e0..1a189b6a0 100644 --- a/tests/dsl/dtd/dtd_test_pingpong.c +++ b/tests/dsl/dtd/dtd_test_pingpong.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -10,6 +11,7 @@ #include #include "tests/tests_data.h" +#include "tests/tests_runtime.h" #include "tests/tests_timing.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -18,10 +20,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - double time_elapsed; double sync_time_elapsed; @@ -81,35 +79,23 @@ int main(int argc, char **argv) parsec_tiled_matrix_t *dcA; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if(MPI_THREAD_MULTIPLE > provided) { - parsec_fatal( "This benchmark requires MPI_THREAD_MULTIPLE because it uses simultaneously MPI within the PaRSEC runtime, and in the main program loop (in SYNC_TIME_START)"); - } + if(argv[1] != NULL){ + cores = atoi(argv[1]); } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_MULTIPLE, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); if( world != 2 ) { - parsec_fatal( "Nope! world is not right, we need exactly two MPI process. " - "Try with \"mpirun -np 2 .....\"\n" ); + parsec_fatal( "Nope! world is not right, we need exactly two processes. " + "Try with a two-process launcher.\n" ); } nb = 1; /* tile_size */ nt = 2; /* total no. of tiles */ - if(argv[1] != NULL){ - cores = atoi(argv[1]); - } - - parsec = parsec_init( cores, &argc, &argv ); - parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); adt = parsec_matrix_adt_new_rect( @@ -221,7 +207,7 @@ int main(int argc, char **argv) parsec_data_collection_t *A = (parsec_data_collection_t *)dcA; parsec_dtd_data_collection_init(A); - SYNC_TIME_START(); + SYNC_TIME_START(parsec); for( j = 0; j < repeat_pingpong; j++ ) { parsec_dtd_insert_task(dtd_tp, task_rank_0, 0, PARSEC_DEV_CPU, "task_for_timing_0", PASSED_BY_REF, PARSEC_DTD_TILE_OF_KEY(A, 0), PARSEC_INOUT | TILE_FULL | PARSEC_AFFINITY, @@ -238,7 +224,7 @@ int main(int argc, char **argv) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); - SYNC_TIME_PRINT(rank, ("\tSize of message : %zu bytes\tTime for each pingpong : %12.5f\n", sizes[i]*sizeof(int), sync_time_elapsed/repeat_pingpong)); + SYNC_TIME_PRINT(parsec, rank, ("\tSize of message : %zu bytes\tTime for each pingpong : %12.5f\n", sizes[i]*sizeof(int), sync_time_elapsed/repeat_pingpong)); parsec_dtd_free_arena_datatype(parsec, TILE_FULL); parsec_dtd_data_collection_fini( A ); @@ -247,11 +233,8 @@ int main(int argc, char **argv) parsec_taskpool_free(dtd_tp); } - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_reduce.c b/tests/dsl/dtd/dtd_test_reduce.c index 91f887928..bb1dd002a 100644 --- a/tests/dsl/dtd/dtd_test_reduce.c +++ b/tests/dsl/dtd/dtd_test_reduce.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -13,6 +14,7 @@ #include "tests/tests_data.h" #include "tests/tests_timing.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -20,10 +22,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -66,26 +64,18 @@ int main(int argc, char **argv) parsec_tiled_matrix_t *dcA; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - nb = 1; /* tile_size */ - nt = world; /* total no. of tiles */ if(argv[1] != NULL){ cores = atoi(argv[1]); } - parsec = parsec_init( cores, &argc, &argv ); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + nt = world; /* total no. of tiles */ parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); @@ -148,11 +138,8 @@ int main(int argc, char **argv) parsec_dtd_data_collection_fini( A ); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_simple_gemm.c b/tests/dsl/dtd/dtd_test_simple_gemm.c index 0b8ac8d76..8df7ff504 100644 --- a/tests/dsl/dtd/dtd_test_simple_gemm.c +++ b/tests/dsl/dtd/dtd_test_simple_gemm.c @@ -11,6 +11,7 @@ #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/mca/device/device.h" +#include "tests/tests_runtime.h" // The file is not compiled if CUDA is not present or CUBLAS is not found #include "parsec/mca/device/cuda/device_cuda.h" @@ -46,10 +47,6 @@ extern void cblas_dgemm(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE TransA, const double beta, double *C, const CBLAS_INDEX ldc); #endif -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - #include #include #include @@ -914,19 +911,9 @@ int main(int argc, char **argv) int M = 16 * mb, N = 16 * nb, K = 16 * kb; double min_perf=0.0; int runs = 5; + int ncores = -1; /* Use all available cores */ int debug=-1; - -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + int show_help = 0; while( 1 ) { int option_index = 0; @@ -1023,54 +1010,66 @@ int main(int argc, char **argv) break; case 'h': case '?': - if( 0 == rank ) { - fprintf(stderr, - "Usage %s [flags] [-- ]\n" - " Nota Bene: this test should not be used to evaluate performance of GEMM!\n" - " Use DPLASMA or other linear algebra libraries written on top of PaRSEC to evaluate this.\n" - "\n" - " Compute pdgemm on a process grid of PxQ, using all available GPUs on each\n" - " node (modulo parsec options), using DTD. Compute C += AxB, where A is MxK\n" - " tiled in mb x kb, B is KxN tiled in kb x nb, and C is MxN tiled in mb x nb\n" - " Executes nruns iterations of the GEMM operation.\n" - " flags:\n" - " --M|-M / --K|-K / --N|-N: set M, K and N (resp.)\n" - " --mb|-m / --kb/-k / --nb|-n: set mb, kb and nb (resp.)\n" - " --nruns|-t: set the number of runs to do\n" - " --device|-d: which device to use (CPU or GPU)\n" - " --batch|-b: enable CUDA batch collection and submit\n" - " the collected GEMMs one by one\n" - " --batch-mode|-B: CUDA batching mode: none, one-by-one,\n" - " or cublas (default: %s)\n" - " --batch-size|-S: maximum number of GEMM tasks per CUDA\n" - " batch (default: %d)\n" - " --batch-slots|-L: maximum number of in-flight cuBLAS\n" - " batched submissions per stream (default: %d)\n" - " --verbose|-v: display which GEMM runs on which GPU\n" - " as execution is unfolding\n" - " --help|-h|-?: display this help\n" - " --debug|-D: blocks the process passed as parameter and\n" - " waits for gdb to connect to it\n" - " --Alarm|-A: sets the expected minimum performance for a\n" - " single GPU (kills the process if it takes longer\n" - " than the time corresponding to the expected\n" - " performance to complete the product)\n" - "\n" - " Nota Bene: this test should not be used to evaluate performance of GEMM!\n" - " Use DPLASMA or other linear algebra libraries written on top of PaRSEC to evaluate this.\n" - "\n", - argv[0], gemm_cuda_batch_mode_name(), - cuda_max_batch_size, cuda_max_submitted_batches); - } -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif - exit(0); + show_help = 1; + break; + } + if( show_help ) { + break; } } int pargc = argc - optind; char **pargv = argv + optind; + rc = parsec_tests_context_init(ncores, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec_context, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + if( show_help ) { + if( 0 == rank ) { + fprintf(stderr, + "Usage %s [flags] [-- ]\n" + " Nota Bene: this test should not be used to evaluate performance of GEMM!\n" + " Use DPLASMA or other linear algebra libraries written on top of PaRSEC to evaluate this.\n" + "\n" + " Compute pdgemm on a process grid of PxQ, using all available GPUs on each\n" + " node (modulo parsec options), using DTD. Compute C += AxB, where A is MxK\n" + " tiled in mb x kb, B is KxN tiled in kb x nb, and C is MxN tiled in mb x nb\n" + " Executes nruns iterations of the GEMM operation.\n" + " flags:\n" + " --M|-M / --K|-K / --N|-N: set M, K and N (resp.)\n" + " --mb|-m / --kb/-k / --nb|-n: set mb, kb and nb (resp.)\n" + " --nruns|-t: set the number of runs to do\n" + " --device|-d: which device to use (CPU or GPU)\n" + " --batch|-b: enable CUDA batch collection and submit\n" + " the collected GEMMs one by one\n" + " --batch-mode|-B: CUDA batching mode: none, one-by-one,\n" + " or cublas (default: %s)\n" + " --batch-size|-S: maximum number of GEMM tasks per CUDA\n" + " batch (default: %d)\n" + " --batch-slots|-L: maximum number of in-flight cuBLAS\n" + " batched submissions per stream (default: %d)\n" + " --verbose|-v: display which GEMM runs on which GPU\n" + " as execution is unfolding\n" + " --help|-h|-?: display this help\n" + " --debug|-D: blocks the process passed as parameter and\n" + " waits for gdb to connect to it\n" + " --Alarm|-A: sets the expected minimum performance for a\n" + " single GPU (kills the process if it takes longer\n" + " than the time corresponding to the expected\n" + " performance to complete the product)\n" + "\n" + " Nota Bene: this test should not be used to evaluate performance of GEMM!\n" + " Use DPLASMA or other linear algebra libraries written on top of PaRSEC to evaluate this.\n" + "\n", + argv[0], gemm_cuda_batch_mode_name(), + cuda_max_batch_size, cuda_max_submitted_batches); + } + rc = parsec_tests_context_fini(&parsec_context); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); + return 0; + } + if( -1 == P ) P = (int)sqrt(world); if( -1 == Q ) @@ -1088,19 +1087,13 @@ int main(int argc, char **argv) while(loop) { sleep(1); } } - // Number of CPU cores involved - int ncores = -1; // Use all available cores - parsec_context = parsec_init(ncores, &pargc, &pargv); - int *gpu_device_index = NULL; if( PARSEC_DEV_CUDA == device ) { nbgpus = get_nb_gpu_devices(); rc = !(nbgpus >= 1); if( rc != 0 ) { fprintf(stderr, "Rank %d doesn't have CUDA accelerators\n", rank); -#if defined(PARSEC_HAVE_MPI) - MPI_Abort(MPI_COMM_WORLD, 0); -#endif + parsec_tests_abort(parsec_context, 0); return -1; } gpu_device_index = get_gpu_device_index(); @@ -1119,9 +1112,7 @@ int main(int argc, char **argv) rc = preallocate_cuda_stream_states(); if( PARSEC_SUCCESS != rc ) { fprintf(stderr, "Failed to preallocate CUDA GEMM stream states\n"); -#if defined(PARSEC_HAVE_MPI) - MPI_Abort(MPI_COMM_WORLD, rc); -#endif + parsec_tests_abort(parsec_context, rc); return rc; } } @@ -1176,11 +1167,8 @@ int main(int argc, char **argv) destroy_matrix(dcB); destroy_matrix(dcC); - parsec_fini(&parsec_context); - -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec_context); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/dsl/dtd/dtd_test_task_generation.c b/tests/dsl/dtd/dtd_test_task_generation.c index aad36a3d5..d01268863 100644 --- a/tests/dsl/dtd/dtd_test_task_generation.c +++ b/tests/dsl/dtd/dtd_test_task_generation.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -20,10 +21,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - double time_elapsed; double sync_time_elapsed; @@ -122,32 +119,19 @@ int main(int argc, char ** argv) int nb, nt, rc; parsec_tiled_matrix_t *dcA; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if(MPI_THREAD_MULTIPLE > provided) { - parsec_fatal( "This benchmark requires MPI_THREAD_MULTIPLE because it uses simultaneously MPI within the PaRSEC runtime, and in the main program loop (in SYNC_TIME_START)"); - } - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - - if( world != 1 ) { - parsec_fatal( "Nope! world is not right, we need exactly one MPI process. " - "Try with \"mpirun -np 1 .....\"\n" ); - } - if(argv[1] != NULL){ cores = atoi(argv[1]); } /* Creating parsec context and initializing dtd environment */ - parsec = parsec_init( cores, &argc, &argv ); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_MULTIPLE, + &argc, &argv, &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + if( world != 1 ) { + parsec_fatal( "Nope! world is not right, we need exactly one process. " + "Try with a single-process launcher.\n" ); + } /****** Checking task generation ******/ parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); @@ -214,7 +198,7 @@ int main(int argc, char ** argv) rc = parsec_context_add_taskpool( parsec, dtd_tp ); PARSEC_CHECK_ERROR(rc, "parsec_context_add_taskpool"); - SYNC_TIME_START(); + SYNC_TIME_START(parsec); if( 1 == total_flows[i] ) { for( j = 0; j < total_flows[i] * total_tasks; j += total_flows[i] ) { @@ -295,7 +279,7 @@ int main(int argc, char ** argv) rc = parsec_taskpool_wait( dtd_tp ); PARSEC_CHECK_ERROR(rc, "parsec_taskpool_wait"); - SYNC_TIME_PRINT(rank, ("\tNo of flows : %d \tTime for each task : %lf\n\n", total_flows[i], sync_time_elapsed/total_tasks)); + SYNC_TIME_PRINT(parsec, rank, ("\tNo of flows : %d \tTime for each task : %lf\n\n", total_flows[i], sync_time_elapsed/total_tasks)); parsec_taskpool_free( dtd_tp ); parsec_dtd_data_collection_fini( A ); @@ -307,11 +291,8 @@ int main(int argc, char ** argv) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_task_inserting_task.c b/tests/dsl/dtd/dtd_test_task_inserting_task.c index c09e5e1e9..20e591df3 100644 --- a/tests/dsl/dtd/dtd_test_task_inserting_task.c +++ b/tests/dsl/dtd/dtd_test_task_inserting_task.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA CORPORATION. All rights reserved. */ /* parsec things */ @@ -19,10 +20,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - double time_elapsed; double sync_time_elapsed; @@ -68,36 +65,26 @@ task_to_insert_task( parsec_execution_stream_t *es, int main(int argc, char ** argv) { parsec_context_t* parsec; - int rank, world, cores = -1, rc; + int rank, cores = -1, rc; if(argv[1] != NULL){ cores = atoi(argv[1]); } -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); int m; int no_of_tasks = 1; - parsec = parsec_init( cores, &argc, &argv ); - parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); /* Registering the dtd_handle with PARSEC context */ rc = parsec_context_add_taskpool(parsec, (parsec_taskpool_t *)dtd_tp); PARSEC_CHECK_ERROR(rc, "parsec_context_add_taskpool"); - SYNC_TIME_START(); + SYNC_TIME_START(parsec); rc = parsec_context_start(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_start"); @@ -120,15 +107,12 @@ int main(int argc, char ** argv) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); - SYNC_TIME_PRINT(rank, ("\n")); + SYNC_TIME_PRINT(parsec, rank, ("\n")); parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_task_insertion.c b/tests/dsl/dtd/dtd_test_task_insertion.c index 6b2213f22..89769ed19 100644 --- a/tests/dsl/dtd/dtd_test_task_insertion.c +++ b/tests/dsl/dtd/dtd_test_task_insertion.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -19,10 +20,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - double time_elapsed = 0.0; double sync_time_elapsed = 0.0; @@ -81,30 +78,21 @@ test_task_generator( parsec_execution_stream_t *es, int main(int argc, char ** argv) { parsec_context_t* parsec; - int rank, world, cores = -1, rc; + int rank, cores = -1, rc; if(argv[1] != NULL){ cores = atoi(argv[1]); } -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); int m, n; int no_of_tasks = 50000; int amount_of_work[3] = {100, 1000, 10000}; parsec_taskpool_t *dtd_tp; - parsec = parsec_init( cores, &argc, &argv ); cores = parsec_context_query(parsec, PARSEC_CONTEXT_QUERY_CORES); dtd_tp = parsec_dtd_taskpool_new(); @@ -217,11 +205,8 @@ int main(int argc, char ** argv) parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_task_placement.c b/tests/dsl/dtd/dtd_test_task_placement.c index adfe7fd9d..51bb84997 100644 --- a/tests/dsl/dtd/dtd_test_task_placement.c +++ b/tests/dsl/dtd/dtd_test_task_placement.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -13,6 +14,7 @@ #include "tests/tests_data.h" #include "tests/tests_timing.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -20,10 +22,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -89,28 +87,20 @@ int main(int argc, char **argv) parsec_tiled_matrix_t *dcA; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + nb = 1; /* tile_size */ + + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); if( world != 2 ) { - parsec_fatal( "Nope! world is not right, we need exactly two MPI process. " - "Try with \"mpirun -np 2 .....\"\n" ); + parsec_fatal( "Nope! world is not right, we need exactly two processes. " + "Try with a two-process launcher.\n" ); } - nb = 1; /* tile_size */ nt = world; /* total no. of tiles */ - parsec = parsec_init(cores, &argc, &argv); - parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); adt = parsec_matrix_adt_new_rect( @@ -189,11 +179,8 @@ int main(int argc, char **argv) parsec_dtd_data_collection_fini( A ); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_template_counter.c b/tests/dsl/dtd/dtd_test_template_counter.c index 399f9ea12..f5b33ef35 100644 --- a/tests/dsl/dtd/dtd_test_template_counter.c +++ b/tests/dsl/dtd/dtd_test_template_counter.c @@ -14,6 +14,7 @@ #include "tests/tests_data.h" #include "tests/tests_timing.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -21,10 +22,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -63,26 +60,18 @@ int main(int argc, char **argv) parsec_tiled_matrix_t *dcA; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - nb = 1; /* tile_size */ - nt = (world > 1) ? world : 2; /* total no. of tiles */ if(argv[1] != NULL){ cores = atoi(argv[1]); } - parsec = parsec_init( cores, &argc, &argv ); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + nt = (world > 1) ? world : 2; /* total no. of tiles */ parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); @@ -148,11 +137,8 @@ int main(int argc, char **argv) parsec_dtd_data_collection_fini( A ); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_tp_enqueue_dequeue.c b/tests/dsl/dtd/dtd_test_tp_enqueue_dequeue.c index 040fb825b..ec8dbb42e 100644 --- a/tests/dsl/dtd/dtd_test_tp_enqueue_dequeue.c +++ b/tests/dsl/dtd/dtd_test_tp_enqueue_dequeue.c @@ -2,6 +2,7 @@ * Copyright (c) 2018-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -16,15 +17,12 @@ #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/scheduling.h" +#include "tests/tests_runtime.h" #if defined(PARSEC_HAVE_STRING_H) #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - int task(parsec_execution_stream_t *es, parsec_task_t *this_task) { @@ -94,30 +92,21 @@ int main(int argc, char **argv) { parsec_context_t* parsec; int rc, i; - int rank, world, cores = -1; - -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - - if( world != 1 ) { - parsec_fatal( "Nope! world is not right, we need exactly one MPI process. " - "Try with \"mpirun -np 1 .....\"\n" ); - } + int world, cores = -1; if(argv[1] != NULL){ cores = atoi(argv[1]); } - parsec = parsec_init(cores, &argc, &argv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, NULL, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + if( world != 1 ) { + parsec_fatal( "Nope! world is not right, we need exactly one process. " + "Try with a single-process launcher.\n" ); + } parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); @@ -159,11 +148,8 @@ int main(int argc, char **argv) parsec_taskpool_free(dtd_tp); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_untie.c b/tests/dsl/dtd/dtd_test_untie.c index ef8dcaed2..ae6c3ebf9 100644 --- a/tests/dsl/dtd/dtd_test_untie.c +++ b/tests/dsl/dtd/dtd_test_untie.c @@ -21,10 +21,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - double time_elapsed = 0.0; double sync_time_elapsed = 0.0; @@ -95,24 +91,13 @@ int main(int argc, char ** argv) if( 0 >= cores ) cores = 8; /* fix it to a sane number */ -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if(MPI_THREAD_MULTIPLE > provided) { - parsec_fatal( "This benchmark requires MPI_THREAD_MULTIPLE because it uses simultaneously MPI within the PaRSEC runtime, and in the main program loop (in SYNC_TIME_START)"); - } - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_MULTIPLE, + &argc, &argv, &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); if( world != 1 ) { - parsec_fatal( "Nope! world is not right, we need exactly one MPI process. " - "Try with \"mpirun -np 1 .....\"\n" ); + parsec_fatal( "Nope! world is not right, we need exactly one process. " + "Try with a single-process launcher.\n" ); } int m, n; @@ -126,8 +111,6 @@ int main(int argc, char ** argv) no_of_chain = cores; int tasks_in_each_chain[3] = {1000, 10000, 100000}; - parsec = parsec_init( cores, &argc, &argv ); - dtd_tp = parsec_dtd_taskpool_new(); /* Registering the dtd_taskpool with PARSEC context */ @@ -153,7 +136,7 @@ int main(int argc, char ** argv) for( i = 0; i < 3; i++ ) { - SYNC_TIME_START(); + SYNC_TIME_START(parsec); for( n = 0; n < no_of_chain; n++ ) { for( m = 0; m < tasks_in_each_chain[i]; m++ ) { parsec_dtd_insert_task(dtd_tp, test_task, 0, PARSEC_DEV_CPU, "Test_Task", @@ -167,12 +150,12 @@ int main(int argc, char ** argv) rc = parsec_taskpool_wait( dtd_tp ); PARSEC_CHECK_ERROR(rc, "parsec_taskpool_wait"); - SYNC_TIME_PRINT(rank, ("No of chains : %d, No of tasks in each chain: %d, Amount of work: %d\n", no_of_chain, tasks_in_each_chain[i], amount_of_work[work_index])); + SYNC_TIME_PRINT(parsec, rank, ("No of chains : %d, No of tasks in each chain: %d, Amount of work: %d\n", no_of_chain, tasks_in_each_chain[i], amount_of_work[work_index])); } count = 0; for( i = 0; i < 3; i++ ) { - SYNC_TIME_START(); + SYNC_TIME_START(parsec); int step = parsec_dtd_window_size, iteration = 0; for( n = 0; n < no_of_chain; n++ ) { @@ -190,7 +173,7 @@ int main(int argc, char ** argv) rc = parsec_taskpool_wait( dtd_tp ); PARSEC_CHECK_ERROR(rc, "parsec_taskpool_wait"); - SYNC_TIME_PRINT(rank, ("No of chains : %d, No of tasks in each chain: %d, Amount of work: %d\n", no_of_chain, tasks_in_each_chain[i], amount_of_work[work_index])); + SYNC_TIME_PRINT(parsec, rank, ("No of chains : %d, No of tasks in each chain: %d, Amount of work: %d\n", no_of_chain, tasks_in_each_chain[i], amount_of_work[work_index])); } rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); @@ -201,11 +184,8 @@ int main(int argc, char ** argv) parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_war.c b/tests/dsl/dtd/dtd_test_war.c index 91e9fb5d8..837ab67e7 100644 --- a/tests/dsl/dtd/dtd_test_war.c +++ b/tests/dsl/dtd/dtd_test_war.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -12,6 +13,7 @@ #include #include "tests/tests_data.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" @@ -20,10 +22,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - static volatile int32_t count_war_error = 0; static volatile int32_t count_raw_error = 0; @@ -72,28 +70,19 @@ int main(int argc, char ** argv) int no_of_tasks, no_of_read_tasks = 5, key; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - if(argv[1] != NULL){ cores = atoi(argv[1]); } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + no_of_tasks = world; nb = 1; /* tile_size */ nt = no_of_tasks; /* total no. of tiles */ - parsec = parsec_init( cores, &argc, &argv ); - parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); adt = parsec_matrix_adt_new_rect( @@ -157,11 +146,8 @@ int main(int argc, char ** argv) parsec_dtd_free_arena_datatype(parsec, TILE_FULL); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/CMakeLists.txt b/tests/dsl/ptg/CMakeLists.txt index 25ed08aea..1d2c64bf0 100644 --- a/tests/dsl/ptg/CMakeLists.txt +++ b/tests/dsl/ptg/CMakeLists.txt @@ -1,21 +1,30 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + add_subdirectory(ptgpp) parsec_addtest_executable(C strange) +target_link_libraries(strange PRIVATE tests_runtime_common) target_ptg_sources(strange PRIVATE "strange.jdf") parsec_addtest_executable(C recursive) +target_link_libraries(recursive PRIVATE tests_runtime_common) target_ptg_sources(recursive PRIVATE "recursive.jdf") if(PARSEC_HAVE_RANDOM) parsec_addtest_executable(C startup) + target_link_libraries(startup PRIVATE tests_runtime_common) target_ptg_sources(startup PRIVATE "startup.jdf") endif(PARSEC_HAVE_RANDOM) parsec_addtest_executable(C complex_deps) +target_link_libraries(complex_deps PRIVATE tests_runtime_common) target_ptg_sources(complex_deps PRIVATE "complex_deps.jdf") if(PARSEC_HAVE_DEV_CAPABILITY_BATCH) parsec_addtest_executable(C batch_cpu) + target_link_libraries(batch_cpu PRIVATE tests_runtime_common) target_ptg_sources(batch_cpu PRIVATE "batch_cpu.jdf") endif(PARSEC_HAVE_DEV_CAPABILITY_BATCH) diff --git a/tests/dsl/ptg/batch_cpu.jdf b/tests/dsl/ptg/batch_cpu.jdf index 05db2adb2..d39196a56 100644 --- a/tests/dsl/ptg/batch_cpu.jdf +++ b/tests/dsl/ptg/batch_cpu.jdf @@ -12,6 +12,7 @@ extern "C" %{ #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/mca/device/device.h" +#include "tests/tests_runtime.h" #include "batch_cpu.h" #define TYPE PARSEC_MATRIX_INTEGER @@ -70,14 +71,6 @@ int main(int argc, char **argv) int rank = 0; int ret = 0; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - } -#endif - if( NULL != argv[1] ) { n = atoi(argv[1]); } @@ -85,8 +78,9 @@ int main(int argc, char **argv) n = 32; } - parsec = parsec_init(-1, &argc, &argv); - assert(NULL != parsec); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init(&descA, TYPE, PARSEC_MATRIX_TILE, rank, @@ -136,11 +130,8 @@ int main(int argc, char **argv) free(descA.mat); PARSEC_OBJ_RELEASE(adt.arena); parsec_matrix_arena_datatype_destruct_free_type(&adt); - parsec_fini(&parsec); - -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/dsl/ptg/branching/CMakeLists.txt b/tests/dsl/ptg/branching/CMakeLists.txt index 4ea295fc0..61910cac7 100644 --- a/tests/dsl/ptg/branching/CMakeLists.txt +++ b/tests/dsl/ptg/branching/CMakeLists.txt @@ -2,14 +2,17 @@ include(ParsecCompilePTG) # Default build: probably ht, but not taking a chance of missing an option parsec_addtest_executable(C branching SOURCES main.c branching_wrapper.c branching_data.c) +target_link_libraries(branching PRIVATE tests_runtime_common) target_ptg_sources(branching PRIVATE "branching.jdf") # Force dynamic hash tables test parsec_addtest_executable(C branching_ht SOURCES main.c branching_wrapper.c branching_data.c) +target_link_libraries(branching_ht PRIVATE tests_runtime_common) target_ptg_source_ex(TARGET branching_ht DESTINATION branching_ht MODE PRIVATE SOURCE branching.jdf DEP_MANAGEMENT dynamic-hash-table) add_dependencies(branching_ht branching) # We need to have branching.h generated before # Force index array test parsec_addtest_executable(C branching_idxarr SOURCES main.c branching_wrapper.c branching_data.c) +target_link_libraries(branching_idxarr PRIVATE tests_runtime_common) target_ptg_source_ex(TARGET branching_idxarr DESTINATION branching_idxarr MODE PRIVATE SOURCE branching.jdf DEP_MANAGEMENT index-array) add_dependencies(branching_idxarr branching) # We need to have branching.h generated before diff --git a/tests/dsl/ptg/branching/branching_wrapper.c b/tests/dsl/ptg/branching/branching_wrapper.c index ceab9504f..1f1753a61 100644 --- a/tests/dsl/ptg/branching/branching_wrapper.c +++ b/tests/dsl/ptg/branching/branching_wrapper.c @@ -2,15 +2,13 @@ * Copyright (c) 2009-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/arena.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include #include "branching.h" diff --git a/tests/dsl/ptg/branching/main.c b/tests/dsl/ptg/branching/main.c index dd3426e20..035438855 100644 --- a/tests/dsl/ptg/branching/main.c +++ b/tests/dsl/ptg/branching/main.c @@ -2,18 +2,17 @@ * Copyright (c) 2009-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "branching_wrapper.h" #include "branching_data.h" #if defined(PARSEC_HAVE_STRING_H) #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ volatile int32_t nb_taskA = 0; volatile int32_t nb_taskB = 0; @@ -27,18 +26,9 @@ int main(int argc, char *argv[]) parsec_data_collection_t *dcA; parsec_taskpool_t *branching; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - parsec = parsec_init(cores, &argc, &argv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); size = 256; if(argc != 2) { @@ -66,20 +56,23 @@ int main(int argc, char *argv[]) free_data(dcA); - parsec_fini(&parsec); int gnbA = nb_taskA, gnbB = nb_taskB, gnbC = nb_taskC; -#if defined(PARSEC_HAVE_MPI) - MPI_Allreduce(MPI_IN_PLACE, &gnbA, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(MPI_IN_PLACE, &gnbB, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(MPI_IN_PLACE, &gnbC, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); -#endif + rc = parsec_tests_allreduce(parsec, NULL, &gnbA, 1, + parsec_datatype_int_t, PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(rc, "parsec_tests_allreduce"); + rc = parsec_tests_allreduce(parsec, NULL, &gnbB, 1, + parsec_datatype_int_t, PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(rc, "parsec_tests_allreduce"); + rc = parsec_tests_allreduce(parsec, NULL, &gnbC, 1, + parsec_datatype_int_t, PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(rc, "parsec_tests_allreduce"); + printf("nb = %d, nb_taskA = %d, nb_taskB = %d, nb_taskC = %d -- %s\n", nb, gnbA, gnbB, gnbC, gnbA == nb && gnbB == 2*nb && gnbC == nb ? "SUCCESS" : "FAILURE!"); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); if( gnbA == nb && gnbB == 2*nb && diff --git a/tests/dsl/ptg/choice/CMakeLists.txt b/tests/dsl/ptg/choice/CMakeLists.txt index 074d8c97e..b78ef2a2e 100644 --- a/tests/dsl/ptg/choice/CMakeLists.txt +++ b/tests/dsl/ptg/choice/CMakeLists.txt @@ -1,4 +1,5 @@ include(ParsecCompilePTG) parsec_addtest_executable(C choice SOURCES main.c choice_wrapper.c choice_data.c) +target_link_libraries(choice PRIVATE tests_runtime_common) target_ptg_sources(choice PRIVATE "choice.jdf") diff --git a/tests/dsl/ptg/choice/choice_wrapper.c b/tests/dsl/ptg/choice/choice_wrapper.c index 167811ba9..1a72a6d01 100644 --- a/tests/dsl/ptg/choice/choice_wrapper.c +++ b/tests/dsl/ptg/choice/choice_wrapper.c @@ -2,15 +2,13 @@ * Copyright (c) 2009-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/arena.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include #include "choice.h" diff --git a/tests/dsl/ptg/choice/main.c b/tests/dsl/ptg/choice/main.c index 1413794c1..f76d52de3 100644 --- a/tests/dsl/ptg/choice/main.c +++ b/tests/dsl/ptg/choice/main.c @@ -2,7 +2,7 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -10,12 +10,10 @@ #include "choice_wrapper.h" #include "choice_data.h" #include "parsec/data_distribution.h" +#include "tests/tests_runtime.h" #if defined(PARSEC_HAVE_STRING_H) #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ #include #include @@ -28,18 +26,6 @@ int main(int argc, char *argv[]) int *decision; parsec_taskpool_t *choice; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - size = 256; int pargc = 0; char **pargv = NULL; @@ -62,10 +48,10 @@ int main(int argc, char *argv[]) } } - parsec = parsec_init(cores, &pargc, &pargv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + dcA = create_and_distribute_data(rank, world, size); parsec_data_collection_set_key(dcA, "A"); @@ -83,8 +69,6 @@ int main(int argc, char *argv[]) parsec_taskpool_free((parsec_taskpool_t*)choice); - parsec_fini(&parsec); - for(size = 0; size < world; size++) { if( rank == size ) { printf("On rank %d, the choices were: ", rank); @@ -93,17 +77,17 @@ int main(int argc, char *argv[]) printf("%c%s", c == 0 ? '#' : (c == 1 ? 'A' : 'B'), i == nb ? "\n" : ", "); } } -#if defined(PARSEC_HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); -#endif + rc = parsec_tests_barrier(parsec); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); + } } free_data(dcA); free(decision); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/complex_deps.jdf b/tests/dsl/ptg/complex_deps.jdf index 5a2301a2c..7335a73d2 100644 --- a/tests/dsl/ptg/complex_deps.jdf +++ b/tests/dsl/ptg/complex_deps.jdf @@ -3,9 +3,11 @@ extern "C" %{ * Copyright (c) 2013-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #include #include #include @@ -142,18 +144,10 @@ int main( int argc, char** argv ) continue; } } -#ifdef DISTRIBUTED - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif /* DISTRIBUTED */ - parsec = parsec_init(cores, &pargc, &pargv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec, &rank, &size); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /** * Build the data and the arena to hold it up. @@ -216,7 +210,8 @@ int main( int argc, char** argv ) free(descA.mat); - parsec_fini( &parsec); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/controlgather/CMakeLists.txt b/tests/dsl/ptg/controlgather/CMakeLists.txt index f20629a6d..c5e883422 100644 --- a/tests/dsl/ptg/controlgather/CMakeLists.txt +++ b/tests/dsl/ptg/controlgather/CMakeLists.txt @@ -1,5 +1,9 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C ctlgat SOURCES main.c ctlgat_wrapper.c ctlgat_data.c) +target_link_libraries(ctlgat PRIVATE tests_runtime_common) target_ptg_sources(ctlgat PRIVATE "ctlgat.jdf") - diff --git a/tests/dsl/ptg/controlgather/ctlgat.jdf b/tests/dsl/ptg/controlgather/ctlgat.jdf index 999b917bb..a12d571a5 100644 --- a/tests/dsl/ptg/controlgather/ctlgat.jdf +++ b/tests/dsl/ptg/controlgather/ctlgat.jdf @@ -3,14 +3,10 @@ extern "C" %{ * Copyright (c) 2012-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ -#if defined(PARSEC_HAVE_MPI) -#include -#define MY_RANK(r) int r; MPI_Comm_rank(MPI_COMM_WORLD, &r) -#else -#define MY_RANK(r) int r = 0 -#endif +#define MY_RANK(dc, r) int r = (int)((dc)->myrank) %} %option no_taskpool_instance = true /* can be anything */ @@ -29,7 +25,7 @@ CTL X -> X TC(0) ; 0 BODY - MY_RANK(r); + MY_RANK(A, r); printf("%d: TA(%d)\n", r, k); END @@ -43,7 +39,7 @@ CTL X -> Y TC(0) ; 0 BODY - MY_RANK(r); + MY_RANK(A, r); printf("%d: TB(%d)\n", r, k); END @@ -58,7 +54,7 @@ CTL Y <- X TB(0..NT-1) ; 0 BODY - MY_RANK(r); + MY_RANK(A, r); printf("%d: TC(%d)\n", r, k); END extern "C" %{ diff --git a/tests/dsl/ptg/controlgather/ctlgat_wrapper.c b/tests/dsl/ptg/controlgather/ctlgat_wrapper.c index 963c2bad0..97d802164 100644 --- a/tests/dsl/ptg/controlgather/ctlgat_wrapper.c +++ b/tests/dsl/ptg/controlgather/ctlgat_wrapper.c @@ -2,15 +2,13 @@ * Copyright (c) 2009-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/arena.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include #include "ctlgat.h" @@ -40,13 +38,8 @@ PARSEC_OBJ_CLASS_INSTANCE(parsec_ctlgat_taskpool_t, parsec_taskpool_t, */ parsec_taskpool_t *ctlgat_new(parsec_data_collection_t *A, int size, int nb) { - int worldsize; + int worldsize = (int)A->nodes; parsec_ctlgat_taskpool_t *tp = NULL; -#if defined(PARSEC_HAVE_MPI) - MPI_Comm_size(MPI_COMM_WORLD, &worldsize); -#else - worldsize = 1; -#endif if( nb <= 0 || size <= 0 ) { fprintf(stderr, "To work, CTLGAT must do at least one round time trip of at least one byte\n"); diff --git a/tests/dsl/ptg/controlgather/main.c b/tests/dsl/ptg/controlgather/main.c index aa998c061..704e223eb 100644 --- a/tests/dsl/ptg/controlgather/main.c +++ b/tests/dsl/ptg/controlgather/main.c @@ -2,19 +2,17 @@ * Copyright (c) 2009-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "ctlgat_wrapper.h" #include "ctlgat_data.h" #if defined(PARSEC_HAVE_STRING_H) #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - int main(int argc, char *argv[]) { parsec_context_t* parsec; @@ -23,18 +21,10 @@ int main(int argc, char *argv[]) parsec_data_collection_t *dcA; parsec_taskpool_t *ctlgat; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - parsec = parsec_init(cores, &argc, &argv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); size = 256; nb = 4 * world; @@ -54,10 +44,8 @@ int main(int argc, char *argv[]) free_data(dcA); - parsec_fini(&parsec); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/local-indices/CMakeLists.txt b/tests/dsl/ptg/local-indices/CMakeLists.txt index 9645bc0ff..07c79f652 100644 --- a/tests/dsl/ptg/local-indices/CMakeLists.txt +++ b/tests/dsl/ptg/local-indices/CMakeLists.txt @@ -1,3 +1,3 @@ parsec_addtest_executable(C local_indices) target_ptg_sources(local_indices PRIVATE "local_indices.jdf") -target_link_libraries(local_indices PRIVATE m) +target_link_libraries(local_indices PRIVATE m tests_runtime_common) diff --git a/tests/dsl/ptg/local-indices/local_indices.jdf b/tests/dsl/ptg/local-indices/local_indices.jdf index a05e1a870..9b253332b 100644 --- a/tests/dsl/ptg/local-indices/local_indices.jdf +++ b/tests/dsl/ptg/local-indices/local_indices.jdf @@ -3,7 +3,7 @@ extern "C" %{ * Copyright (c) 2019-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include @@ -11,6 +11,7 @@ extern "C" %{ #include #include #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" /** * This test defines sparse execution domains to illustrate the @@ -132,24 +133,16 @@ int main( int argc, char** argv ) srand( getpid() ); -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - MPI_Comm_size(MPI_COMM_WORLD, &ws); - MPI_Comm_rank(MPI_COMM_WORLD, &mr); - for(c = (int)sqrt(ws)+1; c > 0; c--) { - if( (c < ws) && (ws % c) == 0 ) { - p = c; - break; - } - } - } -#endif + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &mr, &ws); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - exit(-1); + for(c = (int)sqrt(ws)+1; c > 0; c--) { + if( (c < ws) && (ws % c) == 0 ) { + p = c; + break; + } } /** @@ -191,11 +184,11 @@ int main( int argc, char** argv ) parsec_context_wait(parsec); -#ifdef PARSEC_HAVE_MPI - MPI_Reduce(local_nb, global_nb, 4, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); -#else - memcpy(global_nb, local_nb, 4*sizeof(int)); -#endif + rc = parsec_tests_allreduce(parsec, local_nb, global_nb, 4, + parsec_datatype_int_t, + PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(rc, "parsec_tests_allreduce"); + ret = 0; if( 0 == mr ) { if( global_nb[0] != 25 ) { @@ -221,11 +214,8 @@ int main( int argc, char** argv ) free(descA.mat); parsec_matrix_adt_free( &adt ); - parsec_fini( &parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/dsl/ptg/multisize_bcast/CMakeLists.txt b/tests/dsl/ptg/multisize_bcast/CMakeLists.txt index a5d665949..c48469fd0 100644 --- a/tests/dsl/ptg/multisize_bcast/CMakeLists.txt +++ b/tests/dsl/ptg/multisize_bcast/CMakeLists.txt @@ -1,6 +1,10 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C check_multisize_bcast SOURCES main.c check_multisize_bcast_wrapper.c data_gen.c) +target_link_libraries(check_multisize_bcast PRIVATE tests_runtime_common) target_ptg_sources(check_multisize_bcast PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/check_multisize_bcast.jdf") - diff --git a/tests/dsl/ptg/multisize_bcast/check_multisize_bcast_wrapper.c b/tests/dsl/ptg/multisize_bcast/check_multisize_bcast_wrapper.c index 1cd8e8cd8..adf9b6178 100644 --- a/tests/dsl/ptg/multisize_bcast/check_multisize_bcast_wrapper.c +++ b/tests/dsl/ptg/multisize_bcast/check_multisize_bcast_wrapper.c @@ -2,15 +2,13 @@ * Copyright (c) 2018-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/arena.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include #include "check_multisize_bcast_wrapper.h" diff --git a/tests/dsl/ptg/multisize_bcast/main.c b/tests/dsl/ptg/multisize_bcast/main.c index 277c202e0..1dd58c39c 100644 --- a/tests/dsl/ptg/multisize_bcast/main.c +++ b/tests/dsl/ptg/multisize_bcast/main.c @@ -2,12 +2,14 @@ * Copyright (c) 2018-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include #include "parsec/runtime.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "check_multisize_bcast_wrapper.h" #if defined(PARSEC_HAVE_STRING_H) #include @@ -22,14 +24,6 @@ int main(int argc, char *argv[]) parsec_matrix_block_cyclic_t *dcA; parsec_taskpool_t *bcast; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif if( argc > 1 ) { char* endptr; long val = strtol(argv[1], &endptr, 0); @@ -44,10 +38,10 @@ int main(int argc, char *argv[]) } } - parsec = parsec_init(cores, &argc, &argv); - if( NULL == parsec ) { - exit(1); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); dcA = create_and_distribute_data(rank, world, nb, nt); parsec_data_collection_set_key((parsec_data_collection_t *)dcA, "A"); @@ -64,11 +58,8 @@ int main(int argc, char *argv[]) parsec_taskpool_free((parsec_taskpool_t*)bcast); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/CMakeLists.txt b/tests/dsl/ptg/ptgpp/CMakeLists.txt index c0f2db207..20da2e27f 100644 --- a/tests/dsl/ptg/ptgpp/CMakeLists.txt +++ b/tests/dsl/ptg/ptgpp/CMakeLists.txt @@ -1,10 +1,17 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + parsec_addtest_executable(C write_check SOURCES vector.c) +target_link_libraries(write_check PRIVATE tests_runtime_common) target_ptg_sources(write_check PRIVATE "write_check.jdf") parsec_addtest_executable(C jdf_forward_RW_NULL) +target_link_libraries(jdf_forward_RW_NULL PRIVATE tests_runtime_common) target_ptg_sources(jdf_forward_RW_NULL PRIVATE "forward_RW_NULL.jdf") parsec_addtest_executable(C jdf_forward_READ_NULL) +target_link_libraries(jdf_forward_READ_NULL PRIVATE tests_runtime_common) target_ptg_sources(jdf_forward_READ_NULL PRIVATE "forward_READ_NULL.jdf") parsec_addtest_executable(C must_fail_too_many_in_deps NODEFAULTBUILD) @@ -37,3 +44,9 @@ target_ptg_sources(must_fail_too_many_local_vars PRIVATE "too_many_local_vars.jd set_target_properties(must_fail_too_many_local_vars PROPERTIES EXCLUDE_FROM_ALL TRUE EXCLUDE_FROM_DEFAULT_BUILD TRUE) + +target_link_libraries(must_fail_too_many_in_deps PRIVATE tests_runtime_common) +target_link_libraries(must_fail_too_many_out_deps PRIVATE tests_runtime_common) +target_link_libraries(must_fail_too_many_read_flows PRIVATE tests_runtime_common) +target_link_libraries(must_fail_too_many_write_flows PRIVATE tests_runtime_common) +target_link_libraries(must_fail_too_many_local_vars PRIVATE tests_runtime_common) diff --git a/tests/dsl/ptg/ptgpp/forward_READ_NULL.jdf b/tests/dsl/ptg/ptgpp/forward_READ_NULL.jdf index ba7abd83b..ebc60ea44 100644 --- a/tests/dsl/ptg/ptgpp/forward_READ_NULL.jdf +++ b/tests/dsl/ptg/ptgpp/forward_READ_NULL.jdf @@ -4,6 +4,7 @@ extern "C" %{ * Copyright (c) 2014-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -12,6 +13,8 @@ extern "C" %{ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/datatype.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" %} @@ -78,19 +81,10 @@ int main(int argc, char *argv[]) parsec_data_collection_t taskdist; parsec_forward_READ_NULL_taskpool_t *tp; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - - parsec = parsec_init(-1, &argc, &argv); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /** * Let's initialize the task distribution descriptor @@ -128,10 +122,8 @@ int main(int argc, char *argv[]) */ parsec_data_collection_destroy(&taskdist); - parsec_fini(&parsec); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/forward_RW_NULL.jdf b/tests/dsl/ptg/ptgpp/forward_RW_NULL.jdf index 1fc74e070..97494aba0 100644 --- a/tests/dsl/ptg/ptgpp/forward_RW_NULL.jdf +++ b/tests/dsl/ptg/ptgpp/forward_RW_NULL.jdf @@ -4,6 +4,7 @@ extern "C" %{ * Copyright (c) 2014-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -12,6 +13,8 @@ extern "C" %{ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/datatype.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" %} @@ -78,19 +81,10 @@ int main(int argc, char *argv[]) parsec_data_collection_t taskdist; parsec_forward_RW_NULL_taskpool_t *tp; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - - parsec = parsec_init(-1, &argc, &argv); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /** * Let's initialize the task distribution descriptor @@ -128,10 +122,8 @@ int main(int argc, char *argv[]) */ parsec_data_collection_destroy(&taskdist); - parsec_fini(&parsec); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/too_many_in_deps.jdf b/tests/dsl/ptg/ptgpp/too_many_in_deps.jdf index 53e9f48dd..63b6379ba 100644 --- a/tests/dsl/ptg/ptgpp/too_many_in_deps.jdf +++ b/tests/dsl/ptg/ptgpp/too_many_in_deps.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2020-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -14,6 +15,7 @@ extern "C" %{ */ #include "tests/dsl/ptg/ptgpp/vector.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #if MAX_DEP_IN_COUNT > 20 #error MAX_DEP_IN_COUNT is too large for this test. @@ -65,17 +67,9 @@ int main(int argc, char* argv[]) parsec_context_t *parsec; int block = 10, n = 1000, rc; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &descA, TYPE, PARSEC_MATRIX_TILE, 0 /*rank*/, @@ -112,9 +106,8 @@ int main(int argc, char* argv[]) parsec_taskpool_free((parsec_taskpool_t*)tp); free(descA.mat); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/too_many_local_vars.jdf b/tests/dsl/ptg/ptgpp/too_many_local_vars.jdf index b6c2de23b..d399e40dd 100644 --- a/tests/dsl/ptg/ptgpp/too_many_local_vars.jdf +++ b/tests/dsl/ptg/ptgpp/too_many_local_vars.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2020-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -14,6 +15,7 @@ extern "C" %{ */ #include "tests/dsl/ptg/ptgpp/vector.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #if MAX_LOCAL_COUNT > 10 #error MAX_LOCAL_COUNT is too large for this test. @@ -58,17 +60,9 @@ int main(int argc, char* argv[]) parsec_context_t *parsec; int block = 10, n = 1000, rc; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &descA, TYPE, PARSEC_MATRIX_TILE, 0 /*rank*/, @@ -105,9 +99,8 @@ int main(int argc, char* argv[]) parsec_taskpool_free((parsec_taskpool_t*)tp); free(descA.mat); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/too_many_out_deps.jdf b/tests/dsl/ptg/ptgpp/too_many_out_deps.jdf index 0a5d65648..7c2401415 100644 --- a/tests/dsl/ptg/ptgpp/too_many_out_deps.jdf +++ b/tests/dsl/ptg/ptgpp/too_many_out_deps.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2020-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -14,6 +15,7 @@ extern "C" %{ */ #include "tests/dsl/ptg/ptgpp/vector.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #if MAX_DEP_OUT_COUNT > 20 #error MAX_DEP_OUT_COUNT is too large for this test. @@ -64,17 +66,9 @@ int main(int argc, char* argv[]) parsec_context_t *parsec; int block = 10, n = 1000, rc; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &descA, TYPE, PARSEC_MATRIX_TILE, 0 /*rank*/, @@ -107,9 +101,8 @@ int main(int argc, char* argv[]) parsec_taskpool_free((parsec_taskpool_t*)tp); free(descA.mat); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/too_many_read_flows.jdf b/tests/dsl/ptg/ptgpp/too_many_read_flows.jdf index 0851422ed..29416ca0d 100644 --- a/tests/dsl/ptg/ptgpp/too_many_read_flows.jdf +++ b/tests/dsl/ptg/ptgpp/too_many_read_flows.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2020-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -14,6 +15,7 @@ extern "C" %{ */ #include "tests/dsl/ptg/ptgpp/vector.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #if MAX_PARAM_COUNT > 20 #error MAX_PARAM_COUNT is too large for this test. @@ -66,17 +68,9 @@ int main(int argc, char* argv[]) parsec_context_t *parsec; int block = 10, n = 1000, rc; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &descA, TYPE, PARSEC_MATRIX_TILE, 0 /*rank*/, @@ -109,9 +103,8 @@ int main(int argc, char* argv[]) parsec_taskpool_free((parsec_taskpool_t*)tp); free(descA.mat); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/too_many_write_flows.jdf b/tests/dsl/ptg/ptgpp/too_many_write_flows.jdf index ce7631e72..b67872b4e 100644 --- a/tests/dsl/ptg/ptgpp/too_many_write_flows.jdf +++ b/tests/dsl/ptg/ptgpp/too_many_write_flows.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2020-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -14,6 +15,7 @@ extern "C" %{ */ #include "tests/dsl/ptg/ptgpp/vector.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #if MAX_PARAM_COUNT > 20 #error MAX_PARAM_COUNT is too large for this test. @@ -66,17 +68,9 @@ int main(int argc, char* argv[]) parsec_context_t *parsec; int block = 10, n = 1000, rc; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &descA, TYPE, PARSEC_MATRIX_TILE, 0 /*rank*/, @@ -113,9 +107,8 @@ int main(int argc, char* argv[]) parsec_taskpool_free((parsec_taskpool_t*)tp); free(descA.mat); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/write_check.jdf b/tests/dsl/ptg/ptgpp/write_check.jdf index c17f4b7ad..367c3e65f 100644 --- a/tests/dsl/ptg/ptgpp/write_check.jdf +++ b/tests/dsl/ptg/ptgpp/write_check.jdf @@ -3,8 +3,10 @@ extern "C" %{ * Copyright (c) 2014-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" static int verbose = 0; @@ -80,15 +82,6 @@ int main(int argc, char* argv[]) int i = 0, block = 10, n = 1000, rc; int rank = 0, np = 1; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &np); - } -#endif - int pargc = 0; char **pargv = NULL; for( i = 1; i < argc; i++) { if( 0 == strncmp(argv[i], "--", 3) ) { @@ -112,10 +105,10 @@ int main(int argc, char* argv[]) } } - parsec = parsec_init(-1, &pargc, &pargv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec, &rank, &np); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &descA, TYPE, PARSEC_MATRIX_TILE, rank, @@ -158,14 +151,16 @@ int main(int argc, char* argv[]) PARSEC_OBJ_DESTRUCT(&tp->arenas_datatypes[PARSEC_write_check_DEFAULT_ADT_IDX]); parsec_taskpool_free((parsec_taskpool_t*)tp); - parsec_fini(&parsec); free(descA.mat); int maxloc[2] = {error_found, rank}; -#ifdef PARSEC_HAVE_MPI - MPI_Reduce(0 == rank? MPI_IN_PLACE: &maxloc, &maxloc, 1, MPI_2INT, MPI_MAXLOC, 0, MPI_COMM_WORLD); - MPI_Finalize(); -#endif + rc = parsec_tests_allreduce(parsec, NULL, maxloc, 1, + parsec_datatype_int_t, + PARSEC_TESTS_REDUCE_MAXLOC_INT); + PARSEC_CHECK_ERROR(rc, "parsec_tests_allreduce"); + + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); if( 0 == rank) { if( maxloc[0] > 0 ) { diff --git a/tests/dsl/ptg/recursive.jdf b/tests/dsl/ptg/recursive.jdf index a4ea1cf77..48a3491cf 100644 --- a/tests/dsl/ptg/recursive.jdf +++ b/tests/dsl/ptg/recursive.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include @@ -11,6 +12,7 @@ extern "C" %{ #include #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/data_dist/matrix/subtile.h" +#include "tests/tests_runtime.h" #include "recursive.h" /* generated header */ @@ -101,13 +103,6 @@ int main( int argc, char** argv ) parsec_context_t *parsec; int ni = NN, level = 3, i = 1, rc; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - int pargc = 0; char **pargv = NULL; for( i = 1; i < argc; i++) { if( 0 == strncmp(argv[i], "--", 3) ) { @@ -129,10 +124,9 @@ int main( int argc, char** argv ) } } - parsec = parsec_init(-1, &pargc, &pargv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /** * Build the data and the arena to hold it up. @@ -169,11 +163,8 @@ int main( int argc, char** argv ) free(descA.mat); parsec_matrix_adt_free(&adt); - parsec_fini( &parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/startup.jdf b/tests/dsl/ptg/startup.jdf index dd271c57e..918f6d7c2 100644 --- a/tests/dsl/ptg/startup.jdf +++ b/tests/dsl/ptg/startup.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2019-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include @@ -10,6 +11,7 @@ extern "C" %{ #include #include #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" /** * This test stress the startup mechanism by generating NI*NJ*NK independent @@ -96,13 +98,6 @@ int main( int argc, char** argv ) int ni = NN, nj = NN, nk = NN, verbose = 0, i = 1, rc; long time_elapsed; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - int pargc = 0; char **pargv = NULL; for( i = 1; i < argc; i++) { if( 0 == strncmp(argv[i], "--", 3) ) { @@ -128,10 +123,9 @@ int main( int argc, char** argv ) } } - parsec = parsec_init(-1, &pargc, &pargv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /** * Build the data and the arena to hold it up. @@ -204,11 +198,8 @@ int main( int argc, char** argv ) parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&descA); parsec_matrix_adt_free(&adt); - parsec_fini( &parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/strange.jdf b/tests/dsl/ptg/strange.jdf index d210100c3..93a7bd0d5 100644 --- a/tests/dsl/ptg/strange.jdf +++ b/tests/dsl/ptg/strange.jdf @@ -3,12 +3,14 @@ extern "C" %{ * Copyright (c) 2015-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. * */ #include #include "strange.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" struct prev_next_s { int prev; @@ -148,15 +150,9 @@ int main(int argc, char* argv[] ) print_prev_next("Random array", neworder, n); } -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - - parsec = parsec_init(-1, &pargc, &pargv); - assert( NULL != parsec ); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &descA, TYPE, PARSEC_MATRIX_TILE, 0 /*rank*/, @@ -191,11 +187,8 @@ int main(int argc, char* argv[] ) parsec_taskpool_free(&tp->super); free(descA.mat); - parsec_fini( &parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); if( val != n ) { printf("Failed execution (%d != %d)\n", val, n); diff --git a/tests/dsl/ptg/user-defined-functions/CMakeLists.txt b/tests/dsl/ptg/user-defined-functions/CMakeLists.txt index 643a1a043..21e945e65 100644 --- a/tests/dsl/ptg/user-defined-functions/CMakeLists.txt +++ b/tests/dsl/ptg/user-defined-functions/CMakeLists.txt @@ -1,9 +1,15 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C udf SOURCES main.c udf_wrapper.c) +target_link_libraries(udf PRIVATE tests_runtime_common) target_include_directories(udf PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(udf PRIVATE "udf.jdf") parsec_addtest_executable(C utt) +target_link_libraries(utt PRIVATE tests_runtime_common) target_include_directories(utt PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(utt PRIVATE "utt.jdf") diff --git a/tests/dsl/ptg/user-defined-functions/main.c b/tests/dsl/ptg/user-defined-functions/main.c index c5d39b1c7..75988f746 100644 --- a/tests/dsl/ptg/user-defined-functions/main.c +++ b/tests/dsl/ptg/user-defined-functions/main.c @@ -2,16 +2,14 @@ * Copyright (c) 2019-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include #include #include "parsec/runtime.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" - -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ +#include "tests/tests_runtime.h" #include "udf_wrapper.h" @@ -30,6 +28,7 @@ int main(int argc, char *argv[]) parsec_udf_taskpool_t *udf_tp; int largc; char **largv; + int rc; static struct option long_options[] = { {"P", required_argument, 0, 'P'}, @@ -43,19 +42,7 @@ int main(int argc, char *argv[]) }; int option_index = 0, c; int P = -1, MB = -1, NB = 1, M = -1, N = 1; - -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; - P = 1; -#endif + int show_help = 0; while(1) { option_index = 0; @@ -94,36 +81,38 @@ int main(int argc, char *argv[]) cores = atoi(optarg); break; case 'h': - if( 0 == rank ) { - fprintf(stderr, - "Usage: %s [-M ] [-N ] [-m ] [-n ] [-P

]\n" - " Display how many times a probe function is called to build a basic PTG\n" - " M: number of rows in the matrix (default N)\n" - " N: number of columns in the matrix\n" - " MB: number of rows in a tile (default NB)\n" - " NB: number of columns in a tile\n" - " P: number of rows of processes in the 2D grid (default np, must divide np)\n" - " c: number of computing threads to create per rank (default one per core)\n" - "\n", argv[0]); -#if defined(PARSEC_HAVE_MPI) - MPI_Abort(MPI_COMM_WORLD, 1); -#endif - exit(1); - } -#if defined(PARSEC_HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); /**< Will let the other ranks wait for the MPI_Abort */ -#endif + show_help = 1; break; /**< To silent warnings */ } + if( show_help ) { + break; + } } largc = argc - optind; largv = argv + optind; - parsec = parsec_init(cores, &largc, &largv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &largc, &largv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + if( show_help ) { + if( 0 == rank ) { + fprintf(stderr, + "Usage: %s [-M ] [-N ] [-m ] [-n ] [-P

]\n" + " Display how many times a probe function is called to build a basic PTG\n" + " M: number of rows in the matrix (default N)\n" + " N: number of columns in the matrix\n" + " MB: number of rows in a tile (default NB)\n" + " NB: number of columns in a tile\n" + " P: number of rows of processes in the 2D grid (default np, must divide np)\n" + " c: number of computing threads to create per rank (default one per core)\n" + "\n", argv[0]); + } + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); + return 1; + } if( -1 == MB ) MB = NB; @@ -134,13 +123,9 @@ int main(int argc, char *argv[]) if( -1 == N || -1 == NB ) { if( 0 == rank ) { fprintf(stderr, "Incorrect usage, see --help\n"); -#if defined(PARSEC_HAVE_MPI) - MPI_Abort(MPI_COMM_WORLD, 1); -#endif + parsec_tests_abort(parsec, 1); } -#if defined(PARSEC_HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); /**< Will let the other ranks wait for the MPI_Abort */ -#endif + (void)parsec_tests_barrier(parsec); /**< Will let the other ranks wait for the abort */ exit(1); } @@ -168,11 +153,8 @@ int main(int argc, char *argv[]) parsec_data_free(A.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&A); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/user-defined-functions/utt.jdf b/tests/dsl/ptg/user-defined-functions/utt.jdf index 2b5997034..99247c906 100644 --- a/tests/dsl/ptg/user-defined-functions/utt.jdf +++ b/tests/dsl/ptg/user-defined-functions/utt.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include @@ -10,6 +11,7 @@ extern "C" %{ #include #include #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" /** * This test uses a User-Triggered-Termination (UTT) to detect @@ -110,20 +112,12 @@ int main( int argc, char** argv ) int ret; int nt; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - MPI_Comm_size(MPI_COMM_WORLD, &ws); - MPI_Comm_rank(MPI_COMM_WORLD, &mr); - } -#endif - nt = 2*ws; + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &mr, &ws); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - exit(-1); - } + nt = 2*ws; /** * Build the data and the arena to hold it up. @@ -175,11 +169,8 @@ int main( int argc, char** argv ) free(descA.mat); parsec_matrix_adt_free( & adt ); - parsec_fini( &parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/profiling-standalone/CMakeLists.txt b/tests/profiling-standalone/CMakeLists.txt index f99f9f7fc..c350e7462 100644 --- a/tests/profiling-standalone/CMakeLists.txt +++ b/tests/profiling-standalone/CMakeLists.txt @@ -1,3 +1,7 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + set(PARSEC_PROFILING_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/parsec/") set(PARSEC_PROFILING_LIBRARIES "parsec;parsec-base;${CMAKE_THREAD_LIBS_INIT}") @@ -6,8 +10,9 @@ INCLUDE_DIRECTORIES("${PARSEC_PROFILING_INCLUDE_DIR}") if(PARSEC_HAVE_PTHREAD_BARRIER) add_executable(sp-demo sp-demo.c) target_link_libraries (sp-demo "${PARSEC_PROFILING_LIBRARIES}") + target_link_libraries (sp-demo tests_runtime_common) add_executable(sp-perf sp-perf.c) target_link_libraries (sp-perf "${PARSEC_PROFILING_LIBRARIES}") + target_link_libraries (sp-perf tests_runtime_common) endif(PARSEC_HAVE_PTHREAD_BARRIER) - diff --git a/tests/profiling-standalone/sp-demo.c b/tests/profiling-standalone/sp-demo.c index ad04c7238..62fa5563f 100644 --- a/tests/profiling-standalone/sp-demo.c +++ b/tests/profiling-standalone/sp-demo.c @@ -2,6 +2,7 @@ * Copyright (c) 2016-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -30,9 +31,10 @@ #include #include #include +#include #include "parsec/profiling.h" - -#include +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #define NB_THREADS 4 #define EVENTS_PER_THREAD 10 @@ -136,24 +138,43 @@ int main(int argc, char *argv[]) { int i, rc; per_thread_info_t thread_info[NB_THREADS]; - int mpi_rank; + int rank; + parsec_context_t *parsec; + int parsec_argc = 0; + char **parsec_argv = NULL; + + for(i = 1; i < argc; i++) { + if( 0 == strcmp(argv[i], "--") ) { + parsec_argc = argc - i; + parsec_argv = argv + i; + argc = i; + break; + } + } - MPI_Init(&argc, &argv); // MPI is only needed if using OTF2 as a backend. It can be ignored otherwise. - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_SERIALIZED, + &parsec_argc, &parsec_argv, + &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /** First, there is a sequential part (no threads) */ /** We initialize the system */ - parsec_profiling_init(mpi_rank); + parsec_profiling_init(rank); - /** MPI should be initialized before the dbp_start call, if it is a distributed application + /** The test runtime should be initialized before the dbp_start call, if it is a distributed application * first argument sp is the base name for the trace file - * It will be named sp-<%d>.prof-XXXX where <%d> is the MPI rank (0 if no MPI), and XXXXX is a random value + * It will be named sp-<%d>.prof-XXXX where <%d> is the process rank, + * and XXXXX is a random value. * second argument "Demonstration..." is a human readable string to qualify the trace */ rc = parsec_profiling_dbp_start( "sp", "Demonstration of basic PaRSEC profiling system" ); - if( 0 != rc ) + if( 0 != rc ) { + parsec_profiling_fini(); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; + } /** Each Event type must be defined before any event is traced * They are defined by being added to a dictionary. @@ -200,5 +221,7 @@ int main(int argc, char *argv[]) parsec_profiling_dbp_dump(); parsec_profiling_fini(); - MPI_Finalize(); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); + return 0; } diff --git a/tests/profiling-standalone/sp-perf.c b/tests/profiling-standalone/sp-perf.c index a65781921..cbd19b9bb 100644 --- a/tests/profiling-standalone/sp-perf.c +++ b/tests/profiling-standalone/sp-perf.c @@ -19,6 +19,8 @@ #include #include #include "parsec/profiling.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #if !defined(timersub) #define timersub(a, b, result) do { \ @@ -31,8 +33,6 @@ } while(0) #endif -#include - typedef struct { pthread_t pthread_id; int thread_index; @@ -94,14 +94,28 @@ static void *run_thread(void *_arg) int main(int argc, char *argv[]) { - int i, opt; + int i, opt, rc; per_thread_info_t *thread_info; int nbthreads = 1; char *filename = NULL; - int mpi_rank; + int rank; + parsec_context_t *parsec; + int parsec_argc = 0; + char **parsec_argv = NULL; + + for(i = 1; i < argc; i++) { + if( 0 == strcmp(argv[i], "--") ) { + parsec_argc = argc - i; + parsec_argv = argv + i; + argc = i; + break; + } + } - MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_SERIALIZED, + &parsec_argc, &parsec_argv, + &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); while ((opt = getopt(argc, argv, "f:n:N:h?")) != -1) { switch (opt) { @@ -117,6 +131,8 @@ int main(int argc, char *argv[]) default: /* '?' */ fprintf(stderr, "Usage: %s [-f filename] [-n number of threads] [-N number of tasks per thread]\n", argv[0]); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); exit(EXIT_FAILURE); } } @@ -128,9 +144,13 @@ int main(int argc, char *argv[]) } if( profiling ) { - parsec_profiling_init(mpi_rank); - if( parsec_profiling_dbp_start(filename, "PaRSEC profiling system performance evaluation" ) == -1 ) + parsec_profiling_init(rank); + if( parsec_profiling_dbp_start(filename, "PaRSEC profiling system performance evaluation" ) == -1 ) { + parsec_profiling_fini(); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); exit(EXIT_FAILURE); + } parsec_profiling_add_dictionary_keyword("Event", "#FF0000", 0, NULL, &event_startkey, &event_endkey); } @@ -162,7 +182,8 @@ int main(int argc, char *argv[]) } free(thread_info); - MPI_Finalize(); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); exit(EXIT_SUCCESS); } diff --git a/tests/profiling/CMakeLists.txt b/tests/profiling/CMakeLists.txt index 0d16ec371..3ad72d9ef 100644 --- a/tests/profiling/CMakeLists.txt +++ b/tests/profiling/CMakeLists.txt @@ -1,7 +1,12 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) if(TARGET parsec-ptgpp) parsec_addtest_executable(C async) + target_link_libraries(async PRIVATE tests_runtime_common) target_ptg_sources(async PRIVATE "async.jdf") endif(TARGET parsec-ptgpp) @@ -10,4 +15,3 @@ if(MPI_Fortran_FOUND AND CMAKE_Fortran_COMPILER_WORKS) parsec_addtest_executable(Fortran generate_f SOURCES generate_f.F90) endif(CMAKE_Fortran_COMPILER_SUPPORTS_F90) endif(MPI_Fortran_FOUND AND CMAKE_Fortran_COMPILER_WORKS) - diff --git a/tests/profiling/async.jdf b/tests/profiling/async.jdf index b313e705e..f8245062d 100644 --- a/tests/profiling/async.jdf +++ b/tests/profiling/async.jdf @@ -3,12 +3,14 @@ extern "C" %{ * Copyright (c) 2020-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include #include #include #include #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" /** * This test creates asynchronous tasks to stress the profiling @@ -168,15 +170,6 @@ int main( int argc, char** argv ) int parsec_argc = argc, arg; char **parsec_argv = NULL; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - MPI_Comm_size(MPI_COMM_WORLD, &ws); - MPI_Comm_rank(MPI_COMM_WORLD, &mr); - } -#endif - #if !defined(PARSEC_PROF_TRACE) fprintf(stderr, "This profiling test has been compiled with profiling disabled...\n"); exit(1); @@ -197,16 +190,18 @@ int main( int argc, char** argv ) argc--; } - parsec = parsec_init(-1, &parsec_argc, &parsec_argv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &parsec_argc, &parsec_argv, + &parsec, &mr, &ws); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); if( argc > 1 ) { NB = atoi(argv[1]); } if(NB <= 0) { fprintf(stderr, "Usage: async [-v] NB [--mca profile_filename /path/to/profile --mca mca_pins task_profiler\n"); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 1; } @@ -256,11 +251,8 @@ int main( int argc, char** argv ) parsec_taskpool_free( (parsec_taskpool_t*)tp ); - parsec_fini( &parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/runtime/CMakeLists.txt b/tests/runtime/CMakeLists.txt index 563704f90..1293ef4b9 100644 --- a/tests/runtime/CMakeLists.txt +++ b/tests/runtime/CMakeLists.txt @@ -1,3 +1,7 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + add_subdirectory(scheduling) add_Subdirectory(cuda) @@ -7,6 +11,5 @@ if( MPI_C_FOUND ) endif( MPI_C_FOUND ) parsec_addtest_executable(C dtt_bug_replicator SOURCES dtt_bug_replicator_ex.c) +target_link_libraries(dtt_bug_replicator PRIVATE tests_runtime_common) target_ptg_sources(dtt_bug_replicator PRIVATE "dtt_bug_replicator.jdf") - - diff --git a/tests/runtime/Testings.cmake b/tests/runtime/Testings.cmake index bec60bba4..573bdd3ae 100644 --- a/tests/runtime/Testings.cmake +++ b/tests/runtime/Testings.cmake @@ -1,2 +1,6 @@ include(runtime/scheduling/Testings.cmake) include(runtime/cuda/Testings.cmake) + +if( MPI_C_FOUND ) + parsec_addtest_cmd(runtime/multichain:mp ${MPI_TEST_CMD_LIST} 4 runtime/multichain -l=1 -c=2) +endif( MPI_C_FOUND ) diff --git a/tests/runtime/cuda/CMakeLists.txt b/tests/runtime/cuda/CMakeLists.txt index 6d1479fb1..dca54e287 100644 --- a/tests/runtime/cuda/CMakeLists.txt +++ b/tests/runtime/cuda/CMakeLists.txt @@ -10,16 +10,19 @@ if(PARSEC_HAVE_CUDA) else( NOT TARGET CUDA::cublas ) parsec_addtest_executable(C nvlink SOURCES nvlink_main.c nvlink_wrapper.c) + target_link_libraries(nvlink PRIVATE tests_runtime_common) target_include_directories(nvlink PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(nvlink PRIVATE "nvlink.jdf") target_link_libraries(nvlink PRIVATE CUDA::cublas) parsec_addtest_executable(C stress SOURCES stress_main.c stress_wrapper.c) + target_link_libraries(stress PRIVATE tests_runtime_common) target_include_directories(stress PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(stress PRIVATE "stress.jdf") target_link_libraries(stress PRIVATE CUDA::cublas) parsec_addtest_executable(C stage SOURCES stage_main.c) + target_link_libraries(stage PRIVATE tests_runtime_common) target_include_directories(stage PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(stage PRIVATE "stage_custom.jdf") target_link_libraries(stage PRIVATE CUDA::cublas) @@ -28,6 +31,7 @@ if(PARSEC_HAVE_CUDA) # Testing for getting best device parsec_addtest_executable(C testing_get_best_device SOURCES "testing_get_best_device.c") + target_link_libraries(testing_get_best_device PRIVATE tests_runtime_common) target_include_directories(testing_get_best_device PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(testing_get_best_device PRIVATE "get_best_device_check.jdf") diff --git a/tests/runtime/cuda/get_best_device_check.jdf b/tests/runtime/cuda/get_best_device_check.jdf index 3fac93b87..2e4e5b979 100644 --- a/tests/runtime/cuda/get_best_device_check.jdf +++ b/tests/runtime/cuda/get_best_device_check.jdf @@ -3,7 +3,7 @@ extern "C" %{ * Copyright (c) 2021-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2026 NVIDIA Corporation. All rights reserved. */ #include "cuda_test_internal.h" @@ -220,7 +220,12 @@ int parsec_get_best_device_check(parsec_context_t *parsec, parsec_taskpool_free(parsec_get_best_device_check); - MPI_Allreduce(&info_tmp[0], &info, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + int rc = parsec_tests_allreduce(parsec, &info_tmp[0], &info, 1, + parsec_datatype_int_t, + PARSEC_TESTS_REDUCE_SUM); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + PARSEC_CHECK_ERROR(rc, "parsec_tests_allreduce"); + } return info; } diff --git a/tests/runtime/cuda/nvlink.jdf b/tests/runtime/cuda/nvlink.jdf index e4bfe570c..5658395fe 100644 --- a/tests/runtime/cuda/nvlink.jdf +++ b/tests/runtime/cuda/nvlink.jdf @@ -3,7 +3,7 @@ extern "C" %{ * Copyright (c) 2019-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -16,9 +16,6 @@ extern "C" %{ #include #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) #include "parsec/mca/device/device.h" #include diff --git a/tests/runtime/cuda/nvlink_main.c b/tests/runtime/cuda/nvlink_main.c index 7d822ec7e..ca6f10386 100644 --- a/tests/runtime/cuda/nvlink_main.c +++ b/tests/runtime/cuda/nvlink_main.c @@ -2,50 +2,45 @@ * Copyright (c) 2019-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" #include "parsec/data_distribution.h" #include "parsec/data_dist/matrix/matrix.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "nvlink.h" #include "nvlink_wrapper.h" -#if defined(DISTRIBUTED) -#include -#endif - int main(int argc, char *argv[]) { parsec_context_t *parsec = NULL; parsec_taskpool_t *tp; - int size = 1; - int rank = 0; - -#if defined(DISTRIBUTED) - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif /* DISTRIBUTED */ + int rc; - parsec = parsec_init(-1, &argc, &argv); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /* can the test run? */ int nb_gpus = parsec_context_query(parsec, PARSEC_CONTEXT_QUERY_DEVICES, PARSEC_DEV_CUDA); assert(nb_gpus >= 0); if(nb_gpus == 0) { parsec_warning("This test can only run if at least one GPU device is present"); - exit(-PARSEC_ERR_DEVICE); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); + return -PARSEC_ERR_DEVICE; } int full_peer_access = parsec_context_query(parsec, PARSEC_CONTEXT_QUERY_DEVICES_FULL_PEER_ACCESS, PARSEC_DEV_CUDA); assert(full_peer_access >= 0); if(0 == full_peer_access) { parsec_warning("This system does not have a full peer access matrix between all GPU devices"); - exit(-PARSEC_ERR_DEVICE); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); + return -PARSEC_ERR_DEVICE; } tp = testing_nvlink_New(parsec, 10, 512); @@ -56,9 +51,7 @@ int main(int argc, char *argv[]) parsec_taskpool_free(tp); } - parsec_fini(&parsec); -#if defined(DISTRIBUTED) - MPI_Finalize(); -#endif /* DISTRIBUTED */ + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/runtime/cuda/stage_custom.jdf b/tests/runtime/cuda/stage_custom.jdf index 71b8fe2ee..e32930b42 100644 --- a/tests/runtime/cuda/stage_custom.jdf +++ b/tests/runtime/cuda/stage_custom.jdf @@ -16,7 +16,6 @@ extern "C" %{ #include #include #include -#include #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) #include "parsec/mca/device/cuda/device_cuda_internal.h" #include diff --git a/tests/runtime/cuda/stage_main.c b/tests/runtime/cuda/stage_main.c index 1309b4f2d..142a17f5b 100644 --- a/tests/runtime/cuda/stage_main.c +++ b/tests/runtime/cuda/stage_main.c @@ -2,48 +2,35 @@ * Copyright (c) 2020-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" #include "parsec/data_distribution.h" #include "parsec/data_dist/matrix/matrix.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "stage_custom.h" parsec_taskpool_t* testing_stage_custom_New( parsec_context_t *ctx, int M, int N, int MB, int NB, int P, int *ret); -#if defined(DISTRIBUTED) -#include -#endif - int main(int argc, char *argv[]) { parsec_context_t *parsec = NULL; parsec_taskpool_t *tp; int size = 1; - int rank = 0; int M; int N; int MB; int NB; int P = 1; int ret = 0; - -#if defined(DISTRIBUTED) - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif /* DISTRIBUTED */ + int rc; /* Initialize PaRSEC */ - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - /* Failed to correctly initialize. In a correct scenario report*/ - /* upstream, but in this particular case bail out.*/ - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, &size); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /* can the test run? */ assert(size == 1); @@ -52,7 +39,9 @@ int main(int argc, char *argv[]) if(nb_gpus == 0) { parsec_warning("This test can only run if at least one GPU device is present"); printf("TEST SKIPPED\n"); - exit(-PARSEC_ERR_DEVICE); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); + return -PARSEC_ERR_DEVICE; } /* Test: comparing results when: @@ -105,10 +94,8 @@ int main(int argc, char *argv[]) printf("TEST PASSED\n"); } - parsec_fini(&parsec); -#if defined(DISTRIBUTED) - MPI_Finalize(); -#endif /* DISTRIBUTED */ + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return (0 == ret)? EXIT_SUCCESS: EXIT_FAILURE; } diff --git a/tests/runtime/cuda/stress.jdf b/tests/runtime/cuda/stress.jdf index 2174c5c13..dedbbe209 100644 --- a/tests/runtime/cuda/stress.jdf +++ b/tests/runtime/cuda/stress.jdf @@ -3,7 +3,7 @@ extern "C" %{ * Copyright (c) 2019-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -16,9 +16,6 @@ extern "C" %{ #include #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) #include "parsec/mca/device/cuda/device_cuda_internal.h" #include diff --git a/tests/runtime/cuda/stress_main.c b/tests/runtime/cuda/stress_main.c index 36008203c..947b8e32d 100644 --- a/tests/runtime/cuda/stress_main.c +++ b/tests/runtime/cuda/stress_main.c @@ -2,19 +2,18 @@ * Copyright (c) 2019-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" #include "parsec/data_distribution.h" #include "parsec/data_dist/matrix/matrix.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "stress.h" #include "stress_wrapper.h" -#if defined(DISTRIBUTED) -#include -#endif - #include #include @@ -22,11 +21,10 @@ int main(int argc, char *argv[]) { parsec_context_t *parsec = NULL; parsec_taskpool_t *tp; - int size = 1; - int rank = 0; int tile_size = 1024; int depth = 80; int ch; + int rc; /* Parse -n (tile size) and -d (depth) before parsec_init */ while ((ch = getopt(argc, argv, "n:d:")) != -1) { @@ -46,16 +44,9 @@ int main(int argc, char *argv[]) } argc = argc - optind + 1; -#if defined(DISTRIBUTED) - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif /* DISTRIBUTED */ - - parsec = parsec_init(-1, &argc, &argv); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); tp = testing_stress_New(parsec, depth, tile_size); if( NULL != tp ) { @@ -65,9 +56,7 @@ int main(int argc, char *argv[]) parsec_taskpool_free(tp); } - parsec_fini(&parsec); -#if defined(DISTRIBUTED) - MPI_Finalize(); -#endif /* DISTRIBUTED */ + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/runtime/cuda/testing_get_best_device.c b/tests/runtime/cuda/testing_get_best_device.c index fcafe2c9d..984555080 100644 --- a/tests/runtime/cuda/testing_get_best_device.c +++ b/tests/runtime/cuda/testing_get_best_device.c @@ -2,6 +2,7 @@ * Copyright (c) 2019-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "cuda_test_internal.h" @@ -40,7 +41,7 @@ static int matrix_init_ops(parsec_execution_stream_t *es, int main(int argc, char *argv[]) { parsec_context_t* parsec; - int rank, nodes, ch; + int rank, nodes, ch, rc; int pargc = 0; char **pargv; @@ -67,7 +68,7 @@ int main(int argc, char *argv[]) case 'g': nb_gpus = atoi(optarg); break; case '?': case 'h': default: fprintf(stderr, - "-m : initialize MPI_THREAD_MULTIPLE (default: 0/no)\n" + "-m : request multiple-thread support from the test runtime (default: 0/no)\n" "-N : column dimension (N) of the matrices (default: 8)\n" "-t : row dimension (MB) of the tiles (default: 4)\n" "-s : rows of tiles in a k-cyclic distribution (default: 1)\n" @@ -81,19 +82,6 @@ int main(int argc, char *argv[]) } } -#if defined(PARSEC_HAVE_MPI) - { - int provided; - int requested = m? MPI_THREAD_MULTIPLE: MPI_THREAD_SERIALIZED; - MPI_Init_thread(&argc, &argv, requested, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &nodes); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - nodes = 1; - rank = 0; -#endif - pargc = 0; pargv = NULL; for(int i = 1; i < argc; i++) { if( strcmp(argv[i], "--") == 0 ) { @@ -106,23 +94,21 @@ int main(int argc, char *argv[]) #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) extern char **environ; char *value; - if( nb_gpus < 1 && 0 == rank ) { - fprintf(stderr, "Warning: if run on GPUs, please set --gpus=value bigger than 0\n"); - } asprintf(&value, "%d", nb_gpus); parsec_setenv_mca_param( "device_cuda_enabled", value, &environ ); free(value); #endif - /* Initialize PaRSEC */ - parsec = parsec_init(cores, &pargc, &pargv); + rc = parsec_tests_context_init(cores, + m ? PARSEC_TEST_THREAD_MULTIPLE : PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, &rank, &nodes); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); - if( NULL == parsec ) { - /* Failed to correctly initialize. In a correct scenario report - * upstream, but in this particular case bail out. - */ - exit(-1); +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) + if( nb_gpus < 1 && 0 == rank ) { + fprintf(stderr, "Warning: if run on GPUs, please set --gpus=value bigger than 0\n"); } +#endif /* If the number of cores has not been defined as a parameter earlier * update it with the default parameter computed in parsec_init. */ @@ -151,9 +137,9 @@ int main(int argc, char *argv[]) (parsec_tiled_matrix_unary_op_t)matrix_init_ops, NULL); /* Main routines */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); info = parsec_get_best_device_check(parsec, (parsec_tiled_matrix_t *)&dcA); - SYNC_TIME_PRINT(rank, ("Get_best_device" "\tN= %d NB= %d " + SYNC_TIME_PRINT(parsec, rank, ("Get_best_device" "\tN= %d NB= %d " "PxQ= %d %d KPxKQ= %d %d cores= %d nb_gpus= %d\n", N, NB, P, nodes/P, KP, KQ, cores, parsec_nb_devices-2)); @@ -166,11 +152,8 @@ int main(int argc, char *argv[]) parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA); /* Clean up parsec*/ - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return (0 == info)? EXIT_SUCCESS: EXIT_FAILURE; } diff --git a/tests/runtime/dtt_bug_replicator_ex.c b/tests/runtime/dtt_bug_replicator_ex.c index bda600e25..6a16d5825 100644 --- a/tests/runtime/dtt_bug_replicator_ex.c +++ b/tests/runtime/dtt_bug_replicator_ex.c @@ -2,6 +2,7 @@ * Copyright (c) 2013-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -9,6 +10,7 @@ #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "dtt_bug_replicator.h" #include "parsec/arena.h" +#include "tests/tests_runtime.h" #include #define N 10 @@ -36,17 +38,10 @@ int main( int argc, char** argv ) int nodes, rank, i, j, rc; (void)argc; (void)argv; -#if defined(PARSEC_HAVE_MPI) - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &nodes); - MPI_Comm_size(MPI_COMM_WORLD, &nodes); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - nodes = 1; - rank = 0; -#endif - - parsec = parsec_init(1, &argc, &argv); - assert( NULL != parsec ); + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &nodes); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); PASTE_CODE_ALLOCATE_MATRIX(dcA, 1, parsec_matrix_block_cyclic, (&dcA, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, @@ -98,9 +93,7 @@ int main( int argc, char** argv ) parsec_taskpool_free(tp); - parsec_fini( &parsec); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/runtime/scheduling/CMakeLists.txt b/tests/runtime/scheduling/CMakeLists.txt index 81dcb381c..535a7a17d 100644 --- a/tests/runtime/scheduling/CMakeLists.txt +++ b/tests/runtime/scheduling/CMakeLists.txt @@ -2,5 +2,4 @@ include(ParsecCompilePTG) parsec_addtest_executable(C schedmicro SOURCES main.c ep_wrapper.c schedmicro_data.c) target_ptg_sources(schedmicro PRIVATE "ep.jdf") -target_link_libraries(schedmicro PRIVATE m) - +target_link_libraries(schedmicro PRIVATE m tests_runtime_common) diff --git a/tests/runtime/scheduling/ep_wrapper.c b/tests/runtime/scheduling/ep_wrapper.c index c15d15389..b1cd12436 100644 --- a/tests/runtime/scheduling/ep_wrapper.c +++ b/tests/runtime/scheduling/ep_wrapper.c @@ -2,6 +2,7 @@ * Copyright (c) 2014-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -9,6 +10,7 @@ #include "parsec/data_distribution.h" #include "parsec/arena.h" +#include "parsec/datatype.h" #include "ep.h" #include "ep_wrapper.h" @@ -31,21 +33,20 @@ parsec_taskpool_t *ep_new(parsec_data_collection_t *A, int nt, int level) tp = parsec_ep_new(nt, level, A); -#if defined(PARSEC_HAVE_MPI) + /* The datatype is irrelevant as the example does not communicate data, + * but use the PaRSEC datatype API so the test is not tied to MPI. + */ { - MPI_Aint extent; -#if defined(PARSEC_HAVE_MPI_20) - MPI_Aint lb = 0; - MPI_Type_get_extent(MPI_BYTE, &lb, &extent); -#else - MPI_Type_extent(MPI_BYTE, &extent); -#endif /* defined(PARSEC_HAVE_MPI_20) */ - /* The datatype is irrelevant as the example does not do communications between nodes */ + ptrdiff_t lb, extent; + int rc = parsec_type_extent(parsec_datatype_uint8_t, &lb, &extent); + if( PARSEC_SUCCESS != rc ) { + parsec_taskpool_free((parsec_taskpool_t*)tp); + return NULL; + } parsec_arena_datatype_set_type( &tp->arenas_datatypes[PARSEC_ep_DEFAULT_ADT_IDX], - extent, PARSEC_ARENA_ALIGNMENT_SSE, - MPI_BYTE ); + (size_t)extent, PARSEC_ARENA_ALIGNMENT_SSE, + parsec_datatype_uint8_t ); } -#endif return (parsec_taskpool_t*)tp; } diff --git a/tests/runtime/scheduling/main.c b/tests/runtime/scheduling/main.c index 80c0d1bdb..d2c0e74b4 100644 --- a/tests/runtime/scheduling/main.c +++ b/tests/runtime/scheduling/main.c @@ -2,6 +2,7 @@ * Copyright (c) 2013-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include @@ -10,13 +11,11 @@ #include "ep_wrapper.h" #include "schedmicro_data.h" #include "parsec/os-spec-timing.h" +#include "tests/tests_runtime.h" #if defined(PARSEC_HAVE_STRING_H) #include #endif /* defined(PARSEC_HAVE_STRING_H) */ #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ static int MAXNT = 16384; static int MAXLEVEL = 1024; @@ -40,17 +39,6 @@ int main(int argc, char *argv[]) int parsec_argc = 0; char **parsec_argv = NULL; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif for(int a = 1; a < argc; a++) { if(strcmp(argv[a], "--") == 0) { parsec_argc = argc - a; @@ -81,10 +69,11 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } - parsec = parsec_init(0, &parsec_argc, &parsec_argv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(0, PARSEC_TEST_THREAD_SERIALIZED, + &parsec_argc, &parsec_argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + printf("#All measured values are times. Times are expressed in " TIMER_UNIT "\n"); level = 4 * world; @@ -134,10 +123,8 @@ int main(int argc, char *argv[]) free_data(dcA); - parsec_fini(&parsec); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/tests_runtime.c b/tests/tests_runtime.c new file mode 100644 index 000000000..86143facb --- /dev/null +++ b/tests/tests_runtime.c @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ + +#include "tests/tests_runtime.h" + +#include +#include +#include + +int +parsec_tests_context_init(int nb_cores, int required_thread, + int *pargc, char ***pargv, + parsec_context_t **parsec, + int *rank, int *world) +{ + int rc; + + if( NULL == parsec ) { + return PARSEC_ERR_BAD_PARAM; + } + + /* + * From this point on, rank and world size come from the PaRSEC context. + * This keeps the tests independent from the selected communication backend: + * MPI builds discover them from MPI, UCX builds discover them from PMIx. + */ + *parsec = parsec_init(nb_cores, pargc, pargv); + if( NULL == *parsec ) { + return PARSEC_ERROR; + } + +#if defined(PARSEC_HAVE_MPI) + { + int mpi_initialized = 0, provided = PARSEC_TEST_THREAD_SINGLE; + + MPI_Initialized(&mpi_initialized); + if( mpi_initialized ) { + MPI_Query_thread(&provided); + if( provided < required_thread ) { + fprintf(stderr, "MPI thread support is insufficient: requested %d, provided %d\n", + required_thread, provided); + (void)parsec_tests_context_fini(parsec); + return PARSEC_ERR_NOT_SUPPORTED; + } + } + } +#else + (void)required_thread; +#endif + + if( NULL != rank ) { + rc = parsec_context_query(*parsec, PARSEC_CONTEXT_QUERY_RANK); + if( rc < 0 ) { + (void)parsec_tests_context_fini(parsec); + return rc; + } + *rank = rc; + } + if( NULL != world ) { + rc = parsec_context_query(*parsec, PARSEC_CONTEXT_QUERY_NODES); + if( rc < 0 ) { + (void)parsec_tests_context_fini(parsec); + return rc; + } + /* + * A build without a communication engine reports 0 nodes to indicate + * that no distributed runtime is active. Tests still expect a usable + * local world size, so expose that case as a single-process run. + */ + if( 0 == rc ) { + rc = 1; + } + *world = rc; + } + + return PARSEC_SUCCESS; +} + +int +parsec_tests_context_fini(parsec_context_t **parsec) +{ + int rc = PARSEC_SUCCESS; + + if( (NULL != parsec) && (NULL != *parsec) ) { + rc = parsec_fini(parsec); + } + + return rc; +} + +int +parsec_tests_barrier(parsec_context_t *parsec) +{ + (void)parsec; + +#if defined(PARSEC_HAVE_MPI) + { + int mpi_initialized = 0; + int rc; + + rc = MPI_Initialized(&mpi_initialized); + if( (MPI_SUCCESS == rc) && mpi_initialized ) { + rc = MPI_Barrier(MPI_COMM_WORLD); + return (MPI_SUCCESS == rc) ? PARSEC_SUCCESS : PARSEC_ERROR; + } + } +#endif + + return PARSEC_ERR_NOT_IMPLEMENTED; +} + +void +parsec_tests_abort(parsec_context_t *parsec, int errorcode) +{ + (void)parsec; + +#if defined(PARSEC_HAVE_MPI) + { + int mpi_initialized = 0; + int rc = MPI_Initialized(&mpi_initialized); + if( (MPI_SUCCESS == rc) && mpi_initialized ) { + MPI_Abort(MPI_COMM_WORLD, errorcode); + } + } +#endif + + exit(errorcode); +} + +int +parsec_tests_allreduce(parsec_context_t *parsec, + const void *sendbuf, + void *recvbuf, + int count, + parsec_datatype_t datatype, + parsec_tests_reduce_op_t op) +{ + if( (NULL == recvbuf) || (count < 0) ) { + return PARSEC_ERR_BAD_PARAM; + } + if( (PARSEC_TESTS_REDUCE_SUM != op) && + (PARSEC_TESTS_REDUCE_BXOR != op) && + (PARSEC_TESTS_REDUCE_MAXLOC_INT != op) ) { + return PARSEC_ERR_BAD_PARAM; + } + if( (PARSEC_TESTS_REDUCE_MAXLOC_INT == op) && + (parsec_datatype_int_t != datatype) ) { + return PARSEC_ERR_BAD_PARAM; + } + +#if defined(PARSEC_HAVE_MPI) + { + MPI_Op mpi_op; + MPI_Datatype mpi_datatype = datatype; + int mpi_initialized = 0; + int rc; + + switch(op) { + case PARSEC_TESTS_REDUCE_SUM: + mpi_op = MPI_SUM; + break; + case PARSEC_TESTS_REDUCE_BXOR: + mpi_op = MPI_BXOR; + break; + case PARSEC_TESTS_REDUCE_MAXLOC_INT: + mpi_op = MPI_MAXLOC; + mpi_datatype = MPI_2INT; + break; + default: + return PARSEC_ERR_BAD_PARAM; + } + + rc = MPI_Initialized(&mpi_initialized); + if( (MPI_SUCCESS == rc) && mpi_initialized ) { + rc = MPI_Allreduce((NULL == sendbuf || sendbuf == recvbuf) ? MPI_IN_PLACE : (void *)sendbuf, + recvbuf, count, mpi_datatype, mpi_op, MPI_COMM_WORLD); + return (MPI_SUCCESS == rc) ? PARSEC_SUCCESS : PARSEC_ERROR; + } + } +#endif + + { + int nodes = (NULL == parsec) ? 1 : parsec_context_query(parsec, PARSEC_CONTEXT_QUERY_NODES); + + if( nodes < 0 ) { + return nodes; + } + if( nodes > 1 ) { + return PARSEC_ERR_NOT_IMPLEMENTED; + } + if( (NULL != sendbuf) && (sendbuf != recvbuf) && (0 < count) ) { + if( PARSEC_TESTS_REDUCE_MAXLOC_INT == op ) { + memcpy(recvbuf, sendbuf, 2 * (size_t)count * sizeof(int)); + return PARSEC_SUCCESS; + } + + int size, rc; + + rc = parsec_type_size(datatype, &size); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + memcpy(recvbuf, sendbuf, (size_t)count * (size_t)size); + } + } + + (void)op; + return PARSEC_SUCCESS; +} diff --git a/tests/tests_runtime.h b/tests/tests_runtime.h new file mode 100644 index 000000000..326933c44 --- /dev/null +++ b/tests/tests_runtime.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ +#if !defined(_TESTS_RUNTIME_H_) +#define _TESTS_RUNTIME_H_ + +#include "parsec.h" +#include "parsec/datatype.h" + +#if defined(PARSEC_HAVE_MPI) +#include +#define PARSEC_TEST_THREAD_SINGLE MPI_THREAD_SINGLE +#define PARSEC_TEST_THREAD_FUNNELED MPI_THREAD_FUNNELED +#define PARSEC_TEST_THREAD_SERIALIZED MPI_THREAD_SERIALIZED +#define PARSEC_TEST_THREAD_MULTIPLE MPI_THREAD_MULTIPLE +#else +#define PARSEC_TEST_THREAD_SINGLE 0 +#define PARSEC_TEST_THREAD_FUNNELED 1 +#define PARSEC_TEST_THREAD_SERIALIZED 2 +#define PARSEC_TEST_THREAD_MULTIPLE 3 +#endif + +typedef enum parsec_tests_reduce_op_e { + PARSEC_TESTS_REDUCE_SUM, + PARSEC_TESTS_REDUCE_BXOR, + PARSEC_TESTS_REDUCE_MAXLOC_INT +} parsec_tests_reduce_op_t; + +/** + * Initialize the process launcher/runtime pair used by PaRSEC tests. + * + * Tests should call this helper instead of directly initializing MPI or PMIx. + * parsec_init() initializes the selected communication backend as needed, and + * this helper retrieves rank/size from the PaRSEC context afterwards. + * + * @param[in] nb_cores Number of cores to pass to parsec_init(). + * @param[in] required_thread Minimum MPI thread level, using + * PARSEC_TEST_THREAD_*. + * @param[inout] pargc PaRSEC argc, passed to parsec_init(). + * @param[inout] pargv PaRSEC argv, passed to parsec_init(). + * @param[out] parsec Initialized PaRSEC context. + * @param[out] rank Current process rank in the selected communication backend. + * @param[out] world Number of processes in the selected communication backend. + */ +int parsec_tests_context_init(int nb_cores, int required_thread, + int *pargc, char ***pargv, + parsec_context_t **parsec, + int *rank, int *world); + +/** + * Finalize the PaRSEC context and any process launcher initialized by + * parsec_tests_context_init(). + */ +int parsec_tests_context_fini(parsec_context_t **parsec); + +/** + * Synchronize all processes participating in the selected test runtime. + * + * This is intentionally a test helper, not a public runtime API. It accepts + * the PaRSEC context so future communication backends can implement the same + * operation without exposing their transport details to tests. For now, only + * MPI-backed runs have a useful implementation; non-MPI backends return + * PARSEC_ERR_NOT_IMPLEMENTED. + */ +int parsec_tests_barrier(parsec_context_t *parsec); + +/** + * Abort all processes participating in the selected test runtime. + * + * MPI-backed tests call MPI_Abort on MPI_COMM_WORLD. Other backends terminate + * the local process until they grow a distributed abort primitive. + */ +void parsec_tests_abort(parsec_context_t *parsec, int errorcode); + +/** + * Reduce values across all processes participating in the selected test runtime. + * + * A NULL send buffer means in-place reduction into recvbuf. MPI-backed tests + * call MPI_Allreduce. Single-process non-MPI runs copy sendbuf into recvbuf + * and return success; multi-process non-MPI backends return + * PARSEC_ERR_NOT_IMPLEMENTED until their collective support is added. The + * PARSEC_TESTS_REDUCE_MAXLOC_INT operation expects count int pairs laid out as + * {value, rank} and uses MPI_2INT/MPI_MAXLOC when MPI backs the test runtime. + */ +int parsec_tests_allreduce(parsec_context_t *parsec, + const void *sendbuf, + void *recvbuf, + int count, + parsec_datatype_t datatype, + parsec_tests_reduce_op_t op); + +#endif /* _TESTS_RUNTIME_H_ */ diff --git a/tests/tests_timing.h b/tests/tests_timing.h index fa3327e0a..967c6a8d0 100644 --- a/tests/tests_timing.h +++ b/tests/tests_timing.h @@ -2,20 +2,20 @@ * Copyright (c) 2021-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #ifndef TIMING_H #define TIMING_H #include "parsec/runtime.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include #include extern double time_elapsed; extern double sync_time_elapsed; -#if defined( PARSEC_HAVE_MPI) -# define get_cur_time() MPI_Wtime() -#else static inline double get_cur_time(void) { struct timeval tv; @@ -25,7 +25,6 @@ static inline double get_cur_time(void) t = tv.tv_sec + tv.tv_usec / 1e6; return t; } -#endif #if defined(PARSEC_PROF_TRACE) #define PARSEC_PROFILING_START() parsec_profiling_start() @@ -41,37 +40,32 @@ static inline double get_cur_time(void) printf print; \ } while(0) -#ifdef PARSEC_HAVE_MPI -# define SYNC_TIME_START() do { \ - MPI_Barrier(MPI_COMM_WORLD); \ - PARSEC_PROFILING_START(); \ - sync_time_elapsed = get_cur_time(); \ +/* + * Non-MPI communication backends do not expose a test barrier yet. Keep the + * timing helpers usable as local timers in that case, but still fail on real + * barrier errors. + */ +#define SYNC_TIME_BARRIER(parsec_context) do { \ + int _parsec_tests_barrier_rc = parsec_tests_barrier(parsec_context); \ + if( PARSEC_ERR_NOT_IMPLEMENTED != _parsec_tests_barrier_rc ) { \ + PARSEC_CHECK_ERROR(_parsec_tests_barrier_rc, "parsec_tests_barrier"); \ + } \ } while(0) -# define SYNC_TIME_STOP() do { \ - MPI_Barrier(MPI_COMM_WORLD); \ - sync_time_elapsed = get_cur_time() - sync_time_elapsed; \ +#define SYNC_TIME_START(parsec_context) do { \ + SYNC_TIME_BARRIER(parsec_context); \ + PARSEC_PROFILING_START(); \ + sync_time_elapsed = get_cur_time(); \ } while(0) -# define SYNC_TIME_PRINT(rank, print) do { \ - SYNC_TIME_STOP(); \ - if(0 == rank) { \ - printf("[****] TIME(s) %12.5f : ", sync_time_elapsed); \ - printf print; \ - } \ - } while(0) - -/* overload exit in MPI mode */ -# define exit(ret) MPI_Abort(MPI_COMM_WORLD, ret) - -#else -# define SYNC_TIME_START() do { sync_time_elapsed = get_cur_time(); } while(0) -# define SYNC_TIME_STOP() do { sync_time_elapsed = get_cur_time() - sync_time_elapsed; } while(0) -# define SYNC_TIME_PRINT(rank, print) do { \ - SYNC_TIME_STOP(); \ - if(0 == rank) { \ - printf("[****] TIME(s) %12.5f : ", sync_time_elapsed); \ - printf print; \ - } \ +#define SYNC_TIME_STOP(parsec_context) do { \ + SYNC_TIME_BARRIER(parsec_context); \ + sync_time_elapsed = get_cur_time() - sync_time_elapsed; \ } while(0) -#endif +#define SYNC_TIME_PRINT(parsec_context, rank, print) do { \ + SYNC_TIME_STOP(parsec_context); \ + if(0 == rank) { \ + printf("[****] TIME(s) %12.5f : ", sync_time_elapsed); \ + printf print; \ + } \ + } while(0) #endif /* TIMING_H */