From 71366dcc07a7d6c776a0e57e242e1e8db3c7ca56 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 19 May 2026 01:21:41 -0400 Subject: [PATCH 1/5] comm: split remote dependencies from MPI transport Introduce a comm MCA framework and move the existing funnelled MPI communication engine under the new comm/mpi component. Select the communication backend through MCA while preserving MPI as the default backend and keeping the existing parsec_comm_engine_t callback interface. Rename the remote dependency protocol implementation from remote_dep_mpi.c to remote_dep_comm.c and remove direct MPI usage from that layer. The remote dependency code now uses the selected communication engine callbacks for AM, pack/unpack, memory registration, progress, GET, and PUT operations. Move MPI-specific startup validation and thread-level capability detection into the MPI comm backend. Advertise backend multithread support through parsec_ce.capabilites.multithreaded, and let the remote dependency layer use that generic capability instead of querying MPI directly. Update remote dependency comments, debug/profiling names, and protocol helper names to avoid MPI-specific wording where the logic is transport-neutral. Keep datatype matching outside of the future datatype engine; parsec_type_match remains a generic compatibility helper. Signed-off-by: George Bosilca --- parsec/CMakeLists.txt | 3 +- parsec/mca/comm/CMakeLists.txt | 9 + parsec/mca/comm/comm.c | 58 ++ parsec/mca/comm/comm.h | 98 ++++ parsec/mca/comm/mpi/ValidateModule.CMake | 10 + parsec/mca/comm/mpi/comm_mpi.h | 33 ++ parsec/mca/comm/mpi/comm_mpi_component.c | 60 +++ .../comm/mpi/comm_mpi_funnelled.c} | 38 +- .../comm/mpi/comm_mpi_funnelled.h} | 33 +- parsec/parsec.c | 15 +- parsec/parsec_comm_engine.c | 13 +- parsec/parsec_comm_engine.h | 2 + parsec/remote_dep.h | 12 +- .../{remote_dep_mpi.c => remote_dep_comm.c} | 506 +++++++++--------- 14 files changed, 604 insertions(+), 286 deletions(-) create mode 100644 parsec/mca/comm/CMakeLists.txt create mode 100644 parsec/mca/comm/comm.c create mode 100644 parsec/mca/comm/comm.h create mode 100644 parsec/mca/comm/mpi/ValidateModule.CMake create mode 100644 parsec/mca/comm/mpi/comm_mpi.h create mode 100644 parsec/mca/comm/mpi/comm_mpi_component.c rename parsec/{parsec_mpi_funnelled.c => mca/comm/mpi/comm_mpi_funnelled.c} (97%) rename parsec/{parsec_mpi_funnelled.h => mca/comm/mpi/comm_mpi_funnelled.h} (67%) rename parsec/{remote_dep_mpi.c => remote_dep_comm.c} (84%) diff --git a/parsec/CMakeLists.txt b/parsec/CMakeLists.txt index df3dfe05c..f34dd76fb 100644 --- a/parsec/CMakeLists.txt +++ b/parsec/CMakeLists.txt @@ -132,8 +132,7 @@ if( PARSEC_PROF_TRACE ) endif( PARSEC_PROF_TRACE ) if( PARSEC_HAVE_MPI ) list(APPEND SOURCES - parsec_mpi_funnelled.c - remote_dep_mpi.c) + remote_dep_comm.c) endif( PARSEC_HAVE_MPI ) if( NOT MPI_C_FOUND ) list(APPEND SOURCES datatype/datatype.c) diff --git a/parsec/mca/comm/CMakeLists.txt b/parsec/mca/comm/CMakeLists.txt new file mode 100644 index 000000000..c25717cdd --- /dev/null +++ b/parsec/mca/comm/CMakeLists.txt @@ -0,0 +1,9 @@ +# Sources that belong to the comm framework itself. Backend-specific files are +# discovered from each module directory through its ValidateModule.CMake file. +set(MCA_${COMPONENT}_SOURCES mca/comm/comm.c) + +# Install the framework header so in-tree and future out-of-tree comm components +# can build against the same component/module contract. +set_property(TARGET parsec + APPEND PROPERTY + PUBLIC_HEADER_H mca/comm/comm.h) diff --git a/parsec/mca/comm/comm.c b/parsec/mca/comm/comm.c new file mode 100644 index 000000000..e03e0acef --- /dev/null +++ b/parsec/mca/comm/comm.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ + +#include "parsec/parsec_config.h" +#include "parsec/mca/comm/comm.h" +#include "parsec/mca/mca_repository.h" +#include "parsec/utils/debug.h" +#include + +static parsec_comm_base_component_t *parsec_comm_selected_component = NULL; + +parsec_comm_engine_t * +parsec_comm_engine_component_init(parsec_context_t *context) +{ + mca_base_component_t **components; + mca_base_module_t *selected_module = NULL; + mca_base_component_t *selected_component = NULL; + parsec_comm_engine_t *ce; + + assert(NULL == parsec_comm_selected_component); + + /* + * Query all compiled and user-enabled comm components, close every component + * that was not selected, and keep the selected component open until + * parsec_comm_engine_component_fini(). + */ + components = mca_components_open_bytype("comm"); + mca_components_query(components, &selected_module, &selected_component); + mca_components_close(components); + + if( NULL == selected_module ) { + parsec_warning("No communication engine component could be selected"); + return NULL; + } + + parsec_comm_selected_component = (parsec_comm_base_component_t *)selected_component; + + parsec_debug_verbose(4, parsec_debug_output, "Installing communication engine %s", + parsec_comm_selected_component->base_version.mca_component_name); + + ce = ((parsec_comm_module_t *)selected_module)->module.init(context); + if( NULL == ce ) { + mca_component_close((mca_base_component_t *)parsec_comm_selected_component); + parsec_comm_selected_component = NULL; + } + return ce; +} + +int +parsec_comm_engine_component_fini(void) +{ + if( NULL != parsec_comm_selected_component ) { + mca_component_close((mca_base_component_t *)parsec_comm_selected_component); + parsec_comm_selected_component = NULL; + } + return PARSEC_SUCCESS; +} diff --git a/parsec/mca/comm/comm.h b/parsec/mca/comm/comm.h new file mode 100644 index 000000000..d61a7d016 --- /dev/null +++ b/parsec/mca/comm/comm.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ +/** + * @file + * + * Communication engine MCA framework. + * + * The comm framework selects exactly one transport backend for a PaRSEC + * context. The selected backend fills the existing parsec_comm_engine_t + * interface; the rest of the runtime continues to use parsec_ce and the + * function table from parsec_comm_engine.h. + * + * This framework deliberately exposes only selection and teardown entry points. + * Callers should not reach back into the selected MCA module after init; all + * transport operations go through the returned parsec_comm_engine_t. + */ +#ifndef PARSEC_COMM_H_HAS_BEEN_INCLUDED +#define PARSEC_COMM_H_HAS_BEEN_INCLUDED + +#include "parsec/parsec_config.h" +#include "parsec/parsec_comm_engine.h" +#include "parsec/mca/mca.h" + +BEGIN_C_DECLS + +/** + * Common component header for communication engine components. + * + * Component-specific state should live in the component source file or in the + * parsec_comm_engine_t implementation, not in this base type. + */ +struct parsec_comm_base_component_2_0_0 { + mca_base_component_2_0_0_t base_version; + mca_base_component_data_2_0_0_t base_data; +}; + +typedef struct parsec_comm_base_component_2_0_0 parsec_comm_base_component_2_0_0_t; +typedef struct parsec_comm_base_component_2_0_0 parsec_comm_base_component_t; + +/** + * Initialize a communication engine backend. + * + * @param[inout] context PaRSEC context that owns the selected communication + * engine instance. + * + * @return A fully initialized parsec_comm_engine_t on success, or NULL if this + * module cannot initialize for the provided context. + */ +typedef parsec_comm_engine_t *(*parsec_comm_base_module_init_fn_t)(parsec_context_t *context); + +/** + * Communication module contract. + * + * The module has a single responsibility at this layer: build and return the + * concrete parsec_comm_engine_t used by the runtime. Backend operations + * themselves are the function pointers stored in that returned engine. + */ +struct parsec_comm_base_module_1_0_0_t { + parsec_comm_base_module_init_fn_t init; +}; + +typedef struct parsec_comm_base_module_1_0_0_t parsec_comm_base_module_1_0_0_t; +typedef struct parsec_comm_base_module_1_0_0_t parsec_comm_base_module_t; + +typedef struct parsec_comm_module_s { + const parsec_comm_base_component_t *component; + parsec_comm_base_module_t module; +} parsec_comm_module_t; + +/** + * MCA version tuple for the comm framework. + */ +#define PARSEC_COMM_BASE_VERSION_2_0_0 \ + MCA_BASE_VERSION_2_0_0, \ + "comm", 2, 0, 0 + +/** + * Select and initialize the active communication engine component. + * + * This is internal to the runtime wrapper in parsec_comm_engine.c. It opens all + * available comm components, keeps only the selected component open, and calls + * the selected module's init method. + */ +parsec_comm_engine_t *parsec_comm_engine_component_init(parsec_context_t *context); + +/** + * Close the component selected by parsec_comm_engine_component_init(). + * + * The parsec_comm_engine_t itself must have already been finalized through its + * fini function before this call; this function only releases the MCA component + * lifetime. + */ +int parsec_comm_engine_component_fini(void); + +END_C_DECLS + +#endif /* PARSEC_COMM_H_HAS_BEEN_INCLUDED */ diff --git a/parsec/mca/comm/mpi/ValidateModule.CMake b/parsec/mca/comm/mpi/ValidateModule.CMake new file mode 100644 index 000000000..5180b8d38 --- /dev/null +++ b/parsec/mca/comm/mpi/ValidateModule.CMake @@ -0,0 +1,10 @@ +# The MPI backend is the current implementation of the distributed +# communication engine. Build it only when PaRSEC itself was configured with +# MPI support; otherwise the runtime keeps using the existing local-only path. +if(PARSEC_HAVE_MPI) + set(MCA_${COMPONENT}_${MODULE} ON) + file(GLOB MCA_${COMPONENT}_${MODULE}_SOURCES ${MCA_BASE_DIR}/${COMPONENT}/${MODULE}/[^\\.]*.c) + set(MCA_${COMPONENT}_${MODULE}_CONSTRUCTOR "${COMPONENT}_${MODULE}_static_component") +else() + set(MCA_${COMPONENT}_${MODULE} OFF) +endif() diff --git a/parsec/mca/comm/mpi/comm_mpi.h b/parsec/mca/comm/mpi/comm_mpi.h new file mode 100644 index 000000000..fd4aca531 --- /dev/null +++ b/parsec/mca/comm/mpi/comm_mpi.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ +/** + * @file + * + * MPI communication engine MCA component declaration. + * + * This component keeps the existing funnelled MPI transport behind the new + * comm framework. The transport implementation remains in + * comm_mpi_funnelled.c; this header only exposes the component symbol required + * by the MCA repository. + */ +#ifndef PARSEC_COMM_MPI_H_HAS_BEEN_INCLUDED +#define PARSEC_COMM_MPI_H_HAS_BEEN_INCLUDED + +#include "parsec/mca/comm/comm.h" + +BEGIN_C_DECLS + +/** + * MCA component descriptor for the MPI communication engine. + */ +extern const parsec_comm_base_component_t parsec_comm_mpi_component; + +/** + * Constructor used by the static MCA component table. + */ +mca_base_component_t *comm_mpi_static_component(void); + +END_C_DECLS + +#endif /* PARSEC_COMM_MPI_H_HAS_BEEN_INCLUDED */ diff --git a/parsec/mca/comm/mpi/comm_mpi_component.c b/parsec/mca/comm/mpi/comm_mpi_component.c new file mode 100644 index 000000000..8fb758e01 --- /dev/null +++ b/parsec/mca/comm/mpi/comm_mpi_component.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ + +#include "parsec/parsec_config.h" +#include "parsec/mca/comm/mpi/comm_mpi.h" +#include "comm_mpi_funnelled.h" + +static int comm_mpi_component_query(mca_base_module_t **module, int *priority); + +/* + * The MPI component currently wraps the pre-existing funnelled MPI engine. + * The module init function returns the global parsec_ce populated with MPI + * callbacks; no extra selected-component accessor is needed after init. + */ +static parsec_comm_module_t parsec_comm_mpi_module = { + .component = &parsec_comm_mpi_component, + .module = { + .init = mpi_funnelled_init, + }, +}; + +const parsec_comm_base_component_t parsec_comm_mpi_component = { + { + PARSEC_COMM_BASE_VERSION_2_0_0, + + "mpi", + "", + PARSEC_VERSION_MAJOR, + PARSEC_VERSION_MINOR, + + NULL, + NULL, + comm_mpi_component_query, + NULL, + "", + }, + { + MCA_BASE_METADATA_PARAM_NONE, + "", + } +}; + +mca_base_component_t * +comm_mpi_static_component(void) +{ + return (mca_base_component_t *)&parsec_comm_mpi_component; +} + +static int +comm_mpi_component_query(mca_base_module_t **module, int *priority) +{ + /* + * MPI is the only comm component in this first componentization step, so it + * keeps a high fixed priority and remains the default backend. + */ + *priority = 100; + *module = (mca_base_module_t *)&parsec_comm_mpi_module; + return MCA_SUCCESS; +} diff --git a/parsec/parsec_mpi_funnelled.c b/parsec/mca/comm/mpi/comm_mpi_funnelled.c similarity index 97% rename from parsec/parsec_mpi_funnelled.c rename to parsec/mca/comm/mpi/comm_mpi_funnelled.c index 5e3c670ef..11d9f0265 100644 --- a/parsec/parsec_mpi_funnelled.c +++ b/parsec/mca/comm/mpi/comm_mpi_funnelled.c @@ -4,12 +4,22 @@ * reserved. * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ +/** + * @file + * + * Funnelled MPI implementation of the communication engine callbacks. + * + * This file is intentionally still transport-specific after the first comm MCA + * componentization step. The generic remote-dependency protocol continues to + * call through parsec_comm_engine_t, while this backend provides the MPI active + * message, memory-handle, pack/unpack, and progress operations. + */ #include #include #include #include -#include "parsec/parsec_mpi_funnelled.h" +#include "comm_mpi_funnelled.h" #include "parsec/remote_dep.h" #include "parsec/class/parsec_hash_table.h" #include "parsec/class/dequeue.h" @@ -641,7 +651,30 @@ static int mpi_funneled_init_once(parsec_context_t* context) parsec_comm_engine_t * mpi_funnelled_init(parsec_context_t *context) { - int i, rc; + int i, rc, is_mpi_up = 0, thread_level_support; + + MPI_Initialized(&is_mpi_up); + if( 0 == is_mpi_up ) { + /** + * MPI is not up. The MPI backend cannot provide communication or the + * MPI datatype operations used by the current distributed build. + */ + context->nb_nodes = 1; + parsec_communication_engine_up = -1; /* No communications supported */ + parsec_fatal("MPI was not initialized. This version of PaRSEC was compiled with MPI datatype support and needs MPI to execute.\n" + "\t* Please initialize MPI in the application (MPI_Init/MPI_Init_thread) before initializing PaRSEC.\n" + "\t* Alternatively, compile a version of PaRSEC without MPI (-DPARSEC_DIST_WITH_MPI=OFF in ccmake)\n"); + return NULL; + } + + MPI_Query_thread(&thread_level_support); + if( thread_level_support == MPI_THREAD_SINGLE || + thread_level_support == MPI_THREAD_FUNNELED ) { + parsec_warning("MPI was not initialized with the appropriate level of thread support.\n" + "\t* Current level is %s, while MPI_THREAD_SERIALIZED or MPI_THREAD_MULTIPLE is needed\n" + "\t* to guarantee correctness of the PaRSEC runtime.\n", + thread_level_support == MPI_THREAD_SINGLE ? "MPI_THREAD_SINGLE" : "MPI_THREAD_FUNNELED"); + } if( -1 == MAX_MPI_TAG ) if( 0 != (rc = mpi_funneled_init_once(context)) ) { @@ -692,6 +725,7 @@ mpi_funnelled_init(parsec_context_t *context) parsec_ce.parsec_context = context; parsec_ce.capabilites.sided = 2; parsec_ce.capabilites.supports_noncontiguous_datatype = 1; + parsec_ce.capabilites.multithreaded = (thread_level_support >= MPI_THREAD_MULTIPLE); /* Define some sensible values. We assume the application will initialize PaRSEC using * the entire MPI_COMM_WORLD, but we need to prepare some decent default values. */ diff --git a/parsec/parsec_mpi_funnelled.h b/parsec/mca/comm/mpi/comm_mpi_funnelled.h similarity index 67% rename from parsec/parsec_mpi_funnelled.h rename to parsec/mca/comm/mpi/comm_mpi_funnelled.h index 9996f89ef..db5daf494 100644 --- a/parsec/parsec_mpi_funnelled.h +++ b/parsec/mca/comm/mpi/comm_mpi_funnelled.h @@ -2,23 +2,39 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ +/** + * @file + * + * Backend-local interface for the funnelled MPI communication engine. + * + * These entry points populate the transport function table exposed through + * parsec_comm_engine_t. They remain MPI-specific and should not be called + * directly by generic remote-dependency code; generic code should use the + * parsec_comm_engine_t callbacks instead. */ #ifndef __USE_PARSEC_MPI_FUNNELLED_H__ #define __USE_PARSEC_MPI_FUNNELLED_H__ #include "parsec/parsec_comm_engine.h" -/* ------- Funnelled MPI implementation below ------- */ +/** Initialize the funnelled MPI communication engine for a PaRSEC context. */ parsec_comm_engine_t * mpi_funnelled_init(parsec_context_t *parsec_context); + +/** Finalize the funnelled MPI communication engine instance. */ int mpi_funnelled_fini(parsec_comm_engine_t *comm_engine); +/** Register an active-message tag and receive callback in the MPI backend. */ int mpi_no_thread_tag_register(parsec_ce_tag_t tag, parsec_ce_am_callback_t cb, void *cb_data, size_t msg_length); +/** Unregister a previously registered active-message tag. */ int mpi_no_thread_tag_unregister(parsec_ce_tag_t tag); +/** Register a local memory region and return the backend memory handle. */ int mpi_no_thread_mem_register(void *mem, parsec_mem_type_t mem_type, size_t count, parsec_datatype_t datatype, @@ -26,12 +42,16 @@ mpi_no_thread_mem_register(void *mem, parsec_mem_type_t mem_type, parsec_ce_mem_reg_handle_t *lreg, size_t *lreg_size); +/** Release a memory handle returned by mpi_no_thread_mem_register(). */ int mpi_no_thread_mem_unregister(parsec_ce_mem_reg_handle_t *lreg); +/** Return the fixed wire size used for MPI memory-registration handles. */ int mpi_no_thread_get_mem_reg_handle_size(void); +/** Decode a local MPI memory-registration handle. */ int mpi_no_thread_mem_retrieve(parsec_ce_mem_reg_handle_t lreg, void **mem, parsec_datatype_t *datatype, int *count); +/** Start a remote PUT through the funnelled MPI backend. */ int mpi_no_thread_put(parsec_comm_engine_t *comm_engine, parsec_ce_mem_reg_handle_t lreg, ptrdiff_t ldispl, @@ -42,6 +62,7 @@ int mpi_no_thread_put(parsec_comm_engine_t *comm_engine, parsec_ce_onesided_callback_t l_cb, void *l_cb_data, parsec_ce_tag_t r_tag, void *r_cb_data, size_t r_cb_data_size); +/** Start a remote GET through the funnelled MPI backend. */ int mpi_no_thread_get(parsec_comm_engine_t *comm_engine, parsec_ce_mem_reg_handle_t lreg, ptrdiff_t ldispl, @@ -52,31 +73,41 @@ int mpi_no_thread_get(parsec_comm_engine_t *comm_engine, parsec_ce_onesided_callback_t l_cb, void *l_cb_data, parsec_ce_tag_t r_tag, void *r_cb_data, size_t r_cb_data_size); +/** Send an active message to a remote rank. */ int mpi_no_thread_send_active_message(parsec_comm_engine_t *comm_engine, parsec_ce_tag_t tag, int remote, void *addr, size_t size); +/** Make progress on pending MPI communication requests. */ int mpi_no_thread_progress(parsec_comm_engine_t *comm_engine); +/** Enable active-message receives for the MPI backend. */ int mpi_no_thread_enable(parsec_comm_engine_t *comm_engine); + +/** Disable active-message receives for the MPI backend. */ int mpi_no_thread_disable(parsec_comm_engine_t *comm_engine); +/** Pack data using MPI datatype semantics. */ int mpi_no_thread_pack(parsec_comm_engine_t *ce, void *inbuf, int incount, parsec_datatype_t type, void *outbuf, int outsize, int *positionA); +/** Compute the size needed to pack data with MPI datatype semantics. */ int mpi_no_thread_pack_size(parsec_comm_engine_t *ce, int incount, parsec_datatype_t type, int *size); +/** Unpack data using MPI datatype semantics. */ int mpi_no_thread_unpack(parsec_comm_engine_t *ce, void *inbuf, int insize, int *position, void *outbuf, int outcount, parsec_datatype_t type); +/** Synchronize all outstanding MPI communication operations. */ int mpi_no_thread_sync(parsec_comm_engine_t *comm_engine); +/** Report whether the MPI backend can accept more pending work. */ int mpi_no_thread_can_push_more(parsec_comm_engine_t *c_e); diff --git a/parsec/parsec.c b/parsec/parsec.c index 7f4bbf71c..20a11dfde 100644 --- a/parsec/parsec.c +++ b/parsec/parsec.c @@ -988,6 +988,7 @@ int parsec_version( int* version_major, int* version_minor, int* version_release int parsec_version_ex( size_t len, char* version_string) { int ret; + char *comm_components = mca_components_list_compiled("comm"); char *sched_components = mca_components_list_compiled("sched"); char *device_components = mca_components_list_compiled("device"); char *pins_components = mca_components_list_compiled("pins"); @@ -1058,18 +1059,7 @@ int parsec_version_ex( size_t len, char* version_string) { "no" #endif /*PARSEC_PROF_TRACE*/ , -#if defined(PARSEC_HAVE_MPI) - "mpi" -#if defined(PARSEC_HAVE_MPI_20) - "2" -#endif -#if defined(PARSEC_DIST_THREAD) - "+thread_multiple" -#endif -#else /* defined(PARSEC_HAVE_MPI) */ - "single process only" -#endif - , + comm_components, device_components, sched_components, #if defined(PARSEC_HAVE_HWLOC) @@ -1118,6 +1108,7 @@ int parsec_version_ex( size_t len, char* version_string) { CMAKE_PARSEC_C_COMPILER, CMAKE_PARSEC_C_FLAGS ); + free(comm_components); free(device_components); free(sched_components); free(pins_components); diff --git a/parsec/parsec_comm_engine.c b/parsec/parsec_comm_engine.c index 95c88bead..fd212028d 100644 --- a/parsec/parsec_comm_engine.c +++ b/parsec/parsec_comm_engine.c @@ -2,24 +2,25 @@ * Copyright (c) 2009-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include #include "parsec/parsec_config.h" -#include "parsec/parsec_mpi_funnelled.h" +#include "parsec/mca/comm/comm.h" #include "parsec/remote_dep.h" parsec_comm_engine_t parsec_ce; #if defined(PARSEC_HAVE_MPI) -/* This function will be called by the runtime */ +/* Select and initialize the distributed communication backend. */ parsec_comm_engine_t * parsec_comm_engine_init(parsec_context_t *parsec_context) { - /* call the selected module init */ - parsec_comm_engine_t *ce = mpi_funnelled_init(parsec_context); + parsec_comm_engine_t *ce = parsec_comm_engine_component_init(parsec_context); + assert(NULL != ce); assert(ce->capabilites.sided > 0 && ce->capabilites.sided < 3); return ce; } @@ -31,8 +32,9 @@ parsec_comm_engine_fini(parsec_comm_engine_t *comm_engine) { (void) parsec_remote_dep_fini(comm_engine->parsec_context); remote_dep_ce_fini(comm_engine->parsec_context); - /* call the selected module fini */ + /* Finalize the backend engine before releasing the selected MCA component. */ parsec_ce.fini(&parsec_ce); + parsec_comm_engine_component_fini(); return PARSEC_SUCCESS; } @@ -41,6 +43,7 @@ parsec_comm_engine_fini(parsec_comm_engine_t *comm_engine) parsec_comm_engine_t * parsec_comm_engine_init(parsec_context_t *parsec_context) { + /* Local builds keep the in-process engine and do not select a comm component. */ parsec_ce.parsec_context = parsec_context; parsec_ce.capabilites.sided = 0; parsec_ce.capabilites.supports_noncontiguous_datatype = 0; diff --git a/parsec/parsec_comm_engine.h b/parsec/parsec_comm_engine.h index 3ce32aca8..37c31ebc2 100644 --- a/parsec/parsec_comm_engine.h +++ b/parsec/parsec_comm_engine.h @@ -169,7 +169,9 @@ typedef int (*parsec_ce_reshape_fn_t)(parsec_comm_engine_t* ce, struct parsec_comm_engine_capabilites_s { unsigned int sided : 2; /* Valid values are 1 and 2 */ + /** The backend can register and move non-contiguous datatypes directly. */ unsigned int supports_noncontiguous_datatype : 1; + /** The backend can safely be called concurrently by worker threads. */ unsigned int multithreaded : 1; }; diff --git a/parsec/remote_dep.h b/parsec/remote_dep.h index e81e262b9..d2fa4b79b 100644 --- a/parsec/remote_dep.h +++ b/parsec/remote_dep.h @@ -256,7 +256,6 @@ int parsec_remote_dep_propagate(parsec_execution_stream_t* es, #define parsec_remote_dep_progress(ctx) 0 #define parsec_remote_dep_activate(ctx, o, r) -1 #define parsec_remote_dep_new_taskpool(ctx) 0 -#define remote_dep_mpi_initialize_execution_stream(ctx) 0 #endif /* DISTRIBUTED */ /* check if this data description represents a CTL dependency */ @@ -338,12 +337,13 @@ struct dep_cmd_item_s { }; #define dep_cmd_prio (offsetof(dep_cmd_item_t, priority)) -#define dep_mpi_pos_list (offsetof(dep_cmd_item_t, priority) - offsetof(dep_cmd_item_t, pos_list)) +#define dep_cmd_pos_list (offsetof(dep_cmd_item_t, priority) - offsetof(dep_cmd_item_t, pos_list)) #define rdep_prio (offsetof(parsec_remote_deps_t, max_priority)) /** - * These functions will be inherited from the current remote_dep_mpi.c - * and for the time being will remain in there. + * Remote-dependency communication protocol entry points. These functions are + * implemented by the transport-neutral protocol layer and use the selected + * parsec_comm_engine_t backend for network operations. */ void* remote_dep_dequeue_main(parsec_context_t* context); @@ -389,13 +389,13 @@ typedef struct { uint32_t tcid; // 20 int msg_size; // 24 int dep; // 28 -} parsec_profile_remote_dep_mpi_info_t; // 32 bytes +} parsec_profile_remote_dep_comm_info_t; // 32 bytes #ifdef PARSEC_PROF_TRACE #define TAKE_TIME_WITH_INFO(PROF, KEY, I, k, src, dst, rdw, nbdtt, dtt) \ do { \ if( parsec_profile_enabled ) { \ - parsec_profile_remote_dep_mpi_info_t __info; \ + parsec_profile_remote_dep_comm_info_t __info; \ parsec_taskpool_t *__tp = parsec_taskpool_lookup( (rdw).taskpool_id ); \ const parsec_task_class_t *__tc = __tp->task_classes_array[(rdw).task_class_id ]; \ __info.rank_src = (src); \ diff --git a/parsec/remote_dep_mpi.c b/parsec/remote_dep_comm.c similarity index 84% rename from parsec/remote_dep_mpi.c rename to parsec/remote_dep_comm.c index d21071dc7..4f77af480 100644 --- a/parsec/remote_dep_mpi.c +++ b/parsec/remote_dep_comm.c @@ -4,10 +4,20 @@ * reserved. * Copyright (c) 2023-2026 NVIDIA Corporation. All rights reserved. */ +/** + * @file + * + * Transport-neutral remote-dependency protocol. + * + * This file owns the remote-dependency command queues, activation-message + * protocol, eager payload handling, rendezvous GET/PUT sequence, delayed DTD + * activation, and local reshape requests. It intentionally talks to the + * selected communication backend only through parsec_comm_engine_t callbacks. + * Backend-specific bootstrap and transport mechanics belong in mca/comm. + */ #include "parsec/parsec_config.h" -#include #include "profiling.h" #include "parsec/class/list.h" #include "parsec/utils/output.h" @@ -40,7 +50,7 @@ int parsec_comm_puts = 0; * larger the amount spent in ordering the tasks, but greater the potential * benefits of doing things in the right order. */ -static void remote_dep_mpi_params(parsec_context_t* context); +static void remote_dep_comm_params(parsec_context_t* context); static int parsec_param_nb_tasks_extracted = 20; /* For the meaning of aggregate, short and eager, refer to the * param register help text for comm_aggregate, and @@ -84,7 +94,7 @@ remote_dep_cmd_to_string(remote_dep_wire_activate_t* origin, return parsec_task_snprintf(str, len, &task); } -/* TODO: fix heterogeneous restriction by using proper mpi datatypes */ +/* TODO: fix heterogeneous restriction by using transport-neutral datatypes. */ #define dep_dtt parsec_datatype_int8_t #define dep_count sizeof(remote_dep_wire_activate_t) #define dep_extent dep_count @@ -106,13 +116,13 @@ parsec_list_t dep_put_fifo; /* ordered non threaded fifo */ /* help manage the messages in the same category, where a category is either messages * to the same destination, or with the same action key. */ -static dep_cmd_item_t** parsec_mpi_same_pos_items; -static int parsec_mpi_same_pos_items_size = 0; +static dep_cmd_item_t** remote_dep_same_pos_items; +static int remote_dep_same_pos_items_size = 0; -static int mpi_initialized = 0; +static int remote_dep_initialized = 0; #if defined(PARSEC_REMOTE_DEP_USE_THREADS) -static pthread_mutex_t mpi_thread_mutex; -static pthread_cond_t mpi_thread_condition; +static pthread_mutex_t comm_thread_mutex; +static pthread_cond_t comm_thread_condition; #endif parsec_execution_stream_t parsec_comm_es = { @@ -140,11 +150,15 @@ parsec_execution_stream_t parsec_comm_es = { .datarepo_mempools = {0} }; -static void remote_dep_mpi_put_start(parsec_execution_stream_t* es, dep_cmd_item_t* item); -static void remote_dep_mpi_get_start(parsec_execution_stream_t* es, parsec_remote_deps_t* deps); +static void remote_dep_comm_put_start(parsec_execution_stream_t* es, dep_cmd_item_t* item); +static void remote_dep_comm_get_start(parsec_execution_stream_t* es, parsec_remote_deps_t* deps); + +static void remote_dep_comm_get_end(parsec_execution_stream_t* es, + int idx, + parsec_remote_deps_t* deps); static int -remote_dep_mpi_get_end_cb(parsec_comm_engine_t *ce, +remote_dep_comm_get_end_cb(parsec_comm_engine_t *ce, parsec_ce_tag_t tag, void *msg, size_t msg_size, @@ -152,7 +166,7 @@ remote_dep_mpi_get_end_cb(parsec_comm_engine_t *ce, void *cb_data); static int -remote_dep_mpi_put_end_cb(parsec_comm_engine_t *ce, +remote_dep_comm_put_end_cb(parsec_comm_engine_t *ce, parsec_ce_mem_reg_handle_t lreg, ptrdiff_t ldispl, parsec_ce_mem_reg_handle_t rreg, @@ -173,12 +187,12 @@ int remote_dep_ce_init(parsec_context_t* context); static int local_dep_nothread_reshape(parsec_execution_stream_t* es, dep_cmd_item_t *item); -static int remote_dep_mpi_progress(parsec_execution_stream_t* es); +static int remote_dep_comm_progress(parsec_execution_stream_t* es); -static void remote_dep_mpi_new_taskpool(parsec_execution_stream_t* es, +static void remote_dep_comm_new_taskpool(parsec_execution_stream_t* es, dep_cmd_item_t *dep_cmd_item); -static void remote_dep_mpi_release_delayed_deps(parsec_execution_stream_t* es, +static void remote_dep_comm_release_delayed_deps(parsec_execution_stream_t* es, dep_cmd_item_t *item); /* Perform a memcpy with datatypes by doing a local sendrecv */ @@ -188,13 +202,13 @@ static int remote_dep_nothread_memcpy(parsec_execution_stream_t* es, int remote_dep_ce_reconfigure(parsec_context_t* context); #ifdef PARSEC_PROF_TRACE -static void remote_dep_mpi_profiling_init(void); +static void remote_dep_comm_profiling_init(void); #else -#define remote_dep_mpi_profiling_init() do {} while(0) +#define remote_dep_comm_profiling_init() do {} while(0) #endif // PARSEC_PROF_TRACE -static void remote_dep_mpi_params(parsec_context_t* context) { +static void remote_dep_comm_params(parsec_context_t* context) { (void)context; #if RDEP_MSG_SHORT_LIMIT != 0 parsec_mca_param_reg_sizet_name("runtime", "comm_short_limit", "Controls the maximum size of a short message. Short messages contain both the control message notifying the completion of a task and the associated data that fit completely in that buffer length. The maximum size of a short message should be lower than the network MTU.", @@ -212,69 +226,42 @@ int remote_dep_dequeue_init(parsec_context_t* context) { pthread_attr_t thread_attr; - int is_mpi_up = 0; - int thread_level_support; - - assert(mpi_initialized == 0); + assert(remote_dep_initialized == 0); - remote_dep_mpi_params(context); - - MPI_Initialized(&is_mpi_up); - if( 0 == is_mpi_up ) { - /** - * MPI is not up, so we will consider this as a single node run. Fall - * back to the no-MPI case. - */ - context->nb_nodes = 1; - parsec_communication_engine_up = -1; /* No communications supported */ - /*TODO: restore the original behavior when modular datatype engine is - * available */ - parsec_fatal("MPI was not initialized. This version of PaRSEC was compiled with MPI datatype supports and *needs* MPI to execute.\n" - "\t* Please initialized MPI in the application (MPI_Init/MPI_Init_thread) prior to initializing PaRSEC.\n" - "\t* Alternatively, compile a version of PaRSEC without MPI (-DPARSEC_DIST_WITH_MPI=OFF in ccmake)\n"); - return PARSEC_SUCCESS; - } + remote_dep_comm_params(context); parsec_communication_engine_up = 0; /* we have communication capabilities */ - MPI_Query_thread( &thread_level_support ); - if( thread_level_support == MPI_THREAD_SINGLE || - thread_level_support == MPI_THREAD_FUNNELED ) { - parsec_warning("MPI was not initialized with the appropriate level of thread support.\n" - "\t* Current level is %s, while MPI_THREAD_SERIALIZED or MPI_THREAD_MULTIPLE is needed\n" - "\t* to guarantee correctness of the PaRSEC runtime.\n", - thread_level_support == MPI_THREAD_SINGLE ? "MPI_THREAD_SINGLE" : "MPI_THREAD_FUNNELED"); - } - /* Do this first to give a chance to the communication engine to define * who this process is by setting the corresponding info in the * parsec_context. */ if( NULL == parsec_comm_engine_init(context) ) { + parsec_communication_engine_up = -1; parsec_warning("Communication engine failed to start. Additional information might be available in the corresponding error message"); return PARSEC_ERR_NOT_FOUND; } if(parsec_param_comm_thread_multiple) { - if( thread_level_support >= MPI_THREAD_MULTIPLE ) { + if( parsec_ce.capabilites.multithreaded ) { context->flags |= PARSEC_CONTEXT_FLAG_COMM_MT; } else if(parsec_param_comm_thread_multiple != -1) { - parsec_warning("Requested multithreaded access to the communication engine, but MPI is not initialized with MPI_THREAD_MULTIPLE.\n" - "\t* PaRSEC will continue with the funneled thread communication engine model.\n"); + parsec_warning("Requested multithreaded access to the communication engine, but the selected backend does not support it.\n" + "\t* PaRSEC will continue with the funneled communication engine model.\n"); } } PARSEC_OBJ_CONSTRUCT(&dep_cmd_queue, parsec_dequeue_t); PARSEC_OBJ_CONSTRUCT(&dep_cmd_fifo, parsec_list_t); - /* Build the condition used to drive the MPI thread */ - pthread_mutex_init( &mpi_thread_mutex, NULL ); - pthread_cond_init( &mpi_thread_condition, NULL ); + /* Build the condition used to drive the communication thread. */ + pthread_mutex_init( &comm_thread_mutex, NULL ); + pthread_cond_init( &comm_thread_condition, NULL ); pthread_attr_init(&thread_attr); pthread_attr_setscope(&thread_attr, PTHREAD_SCOPE_SYSTEM); - remote_dep_mpi_profiling_init(); + remote_dep_comm_profiling_init(); /* From now on the communication capabilities are enabled */ parsec_communication_engine_up = 1; @@ -284,22 +271,23 @@ remote_dep_dequeue_init(parsec_context_t* context) /** * We need to synchronize with the newly spawned thread. We will use the - * condition for this. If we lock the mutex prior to spawning the MPI thread, - * and then go in a condition wait, the MPI thread can lock the mutex, and - * then call condition signal. This insure proper synchronization. Similar - * mechanism will be used to turn on and off the MPI thread. + * condition for this. If we lock the mutex prior to spawning the + * communication thread, and then go in a condition wait, the communication + * thread can lock the mutex, and then call condition signal. This ensures + * proper synchronization. A similar mechanism will be used to turn the + * communication thread on and off. */ - pthread_mutex_lock(&mpi_thread_mutex); + pthread_mutex_lock(&comm_thread_mutex); pthread_create(&dep_thread_id, &thread_attr, (void* (*)(void*))remote_dep_dequeue_main, (void*)context); - /* Wait until the MPI thread signals it's awakening */ - pthread_cond_wait( &mpi_thread_condition, &mpi_thread_mutex ); + /* Wait until the communication thread signals it is awake. */ + pthread_cond_wait( &comm_thread_condition, &comm_thread_mutex ); up_and_running: - mpi_initialized = 1; /* up and running */ + remote_dep_initialized = 1; /* up and running */ remote_dep_ce_init(context); return PARSEC_SUCCESS; @@ -308,14 +296,15 @@ remote_dep_dequeue_init(parsec_context_t* context) int remote_dep_dequeue_fini(parsec_context_t* context) { - if( 0 == mpi_initialized ) return 0; + if( 0 == remote_dep_initialized ) return 0; /** * We suppose the disable function was called before. Then we will append a - * shutdown command in the MPI thread queue, and wake the MPI thread. Upon - * processing of the pending command the MPI thread will exit, we will be - * able to catch this by locking the mutex. Once we know the MPI thread is - * gone, cleaning up will be straightforward. + * shutdown command in the communication thread queue, and wake the + * communication thread. Upon processing of the pending command the + * communication thread will exit, we will be able to catch this by locking + * the mutex. Once we know the communication thread is gone, cleaning up + * will be straightforward. */ if( 1 < parsec_communication_engine_up ) { dep_cmd_item_t* item = (dep_cmd_item_t*) calloc(1, sizeof(dep_cmd_item_t)); @@ -323,13 +312,13 @@ remote_dep_dequeue_fini(parsec_context_t* context) void *ret; item->action = DEP_CTL; - item->cmd.ctl.enable = -1; /* turn off and return from the MPI thread */ + item->cmd.ctl.enable = -1; /* turn off and return from the communication thread */ item->priority = 0; parsec_dequeue_push_back(&dep_cmd_queue, (parsec_list_item_t*) item); - /* I am supposed to own the lock. Wake the MPI thread */ - pthread_cond_signal(&mpi_thread_condition); - pthread_mutex_unlock(&mpi_thread_mutex); + /* I am supposed to own the lock. Wake the communication thread. */ + pthread_cond_signal(&comm_thread_condition); + pthread_mutex_unlock(&comm_thread_mutex); pthread_join(dep_thread_id, &ret); assert((parsec_context_t*)ret == context); } @@ -338,7 +327,7 @@ remote_dep_dequeue_fini(parsec_context_t* context) PARSEC_OBJ_DESTRUCT(&dep_cmd_queue); assert(NULL == parsec_dequeue_pop_front(&dep_cmd_fifo)); PARSEC_OBJ_DESTRUCT(&dep_cmd_fifo); - mpi_initialized = 0; + remote_dep_initialized = 0; PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "Process has reshaped %zu tiles.", count_reshaping); (void)context; @@ -356,26 +345,26 @@ remote_dep_dequeue_on(parsec_context_t* context) { /* If we are the only participant in this execution, we should not have to * communicate with any other process. However, we might have to execute all - * local data copies, which requires MPI. + * local data copies, which require the selected communication backend. */ if( 0 >= parsec_communication_engine_up ) return -1; if( context->nb_nodes == 1 ) return 1; - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI: comm engine signalled ON on process %d/%d", + PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "RDEP: comm engine signalled ON on process %d/%d", context->my_rank, context->nb_nodes); /* At this point I am supposed to own the mutex */ parsec_communication_engine_up = 2; - pthread_cond_signal(&mpi_thread_condition); - pthread_mutex_unlock(&mpi_thread_mutex); + pthread_cond_signal(&comm_thread_condition); + pthread_mutex_unlock(&comm_thread_mutex); /* The waking up of the communication thread happen asynchronously, once the thread - * receives the signal. At that point it acquires the mpi_thread_mutex and set the + * receives the signal. At that point it acquires the comm_thread_mutex and set the * global variable parsec_communication_engine_up to 3. */ /** - * We need to wait for the communication thread to perform the mpi_setup + * We need to wait for the communication thread to perform the backend setup * as it will fill-up my_rank on the context. */ while( 3 != parsec_communication_engine_up ) sched_yield(); @@ -392,16 +381,16 @@ remote_dep_dequeue_off(parsec_context_t* context) dep_cmd_item_t* item = (dep_cmd_item_t*) calloc(1, sizeof(dep_cmd_item_t)); PARSEC_OBJ_CONSTRUCT(item, parsec_list_item_t); item->action = DEP_CTL; - item->cmd.ctl.enable = 0; /* turn OFF the MPI thread */ + item->cmd.ctl.enable = 0; /* turn OFF the communication thread */ item->priority = 0; /* wait until the communication thread is up and running */ while( 3 != parsec_communication_engine_up ) sched_yield(); - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI: comm engine signalled OFF on process %d/%d", + PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "RDEP: comm engine signalled OFF on process %d/%d", context->my_rank, context->nb_nodes); parsec_dequeue_push_back(&dep_cmd_queue, (parsec_list_item_t*) item); - /* wait until we own the PaRSEC MPI synchronization mutex */ - pthread_mutex_lock(&mpi_thread_mutex); + /* wait until we own the PaRSEC communication synchronization mutex */ + pthread_mutex_lock(&comm_thread_mutex); assert( 1 == parsec_communication_engine_up ); (void)context; /* silence warning */ @@ -409,7 +398,7 @@ remote_dep_dequeue_off(parsec_context_t* context) } static void -remote_dep_mpi_initialize_execution_stream(parsec_context_t *context) +remote_dep_comm_initialize_execution_stream(parsec_context_t *context) { parsec_comm_es.th_id = 0; /* Pretend to be the master thread */ parsec_comm_es.virtual_process = context->virtual_processes[0]; @@ -428,8 +417,8 @@ void* remote_dep_dequeue_main(parsec_context_t* context) PARSEC_PAPI_SDE_THREAD_INIT(); /* Now synchronize with the main thread */ - pthread_mutex_lock(&mpi_thread_mutex); - pthread_cond_signal(&mpi_thread_condition); + pthread_mutex_lock(&comm_thread_mutex); + pthread_cond_signal(&comm_thread_condition); #ifdef PARSEC_PROF_TRACE parsec_comm_es.es_profile = parsec_profiling_stream_init( 2*1024*1024, "Comm thread"); @@ -437,19 +426,20 @@ void* remote_dep_dequeue_main(parsec_context_t* context) #endif // PARSEC_PROF_TRACE /* This is the main loop. Wait until being woken up by the main thread, do - * the MPI stuff until we get the OFF or FINI commands. Then react the them. + * the communication engine until we get the OFF or FINI commands. Then + * react to them. * However, the first time do the delayed initialization that could not have * been done before due to the lack of other component initialization. */ while( -1 != whatsup ) { /* Let's wait until we are awaken */ - pthread_cond_wait(&mpi_thread_condition, &mpi_thread_mutex); + pthread_cond_wait(&comm_thread_condition, &comm_thread_mutex); - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI: comm engine ON on process %d/%d", + PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "RDEP: comm engine ON on process %d/%d", context->my_rank, context->nb_nodes); - /* The MPI thread is owning the lock */ + /* The communication thread is owning the lock. */ assert( parsec_communication_engine_up == 2 ); parsec_ce.enable(&parsec_ce); @@ -469,7 +459,7 @@ void* remote_dep_dequeue_main(parsec_context_t* context) } whatsup = remote_dep_dequeue_nothread_progress(&parsec_comm_es, -1 /* loop till explicitly asked to return */); - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI: comm engine OFF on process %d/%d", + PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "RDEP: comm engine OFF on process %d/%d", context->my_rank, context->nb_nodes); parsec_communication_engine_up = 1; /* went to sleep */ } @@ -482,7 +472,7 @@ void* remote_dep_dequeue_main(parsec_context_t* context) int remote_dep_dequeue_new_taskpool(parsec_taskpool_t* tp) { - if(!mpi_initialized) return 0; + if(!remote_dep_initialized) return 0; remote_dep_inc_flying_messages(tp); dep_cmd_item_t* item = (dep_cmd_item_t*)calloc(1, sizeof(dep_cmd_item_t)); PARSEC_OBJ_CONSTRUCT(item, parsec_list_item_t); @@ -496,7 +486,7 @@ int remote_dep_dequeue_new_taskpool(parsec_taskpool_t* tp) int remote_dep_dequeue_delayed_dep_release(parsec_remote_deps_t *deps) { - if(!mpi_initialized) return 0; + if(!remote_dep_initialized) return 0; dep_cmd_item_t* item = (dep_cmd_item_t*)calloc(1, sizeof(dep_cmd_item_t)); PARSEC_OBJ_CONSTRUCT(item, parsec_list_item_t); item->action = DEP_DTD_DELAYED_RELEASE; @@ -597,10 +587,10 @@ remote_dep_copy_allocate(parsec_dep_type_description_t* data, int preferred_devi /* don't use preferred_device, it might not be the location where the data copy resides */ parsec_data_start_transfer_ownership_to_copy(dc->original, dc->device_index, PARSEC_FLOW_ACCESS_WRITE); if (dc->device_index != preferred_device) { - PARSEC_DEBUG_VERBOSE(5, parsec_comm_output_stream, "MPI:\tFail to allocate tile on device %d and instead allocate on device %d\n", + PARSEC_DEBUG_VERBOSE(5, parsec_comm_output_stream, "RDEP:\tFail to allocate tile on device %d and instead allocate on device %d\n", preferred_device, dc->device_index); } - PARSEC_DEBUG_VERBOSE(5, parsec_comm_output_stream, "MPI:\tMalloc new temporary tile [dev %d] copy %p size %" PRIu64 " count = %" PRIu64 " displ = %" PRIi64 " %p", + PARSEC_DEBUG_VERBOSE(5, parsec_comm_output_stream, "RDEP:\tMalloc new temporary tile [dev %d] copy %p size %" PRIu64 " count = %" PRIu64 " displ = %" PRIi64 " %p", dc->device_index, dc, data->arena->elem_size, data->dst_count, data->dst_displ, data->arena); return dc; } @@ -626,9 +616,9 @@ reshape_copy_allocate(parsec_dep_type_description_t* data) /** * - * Fulfill a reshape promise by the current thread - * (when MPI_THREAD_MULTIPLE) or delegate the reshaping to the communication - * thread. + * Routine to fulfilled a reshape promise by the current thread when the + * selected backend supports multithreaded access, or delegate the reshaping to + * the communication thread otherwise. * Routine set as callback when initializing a future. * * @param[inout] future future for the reshaping, may be fulfilled by this call or @@ -661,37 +651,40 @@ void parsec_local_reshape_cb(parsec_base_future_t *future, ... ) #endif #if defined(PARSEC_DEBUG) || defined(PARSEC_DEBUG_NOISIER) - char type_name_src[MAX_TASK_STRLEN] = "NULL"; - char type_name_dst[MAX_TASK_STRLEN] = "NULL"; - int len; + uintptr_t type_id_src = 0; + uintptr_t type_id_dst = 0; int src_pack_size=0, dst_pack_size=0; if(dt->local->src_datatype != PARSEC_DATATYPE_NULL) { - MPI_Type_get_name(dt->local->src_datatype, type_name_src, &len); - MPI_Pack_size(dt->local->src_count, dt->local->src_datatype, MPI_COMM_WORLD, &src_pack_size); + type_id_src = (uintptr_t)dt->local->src_datatype; + if( NULL != parsec_ce.pack_size ) { + parsec_ce.pack_size(&parsec_ce, dt->local->src_count, dt->local->src_datatype, &src_pack_size); + } } if(dt->local->dst_datatype != PARSEC_DATATYPE_NULL) { - MPI_Type_get_name(dt->local->dst_datatype, type_name_dst, &len); - MPI_Pack_size(dt->local->dst_count, dt->local->dst_datatype, MPI_COMM_WORLD, &dst_pack_size); + type_id_dst = (uintptr_t)dt->local->dst_datatype; + if( NULL != parsec_ce.pack_size ) { + parsec_ce.pack_size(&parsec_ce, dt->local->dst_count, dt->local->dst_datatype, &dst_pack_size); + } } if(src_pack_size != dst_pack_size){ - parsec_warning("parsec_local_reshape: reshape requested between dtt with different packed size fut %p dtt [%p:%s = sz(%d) -> %p:%s= sz(%d)]", - future, - dt->local->src_datatype, type_name_src, src_pack_size, - dt->local->dst_datatype, type_name_dst, dst_pack_size); + parsec_warning("parsec_local_reshape: reshape requested between dtt with different packed size fut %p dtt [0x%" PRIxPTR " = sz(%d) -> 0x%" PRIxPTR " = sz(%d)]", + future, + type_id_src, src_pack_size, + type_id_dst, dst_pack_size); } #endif - /* if MPI is multithreaded do not thread-shift the sendrecv */ + /* If the selected backend is multithreaded, do not thread-shift the reshape. */ if( (es->virtual_process->parsec_context->flags & PARSEC_CONTEXT_FLAG_COMM_MT) || (tp == NULL && task == NULL)/* || I AM COMM THREAD */) { parsec_data_copy_t *reshape_data = reshape_copy_allocate(dt->local); PARSEC_DEBUG_VERBOSE(2, parsec_debug_output, - "th%d RESHAPE_PROMISE COMPLETED COMP-THREAD to [%p:%p:%s -> %p:%p:%s] for %s fut %p", - es->th_id, dt->data, dt->data->dtt, type_name_src, - reshape_data, dt->local->dst_datatype, type_name_dst, task_string, future); + "th%d RESHAPE_PROMISE COMPLETED COMP-THREAD to [%p:0x%" PRIxPTR " -> %p:0x%" PRIxPTR "] for %s fut %p", + es->th_id, dt->data, type_id_src, + reshape_data, type_id_dst, task_string, future); parsec_ce.reshape(&parsec_ce, es, reshape_data, dt->local->dst_displ, dt->local->dst_datatype, dt->local->dst_count, @@ -706,9 +699,9 @@ void parsec_local_reshape_cb(parsec_base_future_t *future, ... ) } PARSEC_DEBUG_VERBOSE(4, parsec_debug_output, - "th%d RESHAPE_PROMISE TRIGGERED to [%p:%p:%s -> ...:%p:%s] for %s fut %p", - es->th_id, dt->data, dt->data->dtt, type_name_src, - dt->local->dst_datatype, type_name_dst, task_string, future); + "th%d RESHAPE_PROMISE TRIGGERED to [%p:0x%" PRIxPTR " -> ...:0x%" PRIxPTR "] for %s fut %p", + es->th_id, dt->data, type_id_src, + type_id_dst, task_string, future); dep_cmd_item_t* item = (dep_cmd_item_t*)calloc(1, sizeof(dep_cmd_item_t)); PARSEC_OBJ_CONSTRUCT(item, parsec_list_item_t); @@ -736,8 +729,8 @@ void parsec_local_reshape_cb(parsec_base_future_t *future, ... ) * once a datatype has been successfully retrieved it must cancel the iterator * progress in order to return ASAP the datatype to the communication engine. */ -parsec_ontask_iterate_t -remote_dep_mpi_retrieve_datatype(parsec_execution_stream_t *eu, +static parsec_ontask_iterate_t +remote_dep_comm_retrieve_datatype(parsec_execution_stream_t *eu, const parsec_task_t *newcontext, const parsec_task_t *oldcontext, const parsec_dep_t* dep, @@ -810,13 +803,10 @@ remote_dep_mpi_retrieve_datatype(parsec_execution_stream_t *eu, */ if(old_dtt != output->data.remote.dst_datatype) { #if defined(PARSEC_DEBUG_NOISIER) - char type_name_src[MAX_TASK_STRLEN] = "NULL"; - char type_name_dst[MAX_TASK_STRLEN] = "NULL"; - int len; - if(old_dtt != PARSEC_DATATYPE_NULL) MPI_Type_get_name(old_dtt, type_name_src, &len); - if(output->data.remote.dst_datatype != PARSEC_DATATYPE_NULL) MPI_Type_get_name(output->data.remote.dst_datatype, type_name_dst, &len); - PARSEC_DEBUG_VERBOSE(30, parsec_comm_output_stream, "MPI: retrieve dtt for %s [dep_datatype_index %x] DTT: old %s new %s (%p) --> PACKED", - newcontext->task_class->name, dep->dep_datatype_index, type_name_src, type_name_dst, output->data.remote.dst_datatype); + PARSEC_DEBUG_VERBOSE(30, parsec_comm_output_stream, + "RDEP: retrieve dtt for %s [dep_datatype_index %x] DTT: old 0x%" PRIxPTR " new 0x%" PRIxPTR " --> PACKED", + newcontext->task_class->name, dep->dep_datatype_index, + (uintptr_t)old_dtt, (uintptr_t)output->data.remote.dst_datatype); #endif int dsize; parsec_ce.pack_size(&parsec_ce, output->data.remote.dst_count, output->data.remote.dst_datatype, &dsize); @@ -827,13 +817,10 @@ remote_dep_mpi_retrieve_datatype(parsec_execution_stream_t *eu, } #if defined(PARSEC_DEBUG_NOISIER) { - char type_name_src[MAX_TASK_STRLEN] = "NULL"; - char type_name_dst[MAX_TASK_STRLEN] = "NULL"; - int len; - if(old_dtt!=PARSEC_DATATYPE_NULL) MPI_Type_get_name(old_dtt, type_name_src, &len); - if(output->data.remote.dst_datatype!=PARSEC_DATATYPE_NULL) MPI_Type_get_name(output->data.remote.dst_datatype, type_name_dst, &len); - PARSEC_DEBUG_VERBOSE(30, parsec_comm_output_stream, "MPI: retrieve dtt for %s [dep_datatype_index %x] DTT: old %s new %s (%p)--> CONTINUE", - newcontext->task_class->name, dep->dep_datatype_index, type_name_src, type_name_dst, output->data.remote.dst_datatype); + PARSEC_DEBUG_VERBOSE(30, parsec_comm_output_stream, + "RDEP: retrieve dtt for %s [dep_datatype_index %x] DTT: old 0x%" PRIxPTR " new 0x%" PRIxPTR " --> CONTINUE", + newcontext->task_class->name, dep->dep_datatype_index, + (uintptr_t)old_dtt, (uintptr_t)output->data.remote.dst_datatype); } #endif /* Predict where the incoming temporary should be located, by using the data_affinity. @@ -924,12 +911,12 @@ remote_dep_get_datatypes(parsec_execution_stream_t* es, if(return_defer) { return -2; } - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI:\tRetrieve datatype with mask 0x%x (remote_dep_get_datatypes)", (1U<msg.task_class_id = dtd_task->super.task_class->task_class_id; origin->output[k].data.remote.src_datatype = origin->output[k].data.remote.dst_datatype = PARSEC_DATATYPE_NULL; dtd_task->super.task_class->iterate_successors(es, (parsec_task_t *)dtd_task, (1U<output[k].data.remote.src_count = (idx < data_sizes[0]) ? data_sizes[idx+1] : 0; PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, - "MPI:\tRetrieve datatype with mask 0x%x (remote_dep_get_datatypes) remote size %u", + "RDEP:\tRetrieve datatype with mask 0x%x (remote_dep_get_datatypes) remote size %u", local_mask, origin->output[k].data.remote.src_count); incoming_mask = origin->incoming_mask; task.task_class->iterate_successors(es, &task, local_mask, - remote_dep_mpi_retrieve_datatype, + remote_dep_comm_retrieve_datatype, origin); if( (origin->incoming_mask & ~incoming_mask & (1U<output[k].data.remote.dst_count) ) { @@ -1054,7 +1041,7 @@ remote_dep_release_incoming(parsec_execution_stream_t* es, target = task.task_class->out[++pidx]; assert(NULL != target); } - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "MPI:\tDATA %p(%s) released from %p[%d] flow idx %d", + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "RDEP:\tDATA %p(%s) released from %p[%d] flow idx %d", origin->output[i].data.data, target->name, origin, i, target->flow_index); task.data[target->flow_index].source_repo = NULL; task.data[target->flow_index].source_repo_entry = NULL; @@ -1090,7 +1077,7 @@ remote_dep_release_incoming(parsec_execution_stream_t* es, } else { assert(0); } - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "MPI:\tTranslate mask from 0x%lx to 0x%x (remote_dep_release_incoming)", + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "RDEP:\tTranslate mask from 0x%lx to 0x%x (remote_dep_release_incoming)", complete_mask, action_mask); (void)task.task_class->release_deps(es, &task, action_mask | PARSEC_ACTION_RELEASE_LOCAL_DEPS | PARSEC_ACTION_RESHAPE_REMOTE_ON_RELEASE, @@ -1171,10 +1158,10 @@ remote_dep_dequeue_nothread_progress(parsec_execution_stream_t* es, position = (DEP_ACTIVATE == item->action) ? item->cmd.activate.peer : (context->nb_nodes + item->action); parsec_list_item_singleton(&item->pos_list); - same_pos = parsec_mpi_same_pos_items[position]; + same_pos = remote_dep_same_pos_items[position]; if((NULL != same_pos) && (same_pos->priority >= item->priority)) { /* insert the item in the peer list */ - parsec_list_item_ring_push_sorted(&same_pos->pos_list, &item->pos_list, dep_mpi_pos_list); + parsec_list_item_ring_push_sorted(&same_pos->pos_list, &item->pos_list, dep_cmd_pos_list); } else { if(NULL != same_pos) { /* this is the new head of the list. */ @@ -1190,7 +1177,7 @@ remote_dep_dequeue_nothread_progress(parsec_execution_stream_t* es, #endif parsec_list_item_singleton((parsec_list_item_t*)same_pos); } - parsec_mpi_same_pos_items[position] = item; + remote_dep_same_pos_items[position] = item; /* And add ourselves in the temp list */ parsec_list_nolock_push_front(&temp_list, (parsec_list_item_t*)item); } @@ -1209,7 +1196,7 @@ remote_dep_dequeue_nothread_progress(parsec_execution_stream_t* es, if(NULL == (item = (dep_cmd_item_t*)parsec_list_nolock_pop_front(&dep_cmd_fifo)) ) { /* only progress MPI if necessary */ if (context->nb_nodes > 1) { - ret = remote_dep_mpi_progress(es); + ret = remote_dep_comm_progress(es); if( 0 == ret && ((comm_yield == 2) || (comm_yield == 1 /* communication list is full, we need to forcefully drain the network */ @@ -1234,10 +1221,10 @@ remote_dep_dequeue_nothread_progress(parsec_execution_stream_t* es, free(item); return ret; /* FINI or OFF */ case DEP_NEW_TASKPOOL: - remote_dep_mpi_new_taskpool(es, item); + remote_dep_comm_new_taskpool(es, item); break; case DEP_DTD_DELAYED_RELEASE: - remote_dep_mpi_release_delayed_deps(es, item); + remote_dep_comm_release_delayed_deps(es, item); break; case DEP_ACTIVATE: remote_dep_nothread_send(es, &item); @@ -1265,25 +1252,24 @@ remote_dep_dequeue_nothread_progress(parsec_execution_stream_t* es, /* if we still have pending messages of the same type, stay here for an extra loop */ if( cycles >= 0 ) cycles++; } - parsec_mpi_same_pos_items[position] = same_pos; + remote_dep_same_pos_items[position] = same_pos; goto check_pending_queues; } #ifdef PARSEC_PROF_TRACE -static int MPI_Activate_sk, MPI_Activate_ek; -static int MPI_Data_ctl_sk, MPI_Data_ctl_ek; -static int MPI_Data_plds_sk, MPI_Data_plds_ek; -static int MPI_Data_pldr_sk, MPI_Data_pldr_ek; +static int RDEP_Activate_sk, RDEP_Activate_ek; +static int RDEP_Data_ctl_sk, RDEP_Data_ctl_ek; +static int RDEP_Data_plds_sk, RDEP_Data_plds_ek; +static int RDEP_Data_pldr_sk, RDEP_Data_pldr_ek; /** - * The structure describe the MPI events saves into the profiling stream. The following - * string represent it's description so that an external package can decrypt the - * binary format of the stream. + * Description of the remote-dependency events saved into the profiling stream. + * The following string lets external tools decode the binary event payload. */ -static const char *parsec_profile_remote_dep_mpi_info_to_string = "src{int32_t};" +static const char *parsec_profile_remote_dep_comm_info_to_string = "src{int32_t};" "dst{int32_t};" "tid{uint64_t};" "tpid{uint32_t};" @@ -1291,34 +1277,34 @@ static const char *parsec_profile_remote_dep_mpi_info_to_string = "src{int32_t}; "msg_size{int32_t};" "dep{int32_t}"; -static void remote_dep_mpi_profiling_init(void) +static void remote_dep_comm_profiling_init(void) { - parsec_profiling_add_dictionary_keyword( "MPI_ACTIVATE", "fill:#FF0000", - sizeof(parsec_profile_remote_dep_mpi_info_t), - parsec_profile_remote_dep_mpi_info_to_string, - &MPI_Activate_sk, &MPI_Activate_ek); - parsec_profiling_add_dictionary_keyword( "MPI_DATA_CTL", "fill:#000077", - sizeof(parsec_profile_remote_dep_mpi_info_t), - parsec_profile_remote_dep_mpi_info_to_string, - &MPI_Data_ctl_sk, &MPI_Data_ctl_ek); - parsec_profiling_add_dictionary_keyword( "MPI_DATA_PLD_SND", "fill:#B08080", - sizeof(parsec_profile_remote_dep_mpi_info_t), - parsec_profile_remote_dep_mpi_info_to_string, - &MPI_Data_plds_sk, &MPI_Data_plds_ek); - parsec_profiling_add_dictionary_keyword( "MPI_DATA_PLD_RCV", "fill:#80B080", - sizeof(parsec_profile_remote_dep_mpi_info_t), - parsec_profile_remote_dep_mpi_info_to_string, - &MPI_Data_pldr_sk, &MPI_Data_pldr_ek); + parsec_profiling_add_dictionary_keyword( "RDEP_ACTIVATE", "fill:#FF0000", + sizeof(parsec_profile_remote_dep_comm_info_t), + parsec_profile_remote_dep_comm_info_to_string, + &RDEP_Activate_sk, &RDEP_Activate_ek); + parsec_profiling_add_dictionary_keyword( "RDEP_DATA_CTL", "fill:#000077", + sizeof(parsec_profile_remote_dep_comm_info_t), + parsec_profile_remote_dep_comm_info_to_string, + &RDEP_Data_ctl_sk, &RDEP_Data_ctl_ek); + parsec_profiling_add_dictionary_keyword( "RDEP_DATA_PLD_SND", "fill:#B08080", + sizeof(parsec_profile_remote_dep_comm_info_t), + parsec_profile_remote_dep_comm_info_to_string, + &RDEP_Data_plds_sk, &RDEP_Data_plds_ek); + parsec_profiling_add_dictionary_keyword( "RDEP_DATA_PLD_RCV", "fill:#80B080", + sizeof(parsec_profile_remote_dep_comm_info_t), + parsec_profile_remote_dep_comm_info_to_string, + &RDEP_Data_pldr_sk, &RDEP_Data_pldr_ek); } -static void remote_dep_mpi_profiling_fini(void) +static void remote_dep_comm_profiling_fini(void) { /* Nothing to do, the thread_profiling structures will be automatically * released when the master profiling system is shut down. */ } -static inline uint64_t remote_dep_mpi_profiling_event_id(void) +static inline uint64_t remote_dep_comm_profiling_event_id(void) { static uint64_t event_id = 0; /* we only need distinct event ids for events triggered by the comm thread, @@ -1328,8 +1314,8 @@ static inline uint64_t remote_dep_mpi_profiling_event_id(void) } #else -#define remote_dep_mpi_profiling_fini() do {} while(0) -#define remote_dep_mpi_profiling_event_id() (0UL) +#define remote_dep_comm_profiling_fini() do {} while(0) +#define remote_dep_comm_profiling_event_id() (0UL) #endif /* PARSEC_PROF_TRACE */ @@ -1344,7 +1330,7 @@ static inline uint64_t remote_dep_mpi_profiling_event_id(void) * @returns 1 if the message can't be packed due to lack of space, or 0 * otherwise. */ -static int remote_dep_mpi_pack_dep(int peer, +static int remote_dep_comm_pack_dep(int peer, dep_cmd_item_t* item, char* packed_buffer, int length, @@ -1450,7 +1436,7 @@ static int remote_dep_mpi_pack_dep(int peer, (void)parsec_atomic_fetch_add_int32(&deps->pending_ack, expected); /* Keep track of the inflight data */ #if defined(PARSEC_DEBUG) || defined(PARSEC_DEBUG_NOISIER) - parsec_debug_verbose(6, parsec_comm_output_stream, "MPI:\tTO\t%d\tActivate\t% -8s\n" + parsec_debug_verbose(6, parsec_comm_output_stream, "RDEP:\tTO\t%d\tActivate\t% -8s\n" " \t\t\twith datakey %lx\tmask %lx short mask %lu length %d", peer, tmp, msg->deps, msg->output_mask, msg->output_mask ^ item->cmd.activate.task.output_mask, msg->length); @@ -1516,7 +1502,7 @@ static int local_dep_nothread_reshape(parsec_execution_stream_t* es, PARSEC_DATA_COPY_RETAIN(cmd->memcpy.source); int rc = remote_dep_nothread_memcpy(es, item); - assert(MPI_SUCCESS == rc); + assert(PARSEC_SUCCESS == rc); parsec_future_set(item->cmd.memcpy_reshape.future, cmd->memcpy.destination); @@ -1528,7 +1514,7 @@ static int local_dep_nothread_reshape(parsec_execution_stream_t* es, #endif (void)es; - return (MPI_SUCCESS == rc ? 0 : -1); + return (PARSEC_SUCCESS == rc ? 0 : -1); } /** @@ -1555,7 +1541,7 @@ static int remote_dep_nothread_send(parsec_execution_stream_t* es, deps = (parsec_remote_deps_t*)item->cmd.activate.task.source_deps; parsec_list_item_singleton((parsec_list_item_t*)item); - if( 0 == remote_dep_mpi_pack_dep(peer, item, packed_buffer, + if( 0 == remote_dep_comm_pack_dep(peer, item, packed_buffer, DEP_SHORT_BUFFER_SIZE, &position) ) { /* space left on the buffer. Move to the next item with the same destination */ dep_cmd_item_t* next = (dep_cmd_item_t*)parsec_list_item_ring_chop(&item->pos_list); @@ -1572,11 +1558,11 @@ static int remote_dep_nothread_send(parsec_execution_stream_t* es, assert(NULL != ring); /* dep index is meaningless in this context, set to -1 */ - TAKE_TIME_WITH_INFO(es->es_profile, MPI_Activate_sk, 0, -1, + TAKE_TIME_WITH_INFO(es->es_profile, RDEP_Activate_sk, 0, -1, es->virtual_process->parsec_context->my_rank, peer, deps->msg, position, PARSEC_DATATYPE_PACKED); parsec_ce.send_am(&parsec_ce, PARSEC_CE_REMOTE_DEP_ACTIVATE_TAG, peer, packed_buffer, position); - TAKE_TIME(es->es_profile, MPI_Activate_ek, 0); + TAKE_TIME(es->es_profile, RDEP_Activate_ek, 0); DEBUG_MARK_CTL_MSG_ACTIVATE_SENT(peer, (void*)&deps->msg, &deps->msg); do { @@ -1597,7 +1583,7 @@ static int remote_dep_nothread_send(parsec_execution_stream_t* es, * target) before draining the network and pushing out the highest priority * actions. */ -static int remote_dep_mpi_progress(parsec_execution_stream_t* es) +static int remote_dep_comm_progress(parsec_execution_stream_t* es) { int ret = 0; @@ -1607,12 +1593,12 @@ static int remote_dep_mpi_progress(parsec_execution_stream_t* es) if(parsec_ce.can_serve(&parsec_ce) && !parsec_list_nolock_is_empty(&dep_activates_fifo)) { parsec_remote_deps_t* deps = (parsec_remote_deps_t*)parsec_list_nolock_pop_front(&dep_activates_fifo); - remote_dep_mpi_get_start(es, deps); + remote_dep_comm_get_start(es, deps); ret++; } if(parsec_ce.can_serve(&parsec_ce) && !parsec_list_nolock_is_empty(&dep_put_fifo)) { dep_cmd_item_t* item = (dep_cmd_item_t*)parsec_list_nolock_pop_front(&dep_put_fifo); - remote_dep_mpi_put_start(es, item); + remote_dep_comm_put_start(es, item); ret++; } @@ -1620,7 +1606,7 @@ static int remote_dep_mpi_progress(parsec_execution_stream_t* es) } static int -remote_dep_mpi_save_put_cb(parsec_comm_engine_t *ce, +remote_dep_comm_save_put_cb(parsec_comm_engine_t *ce, parsec_ce_tag_t tag, void *msg, size_t msg_size, @@ -1660,7 +1646,7 @@ remote_dep_mpi_save_put_cb(parsec_comm_engine_t *ce, assert(0 != deps->outgoing_mask); item->priority = deps->max_priority; - PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "MPI: Put cb_received for %s from %d tag %u which 0x%x (deps %p)", + PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "RDEP: Put cb_received for %s from %d tag %u which 0x%x (deps %p)", remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), item->cmd.activate.peer, -1, task->output_mask, (void*)deps); @@ -1668,9 +1654,9 @@ remote_dep_mpi_save_put_cb(parsec_comm_engine_t *ce, parsec_list_nolock_push_sorted(&dep_put_fifo, (parsec_list_item_t*)item, dep_cmd_prio); if( parsec_ce.can_serve(&parsec_ce) ) { item = (dep_cmd_item_t*)parsec_list_nolock_pop_front(&dep_put_fifo); - remote_dep_mpi_put_start(es, item); + remote_dep_comm_put_start(es, item); } else { - PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "MPI: Put DELAYED for %s from %d tag %u which 0x%x (deps %p)", + PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "RDEP: Put DELAYED for %s from %d tag %u which 0x%x (deps %p)", remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), item->cmd.activate.peer, -1, task->output_mask, (void*)deps); } @@ -1678,7 +1664,7 @@ remote_dep_mpi_save_put_cb(parsec_comm_engine_t *ce, } static void -remote_dep_mpi_put_start(parsec_execution_stream_t* es, +remote_dep_comm_put_start(parsec_execution_stream_t* es, dep_cmd_item_t* item) { remote_dep_wire_get_t* task = &(item->cmd.activate.task); @@ -1686,19 +1672,15 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, parsec_remote_deps_t* deps = (parsec_remote_deps_t*) (uintptr_t) task->source_deps; int k, nbdtt; void* dataptr; - MPI_Datatype dtt; + parsec_datatype_t dtt; #endif /* !defined(PARSEC_PROF_DRY_DEP) */ -#if defined(PARSEC_DEBUG_NOISIER) - char type_name[MPI_MAX_OBJECT_NAME]; - int len; -#endif (void)es; DEBUG_MARK_CTL_MSG_GET_RECV(item->cmd.activate.peer, (void*)task, task); #if !defined(PARSEC_PROF_DRY_DEP) assert(task->output_mask); - PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "MPI:\tPUT mask=%lx deps 0x%lx", task->output_mask, task->source_deps); + PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "RDEP:\tPUT mask=%lx deps 0x%lx", task->output_mask, task->source_deps); #ifdef PARSEC_RESHAPE_BEFORE_SEND_TO_REMOTE int all_completed = 1; @@ -1738,7 +1720,7 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, } } if( ! all_completed ) { - PARSEC_DEBUG_VERBOSE(4, parsec_comm_output_stream, "MPI:\tReshaping promises not yet completed for deps 0x%lx. Reschedule.", deps); + PARSEC_DEBUG_VERBOSE(4, parsec_comm_output_stream, "RDEP:\tReshaping promises not yet completed for deps 0x%lx. Reschedule.", deps); parsec_list_nolock_push_front(&dep_put_fifo, (parsec_list_item_t*)item); return; } @@ -1748,7 +1730,7 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, assert(k < MAX_PARAM_COUNT); if(!((1U<output_mask)) continue; - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "MPI:\t[idx %d mask(0x%x / 0x%x)] %p, %p", k, (1U<output_mask, + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "RDEP:\t[idx %d mask(0x%x / 0x%x)] %p, %p", k, (1U<output_mask, deps->output[k].data.data, PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data)); dataptr = PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data); dtt = deps->output[k].data.remote.src_datatype; @@ -1780,11 +1762,10 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, parsec_ce_mem_reg_handle_t remote_memory_handle = item->cmd.activate.remote_memory_handle; -#if defined(PARSEC_DEBUG_NOISIER) - MPI_Type_get_name(dtt, type_name, &len); - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "MPI:\tTO\t%d\tPut START\tunknown \tk=%d\twith deps 0x%lx at %p type %s (%p)\t(src_mem_handle = %p, dst_mem_handle = %p)", - item->cmd.activate.peer, k, task->source_deps, dataptr, type_name, dtt, source_memory_handle, remote_memory_handle); -#endif + PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, + "RDEP:\tTO\t%d\tPut START\tunknown \tk=%d\twith deps 0x%lx at %p type 0x%" PRIxPTR "\t(src_mem_handle = %p, dst_mem_handle = %p)", + item->cmd.activate.peer, k, task->source_deps, dataptr, + (uintptr_t)dtt, source_memory_handle, remote_memory_handle); remote_dep_cb_data_t *cb_data = (remote_dep_cb_data_t *) parsec_thread_mempool_allocate (parsec_remote_dep_cb_data_mempool->thread_mempools); @@ -1792,10 +1773,10 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, cb_data->k = k; #if defined(PARSEC_PROF_TRACE) - uint64_t event_id = remote_dep_mpi_profiling_event_id(); + uint64_t event_id = remote_dep_comm_profiling_event_id(); cb_data->event_id = event_id; #endif /* PARSEC_PROF_TRACE */ - TAKE_TIME_WITH_INFO(es->es_profile, MPI_Data_plds_sk, event_id, k, + TAKE_TIME_WITH_INFO(es->es_profile, RDEP_Data_plds_sk, event_id, k, es->virtual_process->parsec_context->my_rank, item->cmd.activate.peer, deps->msg, nbdtt, dtt); @@ -1803,7 +1784,7 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, parsec_ce.put(&parsec_ce, source_memory_handle, 0, remote_memory_handle, 0, 0, item->cmd.activate.peer, - remote_dep_mpi_put_end_cb, cb_data, + remote_dep_comm_put_end_cb, cb_data, (parsec_ce_tag_t)task->callback_fn, &task->remote_callback_data, sizeof(uintptr_t)); parsec_comm_puts++; @@ -1819,7 +1800,7 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, } static int -remote_dep_mpi_put_end_cb(parsec_comm_engine_t *ce, +remote_dep_comm_put_end_cb(parsec_comm_engine_t *ce, parsec_ce_mem_reg_handle_t lreg, ptrdiff_t ldispl, parsec_ce_mem_reg_handle_t rreg, @@ -1832,11 +1813,11 @@ remote_dep_mpi_put_end_cb(parsec_comm_engine_t *ce, /* Retrieve deps from callback_data */ parsec_remote_deps_t* deps = ((remote_dep_cb_data_t *)cb_data)->deps; - PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "MPI:\tTO\tna\tPut END \tunknown \tk=%d\twith deps %p\tparams bla\t(src_mem_handle = %p, dst_mem_handle=%p", + PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "RDEP:\tTO\tna\tPut END \tunknown \tk=%d\twith deps %p\tparams bla\t(src_mem_handle = %p, dst_mem_handle=%p", ((remote_dep_cb_data_t *)cb_data)->k, deps, lreg, rreg); #if defined(PARSEC_PROF_TRACE) - TAKE_TIME(parsec_comm_es.es_profile, MPI_Data_plds_ek, + TAKE_TIME(parsec_comm_es.es_profile, RDEP_Data_plds_ek, ((remote_dep_cb_data_t *)cb_data)->event_id); #endif /* PARSEC_PROF_TRACE */ @@ -1857,7 +1838,7 @@ remote_dep_mpi_put_end_cb(parsec_comm_engine_t *ce, * the buffer, post all the control messages to initiate RGET, and all other local * cleanups. */ -static void remote_dep_mpi_recv_activate(parsec_execution_stream_t* es, +static void remote_dep_comm_recv_activate(parsec_execution_stream_t* es, parsec_remote_deps_t* deps, char* packed_buffer, int length, @@ -1874,7 +1855,7 @@ static void remote_dep_mpi_recv_activate(parsec_execution_stream_t* es, #endif #if defined(PARSEC_DEBUG) || defined(PARSEC_DEBUG_NOISIER) - parsec_debug_verbose(6, parsec_comm_output_stream, "MPI:\tFROM\t%d\tActivate\t% -8s\n" + parsec_debug_verbose(6, parsec_comm_output_stream, "RDEP:\tFROM\t%d\tActivate\t% -8s\n" "\twith datakey %lx\tparams %lx length %d (pack buf %d/%d) prio %d", deps->from, tmp, deps->msg.deps, deps->incoming_mask, deps->msg.length, *position, length, deps->max_priority); @@ -1894,7 +1875,7 @@ static void remote_dep_mpi_recv_activate(parsec_execution_stream_t* es, parsec_dep_type_description_t *type_desc = &data_desc->remote; /* Check for CTL and data that do not carry payload */ if( parsec_is_CTL_dep(data_desc) ) { - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "MPI:\tHERE\t%d\tGet NONE\t% -8s\tk=%d\twith datakey %lx at type CONTROL", + PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "RDEP:\tHERE\t%d\tGet NONE\t% -8s\tk=%d\twith datakey %lx at type CONTROL", deps->from, tmp, k, deps->msg.deps); /* deps->output[k].data.data = NULL; This is unnecessary*/ complete_mask |= (1U<from, tmp, k, deps->msg.deps); } @@ -1959,7 +1940,7 @@ static void remote_dep_mpi_recv_activate(parsec_execution_stream_t* es, #if defined(PARSEC_DEBUG_NOISIER) for(int k = 0; complete_mask>>k; k++) if((1U<from, tmp, k, deps->msg.deps, deps->output[k].data.data); #endif /* If this is the only call then force the remote deps propagation */ @@ -1976,12 +1957,12 @@ static void remote_dep_mpi_recv_activate(parsec_execution_stream_t* es, /* Check if we have any pending GET orders */ if(parsec_ce.can_serve(&parsec_ce) && !parsec_list_nolock_is_empty(&dep_activates_fifo)) { deps = (parsec_remote_deps_t*)parsec_list_nolock_pop_front(&dep_activates_fifo); - remote_dep_mpi_get_start(es, deps); + remote_dep_comm_get_start(es, deps); } } static int -remote_dep_mpi_save_activate_cb(parsec_comm_engine_t *ce, parsec_ce_tag_t tag, +remote_dep_comm_save_activate_cb(parsec_comm_engine_t *ce, parsec_ce_tag_t tag, void *msg, size_t msg_size, int src, void *cb_data) { @@ -2010,7 +1991,7 @@ remote_dep_mpi_save_activate_cb(parsec_comm_engine_t *ce, parsec_ce_tag_t tag, if( -1 == rc ) { /* the corresponding tp doesn't exist, yet. Put it in unexpected */ char* packed_buffer; - PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "MPI:\tFROM\t%d\tActivate NoTPool\t% -8s\tk=%d\twith datakey %lx\tparams %lx", + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "RDEP:\tFROM\t%d\tActivate NoTPool\t% -8s\tk=%d\twith datakey %lx\tparams %lx", deps->from, remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), 0, deps->msg.deps, deps->msg.output_mask); /* Copy the eager data to some temp storage */ @@ -2028,11 +2009,11 @@ remote_dep_mpi_save_activate_cb(parsec_comm_engine_t *ce, parsec_ce_tag_t tag, } } - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "MPI:\tFROM\t%d\tActivate\t% -8s\tk=%d\twith datakey %lx\tparams %lx", + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "RDEP:\tFROM\t%d\tActivate\t% -8s\tk=%d\twith datakey %lx\tparams %lx", src, remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), 0, deps->msg.deps, deps->msg.output_mask); /* Import the activation message and prepare for the reception */ - remote_dep_mpi_recv_activate(es, deps, msg, + remote_dep_comm_recv_activate(es, deps, msg, position + deps->msg.length, &position); assert( parsec_param_enable_aggregate || (position == length)); deps->eager_msg = NULL; /* this buffer will now be reused, not safe to store here */ @@ -2043,7 +2024,7 @@ remote_dep_mpi_save_activate_cb(parsec_comm_engine_t *ce, parsec_ce_tag_t tag, } void -remote_dep_mpi_new_taskpool(parsec_execution_stream_t* es, +remote_dep_comm_new_taskpool(parsec_execution_stream_t* es, dep_cmd_item_t *dep_cmd_item) { parsec_taskpool_t* obj = dep_cmd_item->cmd.new_taskpool.tp; @@ -2051,7 +2032,7 @@ remote_dep_mpi_new_taskpool(parsec_execution_stream_t* es, #if defined(PARSEC_DEBUG_NOISIER) char tmp[MAX_TASK_STRLEN]; #endif - PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "OPAQUE_MPI: ThreadID %"PRIxPTR"\tNew taskpool %d registered", + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "OPAQUE_RDEP: ThreadID %"PRIxPTR"\tNew taskpool %d registered", (intptr_t)pthread_self(), obj->taskpool_id); for(item = PARSEC_LIST_ITERATOR_FIRST(&dep_activates_noobj_fifo); item != PARSEC_LIST_ITERATOR_END(&dep_activates_noobj_fifo); @@ -2064,7 +2045,7 @@ remote_dep_mpi_new_taskpool(parsec_execution_stream_t* es, deps->eager_msg = buffer; /* provide get_datatype with access to the remote sizes */ rc = remote_dep_get_datatypes(es, deps, PARSEC_DTD_SKIP_SAVING, &position); assert( -1 != rc ); assert(deps->taskpool != NULL); - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "MPI:\tFROM\t%d\tActivate NEWOBJ\t% -8s\twith datakey %lx\tparams %lx", + PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "RDEP:\tFROM\t%d\tActivate NEWOBJ\t% -8s\twith datakey %lx\tparams %lx", deps->from, remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), deps->msg.deps, deps->msg.output_mask); @@ -2080,7 +2061,7 @@ remote_dep_mpi_new_taskpool(parsec_execution_stream_t* es, continue; } - remote_dep_mpi_recv_activate(es, deps, buffer, deps->msg.length, &position); + remote_dep_comm_recv_activate(es, deps, buffer, deps->msg.length, &position); deps->eager_msg = NULL; /* back to NULL */ free(buffer); (void)rc; @@ -2097,7 +2078,7 @@ remote_dep_mpi_new_taskpool(parsec_execution_stream_t* es, * the remote task. */ static void -remote_dep_mpi_release_delayed_deps(parsec_execution_stream_t* es, +remote_dep_comm_release_delayed_deps(parsec_execution_stream_t* es, dep_cmd_item_t *item) { PARSEC_PINS(es, ACTIVATE_CB_BEGIN, NULL); @@ -2112,21 +2093,20 @@ remote_dep_mpi_release_delayed_deps(parsec_execution_stream_t* es, (void)rc; assert(deps != NULL); - remote_dep_mpi_recv_activate(es, deps, buffer, deps->msg.length, &position); + remote_dep_comm_recv_activate(es, deps, buffer, deps->msg.length, &position); free(buffer); PARSEC_PINS(es, ACTIVATE_CB_END, NULL); } -static void remote_dep_mpi_get_start(parsec_execution_stream_t* es, +static void remote_dep_comm_get_start(parsec_execution_stream_t* es, parsec_remote_deps_t* deps) { remote_dep_wire_activate_t* task = &(deps->msg); int from = deps->from, k, count, nbdtt; remote_dep_wire_get_t msg; - MPI_Datatype dtt; + parsec_datatype_t dtt; #if defined(PARSEC_DEBUG_NOISIER) - char tmp[MAX_TASK_STRLEN], type_name[MPI_MAX_OBJECT_NAME]; - int len; + char tmp[MAX_TASK_STRLEN]; remote_dep_cmd_to_string(task, tmp, MAX_TASK_STRLEN); #endif for(k = count = 0; deps->incoming_mask >> k; k++) @@ -2136,7 +2116,7 @@ static void remote_dep_mpi_get_start(parsec_execution_stream_t* es, DEBUG_MARK_CTL_MSG_ACTIVATE_RECV(from, (void*)task, task); msg.source_deps = task->deps; /* the deps copied from activate message from source */ - msg.callback_fn = (uintptr_t)remote_dep_mpi_get_end_cb; /* We let the source know to call this + msg.callback_fn = (uintptr_t)remote_dep_comm_get_end_cb; /* We let the source know to call this * function when the PUT is over, in a true * one sided case the (integer) value of this * function pointer will be registered as the @@ -2192,14 +2172,16 @@ static void remote_dep_mpi_get_start(parsec_execution_stream_t* es, } -# if defined(PARSEC_DEBUG_NOISIER) - MPI_Type_get_name(dtt, type_name, &len); +#if defined(PARSEC_DEBUG_NOISIER) int _size; - MPI_Type_size(dtt, &_size); - PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "MPI:\tTO\t%d\tGet START\t% -8s\tk=%d\twith datakey %lx at %p type %s count %d displ %ld \t(k=%d, dst_mem_handle=%p)", - from, tmp, k, task->deps, PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data), type_name, nbdtt, - deps->output[k].data.remote.dst_displ, k, receiver_memory_handle); -# endif + parsec_type_size(dtt, &_size); + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + "RDEP:\tTO\t%d\tGet START\t% -8s\tk=%d\twith datakey %lx at %p type 0x%" PRIxPTR " size %d count %d displ %ld \t(k=%d, dst_mem_handle=%p)", + from, tmp, k, task->deps, + PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data), + (uintptr_t)dtt, _size, nbdtt, + deps->output[k].data.remote.dst_displ, k, receiver_memory_handle); +#endif callback_data->memory_handle = receiver_memory_handle; @@ -2221,19 +2203,19 @@ static void remote_dep_mpi_get_start(parsec_execution_stream_t* es, receiver_memory_handle_size ); #if defined(PARSEC_PROF_TRACE) - uint64_t event_id = remote_dep_mpi_profiling_event_id(); + uint64_t event_id = remote_dep_comm_profiling_event_id(); callback_data->event_id = event_id; #endif /* PARSEC_PROF_TRACE */ /* Send AM */ - TAKE_TIME_WITH_INFO(es->es_profile, MPI_Data_pldr_sk, event_id, k, + TAKE_TIME_WITH_INFO(es->es_profile, RDEP_Data_pldr_sk, event_id, k, from, es->virtual_process->parsec_context->my_rank, *task, nbdtt, dtt); - TAKE_TIME_WITH_INFO(es->es_profile, MPI_Data_ctl_sk, event_id, k, + TAKE_TIME_WITH_INFO(es->es_profile, RDEP_Data_ctl_sk, event_id, k, from, es->virtual_process->parsec_context->my_rank, *task, nbdtt, dtt); parsec_ce.send_am(&parsec_ce, PARSEC_CE_REMOTE_DEP_GET_DATA_TAG, from, buf, buf_size); - TAKE_TIME(es->es_profile, MPI_Data_ctl_ek, event_id); + TAKE_TIME(es->es_profile, RDEP_Data_ctl_ek, event_id); free(buf); @@ -2241,8 +2223,16 @@ static void remote_dep_mpi_get_start(parsec_execution_stream_t* es, } } +static void remote_dep_comm_get_end(parsec_execution_stream_t* es, + int idx, + parsec_remote_deps_t* deps) +{ + /* The ref on the data will be released below */ + remote_dep_release_incoming(es, deps, (1U<msg, tmp, MAX_TASK_STRLEN), callback_data->k, deps->incoming_mask, src); #if defined(PARSEC_PROF_TRACE) - TAKE_TIME(es->es_profile, MPI_Data_pldr_ek, callback_data->event_id); + TAKE_TIME(es->es_profile, RDEP_Data_pldr_ek, callback_data->event_id); #endif /* PARSEC_PROF_TRACE */ - remote_dep_release_incoming(es, deps, (1U << callback_data->k)); + remote_dep_comm_get_end(es, callback_data->k, deps); parsec_ce.mem_unregister(&callback_data->memory_handle); parsec_thread_mempool_free(parsec_remote_dep_cb_data_mempool->thread_mempools, callback_data); @@ -2292,9 +2282,9 @@ remote_dep_mpi_get_end_cb(parsec_comm_engine_t *ce, */ int remote_dep_ce_reconfigure(parsec_context_t* context) { - if( NULL != parsec_mpi_same_pos_items ) { - free(parsec_mpi_same_pos_items); parsec_mpi_same_pos_items = NULL; - parsec_mpi_same_pos_items_size = 0; + if( NULL != remote_dep_same_pos_items ) { + free(remote_dep_same_pos_items); remote_dep_same_pos_items = NULL; + remote_dep_same_pos_items_size = 0; } /** * Finalize the initialization of the upper level structures @@ -2303,9 +2293,9 @@ int remote_dep_ce_reconfigure(parsec_context_t* context) */ remote_deps_allocation_init(context->nb_nodes, MAX_PARAM_COUNT); - parsec_mpi_same_pos_items_size = context->nb_nodes + (int)DEP_LAST; - assert( NULL == parsec_mpi_same_pos_items ); - parsec_mpi_same_pos_items = (dep_cmd_item_t**)calloc(parsec_mpi_same_pos_items_size, + remote_dep_same_pos_items_size = context->nb_nodes + (int)DEP_LAST; + assert( NULL == remote_dep_same_pos_items ); + remote_dep_same_pos_items = (dep_cmd_item_t**)calloc(remote_dep_same_pos_items_size, sizeof(dep_cmd_item_t*)); if(1 < context->nb_nodes) { @@ -2329,14 +2319,14 @@ remote_dep_ce_init(parsec_context_t* context) PARSEC_OBJ_CONSTRUCT(&dep_put_fifo, parsec_list_t); /* Register Persistent requests */ - rc = parsec_ce.tag_register(PARSEC_CE_REMOTE_DEP_ACTIVATE_TAG, remote_dep_mpi_save_activate_cb, context, + rc = parsec_ce.tag_register(PARSEC_CE_REMOTE_DEP_ACTIVATE_TAG, remote_dep_comm_save_activate_cb, context, DEP_SHORT_BUFFER_SIZE * sizeof(char)); if( PARSEC_SUCCESS != rc ) { parsec_warning("[CE] Failed to register communication tag PARSEC_CE_REMOTE_DEP_ACTIVATE_TAG (error %d)\n", rc); parsec_comm_engine_fini(&parsec_ce); return rc; } - rc = parsec_ce.tag_register(PARSEC_CE_REMOTE_DEP_GET_DATA_TAG, remote_dep_mpi_save_put_cb, context, + rc = parsec_ce.tag_register(PARSEC_CE_REMOTE_DEP_GET_DATA_TAG, remote_dep_comm_save_put_cb, context, 4096); if( PARSEC_SUCCESS != rc ) { parsec_warning("[CE] Failed to register communication tag PARSEC_CE_REMOTE_DEP_GET_DATA_TAG (error %d)\n", rc); @@ -2351,14 +2341,14 @@ remote_dep_ce_init(parsec_context_t* context) offsetof(remote_dep_cb_data_t, mempool_owner), 1); /* Lazy or delayed initializations */ - remote_dep_mpi_initialize_execution_stream(context); + remote_dep_comm_initialize_execution_stream(context); return PARSEC_SUCCESS; } int remote_dep_ce_fini(parsec_context_t* context) { (void)context; - remote_dep_mpi_profiling_fini(); + remote_dep_comm_profiling_fini(); // Unregister tags parsec_ce.tag_unregister(PARSEC_CE_REMOTE_DEP_ACTIVATE_TAG); @@ -2369,9 +2359,9 @@ int remote_dep_ce_fini(parsec_context_t* context) parsec_mempool_destruct(parsec_remote_dep_cb_data_mempool); free(parsec_remote_dep_cb_data_mempool); parsec_remote_dep_cb_data_mempool = NULL; } - if( NULL != parsec_mpi_same_pos_items ) { - free(parsec_mpi_same_pos_items); parsec_mpi_same_pos_items = NULL; - parsec_mpi_same_pos_items_size = 0; + if( NULL != remote_dep_same_pos_items ) { + free(remote_dep_same_pos_items); remote_dep_same_pos_items = NULL; + remote_dep_same_pos_items_size = 0; } PARSEC_OBJ_DESTRUCT(&dep_activates_fifo); From 74224feffb2bf70c71c781131df4cd8009ca9412 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 19 May 2026 01:37:33 -0400 Subject: [PATCH 2/5] Move datatype support behind comm backend modules Introduce a datatype module interface used by the public parsec_type_* API and install the selected implementation during communication engine initialization. Move the MPI datatype implementation under the MPI comm component, and keep the basic no-MPI implementation as a fallback datatype module. Leave parsec_type_match() as a generic helper outside the datatype backend, since it only checks compatibility/equality and does not require transport-specific layout handling. Update build wiring and datatype documentation accordingly. Signed-off-by: George Bosilca --- parsec/CMakeLists.txt | 8 +- parsec/data.c | 2 +- parsec/datatype.h | 13 +- parsec/datatype/datatype.c | 124 ++++++----- parsec/datatype/datatype_module.c | 197 ++++++++++++++++++ parsec/datatype_module.h | 92 ++++++++ parsec/mca/comm/comm.c | 37 +++- parsec/mca/comm/comm.h | 21 +- parsec/mca/comm/mpi/comm_mpi.h | 10 +- parsec/mca/comm/mpi/comm_mpi_component.c | 1 + .../comm/mpi/comm_mpi_datatype.c} | 117 ++++++----- 11 files changed, 502 insertions(+), 120 deletions(-) create mode 100644 parsec/datatype/datatype_module.c create mode 100644 parsec/datatype_module.h rename parsec/{datatype/datatype_mpi.c => mca/comm/mpi/comm_mpi_datatype.c} (54%) diff --git a/parsec/CMakeLists.txt b/parsec/CMakeLists.txt index f34dd76fb..9c8d5616c 100644 --- a/parsec/CMakeLists.txt +++ b/parsec/CMakeLists.txt @@ -120,6 +120,7 @@ set(SOURCES private_mempool.c remote_dep.c parsec_comm_engine.c + datatype/datatype_module.c scheduling.c compound.c vpmap.c @@ -134,11 +135,9 @@ if( PARSEC_HAVE_MPI ) list(APPEND SOURCES remote_dep_comm.c) endif( PARSEC_HAVE_MPI ) -if( NOT MPI_C_FOUND ) +if( NOT PARSEC_HAVE_MPI ) list(APPEND SOURCES datatype/datatype.c) -else( NOT MPI_C_FOUND ) - list(APPEND SOURCES datatype/datatype_mpi.c) -endif( NOT MPI_C_FOUND ) +endif( NOT PARSEC_HAVE_MPI ) list(APPEND SOURCES parsec_hwloc.c) if( PARSEC_PROF_GRAPHER ) @@ -309,6 +308,7 @@ if( BUILD_PARSEC ) ${CMAKE_CURRENT_SOURCE_DIR}/include/parsec/parsec_config_bottom.h ${CMAKE_CURRENT_SOURCE_DIR}/include/parsec/data_distribution.h ${CMAKE_CURRENT_SOURCE_DIR}/datatype.h + ${CMAKE_CURRENT_SOURCE_DIR}/datatype_module.h ${CMAKE_CURRENT_SOURCE_DIR}/profiling.h ${CMAKE_CURRENT_SOURCE_DIR}/dictionary.h ${CMAKE_CURRENT_SOURCE_DIR}/data.h diff --git a/parsec/data.c b/parsec/data.c index 306a6c507..29770d593 100644 --- a/parsec/data.c +++ b/parsec/data.c @@ -635,7 +635,7 @@ static void parsec_arena_datatype_construct(parsec_object_t *obj) { adt->ht_item.next_item = NULL; /* keep Coverity happy */ adt->ht_item.hash64 = 0; /* keep Coverity happy */ adt->ht_item.key = 0; /* keep Coverity happy */ - adt->opaque_dtt = NULL; + adt->opaque_dtt = PARSEC_DATATYPE_NULL; } static void parsec_arena_datatype_destruct(parsec_object_t *obj) { diff --git a/parsec/datatype.h b/parsec/datatype.h index 3efdc31a6..dedd233d9 100644 --- a/parsec/datatype.h +++ b/parsec/datatype.h @@ -2,6 +2,7 @@ * Copyright (c) 2015-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #ifndef PARSEC_DATATYPE_H_HAS_BEEN_INCLUDED @@ -66,9 +67,12 @@ typedef intptr_t parsec_datatype_t; BEGIN_C_DECLS /** - * Map the datatype creation to the well designed and well known MPI datatype - * API. The datatype support remains extremely basic, providing API only for - * basic datatypes and functions to mix them together. + * Datatype portability API used by communication and data-movement engines. + * + * The public parsec_type_* functions are stable entry points. Their + * implementation is selected by the active communication backend, so MPI builds + * can keep MPI datatypes while other transports can provide another + * representation. */ int parsec_type_size(parsec_datatype_t type, int *size); @@ -120,7 +124,8 @@ int parsec_type_match(parsec_datatype_t dtt1, /** * Routine to check if a datatype is contiguous. * @param[in] parsec_datatype_t datatype - * @return PARSEC_SUCCESS if it was created with MPI_Type_contiguous, PARSEC_ERROR otherwise. + * @return PARSEC_SUCCESS if the selected backend recognizes it as contiguous, + * PARSEC_ERROR otherwise. */ int parsec_type_contiguous(parsec_datatype_t dtt); END_C_DECLS diff --git a/parsec/datatype/datatype.c b/parsec/datatype/datatype.c index 022d30307..bbed42a9b 100644 --- a/parsec/datatype/datatype.c +++ b/parsec/datatype/datatype.c @@ -2,20 +2,18 @@ * Copyright (c) 2015-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" -#include "parsec/datatype.h" +#include "parsec/datatype_module.h" /** - * Map the datatype creation to the well designed and well known MPI datatype - * manipulation. However, right now we only provide the most basic types and - * functions to mix them together. - * - * However, this file contains only the support functions needed when MPI is not - * available. + * Minimal datatype backend used when no communication component provides a + * richer datatype implementation. It recognizes PaRSEC's predefined scalar + * datatypes and treats all constructors as no-op placeholders. */ -int parsec_type_size( parsec_datatype_t type, - int *size ) +static int +parsec_datatype_basic_size(parsec_datatype_t type, int *size) { *size = 0; switch( type ) { @@ -53,100 +51,124 @@ int parsec_type_size( parsec_datatype_t type, return PARSEC_SUCCESS; } -int parsec_type_extent(parsec_datatype_t type, ptrdiff_t* lb, ptrdiff_t* extent) { +static int +parsec_datatype_basic_extent(parsec_datatype_t type, ptrdiff_t *lb, ptrdiff_t *extent) +{ int size, rc; - rc = parsec_type_size(type, &size); + + rc = parsec_datatype_basic_size(type, &size); + if( NULL != lb ) { + *lb = 0; + } *extent = size; return rc; } -int parsec_type_free(parsec_datatype_t* type) { +static int +parsec_datatype_basic_free(parsec_datatype_t *type) +{ *type = PARSEC_DATATYPE_NULL; return PARSEC_SUCCESS; } -int parsec_type_create_contiguous( int count, - parsec_datatype_t oldtype, - parsec_datatype_t* newtype ) +static int +parsec_datatype_basic_create_contiguous(int count, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { *newtype = PARSEC_DATATYPE_NULL; (void)count; (void)oldtype; return PARSEC_SUCCESS; } -int parsec_type_create_vector( int count, - int blocklength, - int stride, - parsec_datatype_t oldtype, - parsec_datatype_t* newtype ) +static int +parsec_datatype_basic_create_vector(int count, + int blocklength, + int stride, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { *newtype = PARSEC_DATATYPE_NULL; (void)count; (void)blocklength; (void)stride; (void)oldtype; return PARSEC_SUCCESS; } -int parsec_type_create_hvector( int count, - int blocklength, - ptrdiff_t stride, - parsec_datatype_t oldtype, - parsec_datatype_t* newtype ) +static int +parsec_datatype_basic_create_hvector(int count, + int blocklength, + ptrdiff_t stride, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { *newtype = PARSEC_DATATYPE_NULL; (void)count; (void)blocklength; (void)stride; (void)oldtype; return PARSEC_SUCCESS; } -int parsec_type_create_indexed(int count, - const int array_of_blocklengths[], - const int array_of_displacements[], - parsec_datatype_t oldtype, - parsec_datatype_t *newtype) +static int +parsec_datatype_basic_create_indexed(int count, + const int array_of_blocklengths[], + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { *newtype = PARSEC_DATATYPE_NULL; (void)count; (void)array_of_blocklengths; (void)array_of_displacements; (void)oldtype; return PARSEC_SUCCESS; } -int parsec_type_create_indexed_block(int count, - int blocklength, - const int array_of_displacements[], - parsec_datatype_t oldtype, - parsec_datatype_t *newtype) +static int +parsec_datatype_basic_create_indexed_block(int count, + int blocklength, + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { *newtype = PARSEC_DATATYPE_NULL; (void)count; (void)blocklength; (void)array_of_displacements; (void)oldtype; return PARSEC_SUCCESS; } -int parsec_type_create_struct(int count, - const int array_of_blocklengths[], - const ptrdiff_t array_of_displacements[], - const parsec_datatype_t array_of_types[], - parsec_datatype_t *newtype) +static int +parsec_datatype_basic_create_struct(int count, + const int array_of_blocklengths[], + const ptrdiff_t array_of_displacements[], + const parsec_datatype_t array_of_types[], + parsec_datatype_t *newtype) { *newtype = PARSEC_DATATYPE_NULL; (void)count; (void)array_of_blocklengths; (void)array_of_displacements; (void)array_of_types; return PARSEC_SUCCESS; } -int parsec_type_create_resized(parsec_datatype_t oldtype, - ptrdiff_t lb, - ptrdiff_t extent, - parsec_datatype_t *newtype) +static int +parsec_datatype_basic_create_resized(parsec_datatype_t oldtype, + ptrdiff_t lb, + ptrdiff_t extent, + parsec_datatype_t *newtype) { *newtype = PARSEC_DATATYPE_NULL; (void)lb; (void)extent; (void)oldtype; return PARSEC_SUCCESS; } -int parsec_type_match(parsec_datatype_t dtt1, - parsec_datatype_t dtt2){ - (void)dtt1; (void)dtt2; - return PARSEC_SUCCESS; -} - -int parsec_type_contiguous(parsec_datatype_t dtt) +static int +parsec_datatype_basic_contiguous(parsec_datatype_t dtt) { (void)dtt; return PARSEC_SUCCESS; } + +const parsec_datatype_module_t parsec_datatype_basic_module = { + .size = parsec_datatype_basic_size, + .extent = parsec_datatype_basic_extent, + .free = parsec_datatype_basic_free, + .create_contiguous = parsec_datatype_basic_create_contiguous, + .create_vector = parsec_datatype_basic_create_vector, + .create_hvector = parsec_datatype_basic_create_hvector, + .create_indexed = parsec_datatype_basic_create_indexed, + .create_indexed_block = parsec_datatype_basic_create_indexed_block, + .create_struct = parsec_datatype_basic_create_struct, + .create_resized = parsec_datatype_basic_create_resized, + .contiguous = parsec_datatype_basic_contiguous, +}; diff --git a/parsec/datatype/datatype_module.c b/parsec/datatype/datatype_module.c new file mode 100644 index 000000000..99ad6fddf --- /dev/null +++ b/parsec/datatype/datatype_module.c @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ + +#include "parsec/parsec_config.h" +#include "parsec/datatype_module.h" +#include "parsec/utils/debug.h" + +#if !defined(PARSEC_HAVE_MPI) +extern const parsec_datatype_module_t parsec_datatype_basic_module; +static const parsec_datatype_module_t *parsec_datatype_selected_module = &parsec_datatype_basic_module; +#else +/* + * MPI-enabled builds start without a datatype backend on purpose: the selected + * communication component owns the datatype representation and installs the + * matching module during parsec_comm_engine_init(). + */ +static const parsec_datatype_module_t *parsec_datatype_selected_module = NULL; +#endif + +static int +parsec_datatype_module_ready(void) +{ + if( NULL != parsec_datatype_selected_module ) { + return 1; + } + + parsec_warning("No datatype backend has been installed"); + return 0; +} + +int +parsec_datatype_module_install(const parsec_datatype_module_t *module) +{ + if( NULL == module ) { + return PARSEC_ERR_BAD_PARAM; + } + if( (NULL == module->size) || + (NULL == module->extent) || + (NULL == module->free) || + (NULL == module->create_contiguous) || + (NULL == module->create_vector) || + (NULL == module->create_hvector) || + (NULL == module->create_indexed) || + (NULL == module->create_indexed_block) || + (NULL == module->create_struct) || + (NULL == module->create_resized) || + (NULL == module->contiguous) ) { + return PARSEC_ERR_BAD_PARAM; + } + + parsec_datatype_selected_module = module; + return PARSEC_SUCCESS; +} + +int +parsec_type_size(parsec_datatype_t type, int *size) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->size(type, size); +} + +int +parsec_type_extent(parsec_datatype_t type, ptrdiff_t *lb, ptrdiff_t *extent) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->extent(type, lb, extent); +} + +int +parsec_type_free(parsec_datatype_t *type) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->free(type); +} + +int +parsec_type_create_contiguous(int count, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->create_contiguous(count, oldtype, newtype); +} + +int +parsec_type_create_vector(int count, + int blocklength, + int stride, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->create_vector(count, blocklength, stride, + oldtype, newtype); +} + +int +parsec_type_create_hvector(int count, + int blocklength, + ptrdiff_t stride, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->create_hvector(count, blocklength, stride, + oldtype, newtype); +} + +int +parsec_type_create_indexed(int count, + const int array_of_blocklengths[], + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->create_indexed(count, array_of_blocklengths, + array_of_displacements, + oldtype, newtype); +} + +int +parsec_type_create_indexed_block(int count, + int blocklength, + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->create_indexed_block(count, blocklength, + array_of_displacements, + oldtype, newtype); +} + +int +parsec_type_create_struct(int count, + const int array_of_blocklengths[], + const ptrdiff_t array_of_displacements[], + const parsec_datatype_t array_of_types[], + parsec_datatype_t *newtype) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->create_struct(count, array_of_blocklengths, + array_of_displacements, + array_of_types, newtype); +} + +int +parsec_type_create_resized(parsec_datatype_t oldtype, + ptrdiff_t lb, + ptrdiff_t extent, + parsec_datatype_t *newtype) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->create_resized(oldtype, lb, extent, newtype); +} + +int +parsec_type_match(parsec_datatype_t dtt1, parsec_datatype_t dtt2) +{ +#if defined(PARSEC_HAVE_MPI) + return (dtt1 == dtt2 ? PARSEC_SUCCESS : PARSEC_ERROR); +#else + (void)dtt1; (void)dtt2; + return PARSEC_SUCCESS; +#endif +} + +int +parsec_type_contiguous(parsec_datatype_t dtt) +{ + if( !parsec_datatype_module_ready() ) { + return PARSEC_ERR_NOT_FOUND; + } + return parsec_datatype_selected_module->contiguous(dtt); +} diff --git a/parsec/datatype_module.h b/parsec/datatype_module.h new file mode 100644 index 000000000..c0b068c62 --- /dev/null +++ b/parsec/datatype_module.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ +/** + * @file + * + * Backend datatype module interface. + * + * PaRSEC keeps the public parsec_type_* API stable, but the implementation of + * those routines is provided by the communication backend selected for the + * process. This lets an MPI backend keep using MPI datatypes while another + * backend can provide a different representation. + * + * Datatype matching is intentionally not part of this module. The current + * parsec_type_match() API is a lightweight compatibility helper and does not + * require backend-specific layout comparison. + */ +#ifndef PARSEC_DATATYPE_MODULE_H_HAS_BEEN_INCLUDED +#define PARSEC_DATATYPE_MODULE_H_HAS_BEEN_INCLUDED + +#include "parsec/parsec_config.h" +#include "parsec/datatype.h" + +BEGIN_C_DECLS + +typedef int (*parsec_datatype_module_size_fn_t)(parsec_datatype_t type, + int *size); +typedef int (*parsec_datatype_module_extent_fn_t)(parsec_datatype_t type, + ptrdiff_t *lb, + ptrdiff_t *extent); +typedef int (*parsec_datatype_module_free_fn_t)(parsec_datatype_t *type); +typedef int (*parsec_datatype_module_create_contiguous_fn_t)(int count, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype); +typedef int (*parsec_datatype_module_create_vector_fn_t)(int count, + int blocklength, + int stride, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype); +typedef int (*parsec_datatype_module_create_hvector_fn_t)(int count, + int blocklength, + ptrdiff_t stride, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype); +typedef int (*parsec_datatype_module_create_indexed_fn_t)(int count, + const int array_of_blocklengths[], + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype); +typedef int (*parsec_datatype_module_create_indexed_block_fn_t)(int count, + int blocklength, + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype); +typedef int (*parsec_datatype_module_create_struct_fn_t)(int count, + const int array_of_blocklengths[], + const ptrdiff_t array_of_displacements[], + const parsec_datatype_t array_of_types[], + parsec_datatype_t *newtype); +typedef int (*parsec_datatype_module_create_resized_fn_t)(parsec_datatype_t oldtype, + ptrdiff_t lb, + ptrdiff_t extent, + parsec_datatype_t *newtype); +typedef int (*parsec_datatype_module_contiguous_fn_t)(parsec_datatype_t type); + +typedef struct parsec_datatype_module_s { + parsec_datatype_module_size_fn_t size; + parsec_datatype_module_extent_fn_t extent; + parsec_datatype_module_free_fn_t free; + parsec_datatype_module_create_contiguous_fn_t create_contiguous; + parsec_datatype_module_create_vector_fn_t create_vector; + parsec_datatype_module_create_hvector_fn_t create_hvector; + parsec_datatype_module_create_indexed_fn_t create_indexed; + parsec_datatype_module_create_indexed_block_fn_t create_indexed_block; + parsec_datatype_module_create_struct_fn_t create_struct; + parsec_datatype_module_create_resized_fn_t create_resized; + parsec_datatype_module_contiguous_fn_t contiguous; +} parsec_datatype_module_t; + +/** + * Install the datatype backend used by the public parsec_type_* API. + * + * The selected communication component calls this during initialization. The + * installed module must remain valid for the rest of the process lifetime, + * because datatype objects can be freed during runtime teardown after the + * communication engine itself has been finalized. + */ +int parsec_datatype_module_install(const parsec_datatype_module_t *module); + +END_C_DECLS + +#endif /* PARSEC_DATATYPE_MODULE_H_HAS_BEEN_INCLUDED */ diff --git a/parsec/mca/comm/comm.c b/parsec/mca/comm/comm.c index e03e0acef..3bd2657a0 100644 --- a/parsec/mca/comm/comm.c +++ b/parsec/mca/comm/comm.c @@ -3,6 +3,7 @@ */ #include "parsec/parsec_config.h" +#include "parsec/datatype_module.h" #include "parsec/mca/comm/comm.h" #include "parsec/mca/mca_repository.h" #include "parsec/utils/debug.h" @@ -16,6 +17,7 @@ parsec_comm_engine_component_init(parsec_context_t *context) mca_base_component_t **components; mca_base_module_t *selected_module = NULL; mca_base_component_t *selected_component = NULL; + parsec_comm_module_t *comm_module; parsec_comm_engine_t *ce; assert(NULL == parsec_comm_selected_component); @@ -34,15 +36,48 @@ parsec_comm_engine_component_init(parsec_context_t *context) return NULL; } + comm_module = (parsec_comm_module_t *)selected_module; parsec_comm_selected_component = (parsec_comm_base_component_t *)selected_component; parsec_debug_verbose(4, parsec_debug_output, "Installing communication engine %s", parsec_comm_selected_component->base_version.mca_component_name); - ce = ((parsec_comm_module_t *)selected_module)->module.init(context); + if( NULL == comm_module->module.init ) { + parsec_warning("Communication engine %s did not provide an init function", + parsec_comm_selected_component->base_version.mca_component_name); + mca_component_close((mca_base_component_t *)parsec_comm_selected_component); + parsec_comm_selected_component = NULL; + return NULL; + } + if( NULL == comm_module->datatype ) { + parsec_warning("Communication engine %s did not provide datatype support", + parsec_comm_selected_component->base_version.mca_component_name); + mca_component_close((mca_base_component_t *)parsec_comm_selected_component); + parsec_comm_selected_component = NULL; + return NULL; + } + + ce = comm_module->module.init(context); if( NULL == ce ) { mca_component_close((mca_base_component_t *)parsec_comm_selected_component); parsec_comm_selected_component = NULL; + return NULL; + } + + /* + * Datatype handling follows the selected transport. MPI-backed runs keep + * using MPI datatypes; future non-MPI communication engines can install + * their own representation without changing the public parsec_type_* API. + */ + if( PARSEC_SUCCESS != parsec_datatype_module_install(comm_module->datatype) ) { + parsec_warning("Communication engine %s did not provide valid datatype support", + parsec_comm_selected_component->base_version.mca_component_name); + if( NULL != ce->fini ) { + ce->fini(ce); + } + mca_component_close((mca_base_component_t *)parsec_comm_selected_component); + parsec_comm_selected_component = NULL; + return NULL; } return ce; } diff --git a/parsec/mca/comm/comm.h b/parsec/mca/comm/comm.h index d61a7d016..54bfe981d 100644 --- a/parsec/mca/comm/comm.h +++ b/parsec/mca/comm/comm.h @@ -24,6 +24,8 @@ BEGIN_C_DECLS +struct parsec_datatype_module_s; + /** * Common component header for communication engine components. * @@ -52,9 +54,10 @@ typedef parsec_comm_engine_t *(*parsec_comm_base_module_init_fn_t)(parsec_contex /** * Communication module contract. * - * The module has a single responsibility at this layer: build and return the - * concrete parsec_comm_engine_t used by the runtime. Backend operations - * themselves are the function pointers stored in that returned engine. + * The module builds the concrete parsec_comm_engine_t used by the runtime and + * publishes the datatype operations that match this transport. Backend + * communication operations themselves are the function pointers stored in the + * returned parsec_comm_engine_t. */ struct parsec_comm_base_module_1_0_0_t { parsec_comm_base_module_init_fn_t init; @@ -64,8 +67,20 @@ typedef struct parsec_comm_base_module_1_0_0_t parsec_comm_base_module_1_0_0_t; typedef struct parsec_comm_base_module_1_0_0_t parsec_comm_base_module_t; typedef struct parsec_comm_module_s { + /* + * Keep the component pointer in the module, following the existing MCA + * framework convention in PaRSEC. The generic MCA query API returns an + * opaque mca_base_module_t pointer, and the comm framework casts it back to + * this complete module type after selection. + */ const parsec_comm_base_component_t *component; parsec_comm_base_module_t module; + /* + * Datatype operations used by the public parsec_type_* API while this + * transport backend is selected. The module storage must outlive runtime + * finalization because datatype objects can be released late in teardown. + */ + const struct parsec_datatype_module_s *datatype; } parsec_comm_module_t; /** diff --git a/parsec/mca/comm/mpi/comm_mpi.h b/parsec/mca/comm/mpi/comm_mpi.h index fd4aca531..9bd6239a2 100644 --- a/parsec/mca/comm/mpi/comm_mpi.h +++ b/parsec/mca/comm/mpi/comm_mpi.h @@ -15,18 +15,24 @@ #define PARSEC_COMM_MPI_H_HAS_BEEN_INCLUDED #include "parsec/mca/comm/comm.h" +#include "parsec/datatype_module.h" BEGIN_C_DECLS /** * MCA component descriptor for the MPI communication engine. */ -extern const parsec_comm_base_component_t parsec_comm_mpi_component; +PARSEC_DECLSPEC extern const parsec_comm_base_component_t parsec_comm_mpi_component; + +/** + * MPI datatype backend installed together with the MPI communication engine. + */ +PARSEC_DECLSPEC extern const parsec_datatype_module_t parsec_comm_mpi_datatype_module; /** * Constructor used by the static MCA component table. */ -mca_base_component_t *comm_mpi_static_component(void); +PARSEC_DECLSPEC mca_base_component_t *comm_mpi_static_component(void); END_C_DECLS diff --git a/parsec/mca/comm/mpi/comm_mpi_component.c b/parsec/mca/comm/mpi/comm_mpi_component.c index 8fb758e01..ee0b2c124 100644 --- a/parsec/mca/comm/mpi/comm_mpi_component.c +++ b/parsec/mca/comm/mpi/comm_mpi_component.c @@ -18,6 +18,7 @@ static parsec_comm_module_t parsec_comm_mpi_module = { .module = { .init = mpi_funnelled_init, }, + .datatype = &parsec_comm_mpi_datatype_module, }; const parsec_comm_base_component_t parsec_comm_mpi_component = { diff --git a/parsec/datatype/datatype_mpi.c b/parsec/mca/comm/mpi/comm_mpi_datatype.c similarity index 54% rename from parsec/datatype/datatype_mpi.c rename to parsec/mca/comm/mpi/comm_mpi_datatype.c index c28476d01..111a32590 100644 --- a/parsec/datatype/datatype_mpi.c +++ b/parsec/mca/comm/mpi/comm_mpi_datatype.c @@ -2,24 +2,25 @@ * Copyright (c) 2015-2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" -#include "parsec/datatype.h" +#include "parsec/datatype_module.h" #if !defined(PARSEC_HAVE_MPI) #error __FILE__ should only be used when MPI support is enabled. #endif /* !defined(PARSEC_HAVE_MPI) */ -int -parsec_type_size( parsec_datatype_t type, int *size ) +static int +comm_mpi_datatype_size(parsec_datatype_t type, int *size) { int rc = MPI_Type_size( type, size ); return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_extent( parsec_datatype_t type, ptrdiff_t* lb, ptrdiff_t* extent) +static int +comm_mpi_datatype_extent(parsec_datatype_t type, ptrdiff_t *lb, ptrdiff_t *extent) { int rc; MPI_Aint mpi_extent, mpi_lb; @@ -33,17 +34,17 @@ parsec_type_extent( parsec_datatype_t type, ptrdiff_t* lb, ptrdiff_t* extent) return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_free( parsec_datatype_t* type ) +static int +comm_mpi_datatype_free(parsec_datatype_t *type) { int rc = MPI_Type_free(type); return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_create_contiguous( int count, - parsec_datatype_t oldtype, - parsec_datatype_t* newtype ) +static int +comm_mpi_datatype_create_contiguous(int count, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { int rc = MPI_Type_contiguous( count, oldtype, newtype ); if( MPI_SUCCESS != rc ) return PARSEC_ERROR; @@ -51,12 +52,12 @@ parsec_type_create_contiguous( int count, return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_create_vector( int count, - int blocklength, - int stride, - parsec_datatype_t oldtype, - parsec_datatype_t* newtype ) +static int +comm_mpi_datatype_create_vector(int count, + int blocklength, + int stride, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { int rc = MPI_Type_vector( count, blocklength, stride, oldtype, newtype ); @@ -65,12 +66,12 @@ parsec_type_create_vector( int count, return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_create_hvector( int count, - int blocklength, - ptrdiff_t stride, - parsec_datatype_t oldtype, - parsec_datatype_t* newtype ) +static int +comm_mpi_datatype_create_hvector(int count, + int blocklength, + ptrdiff_t stride, + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { int rc = MPI_Type_create_hvector( count, blocklength, stride, oldtype, newtype ); @@ -79,12 +80,12 @@ parsec_type_create_hvector( int count, return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_create_indexed( int count, - const int array_of_blocklengths[], - const int array_of_displacements[], - parsec_datatype_t oldtype, - parsec_datatype_t *newtype ) +static int +comm_mpi_datatype_create_indexed(int count, + const int array_of_blocklengths[], + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { int rc = MPI_Type_indexed( count, array_of_blocklengths, @@ -95,12 +96,12 @@ parsec_type_create_indexed( int count, return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_create_indexed_block( int count, - int blocklength, - const int array_of_displacements[], - parsec_datatype_t oldtype, - parsec_datatype_t *newtype ) +static int +comm_mpi_datatype_create_indexed_block(int count, + int blocklength, + const int array_of_displacements[], + parsec_datatype_t oldtype, + parsec_datatype_t *newtype) { int rc = MPI_Type_create_indexed_block( count, blocklength, array_of_displacements, @@ -110,12 +111,12 @@ parsec_type_create_indexed_block( int count, return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_create_struct( int count, - const int *array_of_blocklengths, - const ptrdiff_t *array_of_displacements, - const parsec_datatype_t *array_of_types, - parsec_datatype_t *newtype ) +static int +comm_mpi_datatype_create_struct(int count, + const int *array_of_blocklengths, + const ptrdiff_t *array_of_displacements, + const parsec_datatype_t *array_of_types, + parsec_datatype_t *newtype) { int rc = MPI_Type_create_struct( count, array_of_blocklengths, @@ -126,11 +127,11 @@ parsec_type_create_struct( int count, return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int -parsec_type_create_resized( parsec_datatype_t oldtype, - ptrdiff_t lb, - ptrdiff_t extent, - parsec_datatype_t *newtype ) +static int +comm_mpi_datatype_create_resized(parsec_datatype_t oldtype, + ptrdiff_t lb, + ptrdiff_t extent, + parsec_datatype_t *newtype) { int rc; #if defined(PARSEC_HAVE_MPI_20) @@ -146,14 +147,8 @@ parsec_type_create_resized( parsec_datatype_t oldtype, return (MPI_SUCCESS == rc ? PARSEC_SUCCESS : PARSEC_ERROR); } -int parsec_type_match(parsec_datatype_t dtt1, - parsec_datatype_t dtt2) -{ - (void)dtt1; (void)dtt2; - return ( dtt1 == dtt2 ? PARSEC_SUCCESS : PARSEC_ERROR); -} - -int parsec_type_contiguous(parsec_datatype_t dtt) +static int +comm_mpi_datatype_contiguous(parsec_datatype_t dtt) { int rc; int num_integers, num_addresses, num_datatypes, combiner; @@ -165,3 +160,17 @@ int parsec_type_contiguous(parsec_datatype_t dtt) } return PARSEC_ERROR; } + +const parsec_datatype_module_t parsec_comm_mpi_datatype_module = { + .size = comm_mpi_datatype_size, + .extent = comm_mpi_datatype_extent, + .free = comm_mpi_datatype_free, + .create_contiguous = comm_mpi_datatype_create_contiguous, + .create_vector = comm_mpi_datatype_create_vector, + .create_hvector = comm_mpi_datatype_create_hvector, + .create_indexed = comm_mpi_datatype_create_indexed, + .create_indexed_block = comm_mpi_datatype_create_indexed_block, + .create_struct = comm_mpi_datatype_create_struct, + .create_resized = comm_mpi_datatype_create_resized, + .contiguous = comm_mpi_datatype_contiguous, +}; From 0a2291f81894af4d89a6fae184e7c4d9d2007329 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 19 May 2026 15:11:13 -0400 Subject: [PATCH 3/5] Add initial UCX communication backend Add an optional UCX communication engine component bootstrapped through PMIx. The UCX backend is currently enabled only for non-MPI distributed builds, because MPI builds still expose MPI_Datatype through the public datatype ABI. The new backend provides the first CPU-contiguous transport path: PMIx rank and worker-address exchange, UCX endpoint creation, active messages, CPU memory registration, rkey exchange, and PUT/GET support. Non-contiguous datatype movement, reshape, taskpool-id synchronization, and device-memory support remain explicitly unsupported for now. Move taskpool-id synchronization behind the communication-engine vtable. The MPI backend keeps the current MPI_Allreduce implementation, while the UCX backend returns PARSEC_ERR_NOT_IMPLEMENTED as a placeholder for a future UCX/PMIx implementation. Add a UCX set_ctx path that accepts an application-owned UCX context and worker. PaRSEC does not take ownership of those handles, but performs its late setup on top of them: worker-address publication, endpoint creation, and active-message handler registration. Also extend the basic non-MPI datatype backend so it records simple derived datatype size, extent, and contiguity information, which gives the UCX backend a minimal datatype representation for CPU-contiguous paths. Signed-off-by: George Bosilca --- CMakeLists.txt | 17 +- parsec/CMakeLists.txt | 4 +- parsec/datatype/datatype.c | 281 +++- parsec/include/parsec/parsec_config_bottom.h | 3 +- parsec/include/parsec/parsec_options.h.in | 2 + parsec/mca/comm/mpi/comm_mpi_funnelled.c | 27 + parsec/mca/comm/mpi/comm_mpi_funnelled.h | 6 + parsec/mca/comm/ucx/ValidateModule.CMake | 16 + parsec/mca/comm/ucx/comm_ucx.c | 1217 ++++++++++++++++++ parsec/mca/comm/ucx/comm_ucx.h | 44 + parsec/mca/comm/ucx/comm_ucx_component.c | 55 + parsec/parsec.c | 28 +- parsec/parsec_comm_engine.c | 5 +- parsec/parsec_comm_engine.h | 9 + parsec/runtime.h | 8 +- 15 files changed, 1656 insertions(+), 66 deletions(-) create mode 100644 parsec/mca/comm/ucx/ValidateModule.CMake create mode 100644 parsec/mca/comm/ucx/comm_ucx.c create mode 100644 parsec/mca/comm/ucx/comm_ucx.h create mode 100644 parsec/mca/comm/ucx/comm_ucx_component.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f5a03f51..e96c7c43c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -168,8 +168,10 @@ mark_as_advanced(PARSEC_SCHED_DEPS_MASK) mark_as_advanced(PARSEC_DIST_THREAD PARSEC_DIST_PRIORITIES) option(PARSEC_DIST_WITH_MPI "Build PaRSEC for distributed memory with MPI backend (conflicts all other backends)" ON) -if(PARSEC_DIST_WITH_MPI AND 0) - message(FATAL_ERROR "PARSEC_DIST_WITH_MPI and PARSEC_DIST_WITH_OTHER are mutually exclusive, please select only one") +option(PARSEC_DIST_WITH_UCX + "Build PaRSEC for distributed memory with UCX backend bootstrapped by PMIx" OFF) +if(PARSEC_DIST_WITH_MPI AND PARSEC_DIST_WITH_UCX) + message(FATAL_ERROR "The UCX backend currently requires PARSEC_DIST_WITH_MPI=OFF because the MPI build still exposes MPI_Datatype in the public datatype ABI") endif() option(PARSEC_MPI_IS_GPU_AWARE "Build PaRSEC assuming the MPI library is GPU-aware, aka. can move data directly to and from GPU memory.\ @@ -647,6 +649,17 @@ if( BUILD_PARSEC ) endif (NOT MPI_C_FOUND) list(APPEND EXTRA_LIBS ${MPI_C_LIBRARIES}) endif (PARSEC_DIST_WITH_MPI) + if (PARSEC_DIST_WITH_UCX) + find_package(PkgConfig REQUIRED) + pkg_check_modules(PARSEC_UCX REQUIRED ucx) + pkg_check_modules(PARSEC_PMIX REQUIRED pmix) + set(PARSEC_HAVE_UCX TRUE) + set(PARSEC_HAVE_PMIX TRUE) + include_directories(BEFORE ${PARSEC_UCX_INCLUDE_DIRS} ${PARSEC_PMIX_INCLUDE_DIRS}) + link_directories(${PARSEC_UCX_LIBRARY_DIRS} ${PARSEC_PMIX_LIBRARY_DIRS}) + list(APPEND EXTRA_INCLUDES ${PARSEC_UCX_INCLUDE_DIRS} ${PARSEC_PMIX_INCLUDE_DIRS}) + list(APPEND EXTRA_LIBS ${PARSEC_UCX_LIBRARIES} ${PARSEC_PMIX_LIBRARIES}) + endif (PARSEC_DIST_WITH_UCX) # # Check to see if # support for MPI 2.0 is available diff --git a/parsec/CMakeLists.txt b/parsec/CMakeLists.txt index 9c8d5616c..2b0fb2abe 100644 --- a/parsec/CMakeLists.txt +++ b/parsec/CMakeLists.txt @@ -131,10 +131,10 @@ set(SOURCES if( PARSEC_PROF_TRACE ) list(APPEND SOURCES dictionary.c) endif( PARSEC_PROF_TRACE ) -if( PARSEC_HAVE_MPI ) +if( PARSEC_HAVE_MPI OR PARSEC_HAVE_UCX ) list(APPEND SOURCES remote_dep_comm.c) -endif( PARSEC_HAVE_MPI ) +endif( PARSEC_HAVE_MPI OR PARSEC_HAVE_UCX ) if( NOT PARSEC_HAVE_MPI ) list(APPEND SOURCES datatype/datatype.c) endif( NOT PARSEC_HAVE_MPI ) diff --git a/parsec/datatype/datatype.c b/parsec/datatype/datatype.c index bbed42a9b..117ed8403 100644 --- a/parsec/datatype/datatype.c +++ b/parsec/datatype/datatype.c @@ -6,67 +6,170 @@ */ #include "parsec/runtime.h" #include "parsec/datatype_module.h" +#include /** * Minimal datatype backend used when no communication component provides a * richer datatype implementation. It recognizes PaRSEC's predefined scalar - * datatypes and treats all constructors as no-op placeholders. + * datatypes and records the size and extent of simple derived datatypes. This + * is enough for communication backends that only need contiguous byte ranges, + * while still failing through the public API if a caller asks for information + * about an unknown datatype. */ + +typedef struct parsec_datatype_basic_desc_s { + uint64_t magic; + int size; + ptrdiff_t lb; + ptrdiff_t extent; + int contiguous; +} parsec_datatype_basic_desc_t; + +#define PARSEC_DATATYPE_BASIC_MAGIC 0x7061727365636474ULL + static int -parsec_datatype_basic_size(parsec_datatype_t type, int *size) +parsec_datatype_basic_is_predefined(parsec_datatype_t type) +{ + return (type >= parsec_datatype_int_t) && + (type <= parsec_datatype_double_complex_t); +} + +static parsec_datatype_basic_desc_t * +parsec_datatype_basic_get_desc(parsec_datatype_t type) +{ + parsec_datatype_basic_desc_t *desc; + uintptr_t handle = (uintptr_t)type; + + if( parsec_datatype_basic_is_predefined(type) || + (PARSEC_DATATYPE_NULL == type) ) { + return NULL; + } + if( handle < 4096 || 0 != (handle % sizeof(void *)) ) { + return NULL; + } + desc = (parsec_datatype_basic_desc_t *)(intptr_t)type; + if( PARSEC_DATATYPE_BASIC_MAGIC != desc->magic ) { + return NULL; + } + return desc; +} + +static int +parsec_datatype_basic_get_info(parsec_datatype_t type, + int *size, + ptrdiff_t *lb, + ptrdiff_t *extent, + int *contiguous) { - *size = 0; + parsec_datatype_basic_desc_t *desc; + int predefined_size; + switch( type ) { case parsec_datatype_int_t: - *size = sizeof( int ); break; + predefined_size = sizeof( int ); + break; case parsec_datatype_int8_t: - *size = sizeof( int8_t ); break; + predefined_size = sizeof( int8_t ); + break; case parsec_datatype_int16_t: - *size = sizeof( int16_t ); break; + predefined_size = sizeof( int16_t ); + break; case parsec_datatype_int32_t: - *size = sizeof( int32_t ); break; + predefined_size = sizeof( int32_t ); + break; case parsec_datatype_int64_t: - *size = sizeof( int64_t ); break; + predefined_size = sizeof( int64_t ); + break; case parsec_datatype_uint8_t: - *size = sizeof( uint8_t ); break; + predefined_size = sizeof( uint8_t ); + break; case parsec_datatype_uint16_t: - *size = sizeof( uint16_t ); break; + predefined_size = sizeof( uint16_t ); + break; case parsec_datatype_uint32_t: - *size = sizeof( uint32_t ); break; + predefined_size = sizeof( uint32_t ); + break; case parsec_datatype_uint64_t: - *size = sizeof( uint64_t ); break; + predefined_size = sizeof( uint64_t ); + break; case parsec_datatype_float_t: - *size = sizeof( float ); break; + predefined_size = sizeof( float ); + break; case parsec_datatype_double_t: - *size = sizeof( double ); break; + predefined_size = sizeof( double ); + break; case parsec_datatype_long_double_t: - *size = sizeof( long double ); break; + predefined_size = sizeof( long double ); + break; case parsec_datatype_complex_t: - *size = 2 * sizeof( float ); break; + predefined_size = 2 * sizeof( float ); + break; case parsec_datatype_double_complex_t: - *size = 2 * sizeof( double ); break; + predefined_size = 2 * sizeof( double ); + break; default: - return PARSEC_ERR_NOT_SUPPORTED; + desc = parsec_datatype_basic_get_desc(type); + if( NULL == desc ) { + return PARSEC_ERR_NOT_SUPPORTED; + } + if( NULL != size ) *size = desc->size; + if( NULL != lb ) *lb = desc->lb; + if( NULL != extent ) *extent = desc->extent; + if( NULL != contiguous ) *contiguous = desc->contiguous; + return PARSEC_SUCCESS; } + + if( NULL != size ) *size = predefined_size; + if( NULL != lb ) *lb = 0; + if( NULL != extent ) *extent = predefined_size; + if( NULL != contiguous ) *contiguous = 1; return PARSEC_SUCCESS; } static int -parsec_datatype_basic_extent(parsec_datatype_t type, ptrdiff_t *lb, ptrdiff_t *extent) +parsec_datatype_basic_desc_create(int size, + ptrdiff_t lb, + ptrdiff_t extent, + int contiguous, + parsec_datatype_t *newtype) { - int size, rc; + parsec_datatype_basic_desc_t *desc; - rc = parsec_datatype_basic_size(type, &size); - if( NULL != lb ) { - *lb = 0; + desc = (parsec_datatype_basic_desc_t *)calloc(1, sizeof(*desc)); + if( NULL == desc ) { + return PARSEC_ERR_OUT_OF_RESOURCE; } - *extent = size; - return rc; + desc->magic = PARSEC_DATATYPE_BASIC_MAGIC; + desc->size = size; + desc->lb = lb; + desc->extent = extent; + desc->contiguous = contiguous; + *newtype = (parsec_datatype_t)(intptr_t)desc; + return PARSEC_SUCCESS; +} + +static int +parsec_datatype_basic_size(parsec_datatype_t type, int *size) +{ + return parsec_datatype_basic_get_info(type, size, NULL, NULL, NULL); +} + +static int +parsec_datatype_basic_extent(parsec_datatype_t type, ptrdiff_t *lb, ptrdiff_t *extent) +{ + return parsec_datatype_basic_get_info(type, NULL, lb, extent, NULL); } static int parsec_datatype_basic_free(parsec_datatype_t *type) { + parsec_datatype_basic_desc_t *desc; + + desc = parsec_datatype_basic_get_desc(*type); + if( NULL != desc ) { + desc->magic = 0; + free(desc); + } *type = PARSEC_DATATYPE_NULL; return PARSEC_SUCCESS; } @@ -76,9 +179,17 @@ parsec_datatype_basic_create_contiguous(int count, parsec_datatype_t oldtype, parsec_datatype_t *newtype) { - *newtype = PARSEC_DATATYPE_NULL; - (void)count; (void)oldtype; - return PARSEC_SUCCESS; + int oldsize, rc, contiguous; + ptrdiff_t oldlb, oldextent; + + rc = parsec_datatype_basic_get_info(oldtype, &oldsize, &oldlb, + &oldextent, &contiguous); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + return parsec_datatype_basic_desc_create(count * oldsize, oldlb, + count * oldextent, + contiguous, newtype); } static int @@ -88,9 +199,19 @@ parsec_datatype_basic_create_vector(int count, parsec_datatype_t oldtype, parsec_datatype_t *newtype) { - *newtype = PARSEC_DATATYPE_NULL; - (void)count; (void)blocklength; (void)stride; (void)oldtype; - return PARSEC_SUCCESS; + int oldsize, rc, contiguous; + ptrdiff_t oldlb, oldextent, extent; + + rc = parsec_datatype_basic_get_info(oldtype, &oldsize, &oldlb, + &oldextent, &contiguous); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + extent = ((ptrdiff_t)(count - 1) * stride + blocklength) * oldextent; + return parsec_datatype_basic_desc_create(count * blocklength * oldsize, + oldlb, extent, + contiguous && (extent == (count * blocklength * oldsize)), + newtype); } static int @@ -100,9 +221,19 @@ parsec_datatype_basic_create_hvector(int count, parsec_datatype_t oldtype, parsec_datatype_t *newtype) { - *newtype = PARSEC_DATATYPE_NULL; - (void)count; (void)blocklength; (void)stride; (void)oldtype; - return PARSEC_SUCCESS; + int oldsize, rc, contiguous; + ptrdiff_t oldlb, oldextent, extent; + + rc = parsec_datatype_basic_get_info(oldtype, &oldsize, &oldlb, + &oldextent, &contiguous); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + extent = ((ptrdiff_t)(count - 1) * stride) + blocklength * oldextent; + return parsec_datatype_basic_desc_create(count * blocklength * oldsize, + oldlb, extent, + contiguous && (extent == (count * blocklength * oldsize)), + newtype); } static int @@ -112,9 +243,23 @@ parsec_datatype_basic_create_indexed(int count, parsec_datatype_t oldtype, parsec_datatype_t *newtype) { - *newtype = PARSEC_DATATYPE_NULL; - (void)count; (void)array_of_blocklengths; (void)array_of_displacements; (void)oldtype; - return PARSEC_SUCCESS; + int oldsize, rc, contiguous, size = 0; + ptrdiff_t oldlb, oldextent, min_disp = 0, max_disp = 0; + + rc = parsec_datatype_basic_get_info(oldtype, &oldsize, &oldlb, + &oldextent, &contiguous); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + for(int i = 0; i < count; i++) { + ptrdiff_t begin = (ptrdiff_t)array_of_displacements[i] * oldextent; + ptrdiff_t end = begin + array_of_blocklengths[i] * oldextent; + if( (0 == i) || (begin < min_disp) ) min_disp = begin; + if( (0 == i) || (end > max_disp) ) max_disp = end; + size += array_of_blocklengths[i] * oldsize; + } + return parsec_datatype_basic_desc_create(size, oldlb + min_disp, + max_disp - min_disp, 0, newtype); } static int @@ -124,9 +269,23 @@ parsec_datatype_basic_create_indexed_block(int count, parsec_datatype_t oldtype, parsec_datatype_t *newtype) { - *newtype = PARSEC_DATATYPE_NULL; - (void)count; (void)blocklength; (void)array_of_displacements; (void)oldtype; - return PARSEC_SUCCESS; + int oldsize, rc, contiguous, size; + ptrdiff_t oldlb, oldextent, min_disp = 0, max_disp = 0; + + rc = parsec_datatype_basic_get_info(oldtype, &oldsize, &oldlb, + &oldextent, &contiguous); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + for(int i = 0; i < count; i++) { + ptrdiff_t begin = (ptrdiff_t)array_of_displacements[i] * oldextent; + ptrdiff_t end = begin + blocklength * oldextent; + if( (0 == i) || (begin < min_disp) ) min_disp = begin; + if( (0 == i) || (end > max_disp) ) max_disp = end; + } + size = count * blocklength * oldsize; + return parsec_datatype_basic_desc_create(size, oldlb + min_disp, + max_disp - min_disp, 0, newtype); } static int @@ -136,9 +295,23 @@ parsec_datatype_basic_create_struct(int count, const parsec_datatype_t array_of_types[], parsec_datatype_t *newtype) { - *newtype = PARSEC_DATATYPE_NULL; - (void)count; (void)array_of_blocklengths; (void)array_of_displacements; (void)array_of_types; - return PARSEC_SUCCESS; + int rc, oldsize, size = 0; + ptrdiff_t oldlb, oldextent, min_disp = 0, max_disp = 0; + + for(int i = 0; i < count; i++) { + rc = parsec_datatype_basic_get_info(array_of_types[i], &oldsize, + &oldlb, &oldextent, NULL); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + ptrdiff_t begin = array_of_displacements[i] + oldlb; + ptrdiff_t end = begin + array_of_blocklengths[i] * oldextent; + if( (0 == i) || (begin < min_disp) ) min_disp = begin; + if( (0 == i) || (end > max_disp) ) max_disp = end; + size += array_of_blocklengths[i] * oldsize; + } + return parsec_datatype_basic_desc_create(size, min_disp, + max_disp - min_disp, 0, newtype); } static int @@ -147,16 +320,28 @@ parsec_datatype_basic_create_resized(parsec_datatype_t oldtype, ptrdiff_t extent, parsec_datatype_t *newtype) { - *newtype = PARSEC_DATATYPE_NULL; - (void)lb; (void)extent; (void)oldtype; - return PARSEC_SUCCESS; + int oldsize, rc, contiguous; + + rc = parsec_datatype_basic_get_info(oldtype, &oldsize, NULL, NULL, + &contiguous); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + return parsec_datatype_basic_desc_create(oldsize, lb, extent, + contiguous && (extent == oldsize), + newtype); } static int parsec_datatype_basic_contiguous(parsec_datatype_t dtt) { - (void)dtt; - return PARSEC_SUCCESS; + int contiguous, rc; + + rc = parsec_datatype_basic_get_info(dtt, NULL, NULL, NULL, &contiguous); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + return contiguous ? PARSEC_SUCCESS : PARSEC_ERROR; } const parsec_datatype_module_t parsec_datatype_basic_module = { diff --git a/parsec/include/parsec/parsec_config_bottom.h b/parsec/include/parsec/parsec_config_bottom.h index 88a9d1045..04286b96c 100644 --- a/parsec/include/parsec/parsec_config_bottom.h +++ b/parsec/include/parsec/parsec_config_bottom.h @@ -132,7 +132,7 @@ #define __STDC_FORMAT_MACROS #include -#if defined(PARSEC_HAVE_MPI) +#if defined(PARSEC_HAVE_MPI) || defined(PARSEC_HAVE_UCX) # define DISTRIBUTED #else # undef DISTRIBUTED @@ -201,4 +201,3 @@ typedef int32_t parsec_dependency_t; #endif #endif /* PARSEC_CONFIG_BOTTOM_H_HAS_BEEN_INCLUDED */ - diff --git a/parsec/include/parsec/parsec_options.h.in b/parsec/include/parsec/parsec_options.h.in index d0936ca4c..4d375afe9 100644 --- a/parsec/include/parsec/parsec_options.h.in +++ b/parsec/include/parsec/parsec_options.h.in @@ -141,6 +141,8 @@ #cmakedefine PARSEC_HAVE_MPI_20 #cmakedefine PARSEC_HAVE_MPI_30 #cmakedefine PARSEC_HAVE_MPI_OVERTAKE +#cmakedefine PARSEC_HAVE_UCX +#cmakedefine PARSEC_HAVE_PMIX #cmakedefine PARSEC_HAVE_AYUDAME #cmakedefine PARSEC_HAVE_DEV_CPU_SUPPORT diff --git a/parsec/mca/comm/mpi/comm_mpi_funnelled.c b/parsec/mca/comm/mpi/comm_mpi_funnelled.c index 11d9f0265..c447ab428 100644 --- a/parsec/mca/comm/mpi/comm_mpi_funnelled.c +++ b/parsec/mca/comm/mpi/comm_mpi_funnelled.c @@ -721,6 +721,7 @@ mpi_funnelled_init(parsec_context_t *context) parsec_ce.reshape = NULL; parsec_ce.can_serve = NULL; parsec_ce.send_am = NULL; + parsec_ce.taskpool_sync_ids = mpi_no_thread_taskpool_sync_ids; parsec_ce.parsec_context = context; parsec_ce.capabilites.sided = 2; @@ -1532,6 +1533,7 @@ mpi_no_thread_enable(parsec_comm_engine_t *ce) parsec_ce.reshape = parsec_mpi_sendrecv; parsec_ce.can_serve = mpi_no_thread_can_push_more; parsec_ce.send_am = mpi_no_thread_send_active_message; + parsec_ce.taskpool_sync_ids = mpi_no_thread_taskpool_sync_ids; /* Initialize the arrays */ array_of_callbacks = (mpi_funnelled_callback_t *) calloc(parsec_param_comm_mpi_dynamic_requests, @@ -1649,3 +1651,28 @@ mpi_no_thread_can_push_more(parsec_comm_engine_t *ce) /* Do we have room to post more requests? */ return mpi_funnelled_last_active_req < current_size_of_total_reqs; } + +int +mpi_no_thread_taskpool_sync_ids(parsec_comm_engine_t *ce, + intptr_t comm_ctx, + uint32_t *next_taskpool_id) +{ + MPI_Comm comm = (MPI_Comm)comm_ctx; + int mpi_is_on, idx; + + if( (NULL == next_taskpool_id) || + (MPI_SUCCESS != MPI_Initialized(&mpi_is_on)) || + !mpi_is_on ) { + return PARSEC_ERR_NOT_IMPLEMENTED; + } + if( MPI_COMM_NULL == comm ) { + comm = (NULL != ce) ? (MPI_Comm)ce->parsec_context->comm_ctx : MPI_COMM_WORLD; + } + + idx = (int)*next_taskpool_id; + if( MPI_SUCCESS != MPI_Allreduce(MPI_IN_PLACE, &idx, 1, MPI_INT, MPI_MAX, comm) ) { + return PARSEC_ERROR; + } + *next_taskpool_id = (uint32_t)idx; + return PARSEC_SUCCESS; +} diff --git a/parsec/mca/comm/mpi/comm_mpi_funnelled.h b/parsec/mca/comm/mpi/comm_mpi_funnelled.h index db5daf494..da8297787 100644 --- a/parsec/mca/comm/mpi/comm_mpi_funnelled.h +++ b/parsec/mca/comm/mpi/comm_mpi_funnelled.h @@ -111,4 +111,10 @@ int mpi_no_thread_sync(parsec_comm_engine_t *comm_engine); int mpi_no_thread_can_push_more(parsec_comm_engine_t *c_e); +/** Synchronize the next taskpool id across the selected MPI communicator. */ +int +mpi_no_thread_taskpool_sync_ids(parsec_comm_engine_t *comm_engine, + intptr_t comm_ctx, + uint32_t *next_taskpool_id); + #endif /* __USE_PARSEC_MPI_FUNNELLED_H__ */ diff --git a/parsec/mca/comm/ucx/ValidateModule.CMake b/parsec/mca/comm/ucx/ValidateModule.CMake new file mode 100644 index 000000000..7234a3b82 --- /dev/null +++ b/parsec/mca/comm/ucx/ValidateModule.CMake @@ -0,0 +1,16 @@ +# The UCX backend is optional and is bootstrapped through PMIx. The first +# implementation is intentionally restricted to builds where PaRSEC owns the +# datatype representation, because MPI-enabled builds still expose MPI_Datatype +# as parsec_datatype_t. +if(PARSEC_HAVE_UCX AND PARSEC_HAVE_PMIX AND NOT PARSEC_HAVE_MPI) + set(MCA_${COMPONENT}_${MODULE} ON) + file(GLOB MCA_${COMPONENT}_${MODULE}_SOURCES ${MCA_BASE_DIR}/${COMPONENT}/${MODULE}/[^\\.]*.c) + set(MCA_${COMPONENT}_${MODULE}_CONSTRUCTOR "${COMPONENT}_${MODULE}_static_component") +else() + if(PARSEC_DIST_WITH_UCX AND PARSEC_HAVE_MPI) + message(STATUS "Module ${MODULE} not selectable: UCX currently requires PARSEC_DIST_WITH_MPI=OFF") + elseif(PARSEC_DIST_WITH_UCX) + message(STATUS "Module ${MODULE} not selectable: UCX and PMIx were not both found") + endif() + set(MCA_${COMPONENT}_${MODULE} OFF) +endif() diff --git a/parsec/mca/comm/ucx/comm_ucx.c b/parsec/mca/comm/ucx/comm_ucx.c new file mode 100644 index 000000000..2bd03958c --- /dev/null +++ b/parsec/mca/comm/ucx/comm_ucx.c @@ -0,0 +1,1217 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ +/** + * @file + * + * UCX communication engine backend. + * + * This first UCX backend uses PMIx for bootstrap and UCX active messages plus + * CPU-memory RMA. It intentionally advertises only contiguous CPU datatype + * support; callers that need sparse datatype movement must pack before handing + * memory to this backend. + */ + +#include "parsec/parsec_config.h" +#include "parsec/mca/comm/ucx/comm_ucx.h" +#include "parsec/parsec_comm_engine.h" +#include "parsec/remote_dep.h" +#include "parsec/utils/debug.h" + +#include +#include +#include +#include +#include + +#include +#include + +#define PARSEC_UCX_WORKER_ADDRESS_KEY "parsec.ucx.worker.address" +#define PARSEC_UCX_MAX_RKEY_SIZE 512 + +typedef struct parsec_ucx_am_header_s { + int32_t source; +} parsec_ucx_am_header_t; + +typedef struct parsec_ucx_callback_am_header_s { + int32_t source; + uintptr_t callback; +} parsec_ucx_callback_am_header_t; + +typedef struct parsec_ucx_mem_handle_wire_s { + uint64_t remote_addr; + uint64_t mem_size; + uint32_t rkey_size; + unsigned char rkey[PARSEC_UCX_MAX_RKEY_SIZE]; +} parsec_ucx_mem_handle_wire_t; + +typedef struct parsec_ucx_mem_handle_s { + parsec_ucx_mem_handle_wire_t wire; + void *mem; + size_t mem_size; + parsec_datatype_t datatype; + int count; + ucp_mem_h memh; +} parsec_ucx_mem_handle_t; + +typedef struct parsec_ucx_am_registration_s { + parsec_ce_tag_t tag; + parsec_ce_am_callback_t callback; + void *cb_data; + size_t max_msg_length; +} parsec_ucx_am_registration_t; + +typedef struct parsec_ucx_state_s { + pmix_proc_t pmix_proc; + int pmix_initialized; + int rank; + int size; + + ucp_context_h context; + ucp_worker_h worker; + ucp_address_t *worker_address; + size_t worker_address_length; + ucp_ep_h *eps; + int owns_context; + int owns_worker; + + parsec_ucx_am_registration_t tags[PARSEC_MAX_REGISTERED_TAGS]; +} parsec_ucx_state_t; + +static parsec_ucx_state_t parsec_ucx_state; + +static int comm_ucx_enable(parsec_comm_engine_t *comm_engine); +static int comm_ucx_disable(parsec_comm_engine_t *comm_engine); +static int comm_ucx_set_ctx(parsec_comm_engine_t *comm_engine, intptr_t ctx); +static int comm_ucx_fini(parsec_comm_engine_t *comm_engine); +static int comm_ucx_tag_register(parsec_ce_tag_t tag, + parsec_ce_am_callback_t cb, + void *cb_data, + size_t msg_length); +static int comm_ucx_tag_unregister(parsec_ce_tag_t tag); +static int comm_ucx_mem_register(void *mem, + parsec_mem_type_t mem_type, + size_t count, + parsec_datatype_t datatype, + size_t mem_size, + parsec_ce_mem_reg_handle_t *lreg, + size_t *lreg_size); +static int comm_ucx_mem_unregister(parsec_ce_mem_reg_handle_t *lreg); +static int comm_ucx_get_mem_reg_handle_size(void); +static int comm_ucx_mem_retrieve(parsec_ce_mem_reg_handle_t lreg, + void **mem, + parsec_datatype_t *datatype, + int *count); +static int comm_ucx_put(parsec_comm_engine_t *comm_engine, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + parsec_ce_onesided_callback_t l_cb, + void *l_cb_data, + parsec_ce_tag_t r_tag, + void *r_cb_data, + size_t r_cb_data_size); +static int comm_ucx_get(parsec_comm_engine_t *comm_engine, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + parsec_ce_onesided_callback_t l_cb, + void *l_cb_data, + parsec_ce_tag_t r_tag, + void *r_cb_data, + size_t r_cb_data_size); +static int comm_ucx_send_am(parsec_comm_engine_t *comm_engine, + parsec_ce_tag_t tag, + int remote, + void *addr, + size_t size); +static int comm_ucx_progress(parsec_comm_engine_t *comm_engine); +static int comm_ucx_pack(parsec_comm_engine_t *ce, + void *inbuf, + int incount, + parsec_datatype_t type, + void *outbuf, + int outsize, + int *position); +static int comm_ucx_pack_size(parsec_comm_engine_t *ce, + int incount, + parsec_datatype_t type, + int *size); +static int comm_ucx_unpack(parsec_comm_engine_t *ce, + void *inbuf, + int insize, + int *position, + void *outbuf, + int outcount, + parsec_datatype_t type); +static int comm_ucx_sync(parsec_comm_engine_t *comm_engine); +static int comm_ucx_can_serve(parsec_comm_engine_t *comm_engine); +static int comm_ucx_taskpool_sync_ids(parsec_comm_engine_t *comm_engine, + intptr_t comm_ctx, + uint32_t *next_taskpool_id); +static int comm_ucx_reshape(parsec_comm_engine_t *ce, + parsec_execution_stream_t *es, + parsec_data_copy_t *dst, + int64_t displ_dst, + parsec_datatype_t layout_dst, + uint64_t count_dst, + parsec_data_copy_t *src, + int64_t displ_src, + parsec_datatype_t layout_src, + uint64_t count_src); +static int comm_ucx_install_callback_am_handler(parsec_ucx_state_t *state); +static int comm_ucx_send_callback_am(parsec_comm_engine_t *comm_engine, + int remote, + parsec_ce_tag_t callback, + void *cb_data, + size_t cb_data_size); +static int comm_ucx_late_init(parsec_context_t *context, + parsec_ucx_state_t *state); + +static int +comm_ucx_status_to_parsec(ucs_status_t status, const char *what) +{ + if( UCS_OK == status ) { + return PARSEC_SUCCESS; + } + parsec_warning("UCX %s failed: %s", what, ucs_status_string(status)); + return PARSEC_ERROR; +} + +static int +comm_ucx_wait_request(parsec_ucx_state_t *state, void *request, const char *what) +{ + ucs_status_t status; + + if( NULL == request ) { + return PARSEC_SUCCESS; + } + if( UCS_PTR_IS_ERR(request) ) { + return comm_ucx_status_to_parsec(UCS_PTR_STATUS(request), what); + } + + do { + status = ucp_request_check_status(request); + if( UCS_INPROGRESS == status ) { + ucp_worker_progress(state->worker); + } + } while( UCS_INPROGRESS == status ); + + ucp_request_free(request); + return comm_ucx_status_to_parsec(status, what); +} + +static int +comm_ucx_direct_am(parsec_comm_engine_t *ce, + parsec_ucx_am_registration_t *registration, + void *addr, + size_t size, + int source) +{ + void *buffer = NULL; + int rc; + + if( NULL == registration->callback ) { + return PARSEC_ERR_NOT_FOUND; + } + if( registration->max_msg_length < size ) { + return PARSEC_ERR_BAD_PARAM; + } + + if( 0 != size ) { + buffer = malloc(size); + if( NULL == buffer ) { + return PARSEC_ERR_OUT_OF_RESOURCE; + } + memcpy(buffer, addr, size); + } + rc = registration->callback(ce, registration->tag, buffer, size, + source, registration->cb_data); + free(buffer); + return rc; +} + +static ucs_status_t +comm_ucx_am_callback(void *arg, + const void *header, + size_t header_length, + void *data, + size_t length, + const ucp_am_recv_param_t *param) +{ + parsec_ucx_am_registration_t *registration = (parsec_ucx_am_registration_t *)arg; + parsec_ucx_am_header_t am_header; + void *buffer = NULL; + + if( sizeof(am_header) != header_length ) { + return UCS_ERR_INVALID_PARAM; + } + memcpy(&am_header, header, sizeof(am_header)); + + if( NULL == registration->callback ) { + return UCS_OK; + } + if( param->recv_attr & UCP_AM_RECV_ATTR_FLAG_RNDV ) { + return UCS_ERR_UNSUPPORTED; + } + if( registration->max_msg_length < length ) { + return UCS_ERR_MESSAGE_TRUNCATED; + } + + if( 0 != length ) { + buffer = malloc(length); + if( NULL == buffer ) { + return UCS_ERR_NO_MEMORY; + } + memcpy(buffer, data, length); + } + + registration->callback(&parsec_ce, registration->tag, buffer, length, + am_header.source, registration->cb_data); + free(buffer); + return UCS_OK; +} + +static ucs_status_t +comm_ucx_callback_am_callback(void *arg, + const void *header, + size_t header_length, + void *data, + size_t length, + const ucp_am_recv_param_t *param) +{ + parsec_ucx_callback_am_header_t callback_header; + parsec_ce_am_callback_t callback; + void *buffer = NULL; + + (void)arg; + if( sizeof(callback_header) != header_length ) { + return UCS_ERR_INVALID_PARAM; + } + memcpy(&callback_header, header, sizeof(callback_header)); + callback = (parsec_ce_am_callback_t)callback_header.callback; + if( NULL == callback ) { + return UCS_OK; + } + if( param->recv_attr & UCP_AM_RECV_ATTR_FLAG_RNDV ) { + return UCS_ERR_UNSUPPORTED; + } + + if( 0 != length ) { + buffer = malloc(length); + if( NULL == buffer ) { + return UCS_ERR_NO_MEMORY; + } + memcpy(buffer, data, length); + } + + callback(&parsec_ce, PARSEC_CE_REMOTE_DEP_PUT_END_TAG, buffer, length, + callback_header.source, NULL); + free(buffer); + return UCS_OK; +} + +static int +comm_ucx_install_am_handler(parsec_ucx_state_t *state, parsec_ce_tag_t tag) +{ + ucp_am_handler_param_t params; + ucs_status_t status; + + if( tag >= PARSEC_MAX_REGISTERED_TAGS ) { + return PARSEC_ERR_VALUE_OUT_OF_BOUNDS; + } + + memset(¶ms, 0, sizeof(params)); + params.field_mask = UCP_AM_HANDLER_PARAM_FIELD_ID | + UCP_AM_HANDLER_PARAM_FIELD_CB | + UCP_AM_HANDLER_PARAM_FIELD_ARG | + UCP_AM_HANDLER_PARAM_FIELD_FLAGS; + params.id = (uint16_t)tag; + params.cb = comm_ucx_am_callback; + params.arg = &state->tags[tag]; + params.flags = UCP_AM_FLAG_WHOLE_MSG; + status = ucp_worker_set_am_recv_handler(state->worker, ¶ms); + return comm_ucx_status_to_parsec(status, "AM handler registration"); +} + +static int +comm_ucx_install_callback_am_handler(parsec_ucx_state_t *state) +{ + ucp_am_handler_param_t params; + ucs_status_t status; + + memset(¶ms, 0, sizeof(params)); + params.field_mask = UCP_AM_HANDLER_PARAM_FIELD_ID | + UCP_AM_HANDLER_PARAM_FIELD_CB | + UCP_AM_HANDLER_PARAM_FIELD_ARG | + UCP_AM_HANDLER_PARAM_FIELD_FLAGS; + params.id = PARSEC_CE_REMOTE_DEP_PUT_END_TAG; + params.cb = comm_ucx_callback_am_callback; + params.arg = state; + params.flags = UCP_AM_FLAG_WHOLE_MSG; + status = ucp_worker_set_am_recv_handler(state->worker, ¶ms); + return comm_ucx_status_to_parsec(status, "callback AM handler registration"); +} + +static int +comm_ucx_pmix_get_job_size(parsec_ucx_state_t *state) +{ + pmix_value_t *value = NULL; + pmix_status_t prc; + + prc = PMIx_Get(&state->pmix_proc, PMIX_JOB_SIZE, NULL, 0, &value); + if( PMIX_SUCCESS != prc ) { + parsec_warning("PMIx failed to retrieve %s: %d", PMIX_JOB_SIZE, prc); + return PARSEC_ERROR; + } + + switch(value->type) { + case PMIX_UINT32: + state->size = (int)value->data.uint32; + break; + case PMIX_UINT64: + state->size = (int)value->data.uint64; + break; + case PMIX_SIZE: + state->size = (int)value->data.size; + break; + case PMIX_INT: + state->size = value->data.integer; + break; + default: + PMIX_VALUE_RELEASE(value); + parsec_warning("PMIx returned unsupported %s type", PMIX_JOB_SIZE); + return PARSEC_ERROR; + } + PMIX_VALUE_RELEASE(value); + return (state->size > 0) ? PARSEC_SUCCESS : PARSEC_ERROR; +} + +static int +comm_ucx_pmix_bootstrap(parsec_ucx_state_t *state) +{ + pmix_value_t value; + pmix_status_t prc; + + prc = PMIx_Init(&state->pmix_proc, NULL, 0); + if( PMIX_SUCCESS != prc ) { + parsec_warning("PMIx_Init failed: %d", prc); + return PARSEC_ERROR; + } + state->pmix_initialized = 1; + state->rank = (int)state->pmix_proc.rank; + if( PARSEC_SUCCESS != comm_ucx_pmix_get_job_size(state) ) { + return PARSEC_ERROR; + } + + memset(&value, 0, sizeof(value)); + value.type = PMIX_BYTE_OBJECT; + value.data.bo.bytes = (char *)state->worker_address; + value.data.bo.size = state->worker_address_length; + + prc = PMIx_Put(PMIX_GLOBAL, PARSEC_UCX_WORKER_ADDRESS_KEY, &value); + if( PMIX_SUCCESS != prc ) { + parsec_warning("PMIx_Put failed while publishing UCX worker address: %d", prc); + return PARSEC_ERROR; + } + prc = PMIx_Commit(); + if( PMIX_SUCCESS != prc ) { + parsec_warning("PMIx_Commit failed while publishing UCX worker address: %d", prc); + return PARSEC_ERROR; + } + prc = PMIx_Fence(NULL, 0, NULL, 0); + if( PMIX_SUCCESS != prc ) { + parsec_warning("PMIx_Fence failed during UCX bootstrap: %d", prc); + return PARSEC_ERROR; + } + return PARSEC_SUCCESS; +} + +static int +comm_ucx_connect_endpoints(parsec_ucx_state_t *state) +{ + for(int peer_rank = 0; peer_rank < state->size; peer_rank++) { + pmix_proc_t peer; + pmix_value_t *value = NULL; + pmix_status_t prc; + ucp_ep_params_t ep_params; + ucs_status_t status; + + if( peer_rank == state->rank ) { + continue; + } + + PMIX_LOAD_PROCID(&peer, state->pmix_proc.nspace, peer_rank); + prc = PMIx_Get(&peer, PARSEC_UCX_WORKER_ADDRESS_KEY, NULL, 0, &value); + if( PMIX_SUCCESS != prc ) { + parsec_warning("PMIx_Get failed for UCX worker address of rank %d: %d", + peer_rank, prc); + return PARSEC_ERROR; + } + if( (PMIX_BYTE_OBJECT != value->type) || + (NULL == value->data.bo.bytes) || + (0 == value->data.bo.size) ) { + PMIX_VALUE_RELEASE(value); + parsec_warning("PMIx returned an invalid UCX worker address for rank %d", + peer_rank); + return PARSEC_ERROR; + } + + memset(&ep_params, 0, sizeof(ep_params)); + ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS | + UCP_EP_PARAM_FIELD_ERR_MODE; + ep_params.address = (ucp_address_t *)value->data.bo.bytes; + ep_params.err_mode = UCP_ERR_HANDLING_MODE_NONE; + status = ucp_ep_create(state->worker, &ep_params, &state->eps[peer_rank]); + PMIX_VALUE_RELEASE(value); + if( UCS_OK != status ) { + return comm_ucx_status_to_parsec(status, "endpoint creation"); + } + } + return PARSEC_SUCCESS; +} + +static int +comm_ucx_init_context(parsec_ucx_state_t *state) +{ + ucp_config_t *config; + ucp_params_t params; + ucp_worker_params_t worker_params; + ucs_status_t status; + + status = ucp_config_read(NULL, NULL, &config); + if( UCS_OK != status ) { + return comm_ucx_status_to_parsec(status, "config read"); + } + + memset(¶ms, 0, sizeof(params)); + params.field_mask = UCP_PARAM_FIELD_FEATURES; + params.features = UCP_FEATURE_AM | UCP_FEATURE_RMA; + status = ucp_init(¶ms, config, &state->context); + ucp_config_release(config); + if( UCS_OK != status ) { + return comm_ucx_status_to_parsec(status, "context initialization"); + } + state->owns_context = 1; + + memset(&worker_params, 0, sizeof(worker_params)); + worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; + worker_params.thread_mode = UCS_THREAD_MODE_SINGLE; + status = ucp_worker_create(state->context, &worker_params, &state->worker); + if( UCS_OK != status ) { + return comm_ucx_status_to_parsec(status, "worker creation"); + } + state->owns_worker = 1; + + status = ucp_worker_get_address(state->worker, + &state->worker_address, + &state->worker_address_length); + return comm_ucx_status_to_parsec(status, "worker address retrieval"); +} + +static int +comm_ucx_attach_external_worker(parsec_ucx_state_t *state, + const parsec_comm_ucx_external_worker_t *external) +{ + ucs_status_t status; + + if( (NULL == external) || + (NULL == external->context) || + (NULL == external->worker) ) { + return PARSEC_ERR_BAD_PARAM; + } + + state->context = external->context; + state->worker = external->worker; + state->owns_context = 0; + state->owns_worker = 0; + + status = ucp_worker_get_address(state->worker, + &state->worker_address, + &state->worker_address_length); + return comm_ucx_status_to_parsec(status, "external worker address retrieval"); +} + +static void +comm_ucx_init_tags(parsec_ucx_state_t *state) +{ + for(parsec_ce_tag_t tag = 0; tag < PARSEC_MAX_REGISTERED_TAGS; tag++) { + state->tags[tag].tag = tag; + state->tags[tag].callback = NULL; + state->tags[tag].cb_data = NULL; + state->tags[tag].max_msg_length = 0; + } +} + +static void +comm_ucx_install_engine(parsec_context_t *context, parsec_ucx_state_t *state) +{ + parsec_ce.parsec_context = context; + parsec_ce.capabilites.sided = 2; + parsec_ce.capabilites.supports_noncontiguous_datatype = 0; + parsec_ce.capabilites.multithreaded = 0; + parsec_ce.enable = comm_ucx_enable; + parsec_ce.disable = comm_ucx_disable; + parsec_ce.set_ctx = comm_ucx_set_ctx; + parsec_ce.fini = comm_ucx_fini; + parsec_ce.tag_register = comm_ucx_tag_register; + parsec_ce.tag_unregister = comm_ucx_tag_unregister; + parsec_ce.mem_register = comm_ucx_mem_register; + parsec_ce.mem_unregister = comm_ucx_mem_unregister; + parsec_ce.get_mem_handle_size = comm_ucx_get_mem_reg_handle_size; + parsec_ce.mem_retrieve = comm_ucx_mem_retrieve; + parsec_ce.put = comm_ucx_put; + parsec_ce.get = comm_ucx_get; + parsec_ce.progress = comm_ucx_progress; + parsec_ce.pack = comm_ucx_pack; + parsec_ce.pack_size = comm_ucx_pack_size; + parsec_ce.unpack = comm_ucx_unpack; + parsec_ce.reshape = comm_ucx_reshape; + parsec_ce.sync = comm_ucx_sync; + parsec_ce.can_serve = comm_ucx_can_serve; + parsec_ce.send_am = comm_ucx_send_am; + parsec_ce.taskpool_sync_ids = comm_ucx_taskpool_sync_ids; + + context->my_rank = state->rank; + context->nb_nodes = state->size; + context->comm_ctx = (intptr_t)state; +} + +static int +comm_ucx_late_init(parsec_context_t *context, parsec_ucx_state_t *state) +{ + if( PARSEC_SUCCESS != comm_ucx_pmix_bootstrap(state) ) { + return PARSEC_ERROR; + } + + state->eps = (ucp_ep_h *)calloc((size_t)state->size, sizeof(*state->eps)); + if( NULL == state->eps ) { + return PARSEC_ERR_OUT_OF_RESOURCE; + } + if( PARSEC_SUCCESS != comm_ucx_connect_endpoints(state) ) { + return PARSEC_ERROR; + } + if( PARSEC_SUCCESS != comm_ucx_install_callback_am_handler(state) ) { + return PARSEC_ERROR; + } + + comm_ucx_init_tags(state); + comm_ucx_install_engine(context, state); + return PARSEC_SUCCESS; +} + +parsec_comm_engine_t * +comm_ucx_init(parsec_context_t *context) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + intptr_t external_ctx = context->comm_ctx; + + memset(state, 0, sizeof(*state)); + state->rank = -1; + state->size = -1; + + if( -1 != external_ctx ) { + if( PARSEC_SUCCESS != comm_ucx_attach_external_worker(state, + (const parsec_comm_ucx_external_worker_t *)external_ctx) ) { + comm_ucx_fini(&parsec_ce); + return NULL; + } + } else if( PARSEC_SUCCESS != comm_ucx_init_context(state) ) { + comm_ucx_fini(&parsec_ce); + return NULL; + } + + if( PARSEC_SUCCESS != comm_ucx_late_init(context, state) ) { + comm_ucx_fini(&parsec_ce); + return NULL; + } + + parsec_debug_verbose(4, parsec_debug_output, + "UCX communication engine initialized rank %d/%d", + context->my_rank, context->nb_nodes); + return &parsec_ce; +} + +static int +comm_ucx_enable(parsec_comm_engine_t *comm_engine) +{ + (void)comm_engine; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_disable(parsec_comm_engine_t *comm_engine) +{ + (void)comm_engine; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_set_ctx(parsec_comm_engine_t *comm_engine, intptr_t ctx) +{ + parsec_context_t *context = comm_engine->parsec_context; + parsec_ucx_state_t *state = &parsec_ucx_state; + int rc; + + if( 1 < parsec_communication_engine_up ) { + parsec_warning("Cannot change PaRSEC's UCX worker while the communication engine is running [ignored]"); + return PARSEC_ERROR; + } + if( -1 == ctx ) { + return PARSEC_ERR_BAD_PARAM; + } + + /* + * set_ctx hands PaRSEC an application-owned UCX worker. PaRSEC releases + * only the resources it creates around that worker: worker address, + * endpoints, AM handlers, and PMIx publication. + */ + comm_ucx_fini(comm_engine); + memset(state, 0, sizeof(*state)); + state->rank = -1; + state->size = -1; + + rc = comm_ucx_attach_external_worker(state, + (const parsec_comm_ucx_external_worker_t *)ctx); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + rc = comm_ucx_late_init(context, state); + if( PARSEC_SUCCESS != rc ) { + comm_ucx_fini(comm_engine); + } + return rc; +} + +static int +comm_ucx_fini(parsec_comm_engine_t *comm_engine) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + ucp_request_param_t close_params; + + memset(&close_params, 0, sizeof(close_params)); + + if( NULL != state->eps ) { + for(int peer_rank = 0; peer_rank < state->size; peer_rank++) { + if( NULL != state->eps[peer_rank] ) { + void *request = ucp_ep_close_nbx(state->eps[peer_rank], &close_params); + (void)comm_ucx_wait_request(state, request, "endpoint close"); + state->eps[peer_rank] = NULL; + } + } + free(state->eps); + state->eps = NULL; + } + if( NULL != state->worker_address ) { + ucp_worker_release_address(state->worker, state->worker_address); + state->worker_address = NULL; + state->worker_address_length = 0; + } + if( NULL != state->worker ) { + if( state->owns_worker ) { + ucp_worker_destroy(state->worker); + } + state->worker = NULL; + } + if( NULL != state->context ) { + if( state->owns_context ) { + ucp_cleanup(state->context); + } + state->context = NULL; + } + if( state->pmix_initialized ) { + PMIx_Finalize(NULL, 0); + state->pmix_initialized = 0; + } + memset(state, 0, sizeof(*state)); + if( (NULL != comm_engine) && (NULL != comm_engine->parsec_context) ) { + comm_engine->parsec_context->comm_ctx = -1; + } + return PARSEC_SUCCESS; +} + +static int +comm_ucx_tag_register(parsec_ce_tag_t tag, + parsec_ce_am_callback_t cb, + void *cb_data, + size_t msg_length) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + + if( tag >= PARSEC_MAX_REGISTERED_TAGS ) { + return PARSEC_ERR_VALUE_OUT_OF_BOUNDS; + } + if( PARSEC_CE_REMOTE_DEP_PUT_END_TAG == tag ) { + return PARSEC_ERR_EXISTS; + } + + state->tags[tag].tag = tag; + state->tags[tag].callback = cb; + state->tags[tag].cb_data = cb_data; + state->tags[tag].max_msg_length = msg_length; + return comm_ucx_install_am_handler(state, tag); +} + +static int +comm_ucx_tag_unregister(parsec_ce_tag_t tag) +{ + if( tag >= PARSEC_MAX_REGISTERED_TAGS ) { + return PARSEC_ERR_VALUE_OUT_OF_BOUNDS; + } + if( PARSEC_CE_REMOTE_DEP_PUT_END_TAG == tag ) { + return PARSEC_SUCCESS; + } + parsec_ucx_state.tags[tag].callback = NULL; + parsec_ucx_state.tags[tag].cb_data = NULL; + parsec_ucx_state.tags[tag].max_msg_length = 0; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_mem_register(void *mem, + parsec_mem_type_t mem_type, + size_t count, + parsec_datatype_t datatype, + size_t mem_size, + parsec_ce_mem_reg_handle_t *lreg, + size_t *lreg_size) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + parsec_ucx_mem_handle_t *handle; + ucp_mem_map_params_t params; + void *rkey_buffer = NULL; + size_t rkey_size = 0; + ucs_status_t status; + + if( (PARSEC_MEM_TYPE_CONTIGUOUS != mem_type) || + (NULL == mem) || + ((size_t)-1 == mem_size) || + (0 == mem_size) ) { + return PARSEC_ERR_NOT_SUPPORTED; + } + + handle = (parsec_ucx_mem_handle_t *)calloc(1, sizeof(*handle)); + if( NULL == handle ) { + return PARSEC_ERR_OUT_OF_RESOURCE; + } + + memset(¶ms, 0, sizeof(params)); + params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH; + params.address = mem; + params.length = mem_size; + status = ucp_mem_map(state->context, ¶ms, &handle->memh); + if( UCS_OK != status ) { + free(handle); + return comm_ucx_status_to_parsec(status, "memory registration"); + } + + status = ucp_rkey_pack(state->context, handle->memh, &rkey_buffer, &rkey_size); + if( UCS_OK != status ) { + ucp_mem_unmap(state->context, handle->memh); + free(handle); + return comm_ucx_status_to_parsec(status, "rkey packing"); + } + if( rkey_size > PARSEC_UCX_MAX_RKEY_SIZE ) { + ucp_rkey_buffer_release(rkey_buffer); + ucp_mem_unmap(state->context, handle->memh); + free(handle); + parsec_warning("UCX rkey size %zu exceeds PaRSEC wire limit %d", + rkey_size, PARSEC_UCX_MAX_RKEY_SIZE); + return PARSEC_ERR_NOT_SUPPORTED; + } + + handle->mem = mem; + handle->mem_size = mem_size; + handle->datatype = datatype; + handle->count = (count > (size_t)INT_MAX) ? INT_MAX : (int)count; + handle->wire.remote_addr = (uint64_t)(uintptr_t)mem; + handle->wire.mem_size = (uint64_t)mem_size; + handle->wire.rkey_size = (uint32_t)rkey_size; + memcpy(handle->wire.rkey, rkey_buffer, rkey_size); + ucp_rkey_buffer_release(rkey_buffer); + + *lreg = handle; + *lreg_size = sizeof(handle->wire); + return PARSEC_SUCCESS; +} + +static int +comm_ucx_mem_unregister(parsec_ce_mem_reg_handle_t *lreg) +{ + parsec_ucx_mem_handle_t *handle; + + if( (NULL == lreg) || (NULL == *lreg) ) { + return PARSEC_SUCCESS; + } + handle = (parsec_ucx_mem_handle_t *)*lreg; + if( NULL != handle->memh ) { + ucp_mem_unmap(parsec_ucx_state.context, handle->memh); + } + free(handle); + *lreg = NULL; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_get_mem_reg_handle_size(void) +{ + return sizeof(parsec_ucx_mem_handle_wire_t); +} + +static int +comm_ucx_mem_retrieve(parsec_ce_mem_reg_handle_t lreg, + void **mem, + parsec_datatype_t *datatype, + int *count) +{ + parsec_ucx_mem_handle_t *handle = (parsec_ucx_mem_handle_t *)lreg; + + *mem = handle->mem; + *datatype = handle->datatype; + *count = handle->count; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_rkey_unpack(parsec_ucx_state_t *state, + int remote, + parsec_ucx_mem_handle_wire_t *remote_wire, + ucp_rkey_h *rkey) +{ + ucs_status_t status; + + if( (remote < 0) || (remote >= state->size) || + (remote == state->rank) || + (NULL == state->eps[remote]) ) { + return PARSEC_ERR_BAD_PARAM; + } + status = ucp_ep_rkey_unpack(state->eps[remote], + remote_wire->rkey, + rkey); + return comm_ucx_status_to_parsec(status, "rkey unpack"); +} + +static int +comm_ucx_put(parsec_comm_engine_t *comm_engine, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + parsec_ce_onesided_callback_t l_cb, + void *l_cb_data, + parsec_ce_tag_t r_tag, + void *r_cb_data, + size_t r_cb_data_size) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + parsec_ucx_mem_handle_t *local = (parsec_ucx_mem_handle_t *)lreg; + parsec_ucx_mem_handle_wire_t *remote_wire = (parsec_ucx_mem_handle_wire_t *)rreg; + size_t transfer_size = (0 == size) ? local->mem_size : size; + char *local_addr = (char *)local->mem + ldispl; + int rc; + + if( remote == state->rank ) { + memcpy((void *)(uintptr_t)(remote_wire->remote_addr + rdispl), + local_addr, transfer_size); + } else { + ucp_rkey_h rkey = NULL; + ucp_request_param_t params; + void *request; + + rc = comm_ucx_rkey_unpack(state, remote, remote_wire, &rkey); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + memset(¶ms, 0, sizeof(params)); + request = ucp_put_nbx(state->eps[remote], local_addr, transfer_size, + remote_wire->remote_addr + rdispl, rkey, ¶ms); + rc = comm_ucx_wait_request(state, request, "PUT"); + ucp_rkey_destroy(rkey); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + } + + if( 0 != r_tag ) { + rc = comm_ucx_send_callback_am(comm_engine, remote, r_tag, + r_cb_data, r_cb_data_size); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + } + if( NULL != l_cb ) { + return l_cb(comm_engine, lreg, ldispl, rreg, rdispl, + transfer_size, remote, l_cb_data); + } + return PARSEC_SUCCESS; +} + +static int +comm_ucx_get(parsec_comm_engine_t *comm_engine, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + parsec_ce_onesided_callback_t l_cb, + void *l_cb_data, + parsec_ce_tag_t r_tag, + void *r_cb_data, + size_t r_cb_data_size) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + parsec_ucx_mem_handle_t *local = (parsec_ucx_mem_handle_t *)lreg; + parsec_ucx_mem_handle_wire_t *remote_wire = (parsec_ucx_mem_handle_wire_t *)rreg; + size_t transfer_size = (0 == size) ? local->mem_size : size; + char *local_addr = (char *)local->mem + ldispl; + int rc; + + if( remote == state->rank ) { + memcpy(local_addr, (void *)(uintptr_t)(remote_wire->remote_addr + rdispl), + transfer_size); + } else { + ucp_rkey_h rkey = NULL; + ucp_request_param_t params; + void *request; + + rc = comm_ucx_rkey_unpack(state, remote, remote_wire, &rkey); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + memset(¶ms, 0, sizeof(params)); + request = ucp_get_nbx(state->eps[remote], local_addr, transfer_size, + remote_wire->remote_addr + rdispl, rkey, ¶ms); + rc = comm_ucx_wait_request(state, request, "GET"); + ucp_rkey_destroy(rkey); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + } + + if( NULL != l_cb ) { + rc = l_cb(comm_engine, lreg, ldispl, rreg, rdispl, + transfer_size, remote, l_cb_data); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + } + if( 0 != r_tag ) { + /* + * The comm-engine API carries the remote completion callback as a + * function pointer in r_tag. UCX AM ids cannot be those pointers, so + * use the reserved internal AM id and carry the callback pointer in the + * AM header. + */ + return comm_ucx_send_callback_am(comm_engine, remote, r_tag, + r_cb_data, r_cb_data_size); + } + return PARSEC_SUCCESS; +} + +static int +comm_ucx_send_callback_am(parsec_comm_engine_t *comm_engine, + int remote, + parsec_ce_tag_t callback, + void *cb_data, + size_t cb_data_size) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + parsec_ucx_callback_am_header_t header; + ucp_request_param_t params; + void *request; + + if( (remote < 0) || (remote >= state->size) ) { + return PARSEC_ERR_BAD_PARAM; + } + if( remote == state->rank ) { + parsec_ce_am_callback_t cb = (parsec_ce_am_callback_t)(uintptr_t)callback; + return cb(comm_engine, PARSEC_CE_REMOTE_DEP_PUT_END_TAG, + cb_data, cb_data_size, state->rank, NULL); + } + + header.source = state->rank; + header.callback = (uintptr_t)callback; + memset(¶ms, 0, sizeof(params)); + request = ucp_am_send_nbx(state->eps[remote], + PARSEC_CE_REMOTE_DEP_PUT_END_TAG, + &header, sizeof(header), + cb_data, cb_data_size, ¶ms); + return comm_ucx_wait_request(state, request, "callback active message send"); +} + +static int +comm_ucx_send_am(parsec_comm_engine_t *comm_engine, + parsec_ce_tag_t tag, + int remote, + void *addr, + size_t size) +{ + parsec_ucx_state_t *state = &parsec_ucx_state; + parsec_ucx_am_header_t header; + ucp_request_param_t params; + void *request; + + if( tag >= PARSEC_MAX_REGISTERED_TAGS ) { + return PARSEC_ERR_VALUE_OUT_OF_BOUNDS; + } + if( (remote < 0) || (remote >= state->size) ) { + return PARSEC_ERR_BAD_PARAM; + } + if( remote == state->rank ) { + return comm_ucx_direct_am(comm_engine, &state->tags[tag], + addr, size, state->rank); + } + if( state->tags[tag].max_msg_length < size ) { + return PARSEC_ERR_BAD_PARAM; + } + + header.source = state->rank; + memset(¶ms, 0, sizeof(params)); + request = ucp_am_send_nbx(state->eps[remote], (unsigned)tag, + &header, sizeof(header), + addr, size, ¶ms); + return comm_ucx_wait_request(state, request, "active message send"); +} + +static int +comm_ucx_progress(parsec_comm_engine_t *comm_engine) +{ + int count = 0; + + (void)comm_engine; + for(int i = 0; i < 16; i++) { + int rc = ucp_worker_progress(parsec_ucx_state.worker); + count += rc; + if( 0 == rc ) { + break; + } + } + return count; +} + +static int +comm_ucx_pack_size(parsec_comm_engine_t *ce, + int incount, + parsec_datatype_t type, + int *size) +{ + int dtt_size, rc; + + (void)ce; + if( PARSEC_SUCCESS != parsec_type_contiguous(type) ) { + return PARSEC_ERR_NOT_SUPPORTED; + } + rc = parsec_type_size(type, &dtt_size); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + *size = incount * dtt_size; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_pack(parsec_comm_engine_t *ce, + void *inbuf, + int incount, + parsec_datatype_t type, + void *outbuf, + int outsize, + int *position) +{ + int size, rc; + + rc = comm_ucx_pack_size(ce, incount, type, &size); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + if( (*position < 0) || ((*position + size) > outsize) ) { + return PARSEC_ERR_BAD_PARAM; + } + memcpy((char *)outbuf + *position, inbuf, (size_t)size); + *position += size; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_unpack(parsec_comm_engine_t *ce, + void *inbuf, + int insize, + int *position, + void *outbuf, + int outcount, + parsec_datatype_t type) +{ + int size, rc; + + rc = comm_ucx_pack_size(ce, outcount, type, &size); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + if( (*position < 0) || ((*position + size) > insize) ) { + return PARSEC_ERR_BAD_PARAM; + } + memcpy(outbuf, (char *)inbuf + *position, (size_t)size); + *position += size; + return PARSEC_SUCCESS; +} + +static int +comm_ucx_sync(parsec_comm_engine_t *comm_engine) +{ + pmix_status_t prc; + + (void)comm_engine; + prc = PMIx_Fence(NULL, 0, NULL, 0); + return (PMIX_SUCCESS == prc) ? PARSEC_SUCCESS : PARSEC_ERROR; +} + +static int +comm_ucx_can_serve(parsec_comm_engine_t *comm_engine) +{ + (void)comm_engine; + return 1; +} + +static int +comm_ucx_taskpool_sync_ids(parsec_comm_engine_t *comm_engine, + intptr_t comm_ctx, + uint32_t *next_taskpool_id) +{ + (void)comm_engine; + (void)comm_ctx; + (void)next_taskpool_id; + /* + * UCX will need a backend-specific collective, likely through the PMIx + * bootstrap path, to replace MPI_Allreduce for taskpool-id convergence. + */ + return PARSEC_ERR_NOT_IMPLEMENTED; +} + +static int +comm_ucx_reshape(parsec_comm_engine_t *ce, + parsec_execution_stream_t *es, + parsec_data_copy_t *dst, + int64_t displ_dst, + parsec_datatype_t layout_dst, + uint64_t count_dst, + parsec_data_copy_t *src, + int64_t displ_src, + parsec_datatype_t layout_src, + uint64_t count_src) +{ + (void)ce; (void)es; (void)dst; (void)displ_dst; (void)layout_dst; + (void)count_dst; (void)src; (void)displ_src; (void)layout_src; + (void)count_src; + return PARSEC_ERR_NOT_SUPPORTED; +} diff --git a/parsec/mca/comm/ucx/comm_ucx.h b/parsec/mca/comm/ucx/comm_ucx.h new file mode 100644 index 000000000..beb8dc90b --- /dev/null +++ b/parsec/mca/comm/ucx/comm_ucx.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ +/** + * @file + * + * UCX communication engine MCA component declaration. + * + * The UCX backend uses PMIx only for process bootstrap and UCX worker-address + * exchange. Runtime data movement is done with UCX active messages and CPU + * RMA operations. + */ +#ifndef PARSEC_COMM_UCX_H_HAS_BEEN_INCLUDED +#define PARSEC_COMM_UCX_H_HAS_BEEN_INCLUDED + +#include "parsec/mca/comm/comm.h" +#include "parsec/datatype_module.h" +#include + +BEGIN_C_DECLS + +PARSEC_DECLSPEC extern const parsec_comm_base_component_t parsec_comm_ucx_component; +PARSEC_DECLSPEC extern const parsec_datatype_module_t parsec_datatype_basic_module; + +/** + * UCX state supplied by an application that initializes UCX itself. + * + * PaRSEC does not take ownership of either handle. The application must keep + * both alive until the PaRSEC context using this communication engine has been + * finalized. PaRSEC still performs the late runtime setup: worker-address + * publication through PMIx, endpoint creation, and active-message handler + * registration. + */ +typedef struct parsec_comm_ucx_external_worker_s { + ucp_context_h context; + ucp_worker_h worker; +} parsec_comm_ucx_external_worker_t; + +PARSEC_DECLSPEC parsec_comm_engine_t *comm_ucx_init(parsec_context_t *context); +PARSEC_DECLSPEC mca_base_component_t *comm_ucx_static_component(void); + +END_C_DECLS + +#endif /* PARSEC_COMM_UCX_H_HAS_BEEN_INCLUDED */ diff --git a/parsec/mca/comm/ucx/comm_ucx_component.c b/parsec/mca/comm/ucx/comm_ucx_component.c new file mode 100644 index 000000000..d25dec959 --- /dev/null +++ b/parsec/mca/comm/ucx/comm_ucx_component.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ + +#include "parsec/parsec_config.h" +#include "parsec/mca/comm/ucx/comm_ucx.h" + +static int comm_ucx_component_query(mca_base_module_t **module, int *priority); + +static parsec_comm_module_t parsec_comm_ucx_module = { + .component = &parsec_comm_ucx_component, + .module = { + .init = comm_ucx_init, + }, + .datatype = &parsec_datatype_basic_module, +}; + +const parsec_comm_base_component_t parsec_comm_ucx_component = { + { + PARSEC_COMM_BASE_VERSION_2_0_0, + + "ucx", + "", + PARSEC_VERSION_MAJOR, + PARSEC_VERSION_MINOR, + + NULL, + NULL, + comm_ucx_component_query, + NULL, + "", + }, + { + MCA_BASE_METADATA_PARAM_NONE, + "", + } +}; + +mca_base_component_t * +comm_ucx_static_component(void) +{ + return (mca_base_component_t *)&parsec_comm_ucx_component; +} + +static int +comm_ucx_component_query(mca_base_module_t **module, int *priority) +{ + /* + * Keep MPI as the default when both backends eventually become buildable + * together. UCX can be selected explicitly with the comm MCA parameter. + */ + *priority = 50; + *module = (mca_base_module_t *)&parsec_comm_ucx_module; + return MCA_SUCCESS; +} diff --git a/parsec/parsec.c b/parsec/parsec.c index 20a11dfde..8c8c5178f 100644 --- a/parsec/parsec.c +++ b/parsec/parsec.c @@ -2188,16 +2188,30 @@ void parsec_taskpool_sync_ids_context( intptr_t comm ) parsec_atomic_lock( &taskpool_array_lock ); idx = (int)taskpool_array_pos; msz = (int)taskpool_array_size; -#if defined(DISTRIBUTED) && defined(PARSEC_HAVE_MPI) +#if defined(DISTRIBUTED) + int rc = PARSEC_ERR_NOT_IMPLEMENTED; + if( NULL != parsec_ce.taskpool_sync_ids ) { + rc = parsec_ce.taskpool_sync_ids(&parsec_ce, comm, &idx); + } +#if defined(PARSEC_HAVE_MPI) + /* + * Keep the legacy direct MPI path for applications that synchronize + * taskpool ids before the communication engine has been selected. + */ int mpi_is_on; - MPI_Initialized(&mpi_is_on); - if( mpi_is_on ) { + if( (PARSEC_ERR_NOT_IMPLEMENTED == rc) && + (NULL == parsec_ce.taskpool_sync_ids) && + (MPI_SUCCESS == MPI_Initialized(&mpi_is_on)) && mpi_is_on ) { MPI_Allreduce( MPI_IN_PLACE, &idx, 1, MPI_INT, MPI_MAX, (MPI_Comm)comm ); - while (idx >= msz){ - msz <<= 1; - } + rc = PARSEC_SUCCESS; } -#endif +#endif /* defined(PARSEC_HAVE_MPI) */ + while( (PARSEC_SUCCESS == rc) && (idx >= msz) ) { + msz <<= 1; + } +#else + (void)comm; +#endif /* defined(DISTRIBUTED) */ if( msz > taskpool_array_size ) { taskpool_array = (parsec_taskpool_t**)realloc(taskpool_array, msz * sizeof(parsec_taskpool_t*) ); /* NULLify all the new elements */ diff --git a/parsec/parsec_comm_engine.c b/parsec/parsec_comm_engine.c index fd212028d..bcf44fab2 100644 --- a/parsec/parsec_comm_engine.c +++ b/parsec/parsec_comm_engine.c @@ -12,7 +12,7 @@ parsec_comm_engine_t parsec_ce; -#if defined(PARSEC_HAVE_MPI) +#if defined(DISTRIBUTED) /* Select and initialize the distributed communication backend. */ parsec_comm_engine_t * @@ -48,6 +48,7 @@ parsec_comm_engine_init(parsec_context_t *parsec_context) parsec_ce.capabilites.sided = 0; parsec_ce.capabilites.supports_noncontiguous_datatype = 0; parsec_ce.capabilites.multithreaded = 0; + parsec_ce.taskpool_sync_ids = NULL; return &parsec_ce; } @@ -58,4 +59,4 @@ parsec_comm_engine_fini(parsec_comm_engine_t *comm_engine) return PARSEC_SUCCESS; } -#endif /* defined(PARSEC_HAVE_MPI) */ +#endif /* defined(DISTRIBUTED) */ diff --git a/parsec/parsec_comm_engine.h b/parsec/parsec_comm_engine.h index 37c31ebc2..c4f7b135b 100644 --- a/parsec/parsec_comm_engine.h +++ b/parsec/parsec_comm_engine.h @@ -150,6 +150,14 @@ typedef int (*parsec_ce_unpack_fn_t)(parsec_comm_engine_t *ce, typedef int (*parsec_ce_sync_fn_t)(parsec_comm_engine_t *comm_engine); typedef int (*parsec_ce_can_serve_fn_t)(parsec_comm_engine_t *comm_engine); +/** + * Synchronize the next taskpool id across the processes known by a backend. + * The runtime owns the taskpool registry lock and storage; the backend only + * updates next_taskpool_id to the globally agreed value. + */ +typedef int (*parsec_ce_taskpool_sync_ids_fn_t)(parsec_comm_engine_t *comm_engine, + intptr_t comm_ctx, + uint32_t *next_taskpool_id); /** * This function realize a data reshaping, by conceptually packing the dst @@ -198,6 +206,7 @@ struct parsec_comm_engine_s { parsec_ce_sync_fn_t sync; parsec_ce_can_serve_fn_t can_serve; parsec_ce_send_active_message_fn_t send_am; + parsec_ce_taskpool_sync_ids_fn_t taskpool_sync_ids; }; /* global comm_engine */ diff --git a/parsec/runtime.h b/parsec/runtime.h index 1688bb0df..9cb3cfed9 100644 --- a/parsec/runtime.h +++ b/parsec/runtime.h @@ -211,9 +211,11 @@ int parsec_version_ex( size_t len, char* version_string); * * @details * Reset the comm engine associated with the PaRSEC context, and use - * the communication context opaque_comm_ctx in the future (typically an MPI - * communicator). The context can only be changed while the PaRSEC runtime - * is down, more specifically while the communication thread is not active. + * the communication context opaque_comm_ctx in the future. For the MPI backend + * this is an MPI communicator. For the UCX backend this is a pointer to a + * parsec_comm_ucx_external_worker_t declared by the UCX comm component. The + * context can only be changed while the PaRSEC runtime is down, more + * specifically while the communication thread is not active. * * parsec_context_wait becomes collective across nodes spanning * on this communication context. From 405c75bb430ebb43967d0ec5f7ffb5a0878fe335 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 20 May 2026 02:18:01 -0400 Subject: [PATCH 4/5] Let the MPI backend initialize MPI on demand Allow the MPI communication backend to initialize MPI from parsec_init() when the application has not already done so. Track whether PaRSEC owns that initialization so parsec_fini() only finalizes MPI when the backend performed the matching init. Populate the context rank and size immediately after MPI becomes available, including the backend-owned initialization path, and keep PARSEC_CONTEXT_QUERY_NODES reporting the context value once the communication engine has been initialized. Document that the selected communication backend may initialize and finalize an external process runtime on behalf of PaRSEC, and encourage callers to query rank and size through the PaRSEC context instead of assuming MPI_COMM_WORLD. Signed-off-by: George Bosilca --- parsec/mca/comm/mpi/comm_mpi_funnelled.c | 165 ++++++++++++++++++++--- parsec/parsec.c | 2 +- parsec/runtime.h | 22 ++- 3 files changed, 162 insertions(+), 27 deletions(-) diff --git a/parsec/mca/comm/mpi/comm_mpi_funnelled.c b/parsec/mca/comm/mpi/comm_mpi_funnelled.c index c447ab428..a4f87df58 100644 --- a/parsec/mca/comm/mpi/comm_mpi_funnelled.c +++ b/parsec/mca/comm/mpi/comm_mpi_funnelled.c @@ -72,6 +72,19 @@ PARSEC_OBJ_CLASS_INSTANCE(mpi_funnelled_mem_reg_handle_t, parsec_list_item_t, * if the layer has been initialized or not. */ static int MAX_MPI_TAG = -1, mca_tag_ub = -1; +/* + * Track MPI ownership explicitly. Applications that initialized MPI keep + * ownership of MPI_Finalize(); PaRSEC only finalizes MPI when the MPI backend + * had to initialize it during parsec_init(). + */ +static int mpi_funnelled_initialized_mpi = 0; +/* + * context->comm_ctx is also owned by the MPI backend when the backend installs + * the default communicator or duplicates a user-provided communicator through + * set_ctx(). The communication engine may later replace that duplicate with + * parsec_ce_mpi_comm, which has its own lifetime. + */ +static int mpi_funnelled_context_comm_owned = 0; static volatile int __VAL_NEXT_TAG = 0; #if INT_MAX == INT32_MAX #define next_tag_cas(t, o, n) parsec_atomic_cas_int32(t, o, n) @@ -102,6 +115,45 @@ static inline int next_tag(int k) { return __tag; } +static const char * +mpi_funnelled_thread_level_name(int level) +{ + switch(level) { + case MPI_THREAD_SINGLE: return "MPI_THREAD_SINGLE"; + case MPI_THREAD_FUNNELED: return "MPI_THREAD_FUNNELED"; + case MPI_THREAD_SERIALIZED: return "MPI_THREAD_SERIALIZED"; + case MPI_THREAD_MULTIPLE: return "MPI_THREAD_MULTIPLE"; + default: return "MPI_THREAD_UNKNOWN"; + } +} + +static int +mpi_funnelled_requested_thread_level(void) +{ + /* + * When PaRSEC owns MPI initialization, ask MPI for the strongest guarantee. + * The existing comm_thread_multiple logic still decides whether PaRSEC uses + * concurrent MPI access internally, but applications and tests may call MPI + * collectives after parsec_init() while PaRSEC is active. + */ + return MPI_THREAD_MULTIPLE; +} + +static void +mpi_funnelled_release_context_comm(parsec_context_t *context) +{ + if( mpi_funnelled_context_comm_owned && (-1 != context->comm_ctx) ) { + MPI_Comm comm = (MPI_Comm)context->comm_ctx; + + if( (MPI_COMM_NULL != comm) && + (MPI_COMM_WORLD != comm) ) { + MPI_Comm_free(&comm); + } + context->comm_ctx = -1; + mpi_funnelled_context_comm_owned = 0; + } +} + /* Count and protect the internal building of the arrays of AM */ static int parsec_ce_am_design_version = 0; static int parsec_ce_am_build_version = 0; @@ -544,8 +596,14 @@ static int parsec_mpi_set_ctx(parsec_comm_engine_t* ce, intptr_t opaque_comm_ctx assert( -1 != context->comm_ctx ); MPI_Comm_free((MPI_Comm*)&context->comm_ctx); } + mpi_funnelled_release_context_comm(context); + rc = MPI_Comm_dup((MPI_Comm)opaque_comm_ctx, &comm); + if( MPI_SUCCESS != rc ) { + return PARSEC_ERROR; + } context->comm_ctx = (intptr_t)comm; + mpi_funnelled_context_comm_owned = 1; parsec_ce_am_design_version++; /* signal need for update */ /* We need to know who we are and how many others are there, in order to * correctly initialize the communication engine at the next start. */ @@ -651,29 +709,78 @@ static int mpi_funneled_init_once(parsec_context_t* context) parsec_comm_engine_t * mpi_funnelled_init(parsec_context_t *context) { - int i, rc, is_mpi_up = 0, thread_level_support; + int i, rc, is_mpi_up = 0, is_mpi_finalized = 0; + int requested_thread_level, thread_level_support; - MPI_Initialized(&is_mpi_up); - if( 0 == is_mpi_up ) { - /** - * MPI is not up. The MPI backend cannot provide communication or the - * MPI datatype operations used by the current distributed build. - */ + if( (MPI_SUCCESS != MPI_Finalized(&is_mpi_finalized)) || + is_mpi_finalized ) { + context->nb_nodes = 1; + parsec_communication_engine_up = -1; + parsec_fatal("MPI was already finalized before PaRSEC initialized the MPI communication backend.\n"); + return NULL; + } + + if( MPI_SUCCESS != MPI_Initialized(&is_mpi_up) ) { context->nb_nodes = 1; - parsec_communication_engine_up = -1; /* No communications supported */ - parsec_fatal("MPI was not initialized. This version of PaRSEC was compiled with MPI datatype support and needs MPI to execute.\n" - "\t* Please initialize MPI in the application (MPI_Init/MPI_Init_thread) before initializing PaRSEC.\n" - "\t* Alternatively, compile a version of PaRSEC without MPI (-DPARSEC_DIST_WITH_MPI=OFF in ccmake)\n"); + parsec_communication_engine_up = -1; + parsec_fatal("PaRSEC could not query the MPI initialization state.\n"); return NULL; } + if( 0 == is_mpi_up ) { + /* + * Tests and applications that only need PaRSEC's communication backend + * should not have to initialize MPI themselves. Initialize MPI lazily + * here, and remember ownership so mpi_funnelled_fini() can perform the + * matching MPI_Finalize(). + */ + requested_thread_level = mpi_funnelled_requested_thread_level(); + rc = MPI_Init_thread(NULL, NULL, requested_thread_level, &thread_level_support); + if( MPI_SUCCESS != rc ) { + context->nb_nodes = 1; + parsec_communication_engine_up = -1; /* No communications supported */ + parsec_fatal("PaRSEC failed to initialize MPI for the MPI communication backend.\n"); + return NULL; + } + mpi_funnelled_initialized_mpi = 1; + PARSEC_DEBUG_VERBOSE(4, parsec_comm_output_stream, + "MPI backend initialized MPI: requested %s, provided %s", + mpi_funnelled_thread_level_name(requested_thread_level), + mpi_funnelled_thread_level_name(thread_level_support)); + } else { + MPI_Query_thread(&thread_level_support); + } - MPI_Query_thread(&thread_level_support); if( thread_level_support == MPI_THREAD_SINGLE || thread_level_support == MPI_THREAD_FUNNELED ) { parsec_warning("MPI was not initialized with the appropriate level of thread support.\n" "\t* Current level is %s, while MPI_THREAD_SERIALIZED or MPI_THREAD_MULTIPLE is needed\n" "\t* to guarantee correctness of the PaRSEC runtime.\n", - thread_level_support == MPI_THREAD_SINGLE ? "MPI_THREAD_SINGLE" : "MPI_THREAD_FUNNELED"); + mpi_funnelled_thread_level_name(thread_level_support)); + } + + /* Establish rank/size as soon as MPI is available, including the case + * where this backend initialized MPI on behalf of parsec_init(). + */ + if( -1 == context->comm_ctx ) { + MPI_Comm comm; + + /* + * Keep PaRSEC isolated from application-level MPI communicator changes. + * Even when the MPI backend initialized MPI itself, MPI_COMM_WORLD is + * duplicated before being stored in the PaRSEC context. + */ + rc = MPI_Comm_dup(MPI_COMM_WORLD, &comm); + if( MPI_SUCCESS != rc ) { + context->nb_nodes = 1; + parsec_communication_engine_up = -1; + parsec_fatal("PaRSEC failed to duplicate MPI_COMM_WORLD for the MPI communication backend.\n"); + return NULL; + } + context->comm_ctx = (intptr_t)comm; + mpi_funnelled_context_comm_owned = 1; + MPI_Comm_size( comm, (int*)&(context->nb_nodes)); + MPI_Comm_rank( comm, (int*)&(context->my_rank)); + parsec_debug_rank = context->my_rank; } if( -1 == MAX_MPI_TAG ) @@ -728,13 +835,6 @@ mpi_funnelled_init(parsec_context_t *context) parsec_ce.capabilites.supports_noncontiguous_datatype = 1; parsec_ce.capabilites.multithreaded = (thread_level_support >= MPI_THREAD_MULTIPLE); - /* Define some sensible values. We assume the application will initialize PaRSEC using - * the entire MPI_COMM_WORLD, but we need to prepare some decent default values. */ - if( -1 == context->comm_ctx ) { - MPI_Comm_size( MPI_COMM_WORLD, (int*)&(context->nb_nodes)); - MPI_Comm_rank( MPI_COMM_WORLD, (int*)&(context->my_rank)); - context->comm_ctx = (intptr_t)MPI_COMM_WORLD; - } /* Register for internal GET and PUT AMs */ parsec_ce.tag_register(PARSEC_CE_MPI_FUNNELLED_GET_TAG_INTERNAL, mpi_funnelled_internal_get_am_callback, @@ -798,6 +898,9 @@ mpi_funnelled_fini(parsec_comm_engine_t *ce) MPI_Comm_free(&parsec_ce_mpi_am_comm[i]); } ce->parsec_context->comm_ctx = -1; /* We use -1 for the opaque comm_ctx, rather than the MPI specific MPI_COMM_NULL */ + mpi_funnelled_context_comm_owned = 0; + } else { + mpi_funnelled_release_context_comm(ce->parsec_context); } assert(MPI_COMM_NULL == parsec_ce_mpi_comm ); /* no communicator */ assert(MPI_COMM_NULL == parsec_ce_mpi_am_comm[0] ); /* no communicator */ @@ -806,6 +909,16 @@ mpi_funnelled_fini(parsec_comm_engine_t *ce) mpi_funnelled_last_active_req = 0; mpi_funnelled_static_req_idx = 0; + if( mpi_funnelled_initialized_mpi ) { + int is_mpi_finalized = 0; + + if( (MPI_SUCCESS == MPI_Finalized(&is_mpi_finalized)) && + !is_mpi_finalized ) { + MPI_Finalize(); + } + mpi_funnelled_initialized_mpi = 0; + } + return 1; } @@ -1496,7 +1609,7 @@ int mpi_no_thread_enable(parsec_comm_engine_t *ce) { parsec_context_t *context = ce->parsec_context; - int i; + int i, rc; /* Did anything changed that would require a reconstruction of the management structures? */ assert(-1 != context->comm_ctx); @@ -1569,8 +1682,16 @@ mpi_no_thread_enable(parsec_comm_engine_t *ce) } #endif /* defined(PARSEC_HAVE_MPI_OVERTAKE) */ /* There is no need to enable overtake for the AM communicator */ - MPI_Comm_dup_with_info((MPI_Comm) context->comm_ctx, info, &parsec_ce_mpi_comm); + MPI_Comm input_comm = (MPI_Comm)context->comm_ctx; + rc = MPI_Comm_dup_with_info(input_comm, info, &parsec_ce_mpi_comm); MPI_Info_free(&info); + if( MPI_SUCCESS != rc ) { + return PARSEC_ERROR; + } + if( mpi_funnelled_context_comm_owned ) { + MPI_Comm_free(&input_comm); + mpi_funnelled_context_comm_owned = 0; + } /* Replace the provided communicator with a pointer to the PaRSEC duplicate */ context->comm_ctx = (uintptr_t)parsec_ce_mpi_comm; diff --git a/parsec/parsec.c b/parsec/parsec.c index 8c8c5178f..a35d65c6f 100644 --- a/parsec/parsec.c +++ b/parsec/parsec.c @@ -3003,7 +3003,7 @@ int parsec_context_query(parsec_context_t *context, parsec_context_query_cmd_t c case PARSEC_CONTEXT_QUERY_NODES: switch (parsec_communication_engine_up) { case 0: return 0; /* context not ready for distributed runs, and lacking datatype handling capabilities */ - case 1: return 1; /* single node runs, but the context has datatype management capabilities */ + case 1: return context->nb_nodes; /* communication engine initialized, but not necessarily awake */ case 2: return PARSEC_ERR_NOT_FOUND; /* we are in a distributed run, but the MPI engine is not yet ready, so the nb_nodes might not be accurate */ case 3: return context->nb_nodes; } diff --git a/parsec/runtime.h b/parsec/runtime.h index 9cb3cfed9..fd8d557dd 100644 --- a/parsec/runtime.h +++ b/parsec/runtime.h @@ -162,6 +162,12 @@ typedef enum parsec_hook_return_e { * execution context. Several contexts can coexist on disjoint resources * at the same time. * + * If the selected communication backend needs an external process runtime + * and that runtime has not been initialized by the application, parsec_init() + * may initialize it. In that case, parsec_fini() releases the runtime during + * communication backend finalization. For example, the MPI backend initializes + * MPI on demand and finalizes MPI only if PaRSEC initialized it. + * * @param[in] nb_cores the number of cores to use * @param[inout] pargc a pointer to the number of arguments passed in pargv * @param[inout] pargv an argv-like NULL terminated array of arguments to pass to @@ -248,7 +254,8 @@ void parsec_abort( parsec_context_t* pcontext, int status); * @details * Complete all pending operations on the execution context, and release * all associated resources. Threads and accelerators attached to this - * context will be released. + * context will be released. If parsec_init() initialized the selected + * communication backend's process runtime, parsec_fini() finalizes it. * * @param[inout] pcontext a pointer to the PaRSEC context to finalize * @return PARSEC_SUCCESS on success @@ -309,11 +316,18 @@ typedef enum parsec_context_query_cmd_e { * @brief Query PaRSEC context's properties. * * @details - * Query properties of the runtime, such as number of devices of a certain type - * or number of cores available to the context. + * Query properties of the runtime, such as the rank and size known by the + * selected communication engine, the number of devices of a certain type, or + * the number of cores available to the context. + * + * PARSEC_CONTEXT_QUERY_RANK and PARSEC_CONTEXT_QUERY_NODES are valid after + * parsec_init() returns. The selected communication backend owns how those + * values are discovered, so callers should use these queries instead of + * assuming MPI_COMM_WORLD. * * @param[in] context the PaRSEC context - * @param[in] device_type the type of device the query is about + * @param[in] cmd the property to query + * @param[in] ... optional arguments required by the selected query command * @return PARSEC_ERR_NOT_SUPPORTED if the command is not supported, PARSEC_ERR_NOT_FOUND * if the correct answer cannot yet be returned (such as when the PaRSEC context * has not yet properly been initialized), or the answer to the query (always From 949ac9d9e70f8b1a788577d39b5c93c154092d3a Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 20 May 2026 02:26:09 -0400 Subject: [PATCH 5/5] tests: decouple test runtime setup from direct MPI use The test suite was still largely written around the assumption that every distributed-capable run starts with MPI_Init/MPI_Init_thread and discovers rank/size directly from MPI_COMM_WORLD. That makes the tests awkward for the new communication-engine component work, where MPI remains the default backend but other backends, such as UCX bootstrapped through PMIx, need to run the same tests without exposing MPI to test code. Add a small tests_runtime_common helper library and route common test runtime operations through it. The helper initializes PaRSEC with parsec_init(), retrieves rank and world size through parsec_context_query(), finalizes through parsec_fini(), and validates requested MPI thread support when the selected backend is MPI-backed. Add test wrappers for the remaining small pieces of process-runtime behavior that tests need: barrier, abort, and allreduce. The MPI implementation maps these to MPI_COMM_WORLD collectives. Non-MPI single-process runs get useful local behavior where possible, while unsupported multi-process non-MPI paths return PARSEC_ERR_NOT_IMPLEMENTED instead of silently pretending that a collective completed. Rework tests/tests_timing.h so timing helpers take a PaRSEC context, use the test barrier wrapper, and no longer override exit() with MPI_Abort. This keeps timed tests usable with non-MPI communication backends while preserving real barriers for MPI-backed distributed runs. Convert the broad init-only test population away from direct MPI calls. This covers API tests, many PTG and DTD tests, application tests, collection tests, profiling tests, CUDA runtime tests, and scheduling tests. These tests now include tests/tests_runtime.h, link against tests_runtime_common, initialize through parsec_tests_context_init(), and finalize through parsec_tests_context_fini(). Convert simple MPI collectives in tests to the new wrappers where they do not depend on MPI-specific communicator behavior. This includes reductions in PTG checks, reshape checks, branch/count validation, CUDA best-device validation, and selected redistribute checks. The MAXLOC case is represented explicitly as PARSEC_TESTS_REDUCE_MAXLOC_INT so tests that used MPI_2INT/MPI_MAXLOC keep the same semantics. Update CMake wiring so all converted tests link with tests_runtime_common. Keep tests that still genuinely exercise MPI-specific behavior in MPI-only build/test groups. In particular, multichain, haar_tree, and redistribute are only built and tested when MPI_C_FOUND is available, because they still use MPI communicators or MPI message-passing routines directly. Replace incidental MPI datatype queries in scheduling test setup with PaRSEC datatype helpers, so tests that do not communicate data are not tied to MPI just to compute a datatype extent. This prepares the test suite for selectable communication backends: ordinary tests now ask PaRSEC for process identity and synchronization services, while the few remaining MPI-specific tests are explicitly marked as such. Signed-off-by: George Bosilca --- tests/CMakeLists.txt | 7 + tests/api/CMakeLists.txt | 9 + tests/api/compose.c | 28 +-- tests/api/init_fini.c | 27 +-- tests/api/operator.c | 28 +-- tests/api/taskpool_wait/CMakeLists.txt | 5 + tests/api/taskpool_wait/dtd_tp.c | 5 +- tests/api/taskpool_wait/main.c | 22 +- tests/api/touch_ex.c | 22 +- tests/api/touch_exf.F90 | 8 +- tests/apps/CMakeLists.txt | 5 +- tests/apps/all2all/CMakeLists.txt | 6 +- tests/apps/all2all/a2a.jdf | 12 +- tests/apps/all2all/main.c | 25 +-- .../BT_reduction_wrapper.c | 4 +- .../apps/generalized_reduction/CMakeLists.txt | 5 + tests/apps/generalized_reduction/main.c | 25 +-- tests/apps/haar_tree/CMakeLists.txt | 4 +- tests/apps/haar_tree/Testings.cmake | 1 - tests/apps/haar_tree/main.c | 36 ++- tests/apps/merge_sort/CMakeLists.txt | 5 + tests/apps/merge_sort/main.c | 25 +-- tests/apps/merge_sort/merge_sort_wrapper.c | 4 +- tests/apps/pingpong/CMakeLists.txt | 7 +- tests/apps/pingpong/bandwidth.jdf | 56 ++--- tests/apps/pingpong/main.c | 42 ++-- tests/apps/pingpong/rtt_wrapper.c | 9 +- tests/apps/stencil/CMakeLists.txt | 6 +- tests/apps/stencil/stencil_internal.h | 5 +- tests/apps/stencil/testing_stencil_1D.c | 50 ++--- tests/class/lifo.c | 12 - tests/class/list.c | 13 -- tests/collections/CMakeLists.txt | 10 +- tests/collections/Testings.cmake | 4 +- tests/collections/kcyclic.jdf | 27 +-- tests/collections/redistribute/CMakeLists.txt | 9 +- tests/collections/redistribute/common.c | 59 ++--- .../redistribute/redistribute_bound.jdf | 22 +- .../redistribute/redistribute_check.jdf | 4 +- .../redistribute/redistribute_check2.jdf | 11 +- .../redistribute/redistribute_test.h | 2 + .../redistribute/testing_redistribute.c | 18 +- .../testing_redistribute_random.c | 18 +- tests/collections/reduce.c | 24 +- tests/collections/reshape/CMakeLists.txt | 5 +- tests/collections/reshape/common.c | 14 +- tests/collections/reshape/common.h | 61 ++--- .../reshape/testing_avoidable_reshape.c | 12 +- .../testing_input_dep_reshape_single_copy.c | 26 ++- ...ting_remote_multiple_outs_same_pred_flow.c | 12 +- tests/collections/reshape/testing_reshape.c | 12 +- tests/collections/two_dim_band/CMakeLists.txt | 6 +- tests/collections/two_dim_band/main.c | 32 +-- .../two_dim_band/two_dim_band_test.h | 5 +- tests/dsl/dtd/CMakeLists.txt | 6 +- tests/dsl/dtd/dtd_test_allreduce.c | 40 ++-- tests/dsl/dtd/dtd_test_batch_cpu.c | 25 +-- tests/dsl/dtd/dtd_test_broadcast.c | 33 +-- tests/dsl/dtd/dtd_test_ce.c | 61 ++--- tests/dsl/dtd/dtd_test_cuda_again_async.c | 31 +-- tests/dsl/dtd/dtd_test_cuda_task_insert.c | 32 +-- tests/dsl/dtd/dtd_test_data_flush.c | 30 +-- tests/dsl/dtd/dtd_test_empty.c | 25 +-- .../dsl/dtd/dtd_test_explicit_task_creation.c | 36 +-- tests/dsl/dtd/dtd_test_flag_dont_track.c | 40 ++-- .../dtd/dtd_test_global_id_for_dc_assumed.c | 35 +-- tests/dsl/dtd/dtd_test_hierarchy.c | 32 +-- .../dsl/dtd/dtd_test_insert_task_interface.c | 40 ++-- tests/dsl/dtd/dtd_test_interleave_actions.c | 30 +-- tests/dsl/dtd/dtd_test_multiple_handle_wait.c | 41 +--- tests/dsl/dtd/dtd_test_new_tile.c | 30 +-- tests/dsl/dtd/dtd_test_null_as_tile.c | 34 +-- tests/dsl/dtd/dtd_test_pingpong.c | 47 ++-- tests/dsl/dtd/dtd_test_reduce.c | 33 +-- tests/dsl/dtd/dtd_test_simple_gemm.c | 136 ++++++------ tests/dsl/dtd/dtd_test_task_generation.c | 45 ++-- tests/dsl/dtd/dtd_test_task_inserting_task.c | 34 +-- tests/dsl/dtd/dtd_test_task_insertion.c | 29 +-- tests/dsl/dtd/dtd_test_task_placement.c | 37 +-- tests/dsl/dtd/dtd_test_template_counter.c | 32 +-- tests/dsl/dtd/dtd_test_tp_enqueue_dequeue.c | 42 ++-- tests/dsl/dtd/dtd_test_untie.c | 42 +--- tests/dsl/dtd/dtd_test_war.c | 32 +-- tests/dsl/ptg/CMakeLists.txt | 9 + tests/dsl/ptg/batch_cpu.jdf | 21 +- tests/dsl/ptg/branching/CMakeLists.txt | 3 + tests/dsl/ptg/branching/branching_wrapper.c | 4 +- tests/dsl/ptg/branching/main.c | 41 ++-- tests/dsl/ptg/choice/CMakeLists.txt | 1 + tests/dsl/ptg/choice/choice_wrapper.c | 4 +- tests/dsl/ptg/choice/main.c | 40 +--- tests/dsl/ptg/complex_deps.jdf | 21 +- tests/dsl/ptg/controlgather/CMakeLists.txt | 6 +- tests/dsl/ptg/controlgather/ctlgat.jdf | 14 +- tests/dsl/ptg/controlgather/ctlgat_wrapper.c | 11 +- tests/dsl/ptg/controlgather/main.c | 28 +-- tests/dsl/ptg/local-indices/CMakeLists.txt | 2 +- tests/dsl/ptg/local-indices/local_indices.jdf | 46 ++-- tests/dsl/ptg/multisize_bcast/CMakeLists.txt | 6 +- .../check_multisize_bcast_wrapper.c | 4 +- tests/dsl/ptg/multisize_bcast/main.c | 25 +-- tests/dsl/ptg/ptgpp/CMakeLists.txt | 13 ++ tests/dsl/ptg/ptgpp/forward_READ_NULL.jdf | 26 +-- tests/dsl/ptg/ptgpp/forward_RW_NULL.jdf | 26 +-- tests/dsl/ptg/ptgpp/too_many_in_deps.jdf | 21 +- tests/dsl/ptg/ptgpp/too_many_local_vars.jdf | 21 +- tests/dsl/ptg/ptgpp/too_many_out_deps.jdf | 21 +- tests/dsl/ptg/ptgpp/too_many_read_flows.jdf | 21 +- tests/dsl/ptg/ptgpp/too_many_write_flows.jdf | 21 +- tests/dsl/ptg/ptgpp/write_check.jdf | 31 ++- tests/dsl/ptg/recursive.jdf | 23 +- tests/dsl/ptg/startup.jdf | 23 +- tests/dsl/ptg/strange.jdf | 21 +- .../ptg/user-defined-functions/CMakeLists.txt | 6 + tests/dsl/ptg/user-defined-functions/main.c | 84 +++---- tests/dsl/ptg/user-defined-functions/utt.jdf | 27 +-- tests/profiling-standalone/CMakeLists.txt | 7 +- tests/profiling-standalone/sp-demo.c | 43 +++- tests/profiling-standalone/sp-perf.c | 39 +++- tests/profiling/CMakeLists.txt | 6 +- tests/profiling/async.jdf | 28 +-- tests/runtime/CMakeLists.txt | 7 +- tests/runtime/Testings.cmake | 4 + tests/runtime/cuda/CMakeLists.txt | 4 + tests/runtime/cuda/get_best_device_check.jdf | 9 +- tests/runtime/cuda/nvlink.jdf | 5 +- tests/runtime/cuda/nvlink_main.c | 37 ++- tests/runtime/cuda/stage_custom.jdf | 1 - tests/runtime/cuda/stage_main.c | 37 +-- tests/runtime/cuda/stress.jdf | 5 +- tests/runtime/cuda/stress_main.c | 29 +-- tests/runtime/cuda/testing_get_best_device.c | 47 ++-- tests/runtime/dtt_bug_replicator_ex.c | 23 +- tests/runtime/scheduling/CMakeLists.txt | 3 +- tests/runtime/scheduling/ep_wrapper.c | 25 ++- tests/runtime/scheduling/main.c | 31 +-- tests/tests_runtime.c | 210 ++++++++++++++++++ tests/tests_runtime.h | 92 ++++++++ tests/tests_timing.h | 60 +++-- 139 files changed, 1523 insertions(+), 1921 deletions(-) create mode 100644 tests/tests_runtime.c create mode 100644 tests/tests_runtime.h diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1ffcdacba..9d02e7cc2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,3 +1,7 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + add_custom_target(parsec_build_tests) add_test(parsec_build_tests "${CMAKE_COMMAND}" --build ${CMAKE_BINARY_DIR} --target parsec_build_tests) @@ -101,6 +105,9 @@ check_function_exists(random PARSEC_HAVE_RANDOM) add_library(tests_common OBJECT tests_data.c) target_link_libraries(tests_common PRIVATE parsec) +add_library(tests_runtime_common OBJECT tests_runtime.c) +target_link_libraries(tests_runtime_common PRIVATE parsec) + add_subdirectory(class) add_subdirectory(api) if( TARGET parsec-ptgpp ) diff --git a/tests/api/CMakeLists.txt b/tests/api/CMakeLists.txt index 7046132bf..45a2e2ff5 100644 --- a/tests/api/CMakeLists.txt +++ b/tests/api/CMakeLists.txt @@ -1,8 +1,14 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + if(TARGET parsec-ptgpp) parsec_addtest_executable(C touch_ex SOURCES touch_ex.c) + target_link_libraries(touch_ex PRIVATE tests_runtime_common) target_ptg_sources(touch_ex PRIVATE "touch.jdf") parsec_addtest_executable(C touch_ex_inline SOURCES touch_ex.c) + target_link_libraries(touch_ex_inline PRIVATE tests_runtime_common) target_ptg_sources(touch_ex_inline PRIVATE "touch.jdf") target_compile_definitions(touch_ex_inline PRIVATE BUILDING_PARSEC) target_compile_options(touch_ex_inline PRIVATE ${PARSEC_ATOMIC_SUPPORT_OPTIONS}) @@ -26,3 +32,6 @@ endif(TARGET parsec-ptgpp) parsec_addtest_executable(C init_fini SOURCES init_fini.c) parsec_addtest_executable(C operator SOURCES operator.c) parsec_addtest_executable(C compose SOURCES compose.c) +target_link_libraries(init_fini PRIVATE tests_runtime_common) +target_link_libraries(operator PRIVATE tests_runtime_common) +target_link_libraries(compose PRIVATE tests_runtime_common) diff --git a/tests/api/compose.c b/tests/api/compose.c index 9100d73ab..05c4b587c 100644 --- a/tests/api/compose.c +++ b/tests/api/compose.c @@ -2,11 +2,13 @@ * Copyright (c) 2018-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" #include "parsec/execution_stream.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #include #define TYPE PARSEC_MATRIX_INTEGER @@ -24,9 +26,7 @@ parsec_operator_print_id( struct parsec_execution_stream_s *es, va_list ap; int m, n, rank = 0; -#if defined(PARSEC_HAVE_MPI) - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif + rank = parsec_context_query(es->virtual_process->parsec_context, PARSEC_CONTEXT_QUERY_RANK); va_start(ap, op_data); m = va_arg(ap, int); @@ -44,15 +44,6 @@ int main(int argc, char* argv[]) parsec_taskpool_t *tp1, *tp2, *tp3; int nodes, rank, rc, i = 0; -#if defined(PARSEC_HAVE_MPI) - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &nodes); - MPI_Comm_size(MPI_COMM_WORLD, &nodes); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - nodes = 1; - rank = 0; -#endif - int pargc = 0; char **pargv = NULL; for( i = 1; i < argc; i++) { if( 0 == strncmp(argv[i], "--", 3) ) { @@ -79,8 +70,10 @@ int main(int argc, char* argv[]) } } - parsec = parsec_init(1, &pargc, &pargv); - assert( NULL != parsec ); + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec, &rank, &nodes); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &dcA, TYPE, PARSEC_MATRIX_TILE, rank, @@ -117,10 +110,7 @@ int main(int argc, char* argv[]) parsec_data_free(dcA.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/api/init_fini.c b/tests/api/init_fini.c index bdc531a18..6f3efe51c 100644 --- a/tests/api/init_fini.c +++ b/tests/api/init_fini.c @@ -2,22 +2,23 @@ * Copyright (c) 2021-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include "parsec.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" int main(int argc, char *argv[]) { -#if defined(PARSEC_HAVE_MPI) - int mpith = MPI_THREAD_SINGLE; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &mpith); - assert(mpith >= MPI_THREAD_SERIALIZED); // parsec will do the complaining in NDEBUG -#endif - parsec_context_t *parsec = parsec_init(-1, &argc, &argv); - parsec_fini(&parsec); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + parsec_context_t *parsec; + int rc; + + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); + + return 0; } diff --git a/tests/api/operator.c b/tests/api/operator.c index a3a2f687b..1a965fb8a 100644 --- a/tests/api/operator.c +++ b/tests/api/operator.c @@ -2,6 +2,7 @@ * Copyright (c) 2011-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" @@ -10,6 +11,7 @@ #include "parsec/execution_stream.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" static int parsec_operator_print_id( struct parsec_execution_stream_s *es, @@ -20,9 +22,8 @@ parsec_operator_print_id( struct parsec_execution_stream_s *es, va_list ap; int k, n, rank = 0; -#if defined(PARSEC_HAVE_MPI) - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif + rank = parsec_context_query(es->virtual_process->parsec_context, + PARSEC_CONTEXT_QUERY_RANK); va_start(ap, op_data); k = va_arg(ap, int); @@ -43,16 +44,10 @@ int main( int argc, char* argv[] ) int lm = 1000, ln = 1000; int rows = 1, rc; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif - - parsec = parsec_init(cores, &argc, &argv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &dcA, PARSEC_MATRIX_FLOAT, PARSEC_MATRIX_TILE, rank, mb, nb, lm, ln, 0, 0, lm, ln, @@ -80,11 +75,8 @@ int main( int argc, char* argv[] ) parsec_data_free(dcA.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA); - parsec_fini(&parsec); - -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/api/taskpool_wait/CMakeLists.txt b/tests/api/taskpool_wait/CMakeLists.txt index 84fc088f7..cf5a4feb1 100644 --- a/tests/api/taskpool_wait/CMakeLists.txt +++ b/tests/api/taskpool_wait/CMakeLists.txt @@ -1,5 +1,10 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C taskpool_wait SOURCES main.c dtd_tp.c) +target_link_libraries(taskpool_wait PRIVATE tests_runtime_common) target_include_directories(taskpool_wait PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(taskpool_wait PRIVATE "ptg_tp.jdf") diff --git a/tests/api/taskpool_wait/dtd_tp.c b/tests/api/taskpool_wait/dtd_tp.c index 27f94af0d..1812f6e5d 100644 --- a/tests/api/taskpool_wait/dtd_tp.c +++ b/tests/api/taskpool_wait/dtd_tp.c @@ -2,6 +2,7 @@ * Copyright (c) 2023-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" #include "parsec/interfaces/dtd/insert_function.h" @@ -11,10 +12,6 @@ #include "parsec/execution_stream.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - int task( parsec_execution_stream_t *es, parsec_task_t *this_task ) { (void)es; int delta, m, n, *A; diff --git a/tests/api/taskpool_wait/main.c b/tests/api/taskpool_wait/main.c index 3d7e1b380..27b12b8df 100644 --- a/tests/api/taskpool_wait/main.c +++ b/tests/api/taskpool_wait/main.c @@ -2,9 +2,11 @@ * Copyright (c) 2023-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #include "ptg_tp.h" #include "dtd_tp.h" @@ -22,16 +24,14 @@ int main(int argc, char *argv[]) { int rc; err = 0; + (void)argc; + (void)argv; parsec_context_t *parsec; -#if defined(PARSEC_HAVE_MPI) - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); - MPI_Comm_size(MPI_COMM_WORLD, &world_size); -#endif - - parsec = parsec_init(-1, NULL, NULL); + err = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_MULTIPLE, + NULL, NULL, + &parsec, &my_rank, &world_size); + PARSEC_CHECK_ERROR(err, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init(&A, PARSEC_MATRIX_INTEGER, PARSEC_MATRIX_TILE, my_rank, @@ -107,9 +107,7 @@ int main(int argc, char *argv[]) { parsec_dtd_data_collection_fini(&A.super.super); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&A); - parsec_fini(&parsec); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return err; } diff --git a/tests/api/touch_ex.c b/tests/api/touch_ex.c index db3ada37b..1e3d7e3f0 100644 --- a/tests/api/touch_ex.c +++ b/tests/api/touch_ex.c @@ -2,10 +2,13 @@ * Copyright (c) 2013-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "touch.h" #include #include @@ -21,11 +24,7 @@ int main( int argc, char** argv ) { parsec_context_t* parsec; parsec_taskpool_t* tp; - int i = 1, rc, verbose; - -#ifdef PARSEC_HAVE_MPI - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &rc); -#endif + int i = 1, rc, verbose = 0; int pargc = 0; char **pargv = NULL; for( i = 1; i < argc; i++) { @@ -40,10 +39,9 @@ int main( int argc, char** argv ) } } - parsec = parsec_init(1, &pargc, &pargv); - if( NULL == parsec ) { - exit(-2); - } + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); tp = touch_initialize(BLOCK, N); rc = parsec_context_add_taskpool( parsec, tp ); @@ -58,12 +56,10 @@ int main( int argc, char** argv ) touch_finalize(); parsec_taskpool_free(tp); - parsec_fini( &parsec); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); if( verbose >= 5 ) { } -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif return 0; } diff --git a/tests/api/touch_exf.F90 b/tests/api/touch_exf.F90 index afcaff5a2..37350edc9 100644 --- a/tests/api/touch_exf.F90 +++ b/tests/api/touch_exf.F90 @@ -2,13 +2,13 @@ ! Copyright (c) 2021-2024 The University of Tennessee and The University ! of Tennessee Research Foundation. All rights ! reserved. +! Copyright (c) 2026 NVIDIA Corporation. All rights reserved. ! PROGRAM TOUCH_EXF use, INTRINSIC :: ISO_C_BINDING, only : c_int use parsec_f08_interfaces - use mpi interface function touch_initialize_f08(block, n) BIND(C, name="touch_initialize") @@ -26,14 +26,12 @@ function touch_finalize_f08() BIND(C, name="touch_finalize") end function touch_finalize_f08 end interface - integer BLOCK, N, mpith, ret + integer BLOCK, N, ret parameter (BLOCK=10, N=100) type(parsec_context_t) :: context type(parsec_taskpool_t) :: tp - call MPI_Init_thread(MPI_THREAD_MULTIPLE, mpith, ret) - call parsec_init(1, context) tp = touch_initialize_f08(BLOCK, N) @@ -50,7 +48,5 @@ end function touch_finalize_f08 call parsec_fini(context) - call MPI_Finalize(ret) - call exit(ret) END diff --git a/tests/apps/CMakeLists.txt b/tests/apps/CMakeLists.txt index 1b6985261..ff57e1f05 100644 --- a/tests/apps/CMakeLists.txt +++ b/tests/apps/CMakeLists.txt @@ -4,6 +4,7 @@ if(TARGET parsec-ptgpp) add_subdirectory(generalized_reduction) add_subdirectory(stencil) add_subdirectory(merge_sort) - add_subdirectory(haar_tree) + if(MPI_C_FOUND) + add_subdirectory(haar_tree) + endif(MPI_C_FOUND) endif(TARGET parsec-ptgpp) - diff --git a/tests/apps/all2all/CMakeLists.txt b/tests/apps/all2all/CMakeLists.txt index 4bdf7c650..a2b519373 100644 --- a/tests/apps/all2all/CMakeLists.txt +++ b/tests/apps/all2all/CMakeLists.txt @@ -1,6 +1,10 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C a2a SOURCES main.c a2a_data.c) +target_link_libraries(a2a PRIVATE tests_runtime_common) target_ptg_sources(a2a PRIVATE "a2a.jdf") target_link_libraries(a2a PRIVATE m) - diff --git a/tests/apps/all2all/a2a.jdf b/tests/apps/all2all/a2a.jdf index bb6fad334..ec280a81a 100644 --- a/tests/apps/all2all/a2a.jdf +++ b/tests/apps/all2all/a2a.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2013-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. * * @precisions normal z -> s d c * @@ -11,9 +12,6 @@ extern "C" %{ #include "parsec/data_distribution.h" #include "parsec/data_dist/matrix/matrix.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include int32_t always_zero() @@ -109,16 +107,10 @@ extern "C" %{ */ parsec_taskpool_t *a2a_new(parsec_tiled_matrix_t *A, parsec_tiled_matrix_t *B, int size, int repeat) { - int worldsize; + int worldsize = (int)A->super.nodes; parsec_a2a_taskpool_t *tp = NULL; (void)size; -#if defined(PARSEC_HAVE_MPI) - MPI_Comm_size(MPI_COMM_WORLD, &worldsize); -#else - worldsize = 1; -#endif - if( repeat <= 0 ) { fprintf(stderr, "To work, A2A must do at least one exchange of at least one byte\n"); return (parsec_taskpool_t*)tp; diff --git a/tests/apps/all2all/main.c b/tests/apps/all2all/main.c index fecf16fe2..d2be1f43f 100644 --- a/tests/apps/all2all/main.c +++ b/tests/apps/all2all/main.c @@ -2,10 +2,12 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "a2a_wrapper.h" #include "a2a_data.h" #if defined(PARSEC_HAVE_STRING_H) @@ -20,18 +22,10 @@ int main(int argc, char *argv[]) parsec_tiled_matrix_t *dcA, *dcB; parsec_taskpool_t *a2a; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - parsec = parsec_init(cores, &argc, &argv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); size = 256; repeat = 10; @@ -52,13 +46,10 @@ int main(int argc, char *argv[]) PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); a2a_free(a2a); - parsec_fini(&parsec); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); free_data(dcA); free_data(dcB); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif - return 0; } diff --git a/tests/apps/generalized_reduction/BT_reduction_wrapper.c b/tests/apps/generalized_reduction/BT_reduction_wrapper.c index 4e19c43c6..2cb16fb78 100644 --- a/tests/apps/generalized_reduction/BT_reduction_wrapper.c +++ b/tests/apps/generalized_reduction/BT_reduction_wrapper.c @@ -2,15 +2,13 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/data_dist/matrix/matrix.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif static parsec_datatype_t block; #include diff --git a/tests/apps/generalized_reduction/CMakeLists.txt b/tests/apps/generalized_reduction/CMakeLists.txt index f77a6d3ef..bf5bd32c7 100644 --- a/tests/apps/generalized_reduction/CMakeLists.txt +++ b/tests/apps/generalized_reduction/CMakeLists.txt @@ -1,4 +1,9 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C BT_reduction SOURCES main.c BT_reduction_wrapper.c reduc_data.c) +target_link_libraries(BT_reduction PRIVATE tests_runtime_common) target_ptg_sources(BT_reduction PRIVATE "BT_reduction.jdf") diff --git a/tests/apps/generalized_reduction/main.c b/tests/apps/generalized_reduction/main.c index 57a6e5f7c..7dbc9ee00 100644 --- a/tests/apps/generalized_reduction/main.c +++ b/tests/apps/generalized_reduction/main.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include @@ -10,6 +11,7 @@ #endif #include "parsec/runtime.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "BT_reduction_wrapper.h" #if defined(PARSEC_HAVE_STRING_H) #include @@ -24,18 +26,10 @@ int main(int argc, char *argv[]) parsec_tiled_matrix_t *dcA; parsec_taskpool_t *BT_reduction; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - parsec = parsec_init(cores, &argc, &argv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); nb = 1; nt = 7; @@ -59,11 +53,8 @@ int main(int argc, char *argv[]) parsec_taskpool_free((parsec_taskpool_t*)BT_reduction); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/apps/haar_tree/CMakeLists.txt b/tests/apps/haar_tree/CMakeLists.txt index 79332681d..a69950dda 100644 --- a/tests/apps/haar_tree/CMakeLists.txt +++ b/tests/apps/haar_tree/CMakeLists.txt @@ -3,10 +3,10 @@ include(ParsecCompilePTG) parsec_addtest_executable(C project SOURCES main.c tree_dist.c) target_ptg_sources(project PRIVATE "project.jdf;walk.jdf") target_include_directories(project PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) -target_link_libraries(project PRIVATE Threads::Threads m) +target_link_libraries(project PRIVATE Threads::Threads m tests_runtime_common) parsec_addtest_executable(C project_dyn SOURCES main.c tree_dist.c) target_ptg_sources(project_dyn PRIVATE "project_dyn.jdf;walk.jdf") target_compile_definitions(project_dyn PUBLIC parsec_project_new=parsec_project_dyn_new) target_include_directories(project_dyn PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) -target_link_libraries(project_dyn PRIVATE Threads::Threads m) +target_link_libraries(project_dyn PRIVATE Threads::Threads m tests_runtime_common) diff --git a/tests/apps/haar_tree/Testings.cmake b/tests/apps/haar_tree/Testings.cmake index f29ec204a..b67848047 100644 --- a/tests/apps/haar_tree/Testings.cmake +++ b/tests/apps/haar_tree/Testings.cmake @@ -1,4 +1,3 @@ -parsec_addtest_cmd(apps/haar_tree ${SHM_TEST_CMD_LIST} apps/haar_tree/project -x) if( MPI_C_FOUND ) parsec_addtest_cmd(apps/haar_tree:mp ${MPI_TEST_CMD_LIST} 4 apps/haar_tree/project -x) if(TEST apps/haar_tree:mp) diff --git a/tests/apps/haar_tree/main.c b/tests/apps/haar_tree/main.c index 45dd1ef10..209b80422 100644 --- a/tests/apps/haar_tree/main.c +++ b/tests/apps/haar_tree/main.c @@ -2,10 +2,12 @@ * Copyright (c) 2016-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/arena.h" +#include "tests/tests_runtime.h" #include "tree_dist.h" #include "project.h" @@ -156,18 +158,6 @@ int main(int argc, char *argv[]) uint64_t cksum = 0; redim_string_t *rs; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - pargc = 0; pargv = NULL; for(i = 1; i < argc; i++) { if( strcmp(argv[i], "--") == 0 ) { @@ -176,7 +166,9 @@ int main(int argc, char *argv[]) break; } } - parsec = parsec_init(1, &pargc, &pargv); + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); while ((ch = getopt(argc, argv, "xvd:m:M:f:")) != -1) { switch (ch) { @@ -224,9 +216,10 @@ int main(int argc, char *argv[]) parsec_matrix_adt_define_rect( adt, parsec_datatype_float_t, 2, 1, 2); -#if defined(HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); -#endif + rc = parsec_tests_barrier(parsec); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); + } project = parsec_project_new(treeA, world, (parsec_data_collection_t*)&fakeDesc, 1e-3, be_verbose, 1.0); project->arenas_datatypes[PARSEC_project_DEFAULT_ADT_IDX] = *adt; @@ -257,7 +250,7 @@ int main(int argc, char *argv[]) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); -#if defined(HAVE_MPI) +#if defined(PARSEC_HAVE_MPI) if( do_checks ) { uint64_t sum = 0; printf("Rank %d contributes with %llx\n", rank, cksum); @@ -292,7 +285,7 @@ int main(int argc, char *argv[]) rs_free(rs); } } -#endif /* defined(HAVE_MPI) */ +#endif /* defined(PARSEC_HAVE_MPI) */ parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&fakeDesc); tree_dist_free(treeA); @@ -300,11 +293,8 @@ int main(int argc, char *argv[]) parsec_taskpool_free(&project->super); parsec_matrix_adt_free( &adt ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/apps/merge_sort/CMakeLists.txt b/tests/apps/merge_sort/CMakeLists.txt index 9b2453eca..6bdfa3ffb 100644 --- a/tests/apps/merge_sort/CMakeLists.txt +++ b/tests/apps/merge_sort/CMakeLists.txt @@ -1,6 +1,11 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) if(PARSEC_HAVE_RANDOM) parsec_addtest_executable(C merge_sort SOURCES main.c merge_sort_wrapper.c sort_data.c) +target_link_libraries(merge_sort PRIVATE tests_runtime_common) target_ptg_sources(merge_sort PRIVATE "merge_sort.jdf") endif(PARSEC_HAVE_RANDOM) diff --git a/tests/apps/merge_sort/main.c b/tests/apps/merge_sort/main.c index cc000a11c..33f288ab4 100644 --- a/tests/apps/merge_sort/main.c +++ b/tests/apps/merge_sort/main.c @@ -2,12 +2,14 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include #include "parsec/runtime.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "merge_sort_wrapper.h" #if defined(PARSEC_HAVE_STRING_H) #include @@ -22,14 +24,6 @@ int main(int argc, char *argv[]) parsec_tiled_matrix_t *dcA; parsec_taskpool_t *msort; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif if( argc > 1 ) { char* endptr; long val = strtol(argv[1], &endptr, 0); @@ -44,10 +38,10 @@ int main(int argc, char *argv[]) } } - parsec = parsec_init(cores, &argc, &argv); - if( NULL == parsec ) { - exit(1); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); dcA = create_and_distribute_data(rank, world, nb, nt, sizeof(int)); parsec_data_collection_set_key((parsec_data_collection_t *)dcA, "A"); @@ -64,11 +58,8 @@ int main(int argc, char *argv[]) merge_sort_free((parsec_taskpool_t*)msort); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/apps/merge_sort/merge_sort_wrapper.c b/tests/apps/merge_sort/merge_sort_wrapper.c index c1c37a88f..5d5e88b52 100644 --- a/tests/apps/merge_sort/merge_sort_wrapper.c +++ b/tests/apps/merge_sort/merge_sort_wrapper.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -9,9 +10,6 @@ #include "parsec/arena.h" #include "parsec/data_dist/matrix/matrix.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include #include "merge_sort.h" diff --git a/tests/apps/pingpong/CMakeLists.txt b/tests/apps/pingpong/CMakeLists.txt index a863dcbad..48c6b6c9f 100644 --- a/tests/apps/pingpong/CMakeLists.txt +++ b/tests/apps/pingpong/CMakeLists.txt @@ -1,9 +1,14 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C rtt SOURCES main.c rtt_wrapper.c rtt_data.c) +target_link_libraries(rtt PRIVATE tests_runtime_common) target_ptg_sources(rtt PRIVATE "rtt.jdf") parsec_addtest_executable(C bw_test) +target_link_libraries(bw_test PRIVATE tests_runtime_common) set_source_files_properties("bandwidth.jdf" PROPERTIES PTGPP_COMPILE_OPTIONS "--Wremoteref") target_ptg_sources(bw_test PRIVATE "bandwidth.jdf") - diff --git a/tests/apps/pingpong/bandwidth.jdf b/tests/apps/pingpong/bandwidth.jdf index df38aa8d3..288f889da 100644 --- a/tests/apps/pingpong/bandwidth.jdf +++ b/tests/apps/pingpong/bandwidth.jdf @@ -24,9 +24,7 @@ extern "C" %{ #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif +#include "tests/tests_runtime.h" %} @@ -180,6 +178,7 @@ int main(int argc, char *argv[]) int rank, nodes, ch, i; int pargc = 0; char **pargv = NULL; + int rc; struct timeval tstart, tend; double t, bw, messages; @@ -191,18 +190,6 @@ int main(int argc, char *argv[]) int cores = 1; int nb_runs = 1; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &nodes); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - nodes = 1; - rank = 0; -#endif - while ((ch = getopt(argc, argv, "n:f:l:u:c:h:e:")) != -1) { switch (ch) { case 'n': loops = atoi(optarg); break; @@ -237,14 +224,13 @@ int main(int argc, char *argv[]) break; } } - /* Initialize PaRSEC */ - parsec = parsec_init(cores, &pargc, &pargv); - if( NULL == parsec ) { - /* Failed to correctly initialize. In a correct scenario report - * upstream, but in this particular case bail out. - */ - exit(-1); + /* Initialize PaRSEC */ + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_MULTIPLE, + &pargc, &pargv, &parsec, &rank, &nodes); + if( PARSEC_SUCCESS != rc ) { + fprintf(stderr, "parsec_tests_context_init failed: %d\n", rc); + exit(EXIT_FAILURE); } /* If the number of cores has not been defined as a parameter earlier @@ -292,9 +278,11 @@ int main(int argc, char *argv[]) parsec_datatype_uint8_t, 1, size, 1); /* Time start */ -#if defined(PARSEC_HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); -#endif /* defined(PARSEC_HAVE_MPI) */ + rc = parsec_tests_barrier(parsec); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + fprintf(stderr, "parsec_tests_barrier failed: %d\n", rc); + exit(EXIT_FAILURE); + } gettimeofday(&tstart, NULL); parsec_context_add_taskpool(parsec, bandwidth_taskpool); @@ -302,9 +290,11 @@ int main(int argc, char *argv[]) parsec_context_wait(parsec); /* Time end */ -#if defined(PARSEC_HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); -#endif /* defined(PARSEC_HAVE_MPI) */ + rc = parsec_tests_barrier(parsec); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + fprintf(stderr, "parsec_tests_barrier failed: %d\n", rc); + exit(EXIT_FAILURE); + } gettimeofday(&tend, NULL); if( 0 == rank ) { @@ -327,11 +317,11 @@ int main(int argc, char *argv[]) parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&Disk); /* Clean up parsec*/ - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + if( PARSEC_SUCCESS != rc ) { + fprintf(stderr, "parsec_tests_context_fini failed: %d\n", rc); + exit(EXIT_FAILURE); + } return 0; } diff --git a/tests/apps/pingpong/main.c b/tests/apps/pingpong/main.c index a0aaff505..97be7a360 100644 --- a/tests/apps/pingpong/main.c +++ b/tests/apps/pingpong/main.c @@ -6,6 +6,7 @@ */ #include "parsec/runtime.h" +#include "tests/tests_runtime.h" #include "rtt_wrapper.h" #include "rtt_data.h" #if defined(PARSEC_HAVE_STRING_H) @@ -15,9 +16,6 @@ #include #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ #include "parsec/utils/debug.h" static int next_message_size(int current, int upper) @@ -55,18 +53,6 @@ int main(int argc, char *argv[]) parsec_data_collection_t *dcA; parsec_taskpool_t *rtt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - while ((ch = getopt(argc, argv, "n:l:u:h")) != -1) { switch (ch) { case 'n': loops = atoi(optarg); break; @@ -99,7 +85,10 @@ int main(int argc, char *argv[]) } } - parsec = parsec_init(-1, &pargc, &pargv); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); nb = loops * world; for(idx = 0, size = start_length; ; idx++) { @@ -114,9 +103,10 @@ int main(int argc, char *argv[]) rc = parsec_context_add_taskpool(parsec, rtt); PARSEC_CHECK_ERROR(rc, "parsec_context_add_taskpool"); -#if defined(PARSEC_HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); -#endif + rc = parsec_tests_barrier(parsec); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); + } gettimeofday(&tstart, NULL); rc = parsec_context_start(parsec); @@ -125,9 +115,10 @@ int main(int argc, char *argv[]) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); -#if defined(PARSEC_HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); -#endif + rc = parsec_tests_barrier(parsec); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); + } gettimeofday(&tend, NULL); if( 0 == rank ) { @@ -145,11 +136,8 @@ int main(int argc, char *argv[]) size = next_message_size(size, end_length); } - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/apps/pingpong/rtt_wrapper.c b/tests/apps/pingpong/rtt_wrapper.c index b76cf579b..1a6bb8e6e 100644 --- a/tests/apps/pingpong/rtt_wrapper.c +++ b/tests/apps/pingpong/rtt_wrapper.c @@ -11,9 +11,6 @@ #include "parsec/arena.h" #include "parsec/mca/device/device.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include #include "rtt.h" @@ -45,11 +42,7 @@ parsec_taskpool_t *rtt_new(parsec_data_collection_t *A, int size, int nb) { parsec_rtt_taskpool_t *tp = NULL; parsec_datatype_t block; - int worldsize = 1; - -#if defined(PARSEC_HAVE_MPI) - MPI_Comm_size(MPI_COMM_WORLD, &worldsize); -#endif + int worldsize = (int)A->nodes; if( nb <= 0 || size <= 0 ) { fprintf(stderr, "To work, RTT must do at least one round time trip of at least one byte\n"); diff --git a/tests/apps/stencil/CMakeLists.txt b/tests/apps/stencil/CMakeLists.txt index 6f508f473..068aea386 100644 --- a/tests/apps/stencil/CMakeLists.txt +++ b/tests/apps/stencil/CMakeLists.txt @@ -1,6 +1,10 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + file(COPY loop_gen_1D DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) parsec_addtest_executable(C testing_stencil_1D SOURCES stencil_internal.c testing_stencil_1D.c) target_include_directories(testing_stencil_1D PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(testing_stencil_1D PRIVATE "stencil_1D.jdf") -target_link_libraries(testing_stencil_1D PRIVATE m) +target_link_libraries(testing_stencil_1D PRIVATE m tests_runtime_common) diff --git a/tests/apps/stencil/stencil_internal.h b/tests/apps/stencil/stencil_internal.h index dcc16d0a4..17ecf78ad 100644 --- a/tests/apps/stencil/stencil_internal.h +++ b/tests/apps/stencil/stencil_internal.h @@ -2,6 +2,7 @@ * Copyright (c) 2019-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* includes parsec headers */ #include "parsec.h" @@ -18,10 +19,6 @@ #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif - /* Flops */ #define FLOPS_STENCIL_1D(n) ( (DTYPE)(iter) * (2*(2*R+1)) * (DTYPE)(n) ) diff --git a/tests/apps/stencil/testing_stencil_1D.c b/tests/apps/stencil/testing_stencil_1D.c index f9df84ff2..935dfcfcc 100644 --- a/tests/apps/stencil/testing_stencil_1D.c +++ b/tests/apps/stencil/testing_stencil_1D.c @@ -2,6 +2,7 @@ * Copyright (c) 2019-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "stencil_internal.h" #include "tests/tests_timing.h" @@ -15,7 +16,7 @@ DTYPE * weight_1D; int main(int argc, char *argv[]) { parsec_context_t* parsec; - int rank, nodes, ch; + int rank, nodes, ch, rc; int pargc = 0; char **pargv; double gflops, flops; @@ -49,7 +50,7 @@ int main(int argc, char *argv[]) case 'R': R = atoi(optarg); break; case '?': case 'h': default: fprintf(stderr, - "-m : initialize MPI_THREAD_MULTIPLE (default: 0/no)\n" + "-m : request multiple-thread support from the test runtime (default: 0/no)\n" "-M : row dimension (M) of the matrices (default: 8)\n" "-N : column dimension (N) of the matrices (default: 8)\n" "-t : row dimension (MB) of the tiles (default: 4)\n" @@ -65,19 +66,6 @@ int main(int argc, char *argv[]) } } -#if defined(PARSEC_HAVE_MPI) - { - int provided; - int requested = m? MPI_THREAD_MULTIPLE: MPI_THREAD_SERIALIZED; - MPI_Init_thread(&argc, &argv, requested, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &nodes); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - nodes = 1; - rank = 0; -#endif - pargc = 0; pargv = NULL; for(i = 1; i < argc; i++) { if( strcmp(argv[i], "--") == 0 ) { @@ -87,6 +75,11 @@ int main(int argc, char *argv[]) } } + rc = parsec_tests_context_init(cores, + m ? PARSEC_TEST_THREAD_MULTIPLE : PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, &rank, &nodes); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + if(0) { volatile int loop = 1; fprintf(stderr, "gdb -p %d\n", getpid()); @@ -94,16 +87,6 @@ int main(int argc, char *argv[]) sleep(1); } - /* Initialize PaRSEC */ - parsec = parsec_init(cores, &pargc, &pargv); - - if( NULL == parsec ) { - /* Failed to correctly initialize. In a correct scenario report - * upstream, but in this particular case bail out. - */ - exit(-1); - } - /* If the number of cores has not been defined as a parameter earlier * update it with the default parameter computed in parsec_init. */ if(cores <= 0) @@ -183,13 +166,16 @@ int main(int argc, char *argv[]) } } - MPI_Barrier(MPI_COMM_WORLD); + rc = parsec_tests_barrier(parsec); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); + } #endif /* Stencil_1D */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); parsec_stencil_1D(parsec, (parsec_tiled_matrix_t *)&dcA, iter, R); - SYNC_TIME_PRINT(rank, ("Stencil" "\tN= %d NB= %d M= %d MB= %d " + SYNC_TIME_PRINT(parsec, rank, ("Stencil" "\tN= %d NB= %d M= %d MB= %d " "PxQ= %d %d KPxKQ= %d %d " "Iteration= %d Radius= %d Kernel_type= %d " "Number_of_buffers= %d cores= %d : %lf gflops\n", @@ -200,12 +186,8 @@ int main(int argc, char *argv[]) parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA); /* Clean up parsec*/ - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif - + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/class/lifo.c b/tests/class/lifo.c index 2ee4219fa..d5f959f21 100644 --- a/tests/class/lifo.c +++ b/tests/class/lifo.c @@ -15,9 +15,6 @@ #include #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include "parsec/class/lifo.h" #include "parsec/os-spec-timing.h" @@ -217,12 +214,6 @@ int main(int argc, char *argv[]) min_time = 0; max_time = 0xffffffff; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } -#endif while( (ch = getopt(argc, argv, "c:n:N:h?")) != -1 ) { switch(ch) { case 'c': { @@ -341,8 +332,5 @@ int main(int argc, char *argv[]) printf(" - all tests passed\n"); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif return 0; } diff --git a/tests/class/list.c b/tests/class/list.c index 0ef61c81f..af1bab9f4 100644 --- a/tests/class/list.c +++ b/tests/class/list.c @@ -16,9 +16,6 @@ #include #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include "parsec/class/list.h" #include "parsec/os-spec-timing.h" @@ -261,13 +258,6 @@ int main(int argc, char *argv[]) min_time = 0; max_time = 0xffffffff; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } -#endif - while( (ch = getopt(argc, argv, "c:n:N:h?")) != -1 ) { switch(ch) { case 'c': { @@ -391,8 +381,5 @@ int main(int argc, char *argv[]) printf(" - all tests passed\n"); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif return 0; } diff --git a/tests/collections/CMakeLists.txt b/tests/collections/CMakeLists.txt index d705bb331..cfa718445 100644 --- a/tests/collections/CMakeLists.txt +++ b/tests/collections/CMakeLists.txt @@ -1,12 +1,18 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + parsec_addtest_executable(C reduce SOURCES reduce.c) +target_link_libraries(reduce PRIVATE tests_runtime_common) parsec_addtest_executable(C kcyclic) +target_link_libraries(kcyclic PRIVATE tests_runtime_common) target_ptg_sources(kcyclic PRIVATE "kcyclic.jdf") target_link_libraries(kcyclic PRIVATE m) add_subdirectory(two_dim_band) -if(PARSEC_HAVE_MPI) +if(MPI_C_FOUND) add_subdirectory(redistribute) -endif(PARSEC_HAVE_MPI) +endif(MPI_C_FOUND) add_subdirectory(reshape) diff --git a/tests/collections/Testings.cmake b/tests/collections/Testings.cmake index d88f0f616..9e9c9d4bb 100644 --- a/tests/collections/Testings.cmake +++ b/tests/collections/Testings.cmake @@ -1,7 +1,7 @@ parsec_addtest_cmd(collections/reduce ${SHM_TEST_CMD_LIST} collections/reduce) -if( PARSEC_HAVE_MPI ) +if( MPI_C_FOUND ) parsec_addtest_cmd(collections/redistribute:mp ${MPI_TEST_CMD_LIST} 8 collections/redistribute/testing_redistribute -M 2400 -N 2400 -a 2400 -A 2400 -t 300 -T 300 -b 200 -B 200 -m 2000 -n 2000 -I 30 -J 40 -i 100 -j 121 -v -z -x -P 2 -Q 4 -p 4 -q 2) set(PARSEC_REDISTRIBUTE_SMALL_ARGS @@ -42,7 +42,7 @@ if( PARSEC_HAVE_MPI ) endforeach() parsec_addtest_cmd(collections/redistribute_random:mp ${MPI_TEST_CMD_LIST} 8 collections/redistribute/testing_redistribute_random -M 2400 -N 2400 -a 2400 -A 2400 -t 300 -T 300 -b 200 -B 200 -m 2000 -n 2000 -I 30 -J 40 -i 100 -j 121 -v -z -x -P 2 -Q 4 -p 4 -q 2) -endif( PARSEC_HAVE_MPI ) +endif( MPI_C_FOUND ) parsec_addtest_cmd(collections/reshape ${SHM_TEST_CMD_LIST} collections/reshape/reshape -N 120 -t 9 -c 10) parsec_addtest_cmd(collections/reshape:mt ${SHM_TEST_CMD_LIST} collections/reshape/reshape -N 120 -t 9 -c 10 -m 1) diff --git a/tests/collections/kcyclic.jdf b/tests/collections/kcyclic.jdf index 122cc4ef6..ea12995af 100644 --- a/tests/collections/kcyclic.jdf +++ b/tests/collections/kcyclic.jdf @@ -3,18 +3,18 @@ extern "C" %{ * Copyright (c) 2019-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" #include "parsec/data_distribution.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include #include #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif #define TYPE PARSEC_MATRIX_INTEGER parsec_taskpool_t* kcyclic_taskpool(parsec_matrix_block_cyclic_t* A, @@ -33,17 +33,10 @@ int main( int argc, char** argv ) parsec_matrix_block_cyclic_t descCA; int rc; -#if defined(PARSEC_HAVE_MPI) - int required = MPI_THREAD_MULTIPLE, provided = 0; - MPI_Init_thread(&argc, &argv, required, &provided); - MPI_Comm_size(MPI_COMM_WORLD, &nodes); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif - - parsec = parsec_init(1, &argc, &argv); - if( NULL == parsec ) { - exit(-2); - } + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_MULTIPLE, + &argc, &argv, + &parsec, &rank, &nodes); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); n *= nodes; /* scale it */ p = sqrt(nodes); @@ -93,14 +86,12 @@ int main( int argc, char** argv ) kcyclic_taskpool_free(tp); - parsec_fini( &parsec); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); free(descA.mat); free(descCA.mat); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif return 0; } diff --git a/tests/collections/redistribute/CMakeLists.txt b/tests/collections/redistribute/CMakeLists.txt index a03f38f2a..d639e495d 100644 --- a/tests/collections/redistribute/CMakeLists.txt +++ b/tests/collections/redistribute/CMakeLists.txt @@ -1,11 +1,14 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include_directories(BEFORE "${CMAKE_CURRENT_SOURCE_DIR}") include_directories(BEFORE "${CMAKE_CURRENT_BINARY_DIR}") parsec_addtest_executable(C testing_redistribute SOURCES testing_redistribute.c common.c) -target_link_libraries(testing_redistribute PRIVATE m) +target_link_libraries(testing_redistribute PRIVATE m tests_runtime_common) target_ptg_sources(testing_redistribute PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/redistribute_check.jdf;${CMAKE_CURRENT_SOURCE_DIR}/redistribute_check2.jdf;${CMAKE_CURRENT_SOURCE_DIR}/redistribute_bound.jdf;${CMAKE_CURRENT_SOURCE_DIR}/redistribute_no_optimization.jdf") parsec_addtest_executable(C testing_redistribute_random SOURCES testing_redistribute_random.c common.c) -target_link_libraries(testing_redistribute_random PRIVATE m) +target_link_libraries(testing_redistribute_random PRIVATE m tests_runtime_common) target_ptg_sources(testing_redistribute_random PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/redistribute_check.jdf;${CMAKE_CURRENT_SOURCE_DIR}/redistribute_check2.jdf;${CMAKE_CURRENT_SOURCE_DIR}/redistribute_bound.jdf;${CMAKE_CURRENT_SOURCE_DIR}/redistribute_no_optimization.jdf") - diff --git a/tests/collections/redistribute/common.c b/tests/collections/redistribute/common.c index f31071fe3..7925517cb 100644 --- a/tests/collections/redistribute/common.c +++ b/tests/collections/redistribute/common.c @@ -15,10 +15,6 @@ #include #endif /* defined(PARSEC_HAVE_GETOPT_H) */ -#ifdef PARSEC_HAVE_MPI -#include -#endif - double time_elapsed = 0.0; double sync_time_elapsed = 0.0; @@ -127,7 +123,7 @@ void print_usage(void) " -h --help : this message\n" " -z --time : get run time\n" " -e --num-runs : number of runs\n" - " -f --thread_multiple : 0/default, init mpi with MPI_THREAD_SERIALIZED; others, MPI_THREAD_MULTIPLE\n" + " -f --thread_multiple : 0/default, serialized test runtime; others, multiple-thread test runtime\n" " -y --no-optimization : no_optimization version, send the whole tile to target; default 0, not no_optimization version\n" " -c --cores : number of concurrent threads (default: number of physical hyper-threads)\n" "\n Notes:\n" @@ -272,7 +268,7 @@ static void parse_arguments(int *_argc, char*** _argv, int* iparam, double *dpar /* Default number of runs: 1 */ iparam[IPARAM_NUM_RUNS] = 1; - /* Default MPI_THREAD_SERIALIZED */ + /* Default to serialized access to the selected test runtime. */ iparam[IPARAM_THREAD_MULTIPLE] = 0; /* Default Not no_optimization version */ @@ -472,38 +468,32 @@ static void print_arguments(int* iparam) parsec_context_t* setup_parsec(int argc, char **argv, int *iparam, double *dparam) { - parse_arguments(&argc, &argv, iparam, dparam); -#ifdef PARSEC_HAVE_MPI - { - int provided; - if( iparam[IPARAM_THREAD_MULTIPLE] ) - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - else - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &iparam[IPARAM_NNODES]); - MPI_Comm_rank(MPI_COMM_WORLD, &iparam[IPARAM_RANK]); -#else - iparam[IPARAM_NNODES] = 1; - iparam[IPARAM_RANK] = 0; -#endif - int verbose = iparam[IPARAM_VERBOSE]; - if(iparam[IPARAM_RANK] > 0 && verbose < 4) verbose = 0; + parsec_context_t* ctx = NULL; + int rc; - SYNC_TIME_START(); + parse_arguments(&argc, &argv, iparam, dparam); /* Once we got out arguments, we should pass whatever is left down */ int parsec_argc = argc - optind; char** parsec_argv = argv + optind; - parsec_context_t* ctx = parsec_init(iparam[IPARAM_NCORES], - &parsec_argc, &parsec_argv); - if( NULL == ctx ) { - /* Failed to correctly initialize. In a correct scenario report - * upstream, but in this particular case bail out. - */ + rc = parsec_tests_context_init(iparam[IPARAM_NCORES], + iparam[IPARAM_THREAD_MULTIPLE] ? + PARSEC_TEST_THREAD_MULTIPLE : + PARSEC_TEST_THREAD_SERIALIZED, + &parsec_argc, &parsec_argv, + &ctx, + &iparam[IPARAM_RANK], + &iparam[IPARAM_NNODES]); + if( PARSEC_SUCCESS != rc ) { + fprintf(stderr, "parsec_tests_context_init failed: %d\n", rc); exit(-1); } + int verbose = iparam[IPARAM_VERBOSE]; + if(iparam[IPARAM_RANK] > 0 && verbose < 4) verbose = 0; + + SYNC_TIME_START(ctx); + /* If the number of cores has not been defined as a parameter earlier update it with the default parameter computed in parsec_init. */ if(iparam[IPARAM_NCORES] <= 0) @@ -516,17 +506,14 @@ parsec_context_t* setup_parsec(int argc, char **argv, int *iparam, double *dpara } print_arguments(iparam); - if(verbose > 2) SYNC_TIME_PRINT(iparam[IPARAM_RANK], ("PaRSEC initialized\n")); + if(verbose > 2) SYNC_TIME_PRINT(ctx, iparam[IPARAM_RANK], ("PaRSEC initialized\n")); return ctx; } void cleanup_parsec(parsec_context_t* parsec, int *iparam, double *dparam) { - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + int rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); (void)iparam; (void)dparam; } diff --git a/tests/collections/redistribute/redistribute_bound.jdf b/tests/collections/redistribute/redistribute_bound.jdf index 8680d7aed..1fe18e90f 100644 --- a/tests/collections/redistribute/redistribute_bound.jdf +++ b/tests/collections/redistribute/redistribute_bound.jdf @@ -3,7 +3,7 @@ extern "C" %{ * Copyright (c) 2017-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "redistribute_test.h" @@ -377,10 +377,22 @@ double* parsec_redistribute_bound(parsec_context_t *parsec, long long int *total_remote_rank = (long long int *)calloc(world_size, sizeof(long long int)); MPI_Barrier( MPI_COMM_WORLD ); - MPI_Allreduce(&sum_remote[0], &total_remote, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&sum_local[0], &total_local, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&sum_nb_message_remote[0], &total_nb_message_remote, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&sum_nb_message_local[0], &total_nb_message_local, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); + int reduce_rc = parsec_tests_allreduce(parsec, &sum_remote[0], &total_remote, 1, + parsec_datatype_int64_t, + PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(reduce_rc, "parsec_tests_allreduce"); + reduce_rc = parsec_tests_allreduce(parsec, &sum_local[0], &total_local, 1, + parsec_datatype_int64_t, + PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(reduce_rc, "parsec_tests_allreduce"); + reduce_rc = parsec_tests_allreduce(parsec, &sum_nb_message_remote[0], &total_nb_message_remote, 1, + parsec_datatype_int64_t, + PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(reduce_rc, "parsec_tests_allreduce"); + reduce_rc = parsec_tests_allreduce(parsec, &sum_nb_message_local[0], &total_nb_message_local, 1, + parsec_datatype_int64_t, + PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(reduce_rc, "parsec_tests_allreduce"); MPI_Allgather( &sum_remote_rank_send[0], 1, MPI_LONG_LONG_INT, total_remote_rank_send, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD); MPI_Allgather( &sum_remote_rank_receive[0], 1, MPI_LONG_LONG_INT, total_remote_rank_receive, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD); MPI_Allgather( &sum_local_rank[0], 1, MPI_LONG_LONG_INT, total_local_rank, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD); diff --git a/tests/collections/redistribute/redistribute_check.jdf b/tests/collections/redistribute/redistribute_check.jdf index edaebebf1..06fcd37a5 100644 --- a/tests/collections/redistribute/redistribute_check.jdf +++ b/tests/collections/redistribute/redistribute_check.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2017-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "redistribute_test.h" @@ -328,7 +329,8 @@ int parsec_redistribute_check(parsec_context_t *parsec, T_g = (DTYPE *)calloc(size_row*size_col, sizeof(DTYPE)); } - MPI_Barrier(MPI_COMM_WORLD); + int barrier_rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(barrier_rc, "parsec_tests_barrier"); parsec_redistribute_check = parsec_redistribute_check_New( (parsec_tiled_matrix_t *)dcY, diff --git a/tests/collections/redistribute/redistribute_check2.jdf b/tests/collections/redistribute/redistribute_check2.jdf index 4a3d958b2..529dc2a1b 100644 --- a/tests/collections/redistribute/redistribute_check2.jdf +++ b/tests/collections/redistribute/redistribute_check2.jdf @@ -159,9 +159,14 @@ int parsec_redistribute_check2(parsec_context_t *parsec, info_sum_thd += info[i]; } - MPI_Barrier( MPI_COMM_WORLD ); - MPI_Allreduce(&info_sum_thd, &info_total, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); - MPI_Barrier( MPI_COMM_WORLD ); + int barrier_rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(barrier_rc, "parsec_tests_barrier"); + int reduce_rc = parsec_tests_allreduce(parsec, &info_sum_thd, &info_total, 1, + parsec_datatype_int64_t, + PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(reduce_rc, "parsec_tests_allreduce"); + barrier_rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(barrier_rc, "parsec_tests_barrier"); if( 0 == dcY->super.myrank ) { if( 0LL == info_total ) diff --git a/tests/collections/redistribute/redistribute_test.h b/tests/collections/redistribute/redistribute_test.h index 27d96c54c..a4c5aa5b4 100644 --- a/tests/collections/redistribute/redistribute_test.h +++ b/tests/collections/redistribute/redistribute_test.h @@ -2,8 +2,10 @@ * Copyright (c) 2017-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/data_dist/matrix/redistribute/redistribute_internal.h" +#include "tests/tests_runtime.h" /* Define whether run PTG or DTD */ #define RUN_PTG 1 diff --git a/tests/collections/redistribute/testing_redistribute.c b/tests/collections/redistribute/testing_redistribute.c index 92462757e..944ef5451 100644 --- a/tests/collections/redistribute/testing_redistribute.c +++ b/tests/collections/redistribute/testing_redistribute.c @@ -580,7 +580,7 @@ int main(int argc, char *argv[]) } /* Timer start */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); /* Main part, call parsec_redistribute; double is default, which could be * changed in parsec/data_dist/matrix/redistribute/redistribute_internal.h @@ -603,7 +603,7 @@ int main(int argc, char *argv[]) /* Timer end */ if( time ) { #if PRINT_MORE - SYNC_TIME_PRINT(rank, ("\"testing_redistribute_PTG\"" + SYNC_TIME_PRINT(parsec, rank, ("\"testing_redistribute_PTG\"" "\tRedistributed Size: m= %d n= %d" "\tSource: P= %d Q= %d M= %d N= %d MB= %d NB= %d I= %d J=%d SMB= %d SNB= %d" "\tTarget: PR= %d QR= %d MR= %d NR= %d MBR= %d NBR= %d i= %d j=%d SMBR= %d SNBR= %d" @@ -611,7 +611,7 @@ int main(int argc, char *argv[]) size_row, size_col, P, Q, M, N, MB, NB, disi_Y, disj_Y, SMB, SNB, PR, QR, MR, NR, MBR, NBR, disi_T, disj_T, SMBR, SNBR, cores)); #else - SYNC_TIME_STOP(); + SYNC_TIME_STOP(parsec); #endif time_ptg = sync_time_elapsed; } @@ -671,7 +671,7 @@ int main(int argc, char *argv[]) } /* Timer start */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); /* Main part, call parsec_redistribute_dtd; double is default, which could be * changed in parsec/data_dist/matrix/redistribute/redistribute_internal.h @@ -688,7 +688,7 @@ int main(int argc, char *argv[]) /* Timer end */ if( time ) { #if PRINT_MORE - SYNC_TIME_PRINT(rank, ("\"testing_redistribute_DTD\"" + SYNC_TIME_PRINT(parsec, rank, ("\"testing_redistribute_DTD\"" "\tRedistributed Size: m= %d n= %d" "\tSource: P= %d Q= %d M= %d N= %d MB= %d NB= %d I= %d J=%d SMB= %d SNB= %d" "\tTarget: PR= %d QR= %d MR= %d NR= %d MBR= %d NBR= %d i= %d j=%d SMBR= %d SNBR= %d" @@ -696,7 +696,7 @@ int main(int argc, char *argv[]) size_row, size_col, P, Q, M, N, MB, NB, disi_Y, disj_Y, SMB, SNB, PR, QR, MR, NR, MBR, NBR, disi_T, disj_T, SMBR, SNBR, cores)); #else - SYNC_TIME_STOP(); + SYNC_TIME_STOP(parsec); #endif time_dtd = sync_time_elapsed; } @@ -740,7 +740,7 @@ int main(int argc, char *argv[]) if( time ) { /* Timer start */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); /* Call parsec_redistribute_bound to get time bound */ results = parsec_redistribute_bound(parsec, dcY.desc, @@ -750,7 +750,7 @@ int main(int argc, char *argv[]) /* Timer end */ #if PRINT_MORE - SYNC_TIME_PRINT(rank, ("\"testing_redistribute_bound\"" + SYNC_TIME_PRINT(parsec, rank, ("\"testing_redistribute_bound\"" "\tRedistributed Size: m= %d n= %d" "\tSource: P= %d Q= %d M= %d N= %d MB= %d NB= %d I= %d J=%d SMB= %d SNB= %d" "\tTarget: PR= %d QR= %d MR= %d NR= %d MBR= %d NBR= %d i= %d j=%d SMBR= %d SNBR= %d" @@ -758,7 +758,7 @@ int main(int argc, char *argv[]) size_row, size_col, P, Q, M, N, MB, NB, disi_Y, disj_Y, SMB, SNB, PR, QR, MR, NR, MBR, NBR, disi_T, disj_T, SMBR, SNBR, cores)); #else - SYNC_TIME_STOP(); + SYNC_TIME_STOP(parsec); #endif } diff --git a/tests/collections/redistribute/testing_redistribute_random.c b/tests/collections/redistribute/testing_redistribute_random.c index 001433fac..97abb1a52 100644 --- a/tests/collections/redistribute/testing_redistribute_random.c +++ b/tests/collections/redistribute/testing_redistribute_random.c @@ -142,7 +142,7 @@ int main(int argc, char *argv[]) (parsec_tiled_matrix_unary_op_t)redistribute_init_ops, &op_args); /* Timer start */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); /* Main part, call parsec_redistribute; double is default, which could be * changed in parsec/data_dist/matrix/redistribute/redistribute_internal.h @@ -161,7 +161,7 @@ int main(int argc, char *argv[]) /* Timer end */ if( time ) { #if PRINT_MORE - SYNC_TIME_PRINT(rank, ("\"testing_redistribute_PTG\"" + SYNC_TIME_PRINT(parsec, rank, ("\"testing_redistribute_PTG\"" "\tRedistributed Size: m= %d n= %d" "\tSource: P= %d Q= %d M= %d N= %d MB= %d NB= %d I= %d J=%d SMB= %d SNB= %d" "\tTarget: PR= %d QR= %d MR= %d NR= %d MBR= %d NBR= %d i= %d j=%d SMBR= %d SNBR= %d" @@ -169,7 +169,7 @@ int main(int argc, char *argv[]) size_row, size_col, P, Q, M, N, MB, NB, disi_Y, disj_Y, SMB, SNB, PR, QR, MR, NR, MBR, NBR, disi_T, disj_T, SMBR, SNBR, cores)); #else - SYNC_TIME_STOP(); + SYNC_TIME_STOP(parsec); #endif time_ptg = sync_time_elapsed; } @@ -215,7 +215,7 @@ int main(int argc, char *argv[]) (parsec_tiled_matrix_unary_op_t)redistribute_init_ops, &op_args_dtd); /* Timer start */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); /* Main part, call parsec_redistribute_dtd; double is default, which could be * changed in parsec/data_dist/matrix/redistribute/redistribute_internal.h @@ -228,7 +228,7 @@ int main(int argc, char *argv[]) /* Timer end */ if( time ) { #if PRINT_MORE - SYNC_TIME_PRINT(rank, ("\"testing_redistribute_DTD\"" + SYNC_TIME_PRINT(parsec, rank, ("\"testing_redistribute_DTD\"" "\tRedistributed Size: m= %d n= %d" "\tSource: P= %d Q= %d M= %d N= %d MB= %d NB= %d I= %d J=%d SMB= %d SNB= %d" "\tTarget: PR= %d QR= %d MR= %d NR= %d MBR= %d NBR= %d i= %d j=%d SMBR= %d SNBR= %d" @@ -236,7 +236,7 @@ int main(int argc, char *argv[]) size_row, size_col, P, Q, M, N, MB, NB, disi_Y, disj_Y, SMB, SNB, PR, QR, MR, NR, MBR, NBR, disi_T, disj_T, SMBR, SNBR, cores)); #else - SYNC_TIME_STOP(); + SYNC_TIME_STOP(parsec); #endif time_dtd = sync_time_elapsed; } @@ -272,7 +272,7 @@ int main(int argc, char *argv[]) if( time ) { /* Timer start */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); /* Call parsec_redistribute_bound to get time bound */ results = parsec_redistribute_bound(parsec, (parsec_tiled_matrix_t *)&dcY, @@ -282,7 +282,7 @@ int main(int argc, char *argv[]) /* Timer end */ #if PRINT_MORE - SYNC_TIME_PRINT(rank, ("\"testing_redistribute_bound\"" + SYNC_TIME_PRINT(parsec, rank, ("\"testing_redistribute_bound\"" "\tRedistributed Size: m= %d n= %d" "\tSource: P= %d Q= %d M= %d N= %d MB= %d NB= %d I= %d J=%d SMB= %d SNB= %d" "\tTarget: PR= %d QR= %d MR= %d NR= %d MBR= %d NBR= %d i= %d j=%d SMBR= %d SNBR= %d" @@ -290,7 +290,7 @@ int main(int argc, char *argv[]) size_row, size_col, P, Q, M, N, MB, NB, disi_Y, disj_Y, SMB, SNB, PR, QR, MR, NR, MBR, NBR, disi_T, disj_T, SMBR, SNBR, cores)); #else - SYNC_TIME_STOP(); + SYNC_TIME_STOP(parsec); #endif } diff --git a/tests/collections/reduce.c b/tests/collections/reduce.c index d2adb7c89..09a99e9e5 100644 --- a/tests/collections/reduce.c +++ b/tests/collections/reduce.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -9,6 +10,8 @@ #include "parsec/arena.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/datatype.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include #include "parsec/data_dist/matrix/reduce.h" @@ -42,15 +45,6 @@ int main( int argc, char* argv[] ) char **pargv; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif - pargc = 0; pargv = NULL; for(i = 1; i < argc; i++) { if( strcmp(argv[i], "--") == 0 ) { @@ -60,7 +54,10 @@ int main( int argc, char* argv[] ) } } - parsec = parsec_init(cores, &pargc, &pargv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &dcA, PARSEC_MATRIX_FLOAT, PARSEC_MATRIX_TILE, rank, nb, 1, ln, 1, 0, 0, ln, 1, @@ -96,11 +93,8 @@ int main( int argc, char* argv[] ) parsec_data_free(dcA.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA); - parsec_fini(&parsec); - -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif /* defined(PARSEC_HAVE_MPI) */ + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/collections/reshape/CMakeLists.txt b/tests/collections/reshape/CMakeLists.txt index 333866159..6e2c567b2 100644 --- a/tests/collections/reshape/CMakeLists.txt +++ b/tests/collections/reshape/CMakeLists.txt @@ -2,16 +2,19 @@ include(ParsecCompilePTG) set(JDF_SOURCES "local_no_reshape.jdf;local_read_reshape.jdf;local_output_reshape.jdf;local_input_reshape.jdf;remote_read_reshape.jdf;remote_no_re_reshape.jdf;local_input_LU_LL.jdf;") parsec_addtest_executable(C reshape SOURCES testing_reshape.c common.c) +target_link_libraries(reshape PRIVATE tests_runtime_common) target_ptg_sources(reshape PRIVATE ${JDF_SOURCES}) parsec_addtest_executable(C input_dep_reshape_single_copy SOURCES testing_input_dep_reshape_single_copy.c common.c) +target_link_libraries(input_dep_reshape_single_copy PRIVATE tests_runtime_common) target_ptg_sources(input_dep_reshape_single_copy PRIVATE "input_dep_single_copy_reshape.jdf;") parsec_addtest_executable(C remote_multiple_outs_same_pred_flow SOURCES testing_remote_multiple_outs_same_pred_flow.c common.c) +target_link_libraries(remote_multiple_outs_same_pred_flow PRIVATE tests_runtime_common) target_ptg_sources(remote_multiple_outs_same_pred_flow PRIVATE "remote_multiple_outs_same_pred_flow.jdf;remote_multiple_outs_same_pred_flow_multiple_deps.jdf;") set(JDF_SOURCES "avoidable_reshape.jdf;") parsec_addtest_executable(C avoidable_reshape SOURCES testing_avoidable_reshape.c common.c) +target_link_libraries(avoidable_reshape PRIVATE tests_runtime_common) target_ptg_sources(avoidable_reshape PRIVATE ${JDF_SOURCES}) - diff --git a/tests/collections/reshape/common.c b/tests/collections/reshape/common.c index 5d5bc31c6..1415516d1 100644 --- a/tests/collections/reshape/common.c +++ b/tests/collections/reshape/common.c @@ -157,7 +157,10 @@ int reshape_set_matrix_value_position_swap(parsec_execution_stream_t *es, return 0; } -int check_matrix_equal(parsec_matrix_block_cyclic_t dcA, parsec_matrix_block_cyclic_t dcA_check){ +int check_matrix_equal(parsec_context_t *parsec, + parsec_matrix_block_cyclic_t dcA, + parsec_matrix_block_cyclic_t dcA_check) +{ int ret = 0; for(size_t i = 0; i < (dcA_check.super.nb_local_tiles * dcA_check.super.bsiz); i++) { if( ((int*)dcA.mat)[i] != ((int*)dcA_check.mat)[i]){ @@ -165,9 +168,12 @@ int check_matrix_equal(parsec_matrix_block_cyclic_t dcA, parsec_matrix_block_cyc break; } } -#if defined(PARSEC_HAVE_MPI) - MPI_Allreduce(MPI_IN_PLACE, &ret, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); -#endif + int rc = parsec_tests_allreduce(parsec, NULL, &ret, 1, + parsec_datatype_int_t, + PARSEC_TESTS_REDUCE_SUM); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + PARSEC_CHECK_ERROR(rc, "parsec_tests_allreduce"); + } return ret; } diff --git a/tests/collections/reshape/common.h b/tests/collections/reshape/common.h index 0f1fca88a..0f2a1d580 100644 --- a/tests/collections/reshape/common.h +++ b/tests/collections/reshape/common.h @@ -2,6 +2,7 @@ * Copyright (c) 2017-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #ifndef _reshape_h @@ -14,10 +15,8 @@ #include "parsec/profiling.h" #include "parsec/execution_stream.h" #include "parsec/utils/mca_param.h" - -#if defined(PARSEC_HAVE_MPI) -#include -#endif +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" int reshape_set_matrix_value(parsec_execution_stream_t *es, const parsec_tiled_matrix_t *descA, @@ -50,35 +49,22 @@ int reshape_set_matrix_value_position_swap(parsec_execution_stream_t *es, void *_A, parsec_matrix_uplo_t uplo, int m, int n, void *args); -int check_matrix_equal(parsec_matrix_block_cyclic_t dcA, parsec_matrix_block_cyclic_t dcA_check); +int check_matrix_equal(parsec_context_t *parsec, + parsec_matrix_block_cyclic_t dcA, + parsec_matrix_block_cyclic_t dcA_check); int reshape_print(parsec_execution_stream_t *es, const parsec_tiled_matrix_t *descA, void *_A, parsec_matrix_uplo_t uplo, int m, int n, void *args); -#if defined(PARSEC_HAVE_MPI) -#define BARRIER MPI_Barrier(MPI_COMM_WORLD); -#else -#define BARRIER -#endif - -#if defined(PARSEC_HAVE_MPI) - #define DO_INIT_MPI() \ - int provided; \ - int requested = m ? MPI_THREAD_MULTIPLE : MPI_THREAD_SERIALIZED; \ - MPI_Init_thread(&argc, &argv, requested, &provided); \ - MPI_Comm_size(MPI_COMM_WORLD, &nodes); \ - MPI_Comm_rank(MPI_COMM_WORLD, &rank); \ - if( requested > provided ) { \ - fprintf(stderr, "#XXXXX User requested %s but the implementation returned a lower thread\n", requested==MPI_THREAD_MULTIPLE? "MPI_THREAD_MULTIPLE": "MPI_THREAD_SERIALIZED");\ - exit(2); \ - } -#else - #define DO_INIT_MPI() \ - nodes = 1; \ - rank = 0; -#endif +#define BARRIER do { \ + int _barrier_rc = parsec_tests_barrier(parsec); \ + if( (PARSEC_SUCCESS != _barrier_rc) && \ + (PARSEC_ERR_NOT_IMPLEMENTED != _barrier_rc) ) { \ + PARSEC_CHECK_ERROR(_barrier_rc, "parsec_tests_barrier"); \ + } \ +} while(0) #define DO_INIT() \ char *name; \ @@ -96,7 +82,7 @@ int reshape_print(parsec_execution_stream_t *es, case 'w': do_sleep = 1; break; \ case '?': case 'h': default: \ fprintf(stderr, \ - "-m : initialize MPI_THREAD_MULTIPLE (default: 0/no)\n" \ + "-m : request multiple-thread support from the test runtime (default: 0/no)\n"\ "-N : rowxcolumn dimension (N, M) of the matrices (default: 8)\n"\ "-t : row dimension (MB) of the tiles (default: 4)\n" \ "-T : column dimension (NB) of the tiles (default: 4)\n" \ @@ -107,9 +93,6 @@ int reshape_print(parsec_execution_stream_t *es, exit(1); \ } \ } \ - DO_INIT_MPI(); \ - if(do_sleep) sleep(10); \ - /* Initialize PaRSEC */ \ pargc = 0; pargv = NULL; \ for(int i = 1; i < argc; i++) { \ if( strcmp(argv[i], "--") == 0 ) { \ @@ -118,12 +101,12 @@ int reshape_print(parsec_execution_stream_t *es, break; \ } \ } \ - parsec = parsec_init(cores, &pargc, &pargv); \ - if( NULL == parsec ) { \ - /* Failed to correctly initialize. In a correct scenario report*/ \ - /* upstream, but in this particular case bail out.*/ \ - exit(-1); \ - } \ + int _init_rc = parsec_tests_context_init(cores, \ + m ? PARSEC_TEST_THREAD_MULTIPLE : \ + PARSEC_TEST_THREAD_SERIALIZED, \ + &pargc, &pargv, &parsec, &rank, &nodes); \ + PARSEC_CHECK_ERROR(_init_rc, "parsec_tests_context_init"); \ + if(do_sleep) sleep(10); \ (void)name; @@ -158,7 +141,7 @@ int reshape_print(parsec_execution_stream_t *es, #define DO_CHECK(NAME, dc, dc_check) do { \ - cret = check_matrix_equal(dc, dc_check ); \ + cret = check_matrix_equal(parsec, dc, dc_check ); \ if(rank==0) \ printf("Test " #NAME " %s\n", (cret > 0)? "FAILED" : "PASSED"); \ ret |= cret; \ @@ -173,7 +156,7 @@ int reshape_print(parsec_execution_stream_t *es, parsec_context_wait(parsec); \ parsec_taskpool_free((parsec_taskpool_t*)tp); \ \ - cret = check_matrix_equal(dcA, dcA_check ); \ + cret = check_matrix_equal(parsec, dcA, dcA_check ); \ if(rank==0) \ printf("Test " #NAME " %s\n", (cret > 0)? "FAILED" : "PASSED"); \ ret |= cret; \ diff --git a/tests/collections/reshape/testing_avoidable_reshape.c b/tests/collections/reshape/testing_avoidable_reshape.c index 59327458a..ee8ee5da4 100644 --- a/tests/collections/reshape/testing_avoidable_reshape.c +++ b/tests/collections/reshape/testing_avoidable_reshape.c @@ -2,14 +2,11 @@ * Copyright (c) 2017-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif - #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "common.h" @@ -107,11 +104,8 @@ int main(int argc, char *argv[]) parsec_data_free(dcA_check.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA_check); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + int fini_rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(fini_rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/collections/reshape/testing_input_dep_reshape_single_copy.c b/tests/collections/reshape/testing_input_dep_reshape_single_copy.c index 4c6d00972..5ea6b0477 100644 --- a/tests/collections/reshape/testing_input_dep_reshape_single_copy.c +++ b/tests/collections/reshape/testing_input_dep_reshape_single_copy.c @@ -2,14 +2,11 @@ * Copyright (c) 2017-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif - #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "common.h" @@ -42,7 +39,19 @@ int main(int argc, char *argv[]) DO_INIT(); - assert(cores == 2); + int runtime_cores = parsec_context_query(parsec, PARSEC_CONTEXT_QUERY_CORES); + PARSEC_CHECK_ERROR(runtime_cores, "parsec_context_query(PARSEC_CONTEXT_QUERY_CORES)"); + if( (cores < 2) || (runtime_cores < 2) ) { + if( 0 == rank ) { + fprintf(stderr, + "input_dep_single_copy_reshape requires at least two PaRSEC execution streams " + "(requested %d, runtime provided %d)\n", + cores, runtime_cores); + } + int fini_rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(fini_rc, "parsec_tests_context_fini"); + return -PARSEC_ERR_DEVICE; + } DO_INI_DATATYPES(); @@ -90,11 +99,8 @@ int main(int argc, char *argv[]) parsec_data_free(dcA_check.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA_check); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + int fini_rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(fini_rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/collections/reshape/testing_remote_multiple_outs_same_pred_flow.c b/tests/collections/reshape/testing_remote_multiple_outs_same_pred_flow.c index 6a7803e13..483a535be 100644 --- a/tests/collections/reshape/testing_remote_multiple_outs_same_pred_flow.c +++ b/tests/collections/reshape/testing_remote_multiple_outs_same_pred_flow.c @@ -2,14 +2,11 @@ * Copyright (c) 2017-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif - #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "common.h" @@ -169,11 +166,8 @@ int main(int argc, char *argv[]) parsec_data_free(dcV.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcV); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + int fini_rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(fini_rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/collections/reshape/testing_reshape.c b/tests/collections/reshape/testing_reshape.c index d7f75601a..9ea8338a2 100644 --- a/tests/collections/reshape/testing_reshape.c +++ b/tests/collections/reshape/testing_reshape.c @@ -2,14 +2,11 @@ * Copyright (c) 2017-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif - #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "common.h" @@ -259,11 +256,8 @@ int main(int argc, char *argv[]) parsec_data_free(dcA_check.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA_check); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + int fini_rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(fini_rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/collections/two_dim_band/CMakeLists.txt b/tests/collections/two_dim_band/CMakeLists.txt index ab5abf708..bcce8d1b4 100644 --- a/tests/collections/two_dim_band/CMakeLists.txt +++ b/tests/collections/two_dim_band/CMakeLists.txt @@ -1,6 +1,10 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C testing_band SOURCES main.c) +target_link_libraries(testing_band PRIVATE tests_runtime_common) target_include_directories(testing_band PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(testing_band PRIVATE "two_dim_band.jdf;two_dim_band_free.jdf") - diff --git a/tests/collections/two_dim_band/main.c b/tests/collections/two_dim_band/main.c index 5afaa56b9..989402d42 100644 --- a/tests/collections/two_dim_band/main.c +++ b/tests/collections/two_dim_band/main.c @@ -2,19 +2,18 @@ * Copyright (c) 2017-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/data_dist/matrix/matrix.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic_band.h" #include "parsec/data_dist/matrix/sym_two_dim_rectangle_cyclic_band.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "two_dim_band_test.h" #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif - int main(int argc, char *argv[]) { parsec_context_t* parsec; @@ -23,23 +22,12 @@ int main(int argc, char *argv[]) char **pargv = NULL; parsec_matrix_uplo_t uplo = PARSEC_MATRIX_UPPER; //PARSEC_MATRIX_LOWER parsec_matrix_uplo_t full = PARSEC_MATRIX_FULL; + int rc; /* Super */ int N = 16, NB = 4, P = 1, KP = 1, KQ = 1; /* Band */ int P_BAND = 1, KP_BAND = 1, KQ_BAND = 1, BAND_SIZE = 1; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &nodes); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - nodes = 1; - rank = 0; -#endif - for(i = 1; i < argc; i++) { if( strcmp(argv[i], "--") == 0 ) { pargc = argc - i; @@ -49,7 +37,10 @@ int main(int argc, char *argv[]) } /* Initialize PaRSEC */ - parsec = parsec_init(-1, &pargc, &pargv); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_MULTIPLE, + &pargc, &pargv, + &parsec, &rank, &nodes); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); while ((ch = getopt(argc, argv, "N:T:s:S:P:p:f:F:b:h")) != -1) { switch (ch) { @@ -143,11 +134,8 @@ int main(int argc, char *argv[]) parsec_tiled_matrix_destroy(&dcYP.off_band.super); /* Clean up parsec*/ - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/collections/two_dim_band/two_dim_band_test.h b/tests/collections/two_dim_band/two_dim_band_test.h index 01a4a692d..b79386eb6 100644 --- a/tests/collections/two_dim_band/two_dim_band_test.h +++ b/tests/collections/two_dim_band/two_dim_band_test.h @@ -2,13 +2,10 @@ * Copyright (c) 2017-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/data_dist/matrix/matrix.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif - /** * @param [in] Y: the data, already distributed and allocated * @param [in] uplo: Upper / Lower / UpperLower diff --git a/tests/dsl/dtd/CMakeLists.txt b/tests/dsl/dtd/CMakeLists.txt index b5da79051..687e16742 100644 --- a/tests/dsl/dtd/CMakeLists.txt +++ b/tests/dsl/dtd/CMakeLists.txt @@ -1,4 +1,8 @@ -link_libraries(tests_common) +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + +link_libraries(tests_common tests_runtime_common) parsec_addtest_executable(C dtd_test_empty SOURCES dtd_test_empty.c) parsec_addtest_executable(C dtd_test_pingpong SOURCES dtd_test_pingpong.c) diff --git a/tests/dsl/dtd/dtd_test_allreduce.c b/tests/dsl/dtd/dtd_test_allreduce.c index 4899875dc..bfc825b94 100644 --- a/tests/dsl/dtd/dtd_test_allreduce.c +++ b/tests/dsl/dtd/dtd_test_allreduce.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* Naive star-based reduce-bcast allreduce; just an example, so keep it * simple... */ @@ -19,15 +20,12 @@ #include "parsec/data_internal.h" #include "parsec/execution_stream.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #if defined(PARSEC_HAVE_STRING_H) #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - static int verbose = 0; /* IDs for the Arena Datatypes */ @@ -90,25 +88,12 @@ int main(int argc, char **argv) { parsec_context_t* parsec; parsec_arena_datatype_t *adt; - int rc, nb, nt; + int rc, nb, nt = 0; int rank, world, cores = -1, root = 0; int i; parsec_tiled_matrix_t *dcA; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - nb = 1; /* tile_size */ - nt = world*10; /* total no. of tiles */ verbose = 0; int pargc = 0; char **pargv = NULL; @@ -120,7 +105,7 @@ int main(int argc, char **argv) } if( 0 == strncmp(argv[i], "-n=", 3) ) { nt = strtol(argv[i]+3, NULL, 10); - if( 0 >= nt ) nt = world*10; /* set to default value */ + if( 0 >= nt ) nt = 0; /* set to default value after rank discovery */ continue; } if( 0 == strncmp(argv[i], "-v", 2) ) { @@ -129,9 +114,13 @@ int main(int argc, char **argv) } } - parsec = parsec_init( cores, &pargc, &pargv ); - if( NULL == parsec ) { - return -1; + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + if( 0 >= nt ) { + nt = world*10; /* total no. of tiles */ } parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); @@ -231,11 +220,8 @@ int main(int argc, char **argv) parsec_tiled_matrix_destroy(dcA); free(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_batch_cpu.c b/tests/dsl/dtd/dtd_test_batch_cpu.c index 96d20324e..39a091a9d 100644 --- a/tests/dsl/dtd/dtd_test_batch_cpu.c +++ b/tests/dsl/dtd/dtd_test_batch_cpu.c @@ -8,10 +8,7 @@ #include "parsec.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/mca/device/device.h" - -#if defined(PARSEC_HAVE_MPI) -#include -#endif +#include "tests/tests_runtime.h" #include #include @@ -41,14 +38,6 @@ main(int argc, char **argv) int expected = 0; int ret = 0; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - } -#endif - if( NULL != argv[1] ) { ntasks = atoi(argv[1]); } @@ -56,8 +45,9 @@ main(int argc, char **argv) ntasks = 32; } - parsec = parsec_init(-1, &argc, &argv); - assert(NULL != parsec); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); dtd_tp = parsec_dtd_taskpool_new(); @@ -102,11 +92,8 @@ main(int argc, char **argv) parsec_dtd_task_class_release(dtd_tp, tc); parsec_taskpool_free(dtd_tp); - parsec_fini(&parsec); - -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/dsl/dtd/dtd_test_broadcast.c b/tests/dsl/dtd/dtd_test_broadcast.c index 7ac4b4301..f7f0b6612 100644 --- a/tests/dsl/dtd/dtd_test_broadcast.c +++ b/tests/dsl/dtd/dtd_test_broadcast.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -13,6 +14,7 @@ #include "tests/tests_data.h" #include "tests/tests_timing.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -20,10 +22,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -63,26 +61,18 @@ int main(int argc, char **argv) parsec_tiled_matrix_t *dcA; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - nb = 1; /* tile_size */ - nt = world; /* total no. of tiles */ if(argv[1] != NULL){ cores = atoi(argv[1]); } - parsec = parsec_init( cores, &argc, &argv ); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + nt = world; /* total no. of tiles */ parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); @@ -158,11 +148,8 @@ int main(int argc, char **argv) parsec_dtd_data_collection_fini( A ); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_ce.c b/tests/dsl/dtd/dtd_test_ce.c index c681f6e56..edf728b10 100644 --- a/tests/dsl/dtd/dtd_test_ce.c +++ b/tests/dsl/dtd/dtd_test_ce.c @@ -2,8 +2,8 @@ * Copyright (c) 2022-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ -#include #include #include #include @@ -12,12 +12,18 @@ #include "parsec/parsec_comm_engine.h" #include "parsec/runtime.h" +#include "tests/tests_runtime.h" -#define ACTIVE_MESSAGE_FROM_0_TAG 2 -#define ACTIVE_MESSAGE_FROM_1_TAG 3 -#define NOTIFY_ABOUT_GET_FROM_0_TAG 4 -#define NOTIFY_ABOUT_PUT_FROM_0_TAG 5 -#define NOTIFY_ABOUT_MEM_HANDLE_FROM_1_TAG 6 +/* + * parsec_init() registers the runtime's own communication-engine control tags. + * Keep this direct CE test on separate tags so the test callbacks do not + * collide with remote-dependency callbacks installed by the runtime. + */ +#define ACTIVE_MESSAGE_FROM_0_TAG 7 +#define ACTIVE_MESSAGE_FROM_1_TAG 8 +#define NOTIFY_ABOUT_GET_FROM_0_TAG 9 +#define NOTIFY_ABOUT_PUT_FROM_0_TAG 10 +#define NOTIFY_ABOUT_MEM_HANDLE_FROM_1_TAG 11 int get_end(parsec_comm_engine_t *ce, @@ -434,29 +440,26 @@ put_end_ack(parsec_comm_engine_t *ce, int main(int argc, char **argv) { + parsec_context_t *parsec = NULL; int rank, world; int i; + int rc; int test_GET = 1; int test_PUT = 1; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_MULTIPLE, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); my_rank = rank; - parsec_comm_engine_t *ce = parsec_comm_engine_init(NULL); + parsec_comm_engine_t *ce = &parsec_ce; if( world != 2 ) { printf("World is too small, too bad! Buh-bye"); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } @@ -480,7 +483,8 @@ int main(int argc, char **argv) ce->enable(ce); /* To make sure all the ranks have the tags registered */ - MPI_Barrier(MPI_COMM_WORLD); + rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); /* Testing active message */ if(rank == 0) { @@ -504,7 +508,8 @@ int main(int argc, char **argv) } } - MPI_Barrier(MPI_COMM_WORLD); + rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); counter = 0; printf("-------------------------------------\n"); @@ -528,7 +533,8 @@ int main(int argc, char **argv) } } - MPI_Barrier(MPI_COMM_WORLD); + rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); counter = 0; printf("-------------------------------------\n"); @@ -603,7 +609,8 @@ int main(int argc, char **argv) } - MPI_Barrier(MPI_COMM_WORLD); + rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); counter = 0; if(test_PUT) { @@ -673,7 +680,8 @@ int main(int argc, char **argv) } } - MPI_Barrier(MPI_COMM_WORLD); + rc = parsec_tests_barrier(parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); ce->tag_unregister(ACTIVE_MESSAGE_FROM_0_TAG); ce->tag_unregister(ACTIVE_MESSAGE_FROM_1_TAG); @@ -681,11 +689,8 @@ int main(int argc, char **argv) ce->tag_unregister(NOTIFY_ABOUT_PUT_FROM_0_TAG); ce->tag_unregister(NOTIFY_ABOUT_MEM_HANDLE_FROM_1_TAG); - parsec_comm_engine_fini(ce); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_cuda_again_async.c b/tests/dsl/dtd/dtd_test_cuda_again_async.c index a3a4e6c19..bcf5b67c3 100644 --- a/tests/dsl/dtd/dtd_test_cuda_again_async.c +++ b/tests/dsl/dtd/dtd_test_cuda_again_async.c @@ -2,6 +2,7 @@ * Copyright (c) 2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" @@ -10,10 +11,7 @@ #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "tests/tests_data.h" - -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ +#include "tests/tests_runtime.h" void parsec_dtd_pack_args( parsec_task_t *this_task, ... ) { @@ -124,21 +122,11 @@ int main(int argc, char* argv[]) { int ret; parsec_context_t *parsec_context = NULL; - int rank, world; + int world; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - - parsec_context = parsec_init(-1, NULL, NULL); + ret = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + NULL, NULL, &parsec_context, NULL, &world); + PARSEC_CHECK_ERROR(ret, "parsec_tests_context_init"); // Create new DTD taskpool parsec_taskpool_t *tp = parsec_dtd_taskpool_new(); @@ -192,9 +180,6 @@ int main(int argc, char* argv[]) parsec_taskpool_free(tp); - parsec_fini(&parsec_context); - -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + ret = parsec_tests_context_fini(&parsec_context); + PARSEC_CHECK_ERROR(ret, "parsec_tests_context_fini"); } diff --git a/tests/dsl/dtd/dtd_test_cuda_task_insert.c b/tests/dsl/dtd/dtd_test_cuda_task_insert.c index 0b0b78de5..08feee78d 100644 --- a/tests/dsl/dtd/dtd_test_cuda_task_insert.c +++ b/tests/dsl/dtd/dtd_test_cuda_task_insert.c @@ -11,10 +11,7 @@ #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "tests/tests_data.h" - -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ +#include "tests/tests_runtime.h" static int TILE_FULL; @@ -881,26 +878,18 @@ int main(int argc, char **argv) parsec_context_t *parsec_context = NULL; int rank, world; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - // Number of CPU cores involved int ncores = -1; // Use all available cores - parsec_context = parsec_init(ncores, &argc, &argv); + rc = parsec_tests_context_init(ncores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec_context, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); rc = !(get_nb_cuda_devices() >= 1); print_test_result("Have CUDA accelerators", rc); if(rc != 0) { - parsec_fini(&parsec_context); + rc = parsec_tests_context_fini(&parsec_context); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return -1; } @@ -941,11 +930,8 @@ int main(int argc, char **argv) rc = test_cuda_multiple_devices(world, rank, parsec_context); ret += print_test_result("cuda multiple devices", rc); - parsec_fini(&parsec_context); - -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec_context); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/dsl/dtd/dtd_test_data_flush.c b/tests/dsl/dtd/dtd_test_data_flush.c index 98ccc6b81..abd41f8c9 100644 --- a/tests/dsl/dtd/dtd_test_data_flush.c +++ b/tests/dsl/dtd/dtd_test_data_flush.c @@ -2,6 +2,7 @@ * Copyright (c) 2018-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -13,6 +14,7 @@ #include "tests/tests_data.h" #include "tests/tests_timing.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -20,10 +22,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -94,18 +92,6 @@ int main(int argc, char ** argv) int nb, nt, rc; parsec_tiled_matrix_t *dcA; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - if(argv[1] != NULL){ cores = atoi(argv[1]); } @@ -113,7 +99,10 @@ int main(int argc, char ** argv) int i, j, total_tasks = 10000; /* Creating parsec context and initializing dtd environment */ - parsec = parsec_init(cores, &argc, &argv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_taskpool_t *dtd_tp; /* @@ -420,11 +409,8 @@ int main(int argc, char ** argv) parsec_dtd_free_arena_datatype(parsec, TILE_FULL); } - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_empty.c b/tests/dsl/dtd/dtd_test_empty.c index cf5eeeef5..840917b5f 100644 --- a/tests/dsl/dtd/dtd_test_empty.c +++ b/tests/dsl/dtd/dtd_test_empty.c @@ -2,32 +2,25 @@ * Copyright (c) 2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" int main(int argc, char **argv) { parsec_context_t* parsec; - int rank = 0, world = 1; + int rc; -#if defined(PARSEC_HAVE_MPI) - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); - parsec = parsec_init( -1, &argc, &argv ); parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_explicit_task_creation.c b/tests/dsl/dtd/dtd_test_explicit_task_creation.c index c34978a9c..8ee8e3a1d 100644 --- a/tests/dsl/dtd/dtd_test_explicit_task_creation.c +++ b/tests/dsl/dtd/dtd_test_explicit_task_creation.c @@ -13,6 +13,7 @@ #include #include "tests/tests_data.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -20,10 +21,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -68,30 +65,24 @@ int main(int argc, char ** argv) parsec_tiled_matrix_t *dcA; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); + if(argv[1] != NULL){ + cores = atoi(argv[1]); } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif - if( world != 1 ) { - parsec_fatal( "Nope! world is not right, we need exactly one MPI process. " - "Try with \"mpirun -np 1 .....\"\n" ); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); - if(argv[1] != NULL){ - cores = atoi(argv[1]); + if( world != 1 ) { + parsec_fatal( "Nope! world is not right, we need exactly one process. " + "Try with a single-process launcher.\n" ); } no_of_tasks = world; nb = 1; /* tile_size */ nt = no_of_tasks; /* total no. of tiles */ - parsec = parsec_init( cores, &argc, &argv ); - parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); adt = parsec_matrix_adt_new_rect( @@ -160,11 +151,8 @@ int main(int argc, char ** argv) parsec_dtd_data_collection_fini( A ); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_flag_dont_track.c b/tests/dsl/dtd/dtd_test_flag_dont_track.c index 355dcb65a..6d15a78a0 100644 --- a/tests/dsl/dtd/dtd_test_flag_dont_track.c +++ b/tests/dsl/dtd/dtd_test_flag_dont_track.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -13,6 +14,7 @@ #include "tests/tests_data.h" #include "tests/tests_timing.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -20,10 +22,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - int task_to_check_dont_track(parsec_execution_stream_t *es, parsec_task_t *this_task) { @@ -45,29 +43,20 @@ int main(int argc, char ** argv) int nb, nt, rc; parsec_tiled_matrix_t *dcA; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - - if( world != 1 ) { - parsec_fatal( "Nope! world is not right, we need exactly one MPI process. " - "Try with \"mpirun -np 1 .....\"\n" ); - } - if(argv[1] != NULL){ cores = atoi(argv[1]); } /* Creating parsec context and initializing dtd environment */ - parsec = parsec_init( cores, &argc, &argv ); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + if( world != 1 ) { + parsec_fatal( "Nope! world is not right, we need exactly one process. " + "Try with a single-process launcher.\n" ); + } /****** Checking Don't track flag ******/ parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); @@ -126,11 +115,8 @@ int main(int argc, char ** argv) parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_global_id_for_dc_assumed.c b/tests/dsl/dtd/dtd_test_global_id_for_dc_assumed.c index a744f0645..c49a55056 100644 --- a/tests/dsl/dtd/dtd_test_global_id_for_dc_assumed.c +++ b/tests/dsl/dtd/dtd_test_global_id_for_dc_assumed.c @@ -2,6 +2,7 @@ * Copyright (c) 2018-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -12,43 +13,32 @@ #include #include "tests/tests_data.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #if defined(PARSEC_HAVE_STRING_H) #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - int main(int argc, char **argv) { parsec_context_t* parsec; - /*int rc;*/ + int rc; int rank, world, cores; int nb, nt; parsec_tiled_matrix_t *dcA, *dcB, *dcC; uint32_t id = 0; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - nb = 1; /* tile_size */ - nt = world; /* total no. of tiles */ cores = 8; - parsec = parsec_init( cores, &argc, &argv ); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + nt = world; /* total no. of tiles */ dcA = create_and_distribute_data(rank, world, nb, nt); parsec_data_collection_set_key((parsec_data_collection_t *)dcA, "A"); @@ -91,11 +81,8 @@ int main(int argc, char **argv) free_data(dcB); free_data(dcC); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_hierarchy.c b/tests/dsl/dtd/dtd_test_hierarchy.c index dfb2179c0..d82354345 100644 --- a/tests/dsl/dtd/dtd_test_hierarchy.c +++ b/tests/dsl/dtd/dtd_test_hierarchy.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA CORPORATION. All rights reserved. */ /* parsec things */ @@ -20,10 +21,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* This testing shows graph pruning as well as hierarchical execution. * The only restriction is the parsec_taskpool_wait() before parsec_context_wait() */ @@ -106,25 +103,15 @@ int main(int argc, char ** argv) int rank, world, cores = -1, rc; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); int m; int nb, nt; parsec_tiled_matrix_t *dcA; parsec_taskpool_t *dtd_tp; - parsec = parsec_init( cores, &argc, &argv ); - dtd_tp = parsec_dtd_taskpool_new(); /* Registering the dtd_handle with PARSEC context */ @@ -144,7 +131,7 @@ int main(int argc, char ** argv) parsec_data_collection_t *A = (parsec_data_collection_t *)dcA; parsec_dtd_data_collection_init(A); - SYNC_TIME_START(); + SYNC_TIME_START(parsec); rc = parsec_context_start( parsec ); PARSEC_CHECK_ERROR(rc, "parsec_context_start"); @@ -167,7 +154,7 @@ int main(int argc, char ** argv) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); - SYNC_TIME_PRINT(rank, ("\n") ); + SYNC_TIME_PRINT(parsec, rank, ("\n") ); parsec_dtd_free_arena_datatype(parsec, TILE_FULL); parsec_dtd_data_collection_fini( A ); @@ -175,11 +162,8 @@ int main(int argc, char ** argv) parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_insert_task_interface.c b/tests/dsl/dtd/dtd_test_insert_task_interface.c index 452284c7f..1deeac952 100644 --- a/tests/dsl/dtd/dtd_test_insert_task_interface.c +++ b/tests/dsl/dtd/dtd_test_insert_task_interface.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -12,6 +13,7 @@ #include #include "tests/tests_data.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -19,10 +21,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -68,33 +66,24 @@ int main(int argc, char ** argv) parsec_tiled_matrix_t *dcA; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); + if(argv[1] != NULL){ + cores = atoi(argv[1]); } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - if( world != 1 ) { - parsec_fatal( "Nope! world is not right, we need exactly one MPI process. " - "Try with \"mpirun -np 1 .....\"\n" ); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); - if(argv[1] != NULL){ - cores = atoi(argv[1]); + if( world != 1 ) { + parsec_fatal( "Nope! world is not right, we need exactly one process. " + "Try with a single-process launcher.\n" ); } no_of_tasks = world; nb = 1; /* tile_size */ nt = no_of_tasks; /* total no. of tiles */ - parsec = parsec_init( cores, &argc, &argv ); - parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); adt = parsec_matrix_adt_new_rect( @@ -160,11 +149,8 @@ int main(int argc, char ** argv) parsec_dtd_data_collection_fini( A ); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_interleave_actions.c b/tests/dsl/dtd/dtd_test_interleave_actions.c index c76925ade..9a2c2d625 100644 --- a/tests/dsl/dtd/dtd_test_interleave_actions.c +++ b/tests/dsl/dtd/dtd_test_interleave_actions.c @@ -2,12 +2,9 @@ * Copyright (c) 2020-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ -#if defined(PARSEC_HAVE_MPI) -#include "mpi.h" -#endif /* defined(PARSEC_HAVE_MPI) */ - #include #include "parsec.h" @@ -15,6 +12,7 @@ #include "parsec/data_dist/matrix/matrix.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "tests/tests_data.h" +#include "tests/tests_runtime.h" /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -107,18 +105,6 @@ int main(int argc, char **argv) { pargc = argc - optind; pargv = argv + optind; - #if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - int nb = 1; int nt = 1; @@ -126,7 +112,10 @@ int main(int argc, char **argv) { nt = 1; int ncores = -1; - parsec_context = parsec_init(ncores, &pargc, &pargv); + ret = parsec_tests_context_init(ncores, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec_context, &rank, &world); + PARSEC_CHECK_ERROR(ret, "parsec_tests_context_init"); if(world == 1) { parsec_warning("*** This test only makes sense with at least two nodes"); @@ -219,11 +208,8 @@ int main(int argc, char **argv) { parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec_context); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + ret = parsec_tests_context_fini(&parsec_context); + PARSEC_CHECK_ERROR(ret, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_multiple_handle_wait.c b/tests/dsl/dtd/dtd_test_multiple_handle_wait.c index 117097ab1..2d651db40 100644 --- a/tests/dsl/dtd/dtd_test_multiple_handle_wait.c +++ b/tests/dsl/dtd/dtd_test_multiple_handle_wait.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -20,10 +21,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - double time_elapsed; double sync_time_elapsed; @@ -38,25 +35,10 @@ task_to_check_generation(parsec_execution_stream_t *es, parsec_task_t *this_task int main(int argc, char ** argv) { parsec_context_t* parsec; - int rank, world, cores = -1, rc; + int rank, cores = -1, rc; int parsec_argc; char** parsec_argv; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if(MPI_THREAD_MULTIPLE > provided) { - parsec_fatal( "This test requires MPI_THREAD_MULTIPLE because it uses simultaneously MPI within the PaRSEC runtime, and in the main program loop (in SYNC_TIME_START)"); - } - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - parsec_argv = &argv[1]; parsec_argc = argc - 1; if(argv[1] != NULL) { @@ -66,10 +48,10 @@ int main(int argc, char ** argv) } /* Creating parsec context and initializing dtd environment */ - parsec = parsec_init( cores, &parsec_argc, &parsec_argv ); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_MULTIPLE, + &parsec_argc, &parsec_argv, + &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /****** Checking task generation ******/ parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); @@ -88,7 +70,7 @@ int main(int argc, char ** argv) PARSEC_CHECK_ERROR(rc, "parsec_context_start"); for( i = 0; i < 6; i++ ) { - SYNC_TIME_START(); + SYNC_TIME_START(parsec); for( j = 0; j < total_tasks; j++ ) { /* This task does not have any data associated with it, so it will be inserted in all mpi processes */ parsec_dtd_insert_task(dtd_tp, task_to_check_generation, 0, PARSEC_DEV_CPU, "sample_task", @@ -97,7 +79,7 @@ int main(int argc, char ** argv) rc = parsec_taskpool_wait( dtd_tp ); PARSEC_CHECK_ERROR(rc, "parsec_taskpool_wait"); - SYNC_TIME_PRINT(rank, ("\n")); + SYNC_TIME_PRINT(parsec, rank, ("\n")); } parsec_taskpool_free( dtd_tp ); @@ -105,11 +87,8 @@ int main(int argc, char ** argv) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_new_tile.c b/tests/dsl/dtd/dtd_test_new_tile.c index c0ef693f1..75fc61a82 100644 --- a/tests/dsl/dtd/dtd_test_new_tile.c +++ b/tests/dsl/dtd/dtd_test_new_tile.c @@ -21,10 +21,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; static int32_t nb_errors = 0; @@ -259,24 +255,11 @@ int main(int argc, char **argv) parsec_device_cuda_module_t **gpu_devices = NULL; #endif -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if(MPI_THREAD_MULTIPLE > provided) { - parsec_fatal( "This benchmark requires MPI_THREAD_MULTIPLE because it uses simultaneously MPI within the PaRSEC runtime, and in the main program loop (in SYNC_TIME_START)"); - } - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - nb = NB; /* tile_size */ - parsec = parsec_init( cores, &argc, &argv ); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_MULTIPLE, + &argc, &argv, &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); #if defined(PARSEC_PROF_TRACE) parsec_profiling_start(); #endif @@ -520,11 +503,8 @@ int main(int argc, char **argv) parsec_dtd_free_arena_datatype(parsec, TILE_FULL); parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); if(nb_errors > 0) return EXIT_FAILURE; diff --git a/tests/dsl/dtd/dtd_test_null_as_tile.c b/tests/dsl/dtd/dtd_test_null_as_tile.c index 658f602ef..ab40b3c45 100644 --- a/tests/dsl/dtd/dtd_test_null_as_tile.c +++ b/tests/dsl/dtd/dtd_test_null_as_tile.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA CORPORATION. All rights reserved. */ /* parsec things */ @@ -19,10 +20,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - double time_elapsed; double sync_time_elapsed; @@ -42,36 +39,26 @@ call_to_kernel_type( parsec_execution_stream_t *es, int main(int argc, char ** argv) { parsec_context_t* parsec; - int rank, world, cores = -1, rc; + int rank, cores = -1, rc; if(argv[1] != NULL){ cores = atoi(argv[1]); } -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); int m; int no_of_tasks = 1; - parsec = parsec_init( cores, &argc, &argv ); - parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); /* Registering the dtd_handle with PARSEC context */ rc = parsec_context_add_taskpool( parsec, dtd_tp ); PARSEC_CHECK_ERROR(rc, "parsec_context_add_taskpool"); - SYNC_TIME_START(); + SYNC_TIME_START(parsec); rc = parsec_context_start( parsec ); PARSEC_CHECK_ERROR(rc, "parsec_context_start"); @@ -89,15 +76,12 @@ int main(int argc, char ** argv) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); - SYNC_TIME_PRINT(rank, ("\n")); + SYNC_TIME_PRINT(parsec, rank, ("\n")); parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_pingpong.c b/tests/dsl/dtd/dtd_test_pingpong.c index b718410e0..1a189b6a0 100644 --- a/tests/dsl/dtd/dtd_test_pingpong.c +++ b/tests/dsl/dtd/dtd_test_pingpong.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -10,6 +11,7 @@ #include #include "tests/tests_data.h" +#include "tests/tests_runtime.h" #include "tests/tests_timing.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -18,10 +20,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - double time_elapsed; double sync_time_elapsed; @@ -81,35 +79,23 @@ int main(int argc, char **argv) parsec_tiled_matrix_t *dcA; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if(MPI_THREAD_MULTIPLE > provided) { - parsec_fatal( "This benchmark requires MPI_THREAD_MULTIPLE because it uses simultaneously MPI within the PaRSEC runtime, and in the main program loop (in SYNC_TIME_START)"); - } + if(argv[1] != NULL){ + cores = atoi(argv[1]); } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_MULTIPLE, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); if( world != 2 ) { - parsec_fatal( "Nope! world is not right, we need exactly two MPI process. " - "Try with \"mpirun -np 2 .....\"\n" ); + parsec_fatal( "Nope! world is not right, we need exactly two processes. " + "Try with a two-process launcher.\n" ); } nb = 1; /* tile_size */ nt = 2; /* total no. of tiles */ - if(argv[1] != NULL){ - cores = atoi(argv[1]); - } - - parsec = parsec_init( cores, &argc, &argv ); - parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); adt = parsec_matrix_adt_new_rect( @@ -221,7 +207,7 @@ int main(int argc, char **argv) parsec_data_collection_t *A = (parsec_data_collection_t *)dcA; parsec_dtd_data_collection_init(A); - SYNC_TIME_START(); + SYNC_TIME_START(parsec); for( j = 0; j < repeat_pingpong; j++ ) { parsec_dtd_insert_task(dtd_tp, task_rank_0, 0, PARSEC_DEV_CPU, "task_for_timing_0", PASSED_BY_REF, PARSEC_DTD_TILE_OF_KEY(A, 0), PARSEC_INOUT | TILE_FULL | PARSEC_AFFINITY, @@ -238,7 +224,7 @@ int main(int argc, char **argv) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); - SYNC_TIME_PRINT(rank, ("\tSize of message : %zu bytes\tTime for each pingpong : %12.5f\n", sizes[i]*sizeof(int), sync_time_elapsed/repeat_pingpong)); + SYNC_TIME_PRINT(parsec, rank, ("\tSize of message : %zu bytes\tTime for each pingpong : %12.5f\n", sizes[i]*sizeof(int), sync_time_elapsed/repeat_pingpong)); parsec_dtd_free_arena_datatype(parsec, TILE_FULL); parsec_dtd_data_collection_fini( A ); @@ -247,11 +233,8 @@ int main(int argc, char **argv) parsec_taskpool_free(dtd_tp); } - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_reduce.c b/tests/dsl/dtd/dtd_test_reduce.c index 91f887928..bb1dd002a 100644 --- a/tests/dsl/dtd/dtd_test_reduce.c +++ b/tests/dsl/dtd/dtd_test_reduce.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -13,6 +14,7 @@ #include "tests/tests_data.h" #include "tests/tests_timing.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -20,10 +22,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -66,26 +64,18 @@ int main(int argc, char **argv) parsec_tiled_matrix_t *dcA; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - nb = 1; /* tile_size */ - nt = world; /* total no. of tiles */ if(argv[1] != NULL){ cores = atoi(argv[1]); } - parsec = parsec_init( cores, &argc, &argv ); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + nt = world; /* total no. of tiles */ parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); @@ -148,11 +138,8 @@ int main(int argc, char **argv) parsec_dtd_data_collection_fini( A ); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_simple_gemm.c b/tests/dsl/dtd/dtd_test_simple_gemm.c index 0b8ac8d76..8df7ff504 100644 --- a/tests/dsl/dtd/dtd_test_simple_gemm.c +++ b/tests/dsl/dtd/dtd_test_simple_gemm.c @@ -11,6 +11,7 @@ #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/mca/device/device.h" +#include "tests/tests_runtime.h" // The file is not compiled if CUDA is not present or CUBLAS is not found #include "parsec/mca/device/cuda/device_cuda.h" @@ -46,10 +47,6 @@ extern void cblas_dgemm(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE TransA, const double beta, double *C, const CBLAS_INDEX ldc); #endif -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - #include #include #include @@ -914,19 +911,9 @@ int main(int argc, char **argv) int M = 16 * mb, N = 16 * nb, K = 16 * kb; double min_perf=0.0; int runs = 5; + int ncores = -1; /* Use all available cores */ int debug=-1; - -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + int show_help = 0; while( 1 ) { int option_index = 0; @@ -1023,54 +1010,66 @@ int main(int argc, char **argv) break; case 'h': case '?': - if( 0 == rank ) { - fprintf(stderr, - "Usage %s [flags] [-- ]\n" - " Nota Bene: this test should not be used to evaluate performance of GEMM!\n" - " Use DPLASMA or other linear algebra libraries written on top of PaRSEC to evaluate this.\n" - "\n" - " Compute pdgemm on a process grid of PxQ, using all available GPUs on each\n" - " node (modulo parsec options), using DTD. Compute C += AxB, where A is MxK\n" - " tiled in mb x kb, B is KxN tiled in kb x nb, and C is MxN tiled in mb x nb\n" - " Executes nruns iterations of the GEMM operation.\n" - " flags:\n" - " --M|-M / --K|-K / --N|-N: set M, K and N (resp.)\n" - " --mb|-m / --kb/-k / --nb|-n: set mb, kb and nb (resp.)\n" - " --nruns|-t: set the number of runs to do\n" - " --device|-d: which device to use (CPU or GPU)\n" - " --batch|-b: enable CUDA batch collection and submit\n" - " the collected GEMMs one by one\n" - " --batch-mode|-B: CUDA batching mode: none, one-by-one,\n" - " or cublas (default: %s)\n" - " --batch-size|-S: maximum number of GEMM tasks per CUDA\n" - " batch (default: %d)\n" - " --batch-slots|-L: maximum number of in-flight cuBLAS\n" - " batched submissions per stream (default: %d)\n" - " --verbose|-v: display which GEMM runs on which GPU\n" - " as execution is unfolding\n" - " --help|-h|-?: display this help\n" - " --debug|-D: blocks the process passed as parameter and\n" - " waits for gdb to connect to it\n" - " --Alarm|-A: sets the expected minimum performance for a\n" - " single GPU (kills the process if it takes longer\n" - " than the time corresponding to the expected\n" - " performance to complete the product)\n" - "\n" - " Nota Bene: this test should not be used to evaluate performance of GEMM!\n" - " Use DPLASMA or other linear algebra libraries written on top of PaRSEC to evaluate this.\n" - "\n", - argv[0], gemm_cuda_batch_mode_name(), - cuda_max_batch_size, cuda_max_submitted_batches); - } -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif - exit(0); + show_help = 1; + break; + } + if( show_help ) { + break; } } int pargc = argc - optind; char **pargv = argv + optind; + rc = parsec_tests_context_init(ncores, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec_context, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + if( show_help ) { + if( 0 == rank ) { + fprintf(stderr, + "Usage %s [flags] [-- ]\n" + " Nota Bene: this test should not be used to evaluate performance of GEMM!\n" + " Use DPLASMA or other linear algebra libraries written on top of PaRSEC to evaluate this.\n" + "\n" + " Compute pdgemm on a process grid of PxQ, using all available GPUs on each\n" + " node (modulo parsec options), using DTD. Compute C += AxB, where A is MxK\n" + " tiled in mb x kb, B is KxN tiled in kb x nb, and C is MxN tiled in mb x nb\n" + " Executes nruns iterations of the GEMM operation.\n" + " flags:\n" + " --M|-M / --K|-K / --N|-N: set M, K and N (resp.)\n" + " --mb|-m / --kb/-k / --nb|-n: set mb, kb and nb (resp.)\n" + " --nruns|-t: set the number of runs to do\n" + " --device|-d: which device to use (CPU or GPU)\n" + " --batch|-b: enable CUDA batch collection and submit\n" + " the collected GEMMs one by one\n" + " --batch-mode|-B: CUDA batching mode: none, one-by-one,\n" + " or cublas (default: %s)\n" + " --batch-size|-S: maximum number of GEMM tasks per CUDA\n" + " batch (default: %d)\n" + " --batch-slots|-L: maximum number of in-flight cuBLAS\n" + " batched submissions per stream (default: %d)\n" + " --verbose|-v: display which GEMM runs on which GPU\n" + " as execution is unfolding\n" + " --help|-h|-?: display this help\n" + " --debug|-D: blocks the process passed as parameter and\n" + " waits for gdb to connect to it\n" + " --Alarm|-A: sets the expected minimum performance for a\n" + " single GPU (kills the process if it takes longer\n" + " than the time corresponding to the expected\n" + " performance to complete the product)\n" + "\n" + " Nota Bene: this test should not be used to evaluate performance of GEMM!\n" + " Use DPLASMA or other linear algebra libraries written on top of PaRSEC to evaluate this.\n" + "\n", + argv[0], gemm_cuda_batch_mode_name(), + cuda_max_batch_size, cuda_max_submitted_batches); + } + rc = parsec_tests_context_fini(&parsec_context); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); + return 0; + } + if( -1 == P ) P = (int)sqrt(world); if( -1 == Q ) @@ -1088,19 +1087,13 @@ int main(int argc, char **argv) while(loop) { sleep(1); } } - // Number of CPU cores involved - int ncores = -1; // Use all available cores - parsec_context = parsec_init(ncores, &pargc, &pargv); - int *gpu_device_index = NULL; if( PARSEC_DEV_CUDA == device ) { nbgpus = get_nb_gpu_devices(); rc = !(nbgpus >= 1); if( rc != 0 ) { fprintf(stderr, "Rank %d doesn't have CUDA accelerators\n", rank); -#if defined(PARSEC_HAVE_MPI) - MPI_Abort(MPI_COMM_WORLD, 0); -#endif + parsec_tests_abort(parsec_context, 0); return -1; } gpu_device_index = get_gpu_device_index(); @@ -1119,9 +1112,7 @@ int main(int argc, char **argv) rc = preallocate_cuda_stream_states(); if( PARSEC_SUCCESS != rc ) { fprintf(stderr, "Failed to preallocate CUDA GEMM stream states\n"); -#if defined(PARSEC_HAVE_MPI) - MPI_Abort(MPI_COMM_WORLD, rc); -#endif + parsec_tests_abort(parsec_context, rc); return rc; } } @@ -1176,11 +1167,8 @@ int main(int argc, char **argv) destroy_matrix(dcB); destroy_matrix(dcC); - parsec_fini(&parsec_context); - -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec_context); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/dsl/dtd/dtd_test_task_generation.c b/tests/dsl/dtd/dtd_test_task_generation.c index aad36a3d5..d01268863 100644 --- a/tests/dsl/dtd/dtd_test_task_generation.c +++ b/tests/dsl/dtd/dtd_test_task_generation.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -20,10 +21,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - double time_elapsed; double sync_time_elapsed; @@ -122,32 +119,19 @@ int main(int argc, char ** argv) int nb, nt, rc; parsec_tiled_matrix_t *dcA; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if(MPI_THREAD_MULTIPLE > provided) { - parsec_fatal( "This benchmark requires MPI_THREAD_MULTIPLE because it uses simultaneously MPI within the PaRSEC runtime, and in the main program loop (in SYNC_TIME_START)"); - } - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - - if( world != 1 ) { - parsec_fatal( "Nope! world is not right, we need exactly one MPI process. " - "Try with \"mpirun -np 1 .....\"\n" ); - } - if(argv[1] != NULL){ cores = atoi(argv[1]); } /* Creating parsec context and initializing dtd environment */ - parsec = parsec_init( cores, &argc, &argv ); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_MULTIPLE, + &argc, &argv, &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + if( world != 1 ) { + parsec_fatal( "Nope! world is not right, we need exactly one process. " + "Try with a single-process launcher.\n" ); + } /****** Checking task generation ******/ parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); @@ -214,7 +198,7 @@ int main(int argc, char ** argv) rc = parsec_context_add_taskpool( parsec, dtd_tp ); PARSEC_CHECK_ERROR(rc, "parsec_context_add_taskpool"); - SYNC_TIME_START(); + SYNC_TIME_START(parsec); if( 1 == total_flows[i] ) { for( j = 0; j < total_flows[i] * total_tasks; j += total_flows[i] ) { @@ -295,7 +279,7 @@ int main(int argc, char ** argv) rc = parsec_taskpool_wait( dtd_tp ); PARSEC_CHECK_ERROR(rc, "parsec_taskpool_wait"); - SYNC_TIME_PRINT(rank, ("\tNo of flows : %d \tTime for each task : %lf\n\n", total_flows[i], sync_time_elapsed/total_tasks)); + SYNC_TIME_PRINT(parsec, rank, ("\tNo of flows : %d \tTime for each task : %lf\n\n", total_flows[i], sync_time_elapsed/total_tasks)); parsec_taskpool_free( dtd_tp ); parsec_dtd_data_collection_fini( A ); @@ -307,11 +291,8 @@ int main(int argc, char ** argv) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_task_inserting_task.c b/tests/dsl/dtd/dtd_test_task_inserting_task.c index c09e5e1e9..20e591df3 100644 --- a/tests/dsl/dtd/dtd_test_task_inserting_task.c +++ b/tests/dsl/dtd/dtd_test_task_inserting_task.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA CORPORATION. All rights reserved. */ /* parsec things */ @@ -19,10 +20,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - double time_elapsed; double sync_time_elapsed; @@ -68,36 +65,26 @@ task_to_insert_task( parsec_execution_stream_t *es, int main(int argc, char ** argv) { parsec_context_t* parsec; - int rank, world, cores = -1, rc; + int rank, cores = -1, rc; if(argv[1] != NULL){ cores = atoi(argv[1]); } -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); int m; int no_of_tasks = 1; - parsec = parsec_init( cores, &argc, &argv ); - parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); /* Registering the dtd_handle with PARSEC context */ rc = parsec_context_add_taskpool(parsec, (parsec_taskpool_t *)dtd_tp); PARSEC_CHECK_ERROR(rc, "parsec_context_add_taskpool"); - SYNC_TIME_START(); + SYNC_TIME_START(parsec); rc = parsec_context_start(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_start"); @@ -120,15 +107,12 @@ int main(int argc, char ** argv) rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); - SYNC_TIME_PRINT(rank, ("\n")); + SYNC_TIME_PRINT(parsec, rank, ("\n")); parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_task_insertion.c b/tests/dsl/dtd/dtd_test_task_insertion.c index 6b2213f22..89769ed19 100644 --- a/tests/dsl/dtd/dtd_test_task_insertion.c +++ b/tests/dsl/dtd/dtd_test_task_insertion.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -19,10 +20,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - double time_elapsed = 0.0; double sync_time_elapsed = 0.0; @@ -81,30 +78,21 @@ test_task_generator( parsec_execution_stream_t *es, int main(int argc, char ** argv) { parsec_context_t* parsec; - int rank, world, cores = -1, rc; + int rank, cores = -1, rc; if(argv[1] != NULL){ cores = atoi(argv[1]); } -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); int m, n; int no_of_tasks = 50000; int amount_of_work[3] = {100, 1000, 10000}; parsec_taskpool_t *dtd_tp; - parsec = parsec_init( cores, &argc, &argv ); cores = parsec_context_query(parsec, PARSEC_CONTEXT_QUERY_CORES); dtd_tp = parsec_dtd_taskpool_new(); @@ -217,11 +205,8 @@ int main(int argc, char ** argv) parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_task_placement.c b/tests/dsl/dtd/dtd_test_task_placement.c index adfe7fd9d..51bb84997 100644 --- a/tests/dsl/dtd/dtd_test_task_placement.c +++ b/tests/dsl/dtd/dtd_test_task_placement.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -13,6 +14,7 @@ #include "tests/tests_data.h" #include "tests/tests_timing.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -20,10 +22,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -89,28 +87,20 @@ int main(int argc, char **argv) parsec_tiled_matrix_t *dcA; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + nb = 1; /* tile_size */ + + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); if( world != 2 ) { - parsec_fatal( "Nope! world is not right, we need exactly two MPI process. " - "Try with \"mpirun -np 2 .....\"\n" ); + parsec_fatal( "Nope! world is not right, we need exactly two processes. " + "Try with a two-process launcher.\n" ); } - nb = 1; /* tile_size */ nt = world; /* total no. of tiles */ - parsec = parsec_init(cores, &argc, &argv); - parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); adt = parsec_matrix_adt_new_rect( @@ -189,11 +179,8 @@ int main(int argc, char **argv) parsec_dtd_data_collection_fini( A ); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_template_counter.c b/tests/dsl/dtd/dtd_test_template_counter.c index 399f9ea12..f5b33ef35 100644 --- a/tests/dsl/dtd/dtd_test_template_counter.c +++ b/tests/dsl/dtd/dtd_test_template_counter.c @@ -14,6 +14,7 @@ #include "tests/tests_data.h" #include "tests/tests_timing.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" @@ -21,10 +22,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - /* IDs for the Arena Datatypes */ static int TILE_FULL; @@ -63,26 +60,18 @@ int main(int argc, char **argv) parsec_tiled_matrix_t *dcA; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - nb = 1; /* tile_size */ - nt = (world > 1) ? world : 2; /* total no. of tiles */ if(argv[1] != NULL){ cores = atoi(argv[1]); } - parsec = parsec_init( cores, &argc, &argv ); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + nt = (world > 1) ? world : 2; /* total no. of tiles */ parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); @@ -148,11 +137,8 @@ int main(int argc, char **argv) parsec_dtd_data_collection_fini( A ); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_tp_enqueue_dequeue.c b/tests/dsl/dtd/dtd_test_tp_enqueue_dequeue.c index 040fb825b..ec8dbb42e 100644 --- a/tests/dsl/dtd/dtd_test_tp_enqueue_dequeue.c +++ b/tests/dsl/dtd/dtd_test_tp_enqueue_dequeue.c @@ -2,6 +2,7 @@ * Copyright (c) 2018-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -16,15 +17,12 @@ #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/scheduling.h" +#include "tests/tests_runtime.h" #if defined(PARSEC_HAVE_STRING_H) #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - int task(parsec_execution_stream_t *es, parsec_task_t *this_task) { @@ -94,30 +92,21 @@ int main(int argc, char **argv) { parsec_context_t* parsec; int rc, i; - int rank, world, cores = -1; - -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - - if( world != 1 ) { - parsec_fatal( "Nope! world is not right, we need exactly one MPI process. " - "Try with \"mpirun -np 1 .....\"\n" ); - } + int world, cores = -1; if(argv[1] != NULL){ cores = atoi(argv[1]); } - parsec = parsec_init(cores, &argc, &argv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, NULL, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + + if( world != 1 ) { + parsec_fatal( "Nope! world is not right, we need exactly one process. " + "Try with a single-process launcher.\n" ); + } parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); @@ -159,11 +148,8 @@ int main(int argc, char **argv) parsec_taskpool_free(dtd_tp); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_untie.c b/tests/dsl/dtd/dtd_test_untie.c index ef8dcaed2..ae6c3ebf9 100644 --- a/tests/dsl/dtd/dtd_test_untie.c +++ b/tests/dsl/dtd/dtd_test_untie.c @@ -21,10 +21,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - double time_elapsed = 0.0; double sync_time_elapsed = 0.0; @@ -95,24 +91,13 @@ int main(int argc, char ** argv) if( 0 >= cores ) cores = 8; /* fix it to a sane number */ -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if(MPI_THREAD_MULTIPLE > provided) { - parsec_fatal( "This benchmark requires MPI_THREAD_MULTIPLE because it uses simultaneously MPI within the PaRSEC runtime, and in the main program loop (in SYNC_TIME_START)"); - } - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_MULTIPLE, + &argc, &argv, &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); if( world != 1 ) { - parsec_fatal( "Nope! world is not right, we need exactly one MPI process. " - "Try with \"mpirun -np 1 .....\"\n" ); + parsec_fatal( "Nope! world is not right, we need exactly one process. " + "Try with a single-process launcher.\n" ); } int m, n; @@ -126,8 +111,6 @@ int main(int argc, char ** argv) no_of_chain = cores; int tasks_in_each_chain[3] = {1000, 10000, 100000}; - parsec = parsec_init( cores, &argc, &argv ); - dtd_tp = parsec_dtd_taskpool_new(); /* Registering the dtd_taskpool with PARSEC context */ @@ -153,7 +136,7 @@ int main(int argc, char ** argv) for( i = 0; i < 3; i++ ) { - SYNC_TIME_START(); + SYNC_TIME_START(parsec); for( n = 0; n < no_of_chain; n++ ) { for( m = 0; m < tasks_in_each_chain[i]; m++ ) { parsec_dtd_insert_task(dtd_tp, test_task, 0, PARSEC_DEV_CPU, "Test_Task", @@ -167,12 +150,12 @@ int main(int argc, char ** argv) rc = parsec_taskpool_wait( dtd_tp ); PARSEC_CHECK_ERROR(rc, "parsec_taskpool_wait"); - SYNC_TIME_PRINT(rank, ("No of chains : %d, No of tasks in each chain: %d, Amount of work: %d\n", no_of_chain, tasks_in_each_chain[i], amount_of_work[work_index])); + SYNC_TIME_PRINT(parsec, rank, ("No of chains : %d, No of tasks in each chain: %d, Amount of work: %d\n", no_of_chain, tasks_in_each_chain[i], amount_of_work[work_index])); } count = 0; for( i = 0; i < 3; i++ ) { - SYNC_TIME_START(); + SYNC_TIME_START(parsec); int step = parsec_dtd_window_size, iteration = 0; for( n = 0; n < no_of_chain; n++ ) { @@ -190,7 +173,7 @@ int main(int argc, char ** argv) rc = parsec_taskpool_wait( dtd_tp ); PARSEC_CHECK_ERROR(rc, "parsec_taskpool_wait"); - SYNC_TIME_PRINT(rank, ("No of chains : %d, No of tasks in each chain: %d, Amount of work: %d\n", no_of_chain, tasks_in_each_chain[i], amount_of_work[work_index])); + SYNC_TIME_PRINT(parsec, rank, ("No of chains : %d, No of tasks in each chain: %d, Amount of work: %d\n", no_of_chain, tasks_in_each_chain[i], amount_of_work[work_index])); } rc = parsec_context_wait(parsec); PARSEC_CHECK_ERROR(rc, "parsec_context_wait"); @@ -201,11 +184,8 @@ int main(int argc, char ** argv) parsec_taskpool_free( dtd_tp ); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/dtd/dtd_test_war.c b/tests/dsl/dtd/dtd_test_war.c index 91e9fb5d8..837ab67e7 100644 --- a/tests/dsl/dtd/dtd_test_war.c +++ b/tests/dsl/dtd/dtd_test_war.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /* parsec things */ @@ -12,6 +13,7 @@ #include #include "tests/tests_data.h" +#include "tests/tests_runtime.h" #include "parsec/interfaces/dtd/insert_function_internal.h" #include "parsec/utils/debug.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" @@ -20,10 +22,6 @@ #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - static volatile int32_t count_war_error = 0; static volatile int32_t count_raw_error = 0; @@ -72,28 +70,19 @@ int main(int argc, char ** argv) int no_of_tasks, no_of_read_tasks = 5, key; parsec_arena_datatype_t *adt; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - if(argv[1] != NULL){ cores = atoi(argv[1]); } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + no_of_tasks = world; nb = 1; /* tile_size */ nt = no_of_tasks; /* total no. of tiles */ - parsec = parsec_init( cores, &argc, &argv ); - parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); adt = parsec_matrix_adt_new_rect( @@ -157,11 +146,8 @@ int main(int argc, char ** argv) parsec_dtd_free_arena_datatype(parsec, TILE_FULL); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/CMakeLists.txt b/tests/dsl/ptg/CMakeLists.txt index 25ed08aea..1d2c64bf0 100644 --- a/tests/dsl/ptg/CMakeLists.txt +++ b/tests/dsl/ptg/CMakeLists.txt @@ -1,21 +1,30 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + add_subdirectory(ptgpp) parsec_addtest_executable(C strange) +target_link_libraries(strange PRIVATE tests_runtime_common) target_ptg_sources(strange PRIVATE "strange.jdf") parsec_addtest_executable(C recursive) +target_link_libraries(recursive PRIVATE tests_runtime_common) target_ptg_sources(recursive PRIVATE "recursive.jdf") if(PARSEC_HAVE_RANDOM) parsec_addtest_executable(C startup) + target_link_libraries(startup PRIVATE tests_runtime_common) target_ptg_sources(startup PRIVATE "startup.jdf") endif(PARSEC_HAVE_RANDOM) parsec_addtest_executable(C complex_deps) +target_link_libraries(complex_deps PRIVATE tests_runtime_common) target_ptg_sources(complex_deps PRIVATE "complex_deps.jdf") if(PARSEC_HAVE_DEV_CAPABILITY_BATCH) parsec_addtest_executable(C batch_cpu) + target_link_libraries(batch_cpu PRIVATE tests_runtime_common) target_ptg_sources(batch_cpu PRIVATE "batch_cpu.jdf") endif(PARSEC_HAVE_DEV_CAPABILITY_BATCH) diff --git a/tests/dsl/ptg/batch_cpu.jdf b/tests/dsl/ptg/batch_cpu.jdf index 05db2adb2..d39196a56 100644 --- a/tests/dsl/ptg/batch_cpu.jdf +++ b/tests/dsl/ptg/batch_cpu.jdf @@ -12,6 +12,7 @@ extern "C" %{ #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/mca/device/device.h" +#include "tests/tests_runtime.h" #include "batch_cpu.h" #define TYPE PARSEC_MATRIX_INTEGER @@ -70,14 +71,6 @@ int main(int argc, char **argv) int rank = 0; int ret = 0; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - } -#endif - if( NULL != argv[1] ) { n = atoi(argv[1]); } @@ -85,8 +78,9 @@ int main(int argc, char **argv) n = 32; } - parsec = parsec_init(-1, &argc, &argv); - assert(NULL != parsec); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init(&descA, TYPE, PARSEC_MATRIX_TILE, rank, @@ -136,11 +130,8 @@ int main(int argc, char **argv) free(descA.mat); PARSEC_OBJ_RELEASE(adt.arena); parsec_matrix_arena_datatype_destruct_free_type(&adt); - parsec_fini(&parsec); - -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/dsl/ptg/branching/CMakeLists.txt b/tests/dsl/ptg/branching/CMakeLists.txt index 4ea295fc0..61910cac7 100644 --- a/tests/dsl/ptg/branching/CMakeLists.txt +++ b/tests/dsl/ptg/branching/CMakeLists.txt @@ -2,14 +2,17 @@ include(ParsecCompilePTG) # Default build: probably ht, but not taking a chance of missing an option parsec_addtest_executable(C branching SOURCES main.c branching_wrapper.c branching_data.c) +target_link_libraries(branching PRIVATE tests_runtime_common) target_ptg_sources(branching PRIVATE "branching.jdf") # Force dynamic hash tables test parsec_addtest_executable(C branching_ht SOURCES main.c branching_wrapper.c branching_data.c) +target_link_libraries(branching_ht PRIVATE tests_runtime_common) target_ptg_source_ex(TARGET branching_ht DESTINATION branching_ht MODE PRIVATE SOURCE branching.jdf DEP_MANAGEMENT dynamic-hash-table) add_dependencies(branching_ht branching) # We need to have branching.h generated before # Force index array test parsec_addtest_executable(C branching_idxarr SOURCES main.c branching_wrapper.c branching_data.c) +target_link_libraries(branching_idxarr PRIVATE tests_runtime_common) target_ptg_source_ex(TARGET branching_idxarr DESTINATION branching_idxarr MODE PRIVATE SOURCE branching.jdf DEP_MANAGEMENT index-array) add_dependencies(branching_idxarr branching) # We need to have branching.h generated before diff --git a/tests/dsl/ptg/branching/branching_wrapper.c b/tests/dsl/ptg/branching/branching_wrapper.c index ceab9504f..1f1753a61 100644 --- a/tests/dsl/ptg/branching/branching_wrapper.c +++ b/tests/dsl/ptg/branching/branching_wrapper.c @@ -2,15 +2,13 @@ * Copyright (c) 2009-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/arena.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include #include "branching.h" diff --git a/tests/dsl/ptg/branching/main.c b/tests/dsl/ptg/branching/main.c index dd3426e20..035438855 100644 --- a/tests/dsl/ptg/branching/main.c +++ b/tests/dsl/ptg/branching/main.c @@ -2,18 +2,17 @@ * Copyright (c) 2009-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "branching_wrapper.h" #include "branching_data.h" #if defined(PARSEC_HAVE_STRING_H) #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ volatile int32_t nb_taskA = 0; volatile int32_t nb_taskB = 0; @@ -27,18 +26,9 @@ int main(int argc, char *argv[]) parsec_data_collection_t *dcA; parsec_taskpool_t *branching; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - parsec = parsec_init(cores, &argc, &argv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); size = 256; if(argc != 2) { @@ -66,20 +56,23 @@ int main(int argc, char *argv[]) free_data(dcA); - parsec_fini(&parsec); int gnbA = nb_taskA, gnbB = nb_taskB, gnbC = nb_taskC; -#if defined(PARSEC_HAVE_MPI) - MPI_Allreduce(MPI_IN_PLACE, &gnbA, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(MPI_IN_PLACE, &gnbB, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(MPI_IN_PLACE, &gnbC, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); -#endif + rc = parsec_tests_allreduce(parsec, NULL, &gnbA, 1, + parsec_datatype_int_t, PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(rc, "parsec_tests_allreduce"); + rc = parsec_tests_allreduce(parsec, NULL, &gnbB, 1, + parsec_datatype_int_t, PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(rc, "parsec_tests_allreduce"); + rc = parsec_tests_allreduce(parsec, NULL, &gnbC, 1, + parsec_datatype_int_t, PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(rc, "parsec_tests_allreduce"); + printf("nb = %d, nb_taskA = %d, nb_taskB = %d, nb_taskC = %d -- %s\n", nb, gnbA, gnbB, gnbC, gnbA == nb && gnbB == 2*nb && gnbC == nb ? "SUCCESS" : "FAILURE!"); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); if( gnbA == nb && gnbB == 2*nb && diff --git a/tests/dsl/ptg/choice/CMakeLists.txt b/tests/dsl/ptg/choice/CMakeLists.txt index 074d8c97e..b78ef2a2e 100644 --- a/tests/dsl/ptg/choice/CMakeLists.txt +++ b/tests/dsl/ptg/choice/CMakeLists.txt @@ -1,4 +1,5 @@ include(ParsecCompilePTG) parsec_addtest_executable(C choice SOURCES main.c choice_wrapper.c choice_data.c) +target_link_libraries(choice PRIVATE tests_runtime_common) target_ptg_sources(choice PRIVATE "choice.jdf") diff --git a/tests/dsl/ptg/choice/choice_wrapper.c b/tests/dsl/ptg/choice/choice_wrapper.c index 167811ba9..1a72a6d01 100644 --- a/tests/dsl/ptg/choice/choice_wrapper.c +++ b/tests/dsl/ptg/choice/choice_wrapper.c @@ -2,15 +2,13 @@ * Copyright (c) 2009-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/arena.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include #include "choice.h" diff --git a/tests/dsl/ptg/choice/main.c b/tests/dsl/ptg/choice/main.c index 1413794c1..f76d52de3 100644 --- a/tests/dsl/ptg/choice/main.c +++ b/tests/dsl/ptg/choice/main.c @@ -2,7 +2,7 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -10,12 +10,10 @@ #include "choice_wrapper.h" #include "choice_data.h" #include "parsec/data_distribution.h" +#include "tests/tests_runtime.h" #if defined(PARSEC_HAVE_STRING_H) #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ #include #include @@ -28,18 +26,6 @@ int main(int argc, char *argv[]) int *decision; parsec_taskpool_t *choice; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - size = 256; int pargc = 0; char **pargv = NULL; @@ -62,10 +48,10 @@ int main(int argc, char *argv[]) } } - parsec = parsec_init(cores, &pargc, &pargv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + dcA = create_and_distribute_data(rank, world, size); parsec_data_collection_set_key(dcA, "A"); @@ -83,8 +69,6 @@ int main(int argc, char *argv[]) parsec_taskpool_free((parsec_taskpool_t*)choice); - parsec_fini(&parsec); - for(size = 0; size < world; size++) { if( rank == size ) { printf("On rank %d, the choices were: ", rank); @@ -93,17 +77,17 @@ int main(int argc, char *argv[]) printf("%c%s", c == 0 ? '#' : (c == 1 ? 'A' : 'B'), i == nb ? "\n" : ", "); } } -#if defined(PARSEC_HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); -#endif + rc = parsec_tests_barrier(parsec); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + PARSEC_CHECK_ERROR(rc, "parsec_tests_barrier"); + } } free_data(dcA); free(decision); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/complex_deps.jdf b/tests/dsl/ptg/complex_deps.jdf index 5a2301a2c..7335a73d2 100644 --- a/tests/dsl/ptg/complex_deps.jdf +++ b/tests/dsl/ptg/complex_deps.jdf @@ -3,9 +3,11 @@ extern "C" %{ * Copyright (c) 2013-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #include #include #include @@ -142,18 +144,10 @@ int main( int argc, char** argv ) continue; } } -#ifdef DISTRIBUTED - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif /* DISTRIBUTED */ - parsec = parsec_init(cores, &pargc, &pargv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec, &rank, &size); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /** * Build the data and the arena to hold it up. @@ -216,7 +210,8 @@ int main( int argc, char** argv ) free(descA.mat); - parsec_fini( &parsec); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/controlgather/CMakeLists.txt b/tests/dsl/ptg/controlgather/CMakeLists.txt index f20629a6d..c5e883422 100644 --- a/tests/dsl/ptg/controlgather/CMakeLists.txt +++ b/tests/dsl/ptg/controlgather/CMakeLists.txt @@ -1,5 +1,9 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C ctlgat SOURCES main.c ctlgat_wrapper.c ctlgat_data.c) +target_link_libraries(ctlgat PRIVATE tests_runtime_common) target_ptg_sources(ctlgat PRIVATE "ctlgat.jdf") - diff --git a/tests/dsl/ptg/controlgather/ctlgat.jdf b/tests/dsl/ptg/controlgather/ctlgat.jdf index 999b917bb..a12d571a5 100644 --- a/tests/dsl/ptg/controlgather/ctlgat.jdf +++ b/tests/dsl/ptg/controlgather/ctlgat.jdf @@ -3,14 +3,10 @@ extern "C" %{ * Copyright (c) 2012-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ -#if defined(PARSEC_HAVE_MPI) -#include -#define MY_RANK(r) int r; MPI_Comm_rank(MPI_COMM_WORLD, &r) -#else -#define MY_RANK(r) int r = 0 -#endif +#define MY_RANK(dc, r) int r = (int)((dc)->myrank) %} %option no_taskpool_instance = true /* can be anything */ @@ -29,7 +25,7 @@ CTL X -> X TC(0) ; 0 BODY - MY_RANK(r); + MY_RANK(A, r); printf("%d: TA(%d)\n", r, k); END @@ -43,7 +39,7 @@ CTL X -> Y TC(0) ; 0 BODY - MY_RANK(r); + MY_RANK(A, r); printf("%d: TB(%d)\n", r, k); END @@ -58,7 +54,7 @@ CTL Y <- X TB(0..NT-1) ; 0 BODY - MY_RANK(r); + MY_RANK(A, r); printf("%d: TC(%d)\n", r, k); END extern "C" %{ diff --git a/tests/dsl/ptg/controlgather/ctlgat_wrapper.c b/tests/dsl/ptg/controlgather/ctlgat_wrapper.c index 963c2bad0..97d802164 100644 --- a/tests/dsl/ptg/controlgather/ctlgat_wrapper.c +++ b/tests/dsl/ptg/controlgather/ctlgat_wrapper.c @@ -2,15 +2,13 @@ * Copyright (c) 2009-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/arena.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include #include "ctlgat.h" @@ -40,13 +38,8 @@ PARSEC_OBJ_CLASS_INSTANCE(parsec_ctlgat_taskpool_t, parsec_taskpool_t, */ parsec_taskpool_t *ctlgat_new(parsec_data_collection_t *A, int size, int nb) { - int worldsize; + int worldsize = (int)A->nodes; parsec_ctlgat_taskpool_t *tp = NULL; -#if defined(PARSEC_HAVE_MPI) - MPI_Comm_size(MPI_COMM_WORLD, &worldsize); -#else - worldsize = 1; -#endif if( nb <= 0 || size <= 0 ) { fprintf(stderr, "To work, CTLGAT must do at least one round time trip of at least one byte\n"); diff --git a/tests/dsl/ptg/controlgather/main.c b/tests/dsl/ptg/controlgather/main.c index aa998c061..704e223eb 100644 --- a/tests/dsl/ptg/controlgather/main.c +++ b/tests/dsl/ptg/controlgather/main.c @@ -2,19 +2,17 @@ * Copyright (c) 2009-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "ctlgat_wrapper.h" #include "ctlgat_data.h" #if defined(PARSEC_HAVE_STRING_H) #include #endif /* defined(PARSEC_HAVE_STRING_H) */ -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ - int main(int argc, char *argv[]) { parsec_context_t* parsec; @@ -23,18 +21,10 @@ int main(int argc, char *argv[]) parsec_data_collection_t *dcA; parsec_taskpool_t *ctlgat; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - parsec = parsec_init(cores, &argc, &argv); + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); size = 256; nb = 4 * world; @@ -54,10 +44,8 @@ int main(int argc, char *argv[]) free_data(dcA); - parsec_fini(&parsec); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/local-indices/CMakeLists.txt b/tests/dsl/ptg/local-indices/CMakeLists.txt index 9645bc0ff..07c79f652 100644 --- a/tests/dsl/ptg/local-indices/CMakeLists.txt +++ b/tests/dsl/ptg/local-indices/CMakeLists.txt @@ -1,3 +1,3 @@ parsec_addtest_executable(C local_indices) target_ptg_sources(local_indices PRIVATE "local_indices.jdf") -target_link_libraries(local_indices PRIVATE m) +target_link_libraries(local_indices PRIVATE m tests_runtime_common) diff --git a/tests/dsl/ptg/local-indices/local_indices.jdf b/tests/dsl/ptg/local-indices/local_indices.jdf index a05e1a870..9b253332b 100644 --- a/tests/dsl/ptg/local-indices/local_indices.jdf +++ b/tests/dsl/ptg/local-indices/local_indices.jdf @@ -3,7 +3,7 @@ extern "C" %{ * Copyright (c) 2019-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include @@ -11,6 +11,7 @@ extern "C" %{ #include #include #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" /** * This test defines sparse execution domains to illustrate the @@ -132,24 +133,16 @@ int main( int argc, char** argv ) srand( getpid() ); -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - MPI_Comm_size(MPI_COMM_WORLD, &ws); - MPI_Comm_rank(MPI_COMM_WORLD, &mr); - for(c = (int)sqrt(ws)+1; c > 0; c--) { - if( (c < ws) && (ws % c) == 0 ) { - p = c; - break; - } - } - } -#endif + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &mr, &ws); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - exit(-1); + for(c = (int)sqrt(ws)+1; c > 0; c--) { + if( (c < ws) && (ws % c) == 0 ) { + p = c; + break; + } } /** @@ -191,11 +184,11 @@ int main( int argc, char** argv ) parsec_context_wait(parsec); -#ifdef PARSEC_HAVE_MPI - MPI_Reduce(local_nb, global_nb, 4, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); -#else - memcpy(global_nb, local_nb, 4*sizeof(int)); -#endif + rc = parsec_tests_allreduce(parsec, local_nb, global_nb, 4, + parsec_datatype_int_t, + PARSEC_TESTS_REDUCE_SUM); + PARSEC_CHECK_ERROR(rc, "parsec_tests_allreduce"); + ret = 0; if( 0 == mr ) { if( global_nb[0] != 25 ) { @@ -221,11 +214,8 @@ int main( int argc, char** argv ) free(descA.mat); parsec_matrix_adt_free( &adt ); - parsec_fini( &parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/dsl/ptg/multisize_bcast/CMakeLists.txt b/tests/dsl/ptg/multisize_bcast/CMakeLists.txt index a5d665949..c48469fd0 100644 --- a/tests/dsl/ptg/multisize_bcast/CMakeLists.txt +++ b/tests/dsl/ptg/multisize_bcast/CMakeLists.txt @@ -1,6 +1,10 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C check_multisize_bcast SOURCES main.c check_multisize_bcast_wrapper.c data_gen.c) +target_link_libraries(check_multisize_bcast PRIVATE tests_runtime_common) target_ptg_sources(check_multisize_bcast PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/check_multisize_bcast.jdf") - diff --git a/tests/dsl/ptg/multisize_bcast/check_multisize_bcast_wrapper.c b/tests/dsl/ptg/multisize_bcast/check_multisize_bcast_wrapper.c index 1cd8e8cd8..adf9b6178 100644 --- a/tests/dsl/ptg/multisize_bcast/check_multisize_bcast_wrapper.c +++ b/tests/dsl/ptg/multisize_bcast/check_multisize_bcast_wrapper.c @@ -2,15 +2,13 @@ * Copyright (c) 2018-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/arena.h" -#if defined(PARSEC_HAVE_MPI) -#include -#endif #include #include "check_multisize_bcast_wrapper.h" diff --git a/tests/dsl/ptg/multisize_bcast/main.c b/tests/dsl/ptg/multisize_bcast/main.c index 277c202e0..1dd58c39c 100644 --- a/tests/dsl/ptg/multisize_bcast/main.c +++ b/tests/dsl/ptg/multisize_bcast/main.c @@ -2,12 +2,14 @@ * Copyright (c) 2018-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include #include "parsec/runtime.h" #include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "check_multisize_bcast_wrapper.h" #if defined(PARSEC_HAVE_STRING_H) #include @@ -22,14 +24,6 @@ int main(int argc, char *argv[]) parsec_matrix_block_cyclic_t *dcA; parsec_taskpool_t *bcast; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif if( argc > 1 ) { char* endptr; long val = strtol(argv[1], &endptr, 0); @@ -44,10 +38,10 @@ int main(int argc, char *argv[]) } } - parsec = parsec_init(cores, &argc, &argv); - if( NULL == parsec ) { - exit(1); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); dcA = create_and_distribute_data(rank, world, nb, nt); parsec_data_collection_set_key((parsec_data_collection_t *)dcA, "A"); @@ -64,11 +58,8 @@ int main(int argc, char *argv[]) parsec_taskpool_free((parsec_taskpool_t*)bcast); free_data(dcA); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/CMakeLists.txt b/tests/dsl/ptg/ptgpp/CMakeLists.txt index c0f2db207..20da2e27f 100644 --- a/tests/dsl/ptg/ptgpp/CMakeLists.txt +++ b/tests/dsl/ptg/ptgpp/CMakeLists.txt @@ -1,10 +1,17 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + parsec_addtest_executable(C write_check SOURCES vector.c) +target_link_libraries(write_check PRIVATE tests_runtime_common) target_ptg_sources(write_check PRIVATE "write_check.jdf") parsec_addtest_executable(C jdf_forward_RW_NULL) +target_link_libraries(jdf_forward_RW_NULL PRIVATE tests_runtime_common) target_ptg_sources(jdf_forward_RW_NULL PRIVATE "forward_RW_NULL.jdf") parsec_addtest_executable(C jdf_forward_READ_NULL) +target_link_libraries(jdf_forward_READ_NULL PRIVATE tests_runtime_common) target_ptg_sources(jdf_forward_READ_NULL PRIVATE "forward_READ_NULL.jdf") parsec_addtest_executable(C must_fail_too_many_in_deps NODEFAULTBUILD) @@ -37,3 +44,9 @@ target_ptg_sources(must_fail_too_many_local_vars PRIVATE "too_many_local_vars.jd set_target_properties(must_fail_too_many_local_vars PROPERTIES EXCLUDE_FROM_ALL TRUE EXCLUDE_FROM_DEFAULT_BUILD TRUE) + +target_link_libraries(must_fail_too_many_in_deps PRIVATE tests_runtime_common) +target_link_libraries(must_fail_too_many_out_deps PRIVATE tests_runtime_common) +target_link_libraries(must_fail_too_many_read_flows PRIVATE tests_runtime_common) +target_link_libraries(must_fail_too_many_write_flows PRIVATE tests_runtime_common) +target_link_libraries(must_fail_too_many_local_vars PRIVATE tests_runtime_common) diff --git a/tests/dsl/ptg/ptgpp/forward_READ_NULL.jdf b/tests/dsl/ptg/ptgpp/forward_READ_NULL.jdf index ba7abd83b..ebc60ea44 100644 --- a/tests/dsl/ptg/ptgpp/forward_READ_NULL.jdf +++ b/tests/dsl/ptg/ptgpp/forward_READ_NULL.jdf @@ -4,6 +4,7 @@ extern "C" %{ * Copyright (c) 2014-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -12,6 +13,8 @@ extern "C" %{ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/datatype.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" %} @@ -78,19 +81,10 @@ int main(int argc, char *argv[]) parsec_data_collection_t taskdist; parsec_forward_READ_NULL_taskpool_t *tp; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - - parsec = parsec_init(-1, &argc, &argv); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /** * Let's initialize the task distribution descriptor @@ -128,10 +122,8 @@ int main(int argc, char *argv[]) */ parsec_data_collection_destroy(&taskdist); - parsec_fini(&parsec); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/forward_RW_NULL.jdf b/tests/dsl/ptg/ptgpp/forward_RW_NULL.jdf index 1fc74e070..97494aba0 100644 --- a/tests/dsl/ptg/ptgpp/forward_RW_NULL.jdf +++ b/tests/dsl/ptg/ptgpp/forward_RW_NULL.jdf @@ -4,6 +4,7 @@ extern "C" %{ * Copyright (c) 2014-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -12,6 +13,8 @@ extern "C" %{ #include "parsec/runtime.h" #include "parsec/data_distribution.h" #include "parsec/datatype.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" %} @@ -78,19 +81,10 @@ int main(int argc, char *argv[]) parsec_data_collection_t taskdist; parsec_forward_RW_NULL_taskpool_t *tp; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif - - parsec = parsec_init(-1, &argc, &argv); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /** * Let's initialize the task distribution descriptor @@ -128,10 +122,8 @@ int main(int argc, char *argv[]) */ parsec_data_collection_destroy(&taskdist); - parsec_fini(&parsec); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/too_many_in_deps.jdf b/tests/dsl/ptg/ptgpp/too_many_in_deps.jdf index 53e9f48dd..63b6379ba 100644 --- a/tests/dsl/ptg/ptgpp/too_many_in_deps.jdf +++ b/tests/dsl/ptg/ptgpp/too_many_in_deps.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2020-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -14,6 +15,7 @@ extern "C" %{ */ #include "tests/dsl/ptg/ptgpp/vector.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #if MAX_DEP_IN_COUNT > 20 #error MAX_DEP_IN_COUNT is too large for this test. @@ -65,17 +67,9 @@ int main(int argc, char* argv[]) parsec_context_t *parsec; int block = 10, n = 1000, rc; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &descA, TYPE, PARSEC_MATRIX_TILE, 0 /*rank*/, @@ -112,9 +106,8 @@ int main(int argc, char* argv[]) parsec_taskpool_free((parsec_taskpool_t*)tp); free(descA.mat); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/too_many_local_vars.jdf b/tests/dsl/ptg/ptgpp/too_many_local_vars.jdf index b6c2de23b..d399e40dd 100644 --- a/tests/dsl/ptg/ptgpp/too_many_local_vars.jdf +++ b/tests/dsl/ptg/ptgpp/too_many_local_vars.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2020-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -14,6 +15,7 @@ extern "C" %{ */ #include "tests/dsl/ptg/ptgpp/vector.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #if MAX_LOCAL_COUNT > 10 #error MAX_LOCAL_COUNT is too large for this test. @@ -58,17 +60,9 @@ int main(int argc, char* argv[]) parsec_context_t *parsec; int block = 10, n = 1000, rc; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &descA, TYPE, PARSEC_MATRIX_TILE, 0 /*rank*/, @@ -105,9 +99,8 @@ int main(int argc, char* argv[]) parsec_taskpool_free((parsec_taskpool_t*)tp); free(descA.mat); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/too_many_out_deps.jdf b/tests/dsl/ptg/ptgpp/too_many_out_deps.jdf index 0a5d65648..7c2401415 100644 --- a/tests/dsl/ptg/ptgpp/too_many_out_deps.jdf +++ b/tests/dsl/ptg/ptgpp/too_many_out_deps.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2020-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -14,6 +15,7 @@ extern "C" %{ */ #include "tests/dsl/ptg/ptgpp/vector.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #if MAX_DEP_OUT_COUNT > 20 #error MAX_DEP_OUT_COUNT is too large for this test. @@ -64,17 +66,9 @@ int main(int argc, char* argv[]) parsec_context_t *parsec; int block = 10, n = 1000, rc; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &descA, TYPE, PARSEC_MATRIX_TILE, 0 /*rank*/, @@ -107,9 +101,8 @@ int main(int argc, char* argv[]) parsec_taskpool_free((parsec_taskpool_t*)tp); free(descA.mat); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/too_many_read_flows.jdf b/tests/dsl/ptg/ptgpp/too_many_read_flows.jdf index 0851422ed..29416ca0d 100644 --- a/tests/dsl/ptg/ptgpp/too_many_read_flows.jdf +++ b/tests/dsl/ptg/ptgpp/too_many_read_flows.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2020-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -14,6 +15,7 @@ extern "C" %{ */ #include "tests/dsl/ptg/ptgpp/vector.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #if MAX_PARAM_COUNT > 20 #error MAX_PARAM_COUNT is too large for this test. @@ -66,17 +68,9 @@ int main(int argc, char* argv[]) parsec_context_t *parsec; int block = 10, n = 1000, rc; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &descA, TYPE, PARSEC_MATRIX_TILE, 0 /*rank*/, @@ -109,9 +103,8 @@ int main(int argc, char* argv[]) parsec_taskpool_free((parsec_taskpool_t*)tp); free(descA.mat); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/too_many_write_flows.jdf b/tests/dsl/ptg/ptgpp/too_many_write_flows.jdf index ce7631e72..b67872b4e 100644 --- a/tests/dsl/ptg/ptgpp/too_many_write_flows.jdf +++ b/tests/dsl/ptg/ptgpp/too_many_write_flows.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2020-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -14,6 +15,7 @@ extern "C" %{ */ #include "tests/dsl/ptg/ptgpp/vector.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" #if MAX_PARAM_COUNT > 20 #error MAX_PARAM_COUNT is too large for this test. @@ -66,17 +68,9 @@ int main(int argc, char* argv[]) parsec_context_t *parsec; int block = 10, n = 1000, rc; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &descA, TYPE, PARSEC_MATRIX_TILE, 0 /*rank*/, @@ -113,9 +107,8 @@ int main(int argc, char* argv[]) parsec_taskpool_free((parsec_taskpool_t*)tp); free(descA.mat); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/ptgpp/write_check.jdf b/tests/dsl/ptg/ptgpp/write_check.jdf index c17f4b7ad..367c3e65f 100644 --- a/tests/dsl/ptg/ptgpp/write_check.jdf +++ b/tests/dsl/ptg/ptgpp/write_check.jdf @@ -3,8 +3,10 @@ extern "C" %{ * Copyright (c) 2014-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" static int verbose = 0; @@ -80,15 +82,6 @@ int main(int argc, char* argv[]) int i = 0, block = 10, n = 1000, rc; int rank = 0, np = 1; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &np); - } -#endif - int pargc = 0; char **pargv = NULL; for( i = 1; i < argc; i++) { if( 0 == strncmp(argv[i], "--", 3) ) { @@ -112,10 +105,10 @@ int main(int argc, char* argv[]) } } - parsec = parsec_init(-1, &pargc, &pargv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, + &parsec, &rank, &np); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &descA, TYPE, PARSEC_MATRIX_TILE, rank, @@ -158,14 +151,16 @@ int main(int argc, char* argv[]) PARSEC_OBJ_DESTRUCT(&tp->arenas_datatypes[PARSEC_write_check_DEFAULT_ADT_IDX]); parsec_taskpool_free((parsec_taskpool_t*)tp); - parsec_fini(&parsec); free(descA.mat); int maxloc[2] = {error_found, rank}; -#ifdef PARSEC_HAVE_MPI - MPI_Reduce(0 == rank? MPI_IN_PLACE: &maxloc, &maxloc, 1, MPI_2INT, MPI_MAXLOC, 0, MPI_COMM_WORLD); - MPI_Finalize(); -#endif + rc = parsec_tests_allreduce(parsec, NULL, maxloc, 1, + parsec_datatype_int_t, + PARSEC_TESTS_REDUCE_MAXLOC_INT); + PARSEC_CHECK_ERROR(rc, "parsec_tests_allreduce"); + + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); if( 0 == rank) { if( maxloc[0] > 0 ) { diff --git a/tests/dsl/ptg/recursive.jdf b/tests/dsl/ptg/recursive.jdf index a4ea1cf77..48a3491cf 100644 --- a/tests/dsl/ptg/recursive.jdf +++ b/tests/dsl/ptg/recursive.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include @@ -11,6 +12,7 @@ extern "C" %{ #include #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/data_dist/matrix/subtile.h" +#include "tests/tests_runtime.h" #include "recursive.h" /* generated header */ @@ -101,13 +103,6 @@ int main( int argc, char** argv ) parsec_context_t *parsec; int ni = NN, level = 3, i = 1, rc; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - int pargc = 0; char **pargv = NULL; for( i = 1; i < argc; i++) { if( 0 == strncmp(argv[i], "--", 3) ) { @@ -129,10 +124,9 @@ int main( int argc, char** argv ) } } - parsec = parsec_init(-1, &pargc, &pargv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /** * Build the data and the arena to hold it up. @@ -169,11 +163,8 @@ int main( int argc, char** argv ) free(descA.mat); parsec_matrix_adt_free(&adt); - parsec_fini( &parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/startup.jdf b/tests/dsl/ptg/startup.jdf index dd271c57e..918f6d7c2 100644 --- a/tests/dsl/ptg/startup.jdf +++ b/tests/dsl/ptg/startup.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2019-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include @@ -10,6 +11,7 @@ extern "C" %{ #include #include #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" /** * This test stress the startup mechanism by generating NI*NJ*NK independent @@ -96,13 +98,6 @@ int main( int argc, char** argv ) int ni = NN, nj = NN, nk = NN, verbose = 0, i = 1, rc; long time_elapsed; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - int pargc = 0; char **pargv = NULL; for( i = 1; i < argc; i++) { if( 0 == strncmp(argv[i], "--", 3) ) { @@ -128,10 +123,9 @@ int main( int argc, char** argv ) } } - parsec = parsec_init(-1, &pargc, &pargv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /** * Build the data and the arena to hold it up. @@ -204,11 +198,8 @@ int main( int argc, char** argv ) parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&descA); parsec_matrix_adt_free(&adt); - parsec_fini( &parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/strange.jdf b/tests/dsl/ptg/strange.jdf index d210100c3..93a7bd0d5 100644 --- a/tests/dsl/ptg/strange.jdf +++ b/tests/dsl/ptg/strange.jdf @@ -3,12 +3,14 @@ extern "C" %{ * Copyright (c) 2015-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. * */ #include #include "strange.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" struct prev_next_s { int prev; @@ -148,15 +150,9 @@ int main(int argc, char* argv[] ) print_prev_next("Random array", neworder, n); } -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } -#endif - - parsec = parsec_init(-1, &pargc, &pargv); - assert( NULL != parsec ); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); parsec_matrix_block_cyclic_init( &descA, TYPE, PARSEC_MATRIX_TILE, 0 /*rank*/, @@ -191,11 +187,8 @@ int main(int argc, char* argv[] ) parsec_taskpool_free(&tp->super); free(descA.mat); - parsec_fini( &parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); if( val != n ) { printf("Failed execution (%d != %d)\n", val, n); diff --git a/tests/dsl/ptg/user-defined-functions/CMakeLists.txt b/tests/dsl/ptg/user-defined-functions/CMakeLists.txt index 643a1a043..21e945e65 100644 --- a/tests/dsl/ptg/user-defined-functions/CMakeLists.txt +++ b/tests/dsl/ptg/user-defined-functions/CMakeLists.txt @@ -1,9 +1,15 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) parsec_addtest_executable(C udf SOURCES main.c udf_wrapper.c) +target_link_libraries(udf PRIVATE tests_runtime_common) target_include_directories(udf PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(udf PRIVATE "udf.jdf") parsec_addtest_executable(C utt) +target_link_libraries(utt PRIVATE tests_runtime_common) target_include_directories(utt PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(utt PRIVATE "utt.jdf") diff --git a/tests/dsl/ptg/user-defined-functions/main.c b/tests/dsl/ptg/user-defined-functions/main.c index c5d39b1c7..75988f746 100644 --- a/tests/dsl/ptg/user-defined-functions/main.c +++ b/tests/dsl/ptg/user-defined-functions/main.c @@ -2,16 +2,14 @@ * Copyright (c) 2019-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include #include #include "parsec/runtime.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" - -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ +#include "tests/tests_runtime.h" #include "udf_wrapper.h" @@ -30,6 +28,7 @@ int main(int argc, char *argv[]) parsec_udf_taskpool_t *udf_tp; int largc; char **largv; + int rc; static struct option long_options[] = { {"P", required_argument, 0, 'P'}, @@ -43,19 +42,7 @@ int main(int argc, char *argv[]) }; int option_index = 0, c; int P = -1, MB = -1, NB = 1, M = -1, N = 1; - -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; - P = 1; -#endif + int show_help = 0; while(1) { option_index = 0; @@ -94,36 +81,38 @@ int main(int argc, char *argv[]) cores = atoi(optarg); break; case 'h': - if( 0 == rank ) { - fprintf(stderr, - "Usage: %s [-M ] [-N ] [-m ] [-n ] [-P

]\n" - " Display how many times a probe function is called to build a basic PTG\n" - " M: number of rows in the matrix (default N)\n" - " N: number of columns in the matrix\n" - " MB: number of rows in a tile (default NB)\n" - " NB: number of columns in a tile\n" - " P: number of rows of processes in the 2D grid (default np, must divide np)\n" - " c: number of computing threads to create per rank (default one per core)\n" - "\n", argv[0]); -#if defined(PARSEC_HAVE_MPI) - MPI_Abort(MPI_COMM_WORLD, 1); -#endif - exit(1); - } -#if defined(PARSEC_HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); /**< Will let the other ranks wait for the MPI_Abort */ -#endif + show_help = 1; break; /**< To silent warnings */ } + if( show_help ) { + break; + } } largc = argc - optind; largv = argv + optind; - parsec = parsec_init(cores, &largc, &largv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(cores, PARSEC_TEST_THREAD_SERIALIZED, + &largc, &largv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + if( show_help ) { + if( 0 == rank ) { + fprintf(stderr, + "Usage: %s [-M ] [-N ] [-m ] [-n ] [-P

]\n" + " Display how many times a probe function is called to build a basic PTG\n" + " M: number of rows in the matrix (default N)\n" + " N: number of columns in the matrix\n" + " MB: number of rows in a tile (default NB)\n" + " NB: number of columns in a tile\n" + " P: number of rows of processes in the 2D grid (default np, must divide np)\n" + " c: number of computing threads to create per rank (default one per core)\n" + "\n", argv[0]); + } + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); + return 1; + } if( -1 == MB ) MB = NB; @@ -134,13 +123,9 @@ int main(int argc, char *argv[]) if( -1 == N || -1 == NB ) { if( 0 == rank ) { fprintf(stderr, "Incorrect usage, see --help\n"); -#if defined(PARSEC_HAVE_MPI) - MPI_Abort(MPI_COMM_WORLD, 1); -#endif + parsec_tests_abort(parsec, 1); } -#if defined(PARSEC_HAVE_MPI) - MPI_Barrier(MPI_COMM_WORLD); /**< Will let the other ranks wait for the MPI_Abort */ -#endif + (void)parsec_tests_barrier(parsec); /**< Will let the other ranks wait for the abort */ exit(1); } @@ -168,11 +153,8 @@ int main(int argc, char *argv[]) parsec_data_free(A.mat); parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&A); - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/dsl/ptg/user-defined-functions/utt.jdf b/tests/dsl/ptg/user-defined-functions/utt.jdf index 2b5997034..99247c906 100644 --- a/tests/dsl/ptg/user-defined-functions/utt.jdf +++ b/tests/dsl/ptg/user-defined-functions/utt.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include @@ -10,6 +11,7 @@ extern "C" %{ #include #include #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" /** * This test uses a User-Triggered-Termination (UTT) to detect @@ -110,20 +112,12 @@ int main( int argc, char** argv ) int ret; int nt; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - MPI_Comm_size(MPI_COMM_WORLD, &ws); - MPI_Comm_rank(MPI_COMM_WORLD, &mr); - } -#endif - nt = 2*ws; + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &mr, &ws); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - exit(-1); - } + nt = 2*ws; /** * Build the data and the arena to hold it up. @@ -175,11 +169,8 @@ int main( int argc, char** argv ) free(descA.mat); parsec_matrix_adt_free( & adt ); - parsec_fini( &parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return ret; } diff --git a/tests/profiling-standalone/CMakeLists.txt b/tests/profiling-standalone/CMakeLists.txt index f99f9f7fc..c350e7462 100644 --- a/tests/profiling-standalone/CMakeLists.txt +++ b/tests/profiling-standalone/CMakeLists.txt @@ -1,3 +1,7 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + set(PARSEC_PROFILING_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/parsec/") set(PARSEC_PROFILING_LIBRARIES "parsec;parsec-base;${CMAKE_THREAD_LIBS_INIT}") @@ -6,8 +10,9 @@ INCLUDE_DIRECTORIES("${PARSEC_PROFILING_INCLUDE_DIR}") if(PARSEC_HAVE_PTHREAD_BARRIER) add_executable(sp-demo sp-demo.c) target_link_libraries (sp-demo "${PARSEC_PROFILING_LIBRARIES}") + target_link_libraries (sp-demo tests_runtime_common) add_executable(sp-perf sp-perf.c) target_link_libraries (sp-perf "${PARSEC_PROFILING_LIBRARIES}") + target_link_libraries (sp-perf tests_runtime_common) endif(PARSEC_HAVE_PTHREAD_BARRIER) - diff --git a/tests/profiling-standalone/sp-demo.c b/tests/profiling-standalone/sp-demo.c index ad04c7238..62fa5563f 100644 --- a/tests/profiling-standalone/sp-demo.c +++ b/tests/profiling-standalone/sp-demo.c @@ -2,6 +2,7 @@ * Copyright (c) 2016-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ /** @@ -30,9 +31,10 @@ #include #include #include +#include #include "parsec/profiling.h" - -#include +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #define NB_THREADS 4 #define EVENTS_PER_THREAD 10 @@ -136,24 +138,43 @@ int main(int argc, char *argv[]) { int i, rc; per_thread_info_t thread_info[NB_THREADS]; - int mpi_rank; + int rank; + parsec_context_t *parsec; + int parsec_argc = 0; + char **parsec_argv = NULL; + + for(i = 1; i < argc; i++) { + if( 0 == strcmp(argv[i], "--") ) { + parsec_argc = argc - i; + parsec_argv = argv + i; + argc = i; + break; + } + } - MPI_Init(&argc, &argv); // MPI is only needed if using OTF2 as a backend. It can be ignored otherwise. - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_SERIALIZED, + &parsec_argc, &parsec_argv, + &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /** First, there is a sequential part (no threads) */ /** We initialize the system */ - parsec_profiling_init(mpi_rank); + parsec_profiling_init(rank); - /** MPI should be initialized before the dbp_start call, if it is a distributed application + /** The test runtime should be initialized before the dbp_start call, if it is a distributed application * first argument sp is the base name for the trace file - * It will be named sp-<%d>.prof-XXXX where <%d> is the MPI rank (0 if no MPI), and XXXXX is a random value + * It will be named sp-<%d>.prof-XXXX where <%d> is the process rank, + * and XXXXX is a random value. * second argument "Demonstration..." is a human readable string to qualify the trace */ rc = parsec_profiling_dbp_start( "sp", "Demonstration of basic PaRSEC profiling system" ); - if( 0 != rc ) + if( 0 != rc ) { + parsec_profiling_fini(); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; + } /** Each Event type must be defined before any event is traced * They are defined by being added to a dictionary. @@ -200,5 +221,7 @@ int main(int argc, char *argv[]) parsec_profiling_dbp_dump(); parsec_profiling_fini(); - MPI_Finalize(); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); + return 0; } diff --git a/tests/profiling-standalone/sp-perf.c b/tests/profiling-standalone/sp-perf.c index a65781921..cbd19b9bb 100644 --- a/tests/profiling-standalone/sp-perf.c +++ b/tests/profiling-standalone/sp-perf.c @@ -19,6 +19,8 @@ #include #include #include "parsec/profiling.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #if !defined(timersub) #define timersub(a, b, result) do { \ @@ -31,8 +33,6 @@ } while(0) #endif -#include - typedef struct { pthread_t pthread_id; int thread_index; @@ -94,14 +94,28 @@ static void *run_thread(void *_arg) int main(int argc, char *argv[]) { - int i, opt; + int i, opt, rc; per_thread_info_t *thread_info; int nbthreads = 1; char *filename = NULL; - int mpi_rank; + int rank; + parsec_context_t *parsec; + int parsec_argc = 0; + char **parsec_argv = NULL; + + for(i = 1; i < argc; i++) { + if( 0 == strcmp(argv[i], "--") ) { + parsec_argc = argc - i; + parsec_argv = argv + i; + argc = i; + break; + } + } - MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_SERIALIZED, + &parsec_argc, &parsec_argv, + &parsec, &rank, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); while ((opt = getopt(argc, argv, "f:n:N:h?")) != -1) { switch (opt) { @@ -117,6 +131,8 @@ int main(int argc, char *argv[]) default: /* '?' */ fprintf(stderr, "Usage: %s [-f filename] [-n number of threads] [-N number of tasks per thread]\n", argv[0]); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); exit(EXIT_FAILURE); } } @@ -128,9 +144,13 @@ int main(int argc, char *argv[]) } if( profiling ) { - parsec_profiling_init(mpi_rank); - if( parsec_profiling_dbp_start(filename, "PaRSEC profiling system performance evaluation" ) == -1 ) + parsec_profiling_init(rank); + if( parsec_profiling_dbp_start(filename, "PaRSEC profiling system performance evaluation" ) == -1 ) { + parsec_profiling_fini(); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); exit(EXIT_FAILURE); + } parsec_profiling_add_dictionary_keyword("Event", "#FF0000", 0, NULL, &event_startkey, &event_endkey); } @@ -162,7 +182,8 @@ int main(int argc, char *argv[]) } free(thread_info); - MPI_Finalize(); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); exit(EXIT_SUCCESS); } diff --git a/tests/profiling/CMakeLists.txt b/tests/profiling/CMakeLists.txt index 0d16ec371..3ad72d9ef 100644 --- a/tests/profiling/CMakeLists.txt +++ b/tests/profiling/CMakeLists.txt @@ -1,7 +1,12 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + include(ParsecCompilePTG) if(TARGET parsec-ptgpp) parsec_addtest_executable(C async) + target_link_libraries(async PRIVATE tests_runtime_common) target_ptg_sources(async PRIVATE "async.jdf") endif(TARGET parsec-ptgpp) @@ -10,4 +15,3 @@ if(MPI_Fortran_FOUND AND CMAKE_Fortran_COMPILER_WORKS) parsec_addtest_executable(Fortran generate_f SOURCES generate_f.F90) endif(CMAKE_Fortran_COMPILER_SUPPORTS_F90) endif(MPI_Fortran_FOUND AND CMAKE_Fortran_COMPILER_WORKS) - diff --git a/tests/profiling/async.jdf b/tests/profiling/async.jdf index b313e705e..f8245062d 100644 --- a/tests/profiling/async.jdf +++ b/tests/profiling/async.jdf @@ -3,12 +3,14 @@ extern "C" %{ * Copyright (c) 2020-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include #include #include #include #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "tests/tests_runtime.h" /** * This test creates asynchronous tasks to stress the profiling @@ -168,15 +170,6 @@ int main( int argc, char** argv ) int parsec_argc = argc, arg; char **parsec_argv = NULL; -#ifdef PARSEC_HAVE_MPI - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - MPI_Comm_size(MPI_COMM_WORLD, &ws); - MPI_Comm_rank(MPI_COMM_WORLD, &mr); - } -#endif - #if !defined(PARSEC_PROF_TRACE) fprintf(stderr, "This profiling test has been compiled with profiling disabled...\n"); exit(1); @@ -197,16 +190,18 @@ int main( int argc, char** argv ) argc--; } - parsec = parsec_init(-1, &parsec_argc, &parsec_argv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &parsec_argc, &parsec_argv, + &parsec, &mr, &ws); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); if( argc > 1 ) { NB = atoi(argv[1]); } if(NB <= 0) { fprintf(stderr, "Usage: async [-v] NB [--mca profile_filename /path/to/profile --mca mca_pins task_profiler\n"); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 1; } @@ -256,11 +251,8 @@ int main( int argc, char** argv ) parsec_taskpool_free( (parsec_taskpool_t*)tp ); - parsec_fini( &parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/runtime/CMakeLists.txt b/tests/runtime/CMakeLists.txt index 563704f90..1293ef4b9 100644 --- a/tests/runtime/CMakeLists.txt +++ b/tests/runtime/CMakeLists.txt @@ -1,3 +1,7 @@ +# +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. +# + add_subdirectory(scheduling) add_Subdirectory(cuda) @@ -7,6 +11,5 @@ if( MPI_C_FOUND ) endif( MPI_C_FOUND ) parsec_addtest_executable(C dtt_bug_replicator SOURCES dtt_bug_replicator_ex.c) +target_link_libraries(dtt_bug_replicator PRIVATE tests_runtime_common) target_ptg_sources(dtt_bug_replicator PRIVATE "dtt_bug_replicator.jdf") - - diff --git a/tests/runtime/Testings.cmake b/tests/runtime/Testings.cmake index bec60bba4..573bdd3ae 100644 --- a/tests/runtime/Testings.cmake +++ b/tests/runtime/Testings.cmake @@ -1,2 +1,6 @@ include(runtime/scheduling/Testings.cmake) include(runtime/cuda/Testings.cmake) + +if( MPI_C_FOUND ) + parsec_addtest_cmd(runtime/multichain:mp ${MPI_TEST_CMD_LIST} 4 runtime/multichain -l=1 -c=2) +endif( MPI_C_FOUND ) diff --git a/tests/runtime/cuda/CMakeLists.txt b/tests/runtime/cuda/CMakeLists.txt index 6d1479fb1..dca54e287 100644 --- a/tests/runtime/cuda/CMakeLists.txt +++ b/tests/runtime/cuda/CMakeLists.txt @@ -10,16 +10,19 @@ if(PARSEC_HAVE_CUDA) else( NOT TARGET CUDA::cublas ) parsec_addtest_executable(C nvlink SOURCES nvlink_main.c nvlink_wrapper.c) + target_link_libraries(nvlink PRIVATE tests_runtime_common) target_include_directories(nvlink PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(nvlink PRIVATE "nvlink.jdf") target_link_libraries(nvlink PRIVATE CUDA::cublas) parsec_addtest_executable(C stress SOURCES stress_main.c stress_wrapper.c) + target_link_libraries(stress PRIVATE tests_runtime_common) target_include_directories(stress PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(stress PRIVATE "stress.jdf") target_link_libraries(stress PRIVATE CUDA::cublas) parsec_addtest_executable(C stage SOURCES stage_main.c) + target_link_libraries(stage PRIVATE tests_runtime_common) target_include_directories(stage PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(stage PRIVATE "stage_custom.jdf") target_link_libraries(stage PRIVATE CUDA::cublas) @@ -28,6 +31,7 @@ if(PARSEC_HAVE_CUDA) # Testing for getting best device parsec_addtest_executable(C testing_get_best_device SOURCES "testing_get_best_device.c") + target_link_libraries(testing_get_best_device PRIVATE tests_runtime_common) target_include_directories(testing_get_best_device PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(testing_get_best_device PRIVATE "get_best_device_check.jdf") diff --git a/tests/runtime/cuda/get_best_device_check.jdf b/tests/runtime/cuda/get_best_device_check.jdf index 3fac93b87..2e4e5b979 100644 --- a/tests/runtime/cuda/get_best_device_check.jdf +++ b/tests/runtime/cuda/get_best_device_check.jdf @@ -3,7 +3,7 @@ extern "C" %{ * Copyright (c) 2021-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2026 NVIDIA Corporation. All rights reserved. */ #include "cuda_test_internal.h" @@ -220,7 +220,12 @@ int parsec_get_best_device_check(parsec_context_t *parsec, parsec_taskpool_free(parsec_get_best_device_check); - MPI_Allreduce(&info_tmp[0], &info, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + int rc = parsec_tests_allreduce(parsec, &info_tmp[0], &info, 1, + parsec_datatype_int_t, + PARSEC_TESTS_REDUCE_SUM); + if( (PARSEC_SUCCESS != rc) && (PARSEC_ERR_NOT_IMPLEMENTED != rc) ) { + PARSEC_CHECK_ERROR(rc, "parsec_tests_allreduce"); + } return info; } diff --git a/tests/runtime/cuda/nvlink.jdf b/tests/runtime/cuda/nvlink.jdf index e4bfe570c..5658395fe 100644 --- a/tests/runtime/cuda/nvlink.jdf +++ b/tests/runtime/cuda/nvlink.jdf @@ -3,7 +3,7 @@ extern "C" %{ * Copyright (c) 2019-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -16,9 +16,6 @@ extern "C" %{ #include #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) #include "parsec/mca/device/device.h" #include diff --git a/tests/runtime/cuda/nvlink_main.c b/tests/runtime/cuda/nvlink_main.c index 7d822ec7e..ca6f10386 100644 --- a/tests/runtime/cuda/nvlink_main.c +++ b/tests/runtime/cuda/nvlink_main.c @@ -2,50 +2,45 @@ * Copyright (c) 2019-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" #include "parsec/data_distribution.h" #include "parsec/data_dist/matrix/matrix.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "nvlink.h" #include "nvlink_wrapper.h" -#if defined(DISTRIBUTED) -#include -#endif - int main(int argc, char *argv[]) { parsec_context_t *parsec = NULL; parsec_taskpool_t *tp; - int size = 1; - int rank = 0; - -#if defined(DISTRIBUTED) - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif /* DISTRIBUTED */ + int rc; - parsec = parsec_init(-1, &argc, &argv); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /* can the test run? */ int nb_gpus = parsec_context_query(parsec, PARSEC_CONTEXT_QUERY_DEVICES, PARSEC_DEV_CUDA); assert(nb_gpus >= 0); if(nb_gpus == 0) { parsec_warning("This test can only run if at least one GPU device is present"); - exit(-PARSEC_ERR_DEVICE); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); + return -PARSEC_ERR_DEVICE; } int full_peer_access = parsec_context_query(parsec, PARSEC_CONTEXT_QUERY_DEVICES_FULL_PEER_ACCESS, PARSEC_DEV_CUDA); assert(full_peer_access >= 0); if(0 == full_peer_access) { parsec_warning("This system does not have a full peer access matrix between all GPU devices"); - exit(-PARSEC_ERR_DEVICE); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); + return -PARSEC_ERR_DEVICE; } tp = testing_nvlink_New(parsec, 10, 512); @@ -56,9 +51,7 @@ int main(int argc, char *argv[]) parsec_taskpool_free(tp); } - parsec_fini(&parsec); -#if defined(DISTRIBUTED) - MPI_Finalize(); -#endif /* DISTRIBUTED */ + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/runtime/cuda/stage_custom.jdf b/tests/runtime/cuda/stage_custom.jdf index 71b8fe2ee..e32930b42 100644 --- a/tests/runtime/cuda/stage_custom.jdf +++ b/tests/runtime/cuda/stage_custom.jdf @@ -16,7 +16,6 @@ extern "C" %{ #include #include #include -#include #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) #include "parsec/mca/device/cuda/device_cuda_internal.h" #include diff --git a/tests/runtime/cuda/stage_main.c b/tests/runtime/cuda/stage_main.c index 1309b4f2d..142a17f5b 100644 --- a/tests/runtime/cuda/stage_main.c +++ b/tests/runtime/cuda/stage_main.c @@ -2,48 +2,35 @@ * Copyright (c) 2020-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" #include "parsec/data_distribution.h" #include "parsec/data_dist/matrix/matrix.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "stage_custom.h" parsec_taskpool_t* testing_stage_custom_New( parsec_context_t *ctx, int M, int N, int MB, int NB, int P, int *ret); -#if defined(DISTRIBUTED) -#include -#endif - int main(int argc, char *argv[]) { parsec_context_t *parsec = NULL; parsec_taskpool_t *tp; int size = 1; - int rank = 0; int M; int N; int MB; int NB; int P = 1; int ret = 0; - -#if defined(DISTRIBUTED) - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif /* DISTRIBUTED */ + int rc; /* Initialize PaRSEC */ - parsec = parsec_init(-1, &argc, &argv); - if( NULL == parsec ) { - /* Failed to correctly initialize. In a correct scenario report*/ - /* upstream, but in this particular case bail out.*/ - exit(-1); - } + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, &size); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); /* can the test run? */ assert(size == 1); @@ -52,7 +39,9 @@ int main(int argc, char *argv[]) if(nb_gpus == 0) { parsec_warning("This test can only run if at least one GPU device is present"); printf("TEST SKIPPED\n"); - exit(-PARSEC_ERR_DEVICE); + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); + return -PARSEC_ERR_DEVICE; } /* Test: comparing results when: @@ -105,10 +94,8 @@ int main(int argc, char *argv[]) printf("TEST PASSED\n"); } - parsec_fini(&parsec); -#if defined(DISTRIBUTED) - MPI_Finalize(); -#endif /* DISTRIBUTED */ + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return (0 == ret)? EXIT_SUCCESS: EXIT_FAILURE; } diff --git a/tests/runtime/cuda/stress.jdf b/tests/runtime/cuda/stress.jdf index 2174c5c13..dedbbe209 100644 --- a/tests/runtime/cuda/stress.jdf +++ b/tests/runtime/cuda/stress.jdf @@ -3,7 +3,7 @@ extern "C" %{ * Copyright (c) 2019-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -16,9 +16,6 @@ extern "C" %{ #include #include #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) #include "parsec/mca/device/cuda/device_cuda_internal.h" #include diff --git a/tests/runtime/cuda/stress_main.c b/tests/runtime/cuda/stress_main.c index 36008203c..947b8e32d 100644 --- a/tests/runtime/cuda/stress_main.c +++ b/tests/runtime/cuda/stress_main.c @@ -2,19 +2,18 @@ * Copyright (c) 2019-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" #include "parsec/data_distribution.h" #include "parsec/data_dist/matrix/matrix.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include "stress.h" #include "stress_wrapper.h" -#if defined(DISTRIBUTED) -#include -#endif - #include #include @@ -22,11 +21,10 @@ int main(int argc, char *argv[]) { parsec_context_t *parsec = NULL; parsec_taskpool_t *tp; - int size = 1; - int rank = 0; int tile_size = 1024; int depth = 80; int ch; + int rc; /* Parse -n (tile size) and -d (depth) before parsec_init */ while ((ch = getopt(argc, argv, "n:d:")) != -1) { @@ -46,16 +44,9 @@ int main(int argc, char *argv[]) } argc = argc - optind + 1; -#if defined(DISTRIBUTED) - { - int provided; - MPI_Init_thread(NULL, NULL, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#endif /* DISTRIBUTED */ - - parsec = parsec_init(-1, &argc, &argv); + rc = parsec_tests_context_init(-1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, &parsec, NULL, NULL); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); tp = testing_stress_New(parsec, depth, tile_size); if( NULL != tp ) { @@ -65,9 +56,7 @@ int main(int argc, char *argv[]) parsec_taskpool_free(tp); } - parsec_fini(&parsec); -#if defined(DISTRIBUTED) - MPI_Finalize(); -#endif /* DISTRIBUTED */ + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/runtime/cuda/testing_get_best_device.c b/tests/runtime/cuda/testing_get_best_device.c index fcafe2c9d..984555080 100644 --- a/tests/runtime/cuda/testing_get_best_device.c +++ b/tests/runtime/cuda/testing_get_best_device.c @@ -2,6 +2,7 @@ * Copyright (c) 2019-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "cuda_test_internal.h" @@ -40,7 +41,7 @@ static int matrix_init_ops(parsec_execution_stream_t *es, int main(int argc, char *argv[]) { parsec_context_t* parsec; - int rank, nodes, ch; + int rank, nodes, ch, rc; int pargc = 0; char **pargv; @@ -67,7 +68,7 @@ int main(int argc, char *argv[]) case 'g': nb_gpus = atoi(optarg); break; case '?': case 'h': default: fprintf(stderr, - "-m : initialize MPI_THREAD_MULTIPLE (default: 0/no)\n" + "-m : request multiple-thread support from the test runtime (default: 0/no)\n" "-N : column dimension (N) of the matrices (default: 8)\n" "-t : row dimension (MB) of the tiles (default: 4)\n" "-s : rows of tiles in a k-cyclic distribution (default: 1)\n" @@ -81,19 +82,6 @@ int main(int argc, char *argv[]) } } -#if defined(PARSEC_HAVE_MPI) - { - int provided; - int requested = m? MPI_THREAD_MULTIPLE: MPI_THREAD_SERIALIZED; - MPI_Init_thread(&argc, &argv, requested, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &nodes); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - nodes = 1; - rank = 0; -#endif - pargc = 0; pargv = NULL; for(int i = 1; i < argc; i++) { if( strcmp(argv[i], "--") == 0 ) { @@ -106,23 +94,21 @@ int main(int argc, char *argv[]) #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) extern char **environ; char *value; - if( nb_gpus < 1 && 0 == rank ) { - fprintf(stderr, "Warning: if run on GPUs, please set --gpus=value bigger than 0\n"); - } asprintf(&value, "%d", nb_gpus); parsec_setenv_mca_param( "device_cuda_enabled", value, &environ ); free(value); #endif - /* Initialize PaRSEC */ - parsec = parsec_init(cores, &pargc, &pargv); + rc = parsec_tests_context_init(cores, + m ? PARSEC_TEST_THREAD_MULTIPLE : PARSEC_TEST_THREAD_SERIALIZED, + &pargc, &pargv, &parsec, &rank, &nodes); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); - if( NULL == parsec ) { - /* Failed to correctly initialize. In a correct scenario report - * upstream, but in this particular case bail out. - */ - exit(-1); +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) + if( nb_gpus < 1 && 0 == rank ) { + fprintf(stderr, "Warning: if run on GPUs, please set --gpus=value bigger than 0\n"); } +#endif /* If the number of cores has not been defined as a parameter earlier * update it with the default parameter computed in parsec_init. */ @@ -151,9 +137,9 @@ int main(int argc, char *argv[]) (parsec_tiled_matrix_unary_op_t)matrix_init_ops, NULL); /* Main routines */ - SYNC_TIME_START(); + SYNC_TIME_START(parsec); info = parsec_get_best_device_check(parsec, (parsec_tiled_matrix_t *)&dcA); - SYNC_TIME_PRINT(rank, ("Get_best_device" "\tN= %d NB= %d " + SYNC_TIME_PRINT(parsec, rank, ("Get_best_device" "\tN= %d NB= %d " "PxQ= %d %d KPxKQ= %d %d cores= %d nb_gpus= %d\n", N, NB, P, nodes/P, KP, KQ, cores, parsec_nb_devices-2)); @@ -166,11 +152,8 @@ int main(int argc, char *argv[]) parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)&dcA); /* Clean up parsec*/ - parsec_fini(&parsec); - -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return (0 == info)? EXIT_SUCCESS: EXIT_FAILURE; } diff --git a/tests/runtime/dtt_bug_replicator_ex.c b/tests/runtime/dtt_bug_replicator_ex.c index bda600e25..6a16d5825 100644 --- a/tests/runtime/dtt_bug_replicator_ex.c +++ b/tests/runtime/dtt_bug_replicator_ex.c @@ -2,6 +2,7 @@ * Copyright (c) 2013-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -9,6 +10,7 @@ #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "dtt_bug_replicator.h" #include "parsec/arena.h" +#include "tests/tests_runtime.h" #include #define N 10 @@ -36,17 +38,10 @@ int main( int argc, char** argv ) int nodes, rank, i, j, rc; (void)argc; (void)argv; -#if defined(PARSEC_HAVE_MPI) - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &nodes); - MPI_Comm_size(MPI_COMM_WORLD, &nodes); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - nodes = 1; - rank = 0; -#endif - - parsec = parsec_init(1, &argc, &argv); - assert( NULL != parsec ); + rc = parsec_tests_context_init(1, PARSEC_TEST_THREAD_SERIALIZED, + &argc, &argv, + &parsec, &rank, &nodes); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); PASTE_CODE_ALLOCATE_MATRIX(dcA, 1, parsec_matrix_block_cyclic, (&dcA, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, @@ -98,9 +93,7 @@ int main( int argc, char** argv ) parsec_taskpool_free(tp); - parsec_fini( &parsec); -#if defined(PARSEC_HAVE_MPI) - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/runtime/scheduling/CMakeLists.txt b/tests/runtime/scheduling/CMakeLists.txt index 81dcb381c..535a7a17d 100644 --- a/tests/runtime/scheduling/CMakeLists.txt +++ b/tests/runtime/scheduling/CMakeLists.txt @@ -2,5 +2,4 @@ include(ParsecCompilePTG) parsec_addtest_executable(C schedmicro SOURCES main.c ep_wrapper.c schedmicro_data.c) target_ptg_sources(schedmicro PRIVATE "ep.jdf") -target_link_libraries(schedmicro PRIVATE m) - +target_link_libraries(schedmicro PRIVATE m tests_runtime_common) diff --git a/tests/runtime/scheduling/ep_wrapper.c b/tests/runtime/scheduling/ep_wrapper.c index c15d15389..b1cd12436 100644 --- a/tests/runtime/scheduling/ep_wrapper.c +++ b/tests/runtime/scheduling/ep_wrapper.c @@ -2,6 +2,7 @@ * Copyright (c) 2014-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -9,6 +10,7 @@ #include "parsec/data_distribution.h" #include "parsec/arena.h" +#include "parsec/datatype.h" #include "ep.h" #include "ep_wrapper.h" @@ -31,21 +33,20 @@ parsec_taskpool_t *ep_new(parsec_data_collection_t *A, int nt, int level) tp = parsec_ep_new(nt, level, A); -#if defined(PARSEC_HAVE_MPI) + /* The datatype is irrelevant as the example does not communicate data, + * but use the PaRSEC datatype API so the test is not tied to MPI. + */ { - MPI_Aint extent; -#if defined(PARSEC_HAVE_MPI_20) - MPI_Aint lb = 0; - MPI_Type_get_extent(MPI_BYTE, &lb, &extent); -#else - MPI_Type_extent(MPI_BYTE, &extent); -#endif /* defined(PARSEC_HAVE_MPI_20) */ - /* The datatype is irrelevant as the example does not do communications between nodes */ + ptrdiff_t lb, extent; + int rc = parsec_type_extent(parsec_datatype_uint8_t, &lb, &extent); + if( PARSEC_SUCCESS != rc ) { + parsec_taskpool_free((parsec_taskpool_t*)tp); + return NULL; + } parsec_arena_datatype_set_type( &tp->arenas_datatypes[PARSEC_ep_DEFAULT_ADT_IDX], - extent, PARSEC_ARENA_ALIGNMENT_SSE, - MPI_BYTE ); + (size_t)extent, PARSEC_ARENA_ALIGNMENT_SSE, + parsec_datatype_uint8_t ); } -#endif return (parsec_taskpool_t*)tp; } diff --git a/tests/runtime/scheduling/main.c b/tests/runtime/scheduling/main.c index 80c0d1bdb..d2c0e74b4 100644 --- a/tests/runtime/scheduling/main.c +++ b/tests/runtime/scheduling/main.c @@ -2,6 +2,7 @@ * Copyright (c) 2013-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include @@ -10,13 +11,11 @@ #include "ep_wrapper.h" #include "schedmicro_data.h" #include "parsec/os-spec-timing.h" +#include "tests/tests_runtime.h" #if defined(PARSEC_HAVE_STRING_H) #include #endif /* defined(PARSEC_HAVE_STRING_H) */ #include -#if defined(PARSEC_HAVE_MPI) -#include -#endif /* defined(PARSEC_HAVE_MPI) */ static int MAXNT = 16384; static int MAXLEVEL = 1024; @@ -40,17 +39,6 @@ int main(int argc, char *argv[]) int parsec_argc = 0; char **parsec_argv = NULL; -#if defined(PARSEC_HAVE_MPI) - { - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); - } - MPI_Comm_size(MPI_COMM_WORLD, &world); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); -#else - world = 1; - rank = 0; -#endif for(int a = 1; a < argc; a++) { if(strcmp(argv[a], "--") == 0) { parsec_argc = argc - a; @@ -81,10 +69,11 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } - parsec = parsec_init(0, &parsec_argc, &parsec_argv); - if( NULL == parsec ) { - exit(-1); - } + rc = parsec_tests_context_init(0, PARSEC_TEST_THREAD_SERIALIZED, + &parsec_argc, &parsec_argv, + &parsec, &rank, &world); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_init"); + printf("#All measured values are times. Times are expressed in " TIMER_UNIT "\n"); level = 4 * world; @@ -134,10 +123,8 @@ int main(int argc, char *argv[]) free_data(dcA); - parsec_fini(&parsec); -#ifdef PARSEC_HAVE_MPI - MPI_Finalize(); -#endif + rc = parsec_tests_context_fini(&parsec); + PARSEC_CHECK_ERROR(rc, "parsec_tests_context_fini"); return 0; } diff --git a/tests/tests_runtime.c b/tests/tests_runtime.c new file mode 100644 index 000000000..86143facb --- /dev/null +++ b/tests/tests_runtime.c @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ + +#include "tests/tests_runtime.h" + +#include +#include +#include + +int +parsec_tests_context_init(int nb_cores, int required_thread, + int *pargc, char ***pargv, + parsec_context_t **parsec, + int *rank, int *world) +{ + int rc; + + if( NULL == parsec ) { + return PARSEC_ERR_BAD_PARAM; + } + + /* + * From this point on, rank and world size come from the PaRSEC context. + * This keeps the tests independent from the selected communication backend: + * MPI builds discover them from MPI, UCX builds discover them from PMIx. + */ + *parsec = parsec_init(nb_cores, pargc, pargv); + if( NULL == *parsec ) { + return PARSEC_ERROR; + } + +#if defined(PARSEC_HAVE_MPI) + { + int mpi_initialized = 0, provided = PARSEC_TEST_THREAD_SINGLE; + + MPI_Initialized(&mpi_initialized); + if( mpi_initialized ) { + MPI_Query_thread(&provided); + if( provided < required_thread ) { + fprintf(stderr, "MPI thread support is insufficient: requested %d, provided %d\n", + required_thread, provided); + (void)parsec_tests_context_fini(parsec); + return PARSEC_ERR_NOT_SUPPORTED; + } + } + } +#else + (void)required_thread; +#endif + + if( NULL != rank ) { + rc = parsec_context_query(*parsec, PARSEC_CONTEXT_QUERY_RANK); + if( rc < 0 ) { + (void)parsec_tests_context_fini(parsec); + return rc; + } + *rank = rc; + } + if( NULL != world ) { + rc = parsec_context_query(*parsec, PARSEC_CONTEXT_QUERY_NODES); + if( rc < 0 ) { + (void)parsec_tests_context_fini(parsec); + return rc; + } + /* + * A build without a communication engine reports 0 nodes to indicate + * that no distributed runtime is active. Tests still expect a usable + * local world size, so expose that case as a single-process run. + */ + if( 0 == rc ) { + rc = 1; + } + *world = rc; + } + + return PARSEC_SUCCESS; +} + +int +parsec_tests_context_fini(parsec_context_t **parsec) +{ + int rc = PARSEC_SUCCESS; + + if( (NULL != parsec) && (NULL != *parsec) ) { + rc = parsec_fini(parsec); + } + + return rc; +} + +int +parsec_tests_barrier(parsec_context_t *parsec) +{ + (void)parsec; + +#if defined(PARSEC_HAVE_MPI) + { + int mpi_initialized = 0; + int rc; + + rc = MPI_Initialized(&mpi_initialized); + if( (MPI_SUCCESS == rc) && mpi_initialized ) { + rc = MPI_Barrier(MPI_COMM_WORLD); + return (MPI_SUCCESS == rc) ? PARSEC_SUCCESS : PARSEC_ERROR; + } + } +#endif + + return PARSEC_ERR_NOT_IMPLEMENTED; +} + +void +parsec_tests_abort(parsec_context_t *parsec, int errorcode) +{ + (void)parsec; + +#if defined(PARSEC_HAVE_MPI) + { + int mpi_initialized = 0; + int rc = MPI_Initialized(&mpi_initialized); + if( (MPI_SUCCESS == rc) && mpi_initialized ) { + MPI_Abort(MPI_COMM_WORLD, errorcode); + } + } +#endif + + exit(errorcode); +} + +int +parsec_tests_allreduce(parsec_context_t *parsec, + const void *sendbuf, + void *recvbuf, + int count, + parsec_datatype_t datatype, + parsec_tests_reduce_op_t op) +{ + if( (NULL == recvbuf) || (count < 0) ) { + return PARSEC_ERR_BAD_PARAM; + } + if( (PARSEC_TESTS_REDUCE_SUM != op) && + (PARSEC_TESTS_REDUCE_BXOR != op) && + (PARSEC_TESTS_REDUCE_MAXLOC_INT != op) ) { + return PARSEC_ERR_BAD_PARAM; + } + if( (PARSEC_TESTS_REDUCE_MAXLOC_INT == op) && + (parsec_datatype_int_t != datatype) ) { + return PARSEC_ERR_BAD_PARAM; + } + +#if defined(PARSEC_HAVE_MPI) + { + MPI_Op mpi_op; + MPI_Datatype mpi_datatype = datatype; + int mpi_initialized = 0; + int rc; + + switch(op) { + case PARSEC_TESTS_REDUCE_SUM: + mpi_op = MPI_SUM; + break; + case PARSEC_TESTS_REDUCE_BXOR: + mpi_op = MPI_BXOR; + break; + case PARSEC_TESTS_REDUCE_MAXLOC_INT: + mpi_op = MPI_MAXLOC; + mpi_datatype = MPI_2INT; + break; + default: + return PARSEC_ERR_BAD_PARAM; + } + + rc = MPI_Initialized(&mpi_initialized); + if( (MPI_SUCCESS == rc) && mpi_initialized ) { + rc = MPI_Allreduce((NULL == sendbuf || sendbuf == recvbuf) ? MPI_IN_PLACE : (void *)sendbuf, + recvbuf, count, mpi_datatype, mpi_op, MPI_COMM_WORLD); + return (MPI_SUCCESS == rc) ? PARSEC_SUCCESS : PARSEC_ERROR; + } + } +#endif + + { + int nodes = (NULL == parsec) ? 1 : parsec_context_query(parsec, PARSEC_CONTEXT_QUERY_NODES); + + if( nodes < 0 ) { + return nodes; + } + if( nodes > 1 ) { + return PARSEC_ERR_NOT_IMPLEMENTED; + } + if( (NULL != sendbuf) && (sendbuf != recvbuf) && (0 < count) ) { + if( PARSEC_TESTS_REDUCE_MAXLOC_INT == op ) { + memcpy(recvbuf, sendbuf, 2 * (size_t)count * sizeof(int)); + return PARSEC_SUCCESS; + } + + int size, rc; + + rc = parsec_type_size(datatype, &size); + if( PARSEC_SUCCESS != rc ) { + return rc; + } + memcpy(recvbuf, sendbuf, (size_t)count * (size_t)size); + } + } + + (void)op; + return PARSEC_SUCCESS; +} diff --git a/tests/tests_runtime.h b/tests/tests_runtime.h new file mode 100644 index 000000000..326933c44 --- /dev/null +++ b/tests/tests_runtime.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. + */ +#if !defined(_TESTS_RUNTIME_H_) +#define _TESTS_RUNTIME_H_ + +#include "parsec.h" +#include "parsec/datatype.h" + +#if defined(PARSEC_HAVE_MPI) +#include +#define PARSEC_TEST_THREAD_SINGLE MPI_THREAD_SINGLE +#define PARSEC_TEST_THREAD_FUNNELED MPI_THREAD_FUNNELED +#define PARSEC_TEST_THREAD_SERIALIZED MPI_THREAD_SERIALIZED +#define PARSEC_TEST_THREAD_MULTIPLE MPI_THREAD_MULTIPLE +#else +#define PARSEC_TEST_THREAD_SINGLE 0 +#define PARSEC_TEST_THREAD_FUNNELED 1 +#define PARSEC_TEST_THREAD_SERIALIZED 2 +#define PARSEC_TEST_THREAD_MULTIPLE 3 +#endif + +typedef enum parsec_tests_reduce_op_e { + PARSEC_TESTS_REDUCE_SUM, + PARSEC_TESTS_REDUCE_BXOR, + PARSEC_TESTS_REDUCE_MAXLOC_INT +} parsec_tests_reduce_op_t; + +/** + * Initialize the process launcher/runtime pair used by PaRSEC tests. + * + * Tests should call this helper instead of directly initializing MPI or PMIx. + * parsec_init() initializes the selected communication backend as needed, and + * this helper retrieves rank/size from the PaRSEC context afterwards. + * + * @param[in] nb_cores Number of cores to pass to parsec_init(). + * @param[in] required_thread Minimum MPI thread level, using + * PARSEC_TEST_THREAD_*. + * @param[inout] pargc PaRSEC argc, passed to parsec_init(). + * @param[inout] pargv PaRSEC argv, passed to parsec_init(). + * @param[out] parsec Initialized PaRSEC context. + * @param[out] rank Current process rank in the selected communication backend. + * @param[out] world Number of processes in the selected communication backend. + */ +int parsec_tests_context_init(int nb_cores, int required_thread, + int *pargc, char ***pargv, + parsec_context_t **parsec, + int *rank, int *world); + +/** + * Finalize the PaRSEC context and any process launcher initialized by + * parsec_tests_context_init(). + */ +int parsec_tests_context_fini(parsec_context_t **parsec); + +/** + * Synchronize all processes participating in the selected test runtime. + * + * This is intentionally a test helper, not a public runtime API. It accepts + * the PaRSEC context so future communication backends can implement the same + * operation without exposing their transport details to tests. For now, only + * MPI-backed runs have a useful implementation; non-MPI backends return + * PARSEC_ERR_NOT_IMPLEMENTED. + */ +int parsec_tests_barrier(parsec_context_t *parsec); + +/** + * Abort all processes participating in the selected test runtime. + * + * MPI-backed tests call MPI_Abort on MPI_COMM_WORLD. Other backends terminate + * the local process until they grow a distributed abort primitive. + */ +void parsec_tests_abort(parsec_context_t *parsec, int errorcode); + +/** + * Reduce values across all processes participating in the selected test runtime. + * + * A NULL send buffer means in-place reduction into recvbuf. MPI-backed tests + * call MPI_Allreduce. Single-process non-MPI runs copy sendbuf into recvbuf + * and return success; multi-process non-MPI backends return + * PARSEC_ERR_NOT_IMPLEMENTED until their collective support is added. The + * PARSEC_TESTS_REDUCE_MAXLOC_INT operation expects count int pairs laid out as + * {value, rank} and uses MPI_2INT/MPI_MAXLOC when MPI backs the test runtime. + */ +int parsec_tests_allreduce(parsec_context_t *parsec, + const void *sendbuf, + void *recvbuf, + int count, + parsec_datatype_t datatype, + parsec_tests_reduce_op_t op); + +#endif /* _TESTS_RUNTIME_H_ */ diff --git a/tests/tests_timing.h b/tests/tests_timing.h index fa3327e0a..967c6a8d0 100644 --- a/tests/tests_timing.h +++ b/tests/tests_timing.h @@ -2,20 +2,20 @@ * Copyright (c) 2021-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #ifndef TIMING_H #define TIMING_H #include "parsec/runtime.h" +#include "parsec/utils/debug.h" +#include "tests/tests_runtime.h" #include #include extern double time_elapsed; extern double sync_time_elapsed; -#if defined( PARSEC_HAVE_MPI) -# define get_cur_time() MPI_Wtime() -#else static inline double get_cur_time(void) { struct timeval tv; @@ -25,7 +25,6 @@ static inline double get_cur_time(void) t = tv.tv_sec + tv.tv_usec / 1e6; return t; } -#endif #if defined(PARSEC_PROF_TRACE) #define PARSEC_PROFILING_START() parsec_profiling_start() @@ -41,37 +40,32 @@ static inline double get_cur_time(void) printf print; \ } while(0) -#ifdef PARSEC_HAVE_MPI -# define SYNC_TIME_START() do { \ - MPI_Barrier(MPI_COMM_WORLD); \ - PARSEC_PROFILING_START(); \ - sync_time_elapsed = get_cur_time(); \ +/* + * Non-MPI communication backends do not expose a test barrier yet. Keep the + * timing helpers usable as local timers in that case, but still fail on real + * barrier errors. + */ +#define SYNC_TIME_BARRIER(parsec_context) do { \ + int _parsec_tests_barrier_rc = parsec_tests_barrier(parsec_context); \ + if( PARSEC_ERR_NOT_IMPLEMENTED != _parsec_tests_barrier_rc ) { \ + PARSEC_CHECK_ERROR(_parsec_tests_barrier_rc, "parsec_tests_barrier"); \ + } \ } while(0) -# define SYNC_TIME_STOP() do { \ - MPI_Barrier(MPI_COMM_WORLD); \ - sync_time_elapsed = get_cur_time() - sync_time_elapsed; \ +#define SYNC_TIME_START(parsec_context) do { \ + SYNC_TIME_BARRIER(parsec_context); \ + PARSEC_PROFILING_START(); \ + sync_time_elapsed = get_cur_time(); \ } while(0) -# define SYNC_TIME_PRINT(rank, print) do { \ - SYNC_TIME_STOP(); \ - if(0 == rank) { \ - printf("[****] TIME(s) %12.5f : ", sync_time_elapsed); \ - printf print; \ - } \ - } while(0) - -/* overload exit in MPI mode */ -# define exit(ret) MPI_Abort(MPI_COMM_WORLD, ret) - -#else -# define SYNC_TIME_START() do { sync_time_elapsed = get_cur_time(); } while(0) -# define SYNC_TIME_STOP() do { sync_time_elapsed = get_cur_time() - sync_time_elapsed; } while(0) -# define SYNC_TIME_PRINT(rank, print) do { \ - SYNC_TIME_STOP(); \ - if(0 == rank) { \ - printf("[****] TIME(s) %12.5f : ", sync_time_elapsed); \ - printf print; \ - } \ +#define SYNC_TIME_STOP(parsec_context) do { \ + SYNC_TIME_BARRIER(parsec_context); \ + sync_time_elapsed = get_cur_time() - sync_time_elapsed; \ } while(0) -#endif +#define SYNC_TIME_PRINT(parsec_context, rank, print) do { \ + SYNC_TIME_STOP(parsec_context); \ + if(0 == rank) { \ + printf("[****] TIME(s) %12.5f : ", sync_time_elapsed); \ + printf print; \ + } \ + } while(0) #endif /* TIMING_H */