From e73577ce246faa626ae40932c611ff06688faa90 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Fri, 17 Apr 2026 15:09:15 +0200 Subject: [PATCH 01/35] Split send/recv test into independent cpu and device mode tests --- test/CMakeLists.txt | 98 +++++++++----- test/test_send_recv.cpp | 285 ++++++++++++++++++++++------------------ 2 files changed, 222 insertions(+), 161 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index cb4e6f0e..39affd0e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,6 +1,8 @@ add_subdirectory(mpi_runner) -set(OOMPH_TEST_LEAK_GPU_MEMORY OFF CACHE BOOL "Do not free memory (bug on Piz Daint)") +set(OOMPH_TEST_LEAK_GPU_MEMORY + OFF + CACHE BOOL "Do not free memory (bug on Piz Daint)") # --------------------------------------------------------------------- # compile tests @@ -10,30 +12,57 @@ set(OOMPH_TEST_LEAK_GPU_MEMORY OFF CACHE BOOL "Do not free memory (bug on Piz Da set(serial_tests test_unique_function test_unsafe_shared_ptr) # list of parallel tests to be executed -set(parallel_tests test_context test_send_recv test_send_multi test_cancel test_locality) -#test_tag_range) -if (OOMPH_ENABLE_BARRIER) - list(APPEND parallel_tests test_barrier) +set(parallel_tests test_context test_send_recv test_send_multi test_cancel + test_locality) + +# list of parallel tests that also have device code variants +if(HWMALLOC_ENABLE_DEVICE) + set(device_tests test_send_recv) +endif() + +# test_tag_range) +if(OOMPH_ENABLE_BARRIER) + list(APPEND parallel_tests test_barrier) endif() -# creates an object library (i.e. *.o file) +# creates an object library (i.e. *.o file), if DEVICE is specified, extra flags +# are added and the target name has a suffix function(compile_test t_) - set(t ${t_}_obj) - add_library(${t} OBJECT ${t_}.cpp) - oomph_target_compile_options(${t}) - if (OOMPH_TEST_LEAK_GPU_MEMORY) - target_compile_definitions(${t} PRIVATE OOMPH_TEST_LEAK_GPU_MEMORY) - endif() - target_link_libraries(${t} PRIVATE ext-gtest) - target_link_libraries(${t} PUBLIC oomph) + set(options DEVICE) + cmake_parse_arguments(CT "${options}" "" "" ${ARGN}) + set(source_filename_ "${t_}.cpp") + set(suffix_ "") + if(CT_DEVICE) + # Make a copy the input source file in the build directory, add a suffix + set(suffix_ "_device") + cmake_path(REPLACE_EXTENSION source_filename_ LAST_ONLY "${suffix_}.cpp" + OUTPUT_VARIABLE src_name_) + set(dst_file "${CMAKE_CURRENT_BINARY_DIR}/${src_name_}") + configure_file("${source_filename_}" "${dst_file}" COPYONLY) + set(source_filename_ "${dst_file}") + endif() + set(target_ ${t}${suffix_}_obj) + add_library(${target_} OBJECT ${source_filename_}) + oomph_target_compile_options(${target_}) + target_compile_definitions( + ${target_} + PRIVATE $<$:OOMPH_TEST_LEAK_GPU_MEMORY>) + target_compile_definitions( + ${target_} PRIVATE $<$:TEST_DEVICE_MODE_ONLY>) + target_link_libraries(${target_} PRIVATE ext-gtest) + target_link_libraries(${target_} PUBLIC oomph) endfunction() -# compile an object library for each test -# tests will be compiled only once and then linked against all enabled oomph backends +# compile an object library for each test tests will be compiled only once and +# then linked against all enabled oomph backends list(APPEND all_tests ${serial_tests} ${parallel_tests}) list(REMOVE_DUPLICATES all_tests) foreach(t ${all_tests}) - compile_test(${t}) + compile_test(${t}) + if(${t} IN_LIST device_tests) + # generate a second version of the obj file, but with DEVICE code enabled + compile_test(${t} DEVICE) + endif() endforeach() # --------------------------------------------------------------------- @@ -52,7 +81,7 @@ function(reg_serial_test t) endfunction() foreach(t ${serial_tests}) - reg_serial_test(${t}) + reg_serial_test(${t}) endforeach() # creates an executable by linking to object file and to selected oomph backend @@ -73,22 +102,31 @@ function(reg_parallel_test t_ lib n) set_tests_properties(${t} PROPERTIES RUN_SERIAL TRUE LABELS "parallel-ranks-${n}") endfunction() -if (OOMPH_WITH_MPI) - foreach(t ${parallel_tests}) - reg_parallel_test(${t} mpi 4) - endforeach() +if(OOMPH_WITH_MPI) + foreach(t ${parallel_tests}) + reg_parallel_test(${t} mpi 4) + endforeach() + foreach(t ${device_tests}) + reg_parallel_test(${t}_device mpi 4) + endforeach() endif() -if (OOMPH_WITH_UCX) - foreach(t ${parallel_tests}) - reg_parallel_test(${t} ucx 4) - endforeach() +if(OOMPH_WITH_UCX) + foreach(t ${parallel_tests}) + reg_parallel_test(${t} ucx 4) + endforeach() + foreach(t ${device_tests}) + reg_parallel_test(${t}_device ucx 4) + endforeach() endif() -if (OOMPH_WITH_LIBFABRIC) - foreach(t ${parallel_tests}) - reg_parallel_test(${t} libfabric 4) - endforeach() +if(OOMPH_WITH_LIBFABRIC) + foreach(t ${parallel_tests}) + reg_parallel_test(${t} libfabric 4) + endforeach() + foreach(t ${device_tests}) + reg_parallel_test(${t}_device libfabric 4) + endforeach() endif() add_subdirectory(bindings) diff --git a/test/test_send_recv.cpp b/test/test_send_recv.cpp index 0cfd1170..1326eecb 100644 --- a/test/test_send_recv.cpp +++ b/test/test_send_recv.cpp @@ -7,16 +7,21 @@ * Please, refer to the LICENSE file in the root directory. * SPDX-License-Identifier: BSD-3-Clause */ -#include +#ifdef TEST_DEVICE_MODE_ONLY +# ifdef HWMALLOC_ENABLE_DEVICE +# include +# endif +#endif + #include -#include "./mpi_runner/mpi_test_fixture.hpp" -#include -#include -#include +#include +// use this path because device version in build dir needs to find include #include +#include +#include "../test/mpi_runner/mpi_test_fixture.hpp" -#define NITERS 50 -#define SIZE 64 +#define NITERS 50 +#define SIZE 64 #define NTHREADS 4 std::vector> shared_received(NTHREADS); @@ -33,22 +38,22 @@ struct test_environment_base using tag_type = oomph::tag_type; using message = oomph::message_buffer; - oomph::context& ctxt; + oomph::context& ctxt; oomph::communicator comm; - rank_type speer_rank; - rank_type rpeer_rank; - int thread_id; - int num_threads; - tag_type tag; + rank_type speer_rank; + rank_type rpeer_rank; + int thread_id; + int num_threads; + tag_type tag; test_environment_base(oomph::context& c, int tid, int num_t) - : ctxt(c) - , comm(ctxt.get_communicator()) - , speer_rank((comm.rank() + 1) % comm.size()) - , rpeer_rank((comm.rank() + comm.size() - 1) % comm.size()) - , thread_id(tid) - , num_threads(num_t) - , tag(tid) + : ctxt(c) + , comm(ctxt.get_communicator()) + , speer_rank((comm.rank() + 1) % comm.size()) + , rpeer_rank((comm.rank() + comm.size() - 1) % comm.size()) + , thread_id(tid) + , num_threads(num_t) + , tag(tid) { } }; @@ -57,25 +62,26 @@ struct test_environment : public test_environment_base { using base = test_environment_base; - static auto make_buffer(oomph::communicator& comm, std::size_t size, bool user_alloc, - rank_type* ptr) + static auto make_buffer( + oomph::communicator& comm, std::size_t size, bool user_alloc, rank_type* ptr) { - if (user_alloc) return comm.make_buffer(ptr, size); + if (user_alloc) + return comm.make_buffer(ptr, size); else return comm.make_buffer(size); } std::vector raw_smsg; std::vector raw_rmsg; - message smsg; - message rmsg; + message smsg; + message rmsg; test_environment(oomph::context& c, std::size_t size, int tid, int num_t, bool user_alloc) - : base(c, tid, num_t) - , raw_smsg(user_alloc ? size : 0) - , raw_rmsg(user_alloc ? size : 0) - , smsg(make_buffer(comm, size, user_alloc, raw_smsg.data())) - , rmsg(make_buffer(comm, size, user_alloc, raw_rmsg.data())) + : base(c, tid, num_t) + , raw_smsg(user_alloc ? size : 0) + , raw_rmsg(user_alloc ? size : 0) + , smsg(make_buffer(comm, size, user_alloc, raw_smsg.data())) + , rmsg(make_buffer(comm, size, user_alloc, raw_rmsg.data())) { fill_send_buffer(); fill_recv_buffer(); @@ -104,10 +110,11 @@ struct test_environment_device : public test_environment_base { using base = test_environment_base; - static auto make_buffer(oomph::communicator& comm, std::size_t size, bool user_alloc, - rank_type* device_ptr) + static auto make_buffer( + oomph::communicator& comm, std::size_t size, bool user_alloc, rank_type* device_ptr) { - if (user_alloc) return comm.make_device_buffer(device_ptr, size, 0); + if (user_alloc) + return comm.make_device_buffer(device_ptr, size, 0); else return comm.make_device_buffer(size, 0); } @@ -120,37 +127,37 @@ struct test_environment_device : public test_environment_base if (size) m_ptr = hwmalloc::device_malloc(size * sizeof(rank_type)); } device_allocation(device_allocation&& other) - : m_ptr{std::exchange(other.m_ptr, nullptr)} + : m_ptr{std::exchange(other.m_ptr, nullptr)} { } ~device_allocation() { -#ifndef OOMPH_TEST_LEAK_GPU_MEMORY +# ifndef OOMPH_TEST_LEAK_GPU_MEMORY if (m_ptr) hwmalloc::device_free(m_ptr); -#endif +# endif } - rank_type* get() const noexcept { return (rank_type*)m_ptr; } + rank_type* get() const noexcept { return (rank_type*) m_ptr; } }; device_allocation raw_device_smsg; device_allocation raw_device_rmsg; - message smsg; - message rmsg; - - test_environment_device(oomph::context& c, std::size_t size, int tid, int num_t, - bool user_alloc) - : base(c, tid, num_t) -#ifndef OOMPH_TEST_LEAK_GPU_MEMORY - , raw_device_smsg(user_alloc ? size : 0) - , raw_device_rmsg(user_alloc ? size : 0) - , smsg(make_buffer(comm, size, user_alloc, raw_device_smsg.get())) - , rmsg(make_buffer(comm, size, user_alloc, raw_device_rmsg.get())) -#else - , raw_device_smsg(size) - , raw_device_rmsg(size) - , smsg(make_buffer(comm, size, user_alloc, raw_device_smsg.get())) - , rmsg(make_buffer(comm, size, user_alloc, raw_device_rmsg.get())) -#endif + message smsg; + message rmsg; + + test_environment_device( + oomph::context& c, std::size_t size, int tid, int num_t, bool user_alloc) + : base(c, tid, num_t) +# ifndef OOMPH_TEST_LEAK_GPU_MEMORY + , raw_device_smsg(user_alloc ? size : 0) + , raw_device_rmsg(user_alloc ? size : 0) + , smsg(make_buffer(comm, size, user_alloc, raw_device_smsg.get())) + , rmsg(make_buffer(comm, size, user_alloc, raw_device_rmsg.get())) +# else + , raw_device_smsg(size) + , raw_device_rmsg(size) + , smsg(make_buffer(comm, size, user_alloc, raw_device_smsg.get())) + , rmsg(make_buffer(comm, size, user_alloc, raw_device_rmsg.get())) +# endif { fill_send_buffer(); fill_recv_buffer(); @@ -178,9 +185,8 @@ struct test_environment_device : public test_environment_base }; #endif -template -void -launch_test(Func f) +template +void launch_test(Func f) { // single threaded { @@ -193,7 +199,7 @@ launch_test(Func f) // multi threaded { - oomph::context ctxt(MPI_COMM_WORLD, true); + oomph::context ctxt(MPI_COMM_WORLD, true); std::vector threads; threads.reserve(NTHREADS); reset_counters(); @@ -210,9 +216,9 @@ launch_test(Func f) // no callback // =========== -template -void -test_send_recv(oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) +template +void test_send_recv( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { Env env(ctxt, size, tid, num_threads, user_alloc); @@ -221,10 +227,7 @@ test_send_recv(oomph::context& ctxt, std::size_t size, int tid, int num_threads, { auto rreq = env.comm.recv(env.rmsg, env.rpeer_rank, env.tag); auto sreq = env.comm.send(env.smsg, env.speer_rank, env.tag); - while (!(rreq.is_ready() && sreq.is_ready())) - { - env.comm.progress(); - }; + while (!(rreq.is_ready() && sreq.is_ready())) { env.comm.progress(); }; EXPECT_TRUE(env.check_recv_buffer()); env.fill_recv_buffer(); } @@ -250,19 +253,19 @@ test_send_recv(oomph::context& ctxt, std::size_t size, int tid, int num_threads, } } -TEST_F(mpi_test_fixture, send_recv) -{ - launch_test(test_send_recv); -#if HWMALLOC_ENABLE_DEVICE - launch_test(test_send_recv); +#ifndef TEST_DEVICE_MODE_ONLY +TEST_F(mpi_test_fixture, send_recv) { launch_test(test_send_recv); } +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_recv_device) { launch_test(test_send_recv); } +# endif #endif -} // callback: pass by l-value reference // =================================== -template -void -test_send_recv_cb(oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) +template +void test_send_recv_cb( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { using rank_type = test_environment::rank_type; using tag_type = test_environment::tag_type; @@ -270,8 +273,8 @@ test_send_recv_cb(oomph::context& ctxt, std::size_t size, int tid, int num_threa Env env(ctxt, size, tid, num_threads, user_alloc); - volatile int received = 0; - volatile int sent = 0; + int volatile received = 0; + int volatile sent = 0; auto send_callback = [&](message const&, rank_type, tag_type) { ++sent; }; auto recv_callback = [&](message&, rank_type, tag_type) { ++received; }; @@ -317,20 +320,22 @@ test_send_recv_cb(oomph::context& ctxt, std::size_t size, int tid, int num_threa EXPECT_EQ(sent, NITERS); } -TEST_F(mpi_test_fixture, send_recv_cb) +#ifndef TEST_DEVICE_MODE_ONLY +TEST_F(mpi_test_fixture, send_recv_cb) { launch_test(test_send_recv_cb); } +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_recv_cb_device) { - launch_test(test_send_recv_cb); -#if HWMALLOC_ENABLE_DEVICE launch_test(test_send_recv_cb); -#endif } +# endif +#endif // callback: pass by r-value reference (give up ownership) // ======================================================= -template -void -test_send_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, int num_threads, - bool user_alloc) +template +void test_send_recv_cb_disown( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { using rank_type = test_environment::rank_type; using tag_type = test_environment::tag_type; @@ -338,16 +343,14 @@ test_send_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, int nu Env env(ctxt, size, tid, num_threads, user_alloc); - volatile int received = 0; - volatile int sent = 0; + int volatile received = 0; + int volatile sent = 0; - auto send_callback = [&](message msg, rank_type, tag_type) - { + auto send_callback = [&](message msg, rank_type, tag_type) { ++sent; env.smsg = std::move(msg); }; - auto recv_callback = [&](message msg, rank_type, tag_type) - { + auto recv_callback = [&](message msg, rank_type, tag_type) { ++received; env.rmsg = std::move(msg); }; @@ -393,20 +396,25 @@ test_send_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, int nu EXPECT_EQ(sent, NITERS); } +#ifndef TEST_DEVICE_MODE_ONLY TEST_F(mpi_test_fixture, send_recv_cb_disown) { launch_test(test_send_recv_cb_disown); -#if HWMALLOC_ENABLE_DEVICE +} +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_recv_cb_disown_device) +{ launch_test(test_send_recv_cb_disown); -#endif } +# endif +#endif // callback: pass by r-value reference (give up ownership), shared recv // ==================================================================== -template -void -test_send_shared_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, int num_threads, - bool user_alloc) +template +void test_send_shared_recv_cb_disown( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { using rank_type = test_environment::rank_type; using tag_type = test_environment::tag_type; @@ -416,19 +424,18 @@ test_send_shared_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, thread_id = env.thread_id; - //volatile int received = 0; - volatile int sent = 0; + // volatile int received = 0; + int volatile sent = 0; - auto send_callback = [&](message msg, rank_type, tag_type) - { + auto send_callback = [&](message msg, rank_type, tag_type) { ++sent; env.smsg = std::move(msg); }; - auto recv_callback = [&](message msg, rank_type, tag_type) - { - //std::cout << thread_id << " " << env.thread_id << std::endl; - //if (thread_id != env.thread_id) std::cout << "other thread picked up callback" << std::endl; - //else std::cout << "my thread picked up callback" << std::endl; + auto recv_callback = [&](message msg, rank_type, tag_type) { + // std::cout << thread_id << " " << env.thread_id << std::endl; + // if (thread_id != env.thread_id) std::cout << "other thread picked up + // callback" << std::endl; else std::cout << "my thread picked up callback" + // << std::endl; env.rmsg = std::move(msg); ++shared_received[env.thread_id]; }; @@ -475,20 +482,25 @@ test_send_shared_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, EXPECT_EQ(sent, NITERS); } +#ifndef TEST_DEVICE_MODE_ONLY TEST_F(mpi_test_fixture, send_shared_recv_cb_disown) { launch_test(test_send_shared_recv_cb_disown); -#if HWMALLOC_ENABLE_DEVICE +} +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_shared_recv_cb_disown_device) +{ launch_test(test_send_shared_recv_cb_disown); -#endif } +# endif +#endif // callback: pass by l-value reference, and resubmit // ================================================= -template -void -test_send_recv_cb_resubmit(oomph::context& ctxt, std::size_t size, int tid, int num_threads, - bool user_alloc) +template +void test_send_recv_cb_resubmit( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { using rank_type = test_environment::rank_type; using tag_type = test_environment::tag_type; @@ -496,13 +508,13 @@ test_send_recv_cb_resubmit(oomph::context& ctxt, std::size_t size, int tid, int Env env(ctxt, size, tid, num_threads, user_alloc); - volatile int received = 0; - volatile int sent = 0; + int volatile received = 0; + int volatile sent = 0; struct recursive_send_callback { - Env& env; - volatile int& sent; + Env& env; + int volatile& sent; void operator()(message& msg, rank_type dst, tag_type tag) { @@ -513,8 +525,8 @@ test_send_recv_cb_resubmit(oomph::context& ctxt, std::size_t size, int tid, int struct recursive_recv_callback { - Env& env; - volatile int& received; + Env& env; + int volatile& received; void operator()(message& msg, rank_type src, tag_type tag) { @@ -531,20 +543,25 @@ test_send_recv_cb_resubmit(oomph::context& ctxt, std::size_t size, int tid, int while (sent < NITERS || received < NITERS) { env.comm.progress(); }; } +#ifndef TEST_DEVICE_MODE_ONLY TEST_F(mpi_test_fixture, send_recv_cb_resubmit) { launch_test(test_send_recv_cb_resubmit); -#if HWMALLOC_ENABLE_DEVICE +} +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_recv_cb_resubmit_device) +{ launch_test(test_send_recv_cb_resubmit); -#endif } +# endif +#endif // callback: pass by r-value reference (give up ownership), and resubmit // ===================================================================== -template -void -test_send_recv_cb_resubmit_disown(oomph::context& ctxt, std::size_t size, int tid, int num_threads, - bool user_alloc) +template +void test_send_recv_cb_resubmit_disown( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { using rank_type = test_environment::rank_type; using tag_type = test_environment::tag_type; @@ -552,13 +569,13 @@ test_send_recv_cb_resubmit_disown(oomph::context& ctxt, std::size_t size, int ti Env env(ctxt, size, tid, num_threads, user_alloc); - volatile int received = 0; - volatile int sent = 0; + int volatile received = 0; + int volatile sent = 0; struct recursive_send_callback { - Env& env; - volatile int& sent; + Env& env; + int volatile& sent; void operator()(message msg, rank_type dst, tag_type tag) { @@ -570,8 +587,8 @@ test_send_recv_cb_resubmit_disown(oomph::context& ctxt, std::size_t size, int ti struct recursive_recv_callback { - Env& env; - volatile int& received; + Env& env; + int volatile& received; void operator()(message msg, rank_type src, tag_type tag) { @@ -590,10 +607,16 @@ test_send_recv_cb_resubmit_disown(oomph::context& ctxt, std::size_t size, int ti while (sent < NITERS || received < NITERS) { env.comm.progress(); }; } +#ifndef TEST_DEVICE_MODE_ONLY TEST_F(mpi_test_fixture, send_recv_cb_resubmit_disown) { launch_test(test_send_recv_cb_resubmit_disown); -#if HWMALLOC_ENABLE_DEVICE +} +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_recv_cb_resubmit_disown_device) +{ launch_test(test_send_recv_cb_resubmit_disown); -#endif } +# endif +#endif From 2c03730e18ca97cd935c3f6b1b6957e81d092722 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 16 Apr 2026 16:13:29 +0200 Subject: [PATCH 02/35] Bump hwmalloc submodule --- ext/hwmalloc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/hwmalloc b/ext/hwmalloc index 762dfd8a..eb9484b0 160000 --- a/ext/hwmalloc +++ b/ext/hwmalloc @@ -1 +1 @@ -Subproject commit 762dfd8a47dee7b7843da78760f0e35174682a7c +Subproject commit eb9484b0c7a2a1a7122975c1ff1b76d7e61f230d From e1d177b4db19015b33b49784a8f64ac7294d09d1 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 13 Nov 2025 11:45:13 +0000 Subject: [PATCH 03/35] Clang-format files affected by libfabric changes --- include/oomph/detail/communicator_helper.hpp | 200 +- src/libfabric/communicator.hpp | 509 ++--- src/libfabric/context.cpp | 133 +- src/libfabric/context.hpp | 220 +- src/libfabric/controller.hpp | 682 +++--- src/libfabric/controller_base.hpp | 2136 +++++++++--------- src/libfabric/fabric_error.hpp | 54 +- src/libfabric/libfabric_defines_template.hpp | 18 +- src/libfabric/locality.cpp | 37 +- src/libfabric/locality.hpp | 411 ++-- src/libfabric/memory_region.hpp | 581 ++--- src/libfabric/operation_context.cpp | 183 +- src/libfabric/operation_context.hpp | 67 +- src/libfabric/operation_context_base.hpp | 137 +- src/libfabric/print.hpp | 1167 +++++----- src/libfabric/request_state.hpp | 166 +- src/libfabric/simple_counter.hpp | 126 +- 17 files changed, 3400 insertions(+), 3427 deletions(-) diff --git a/include/oomph/detail/communicator_helper.hpp b/include/oomph/detail/communicator_helper.hpp index 6e0e97d5..44f6d828 100644 --- a/include/oomph/detail/communicator_helper.hpp +++ b/include/oomph/detail/communicator_helper.hpp @@ -33,7 +33,7 @@ #define OOMPH_CHECK_CALLBACK_MSG_REF \ static_assert(std::is_same&>::value || \ - std::is_same const&>::value, \ + std::is_same const&>::value, \ "first callback argument type is not an l-value reference to a message_buffer"); #define OOMPH_CHECK_CALLBACK_MSG_CONST_REF \ @@ -41,129 +41,107 @@ "first callback argument type is not a const l-value reference to a message_buffer"); #define OOMPH_CHECK_CALLBACK(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, rank_type, tag_type) \ - OOMPH_CHECK_CALLBACK_MSG \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, rank_type, tag_type) OOMPH_CHECK_CALLBACK_MSG} #define OOMPH_CHECK_CALLBACK_MULTI(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, tag_type) \ - OOMPH_CHECK_CALLBACK_MSG \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, tag_type) OOMPH_CHECK_CALLBACK_MSG} #define OOMPH_CHECK_CALLBACK_MULTI_TAGS(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, std::vector) \ - OOMPH_CHECK_CALLBACK_MSG \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, std::vector) \ + OOMPH_CHECK_CALLBACK_MSG} #define OOMPH_CHECK_CALLBACK_REF(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, rank_type, tag_type) \ - OOMPH_CHECK_CALLBACK_MSG_REF \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, rank_type, tag_type) OOMPH_CHECK_CALLBACK_MSG_REF} #define OOMPH_CHECK_CALLBACK_MULTI_REF(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, tag_type) \ - OOMPH_CHECK_CALLBACK_MSG_REF \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, tag_type) \ + OOMPH_CHECK_CALLBACK_MSG_REF} #define OOMPH_CHECK_CALLBACK_MULTI_REF_TAGS(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, std::vector) \ - OOMPH_CHECK_CALLBACK_MSG_REF \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, std::vector) \ + OOMPH_CHECK_CALLBACK_MSG_REF} #define OOMPH_CHECK_CALLBACK_CONST_REF(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, rank_type, tag_type) \ - OOMPH_CHECK_CALLBACK_MSG_CONST_REF \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, rank_type, tag_type) OOMPH_CHECK_CALLBACK_MSG_CONST_REF} #define OOMPH_CHECK_CALLBACK_MULTI_CONST_REF(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, tag_type) \ - OOMPH_CHECK_CALLBACK_MSG_CONST_REF \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, tag_type) \ + OOMPH_CHECK_CALLBACK_MSG_CONST_REF} #define OOMPH_CHECK_CALLBACK_MULTI_CONST_REF_TAGS(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, std::vector) \ - OOMPH_CHECK_CALLBACK_MSG_CONST_REF \ - } - -namespace oomph -{ -class communicator_impl; - -namespace detail -{ -struct communicator_state -{ - using impl_type = communicator_impl; - impl_type* m_impl; - std::atomic* m_shared_scheduled_recvs; - util::pool_factory m_mrs_factory; - std::size_t scheduled_sends = 0; - std::size_t scheduled_recvs = 0; - - communicator_state(impl_type* impl_, std::atomic* shared_scheduled_recvs); - ~communicator_state(); - communicator_state(communicator_state const&) = delete; - communicator_state(communicator_state&&) = delete; - communicator_state& operator=(communicator_state const&) = delete; - communicator_state& operator=(communicator_state&&) = delete; - - auto make_multi_request_state(std::size_t ns) { return m_mrs_factory.make(m_impl, ns); } - - template - auto make_multi_request_state(std::vector&& neighs, - oomph::message_buffer const& msg) - { - return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), std::vector{}, - msg.size(), &msg); - } - - template - auto make_multi_request_state(std::vector&& neighs, std::vector&& tags, - oomph::message_buffer const& msg) - { - return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), std::move(tags), - msg.size(), &msg); - } - - template - auto make_multi_request_state(std::vector&& neighs, oomph::message_buffer& msg) - { - return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), std::vector{}, - msg.size(), &msg); - } - - template - auto make_multi_request_state(std::vector&& neighs, std::vector&& tags, - oomph::message_buffer& msg) - { - return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), std::move(tags), - msg.size(), &msg); - } - - template - auto make_multi_request_state(std::vector&& neighs, oomph::message_buffer&& msg) - { - return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), std::vector{}, - msg.size(), nullptr, std::move(msg.m)); - } - - template - auto make_multi_request_state(std::vector&& neighs, std::vector&& tags, - oomph::message_buffer&& msg) - { - return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), std::move(tags), - msg.size(), nullptr, std::move(msg.m)); - } -}; - -} // namespace detail -} // namespace oomph + {OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, std::vector) \ + OOMPH_CHECK_CALLBACK_MSG_CONST_REF} + +namespace oomph { + class communicator_impl; + + namespace detail { + struct communicator_state + { + using impl_type = communicator_impl; + impl_type* m_impl; + std::atomic* m_shared_scheduled_recvs; + util::pool_factory m_mrs_factory; + std::size_t scheduled_sends = 0; + std::size_t scheduled_recvs = 0; + + communicator_state(impl_type* impl_, std::atomic* shared_scheduled_recvs); + ~communicator_state(); + communicator_state(communicator_state const&) = delete; + communicator_state(communicator_state&&) = delete; + communicator_state& operator=(communicator_state const&) = delete; + communicator_state& operator=(communicator_state&&) = delete; + + auto make_multi_request_state(std::size_t ns) { return m_mrs_factory.make(m_impl, ns); } + + template + auto make_multi_request_state( + std::vector&& neighs, oomph::message_buffer const& msg) + { + return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), + std::vector{}, msg.size(), &msg); + } + + template + auto make_multi_request_state(std::vector&& neighs, + std::vector&& tags, oomph::message_buffer const& msg) + { + return m_mrs_factory.make( + m_impl, neighs.size(), std::move(neighs), std::move(tags), msg.size(), &msg); + } + + template + auto + make_multi_request_state(std::vector&& neighs, oomph::message_buffer& msg) + { + return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), + std::vector{}, msg.size(), &msg); + } + + template + auto make_multi_request_state(std::vector&& neighs, + std::vector&& tags, oomph::message_buffer& msg) + { + return m_mrs_factory.make( + m_impl, neighs.size(), std::move(neighs), std::move(tags), msg.size(), &msg); + } + + template + auto make_multi_request_state( + std::vector&& neighs, oomph::message_buffer&& msg) + { + return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), + std::vector{}, msg.size(), nullptr, std::move(msg.m)); + } + + template + auto make_multi_request_state(std::vector&& neighs, + std::vector&& tags, oomph::message_buffer&& msg) + { + return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), std::move(tags), + msg.size(), nullptr, std::move(msg.m)); + } + }; + + } // namespace detail +} // namespace oomph diff --git a/src/libfabric/communicator.hpp b/src/libfabric/communicator.hpp index ff8fc945..a38419dc 100644 --- a/src/libfabric/communicator.hpp +++ b/src/libfabric/communicator.hpp @@ -14,108 +14,109 @@ #include -#include #include +#include // paths relative to backend #include <../communicator_base.hpp> #include <../device_guard.hpp> +#include +#include #include #include -#include -#include - -namespace oomph -{ -using operation_context = libfabric::operation_context; +namespace oomph { -using tag_disp = NS_DEBUG::detail::hex<12, uintptr_t>; + using operation_context = libfabric::operation_context; -template -inline /*constexpr*/ NS_DEBUG::print_threshold com_deb("COMMUNI"); + using tag_disp = NS_DEBUG::detail::hex<12, uintptr_t>; -static NS_DEBUG::enable_print com_err("COMMUNI"); + template + inline /*constexpr*/ NS_DEBUG::print_threshold com_deb("COMMUNI"); -class communicator_impl : public communicator_base -{ - using tag_type = std::uint64_t; - // - using segment_type = libfabric::memory_segment; - using region_type = segment_type::handle_type; + static NS_DEBUG::enable_print com_err("COMMUNI"); - using callback_queue = boost::lockfree::queue, boost::lockfree::allocator>>; - - public: - context_impl* m_context; - libfabric::endpoint_wrapper m_tx_endpoint; - libfabric::endpoint_wrapper m_rx_endpoint; - // - callback_queue m_send_cb_queue; - callback_queue m_recv_cb_queue; - callback_queue m_recv_cb_cancel; - - // -------------------------------------------------------------------- - communicator_impl(context_impl* ctxt) - : communicator_base(ctxt) - , m_context(ctxt) - , m_send_cb_queue(128) - , m_recv_cb_queue(128) - , m_recv_cb_cancel(8) + class communicator_impl : public communicator_base { - LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("MPI_comm"), NS_DEBUG::ptr(mpi_comm()))); - m_tx_endpoint = m_context->get_controller()->get_tx_endpoint(); - m_rx_endpoint = m_context->get_controller()->get_rx_endpoint(); - } + using tag_type = std::uint64_t; + // + using segment_type = libfabric::memory_segment; + using region_type = segment_type::handle_type; + + using callback_queue = boost::lockfree::queue, boost::lockfree::allocator>>; + + public: + context_impl* m_context; + libfabric::endpoint_wrapper m_tx_endpoint; + libfabric::endpoint_wrapper m_rx_endpoint; + // + callback_queue m_send_cb_queue; + callback_queue m_recv_cb_queue; + callback_queue m_recv_cb_cancel; + + // -------------------------------------------------------------------- + communicator_impl(context_impl* ctxt) + : communicator_base(ctxt) + , m_context(ctxt) + , m_send_cb_queue(128) + , m_recv_cb_queue(128) + , m_recv_cb_cancel(8) + { + LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("MPI_comm"), NS_DEBUG::ptr(mpi_comm()))); + m_tx_endpoint = m_context->get_controller()->get_tx_endpoint(); + m_rx_endpoint = m_context->get_controller()->get_rx_endpoint(); + } - // -------------------------------------------------------------------- - ~communicator_impl() { clear_callback_queues(); } + // -------------------------------------------------------------------- + ~communicator_impl() { clear_callback_queues(); } - // -------------------------------------------------------------------- - auto& get_heap() noexcept { return m_context->get_heap(); } + // -------------------------------------------------------------------- + auto& get_heap() noexcept { return m_context->get_heap(); } - // -------------------------------------------------------------------- - /// generate a tag with 0xRRRRRRRRtttttttt rank, tag. - /// original tag can be 32bits, then we add 32bits of rank info. - inline std::uint64_t make_tag64(std::uint32_t tag, /*std::uint32_t rank, */ std::uintptr_t ctxt) - { - return (((ctxt & 0x0000000000FFFFFF) << 24) | ((std::uint64_t(tag) & 0x0000000000FFFFFF))); - } + // -------------------------------------------------------------------- + /// generate a tag with 0xRRRRRRRRtttttttt rank, tag. + /// original tag can be 32bits, then we add 32bits of rank info. + inline std::uint64_t make_tag64( + std::uint32_t tag, /*std::uint32_t rank, */ std::uintptr_t ctxt) + { + return (((ctxt & 0x0000'0000'00FF'FFFF) << 24) | + ((std::uint64_t(tag) & 0x0000'0000'00FF'FFFF))); + } - // -------------------------------------------------------------------- - template - inline void execute_fi_function(Func F, const char* msg, Args&&... args) - { - bool ok = false; - while (!ok) + // -------------------------------------------------------------------- + template + inline void execute_fi_function(Func F, char const* msg, Args&&... args) { - ssize_t ret = F(std::forward(args)...); - if (ret == 0) { return; } - else if (ret == -FI_EAGAIN) - { - // com_deb<9>.error("Reposting", msg); - // no point stressing the system - m_context->get_controller()->poll_for_work_completions(this); - } - else if (ret == -FI_ENOENT) + bool ok = false; + while (!ok) { - // if a node has failed, we can recover - // @TODO : put something better here - com_err.error("No destination endpoint, terminating."); - std::terminate(); + ssize_t ret = F(std::forward(args)...); + if (ret == 0) { return; } + else if (ret == -FI_EAGAIN) + { + // com_deb<9>.error("Reposting", msg); + // no point stressing the system + m_context->get_controller()->poll_for_work_completions(this); + } + else if (ret == -FI_ENOENT) + { + // if a node has failed, we can recover + // @TODO : put something better here + com_err.error("No destination endpoint, terminating."); + std::terminate(); + } + else if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), msg); } } - else if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), msg); } } - } - // -------------------------------------------------------------------- - // this takes a pinned memory region and sends it - void send_tagged_region(region_type const& send_region, std::size_t size, fi_addr_t dst_addr_, - uint64_t tag_, operation_context* ctxt) - { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - // clang-format off + // -------------------------------------------------------------------- + // this takes a pinned memory region and sends it + void send_tagged_region(region_type const& send_region, std::size_t size, + fi_addr_t dst_addr_, uint64_t tag_, operation_context* ctxt) + { + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + // clang-format off LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("send_tagged_region"), "->", NS_DEBUG::dec<2>(dst_addr_), @@ -123,22 +124,24 @@ class communicator_impl : public communicator_base "tag", tag_disp(tag_), "context", NS_DEBUG::ptr(ctxt), "tx endpoint", NS_DEBUG::ptr(m_tx_endpoint.get_ep()))); - // clang-format on - execute_fi_function(fi_tsend, "fi_tsend", m_tx_endpoint.get_ep(), send_region.get_address(), - size, send_region.get_local_key(), dst_addr_, tag_, ctxt); - } + // clang-format on + execute_fi_function(fi_tsend, "fi_tsend", m_tx_endpoint.get_ep(), + send_region.get_address(), size, send_region.get_local_key(), dst_addr_, tag_, + ctxt); + } - // -------------------------------------------------------------------- - // this takes a pinned memory region and sends it using inject instead of send - void inject_tagged_region(region_type const& send_region, std::size_t size, fi_addr_t dst_addr_, - uint64_t tag_) - { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - // clang-format on - LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("inject tagged"), "->", NS_DEBUG::dec<2>(dst_addr_), send_region, - "tag", tag_disp(tag_), "tx endpoint", NS_DEBUG::ptr(m_tx_endpoint.get_ep()))); - // clang-format off + // -------------------------------------------------------------------- + // this takes a pinned memory region and sends it using inject instead of send + void inject_tagged_region( + region_type const& send_region, std::size_t size, fi_addr_t dst_addr_, uint64_t tag_) + { + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + // clang-format on + LF_DEB(com_deb<9>, + debug(NS_DEBUG::str<>("inject tagged"), "->", NS_DEBUG::dec<2>(dst_addr_), + send_region, "tag", tag_disp(tag_), "tx endpoint", + NS_DEBUG::ptr(m_tx_endpoint.get_ep()))); + // clang-format off execute_fi_function(fi_tinject, "fi_tinject", m_tx_endpoint.get_ep(), send_region.get_address(), size, dst_addr_, tag_); } @@ -159,62 +162,65 @@ class communicator_impl : public communicator_base "tag", tag_disp(tag_), "context", NS_DEBUG::ptr(ctxt), "rx endpoint", NS_DEBUG::ptr(m_rx_endpoint.get_ep()))); - // clang-format on - constexpr uint64_t ignore = 0; - execute_fi_function(fi_trecv, "fi_trecv", m_rx_endpoint.get_ep(), recv_region.get_address(), - size, recv_region.get_local_key(), src_addr_, tag_, ignore, ctxt); - // if (l.owns_lock()) l.unlock(); - } + // clang-format on + constexpr uint64_t ignore = 0; + execute_fi_function(fi_trecv, "fi_trecv", m_rx_endpoint.get_ep(), + recv_region.get_address(), size, recv_region.get_local_key(), src_addr_, tag_, + ignore, ctxt); + // if (l.owns_lock()) l.unlock(); + } - // -------------------------------------------------------------------- - send_request send(context_impl::heap_type::pointer const& ptr, std::size_t size, rank_type dst, - oomph::tag_type tag, util::unique_function&& cb, - std::size_t* scheduled) - { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - std::uint64_t stag = make_tag64(tag, /*this->rank(), */ this->m_context->get_context_tag()); + // -------------------------------------------------------------------- + send_request send(context_impl::heap_type::pointer const& ptr, std::size_t size, + rank_type dst, oomph::tag_type tag, + util::unique_function&& cb, std::size_t* scheduled) + { + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + std::uint64_t stag = + make_tag64(tag, /*this->rank(), */ this->m_context->get_context_tag()); #if OOMPH_ENABLE_DEVICE - auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); + auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); #else - auto const& reg = ptr.handle(); + auto const& reg = ptr.handle(); #endif #ifdef EXTRA_SIZE_CHECKS - if (size != reg.get_size()) - { - LF_DEB(com_err, error(NS_DEBUG::str<>("send mismatch"), "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()))); - } -#endif - m_context->get_controller()->sends_posted_++; - - // use optimized inject if msg is very small - if (size <= m_context->get_controller()->get_tx_inject_size()) - { - inject_tagged_region(reg, size, fi_addr_t(dst), stag); - if (!has_reached_recursion_depth()) + if (size != reg.get_size()) { - auto inc = recursion(); - cb(dst, tag); - return {}; + LF_DEB(com_err, + error(NS_DEBUG::str<>("send mismatch"), "size", NS_DEBUG::hex<6>(size), + "reg size", NS_DEBUG::hex<6>(reg.get_size()))); } - else +#endif + m_context->get_controller()->sends_posted_++; + + // use optimized inject if msg is very small + if (size <= m_context->get_controller()->get_tx_inject_size()) { - // construct request which is also an operation context - auto s = - m_req_state_factory.make(m_context, this, scheduled, dst, tag, std::move(cb)); - s->create_self_ref(); - while (!m_send_cb_queue.push(s.get())) {} - return {std::move(s)}; + inject_tagged_region(reg, size, fi_addr_t(dst), stag); + if (!has_reached_recursion_depth()) + { + auto inc = recursion(); + cb(dst, tag); + return {}; + } + else + { + // construct request which is also an operation context + auto s = m_req_state_factory.make( + m_context, this, scheduled, dst, tag, std::move(cb)); + s->create_self_ref(); + while (!m_send_cb_queue.push(s.get())) {} + return {std::move(s)}; + } } - } - // construct request which is also an operation context - auto s = m_req_state_factory.make(m_context, this, scheduled, dst, tag, std::move(cb)); - s->create_self_ref(); + // construct request which is also an operation context + auto s = m_req_state_factory.make(m_context, this, scheduled, dst, tag, std::move(cb)); + s->create_self_ref(); - // clang-format off + // clang-format off LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("Send"), "thisrank", NS_DEBUG::dec<>(rank()), @@ -234,39 +240,40 @@ class communicator_impl : public communicator_base NS_DEBUG::mem_crc32(reg.get_address(), size, "CRC32"))); } #endif - // clang-format on + // clang-format on - send_tagged_region(reg, size, fi_addr_t(dst), stag, &(s->m_operation_context)); - return {std::move(s)}; - } + send_tagged_region(reg, size, fi_addr_t(dst), stag, &(s->m_operation_context)); + return {std::move(s)}; + } - recv_request recv(context_impl::heap_type::pointer& ptr, std::size_t size, rank_type src, - oomph::tag_type tag, util::unique_function&& cb, - std::size_t* scheduled) - { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); + recv_request recv(context_impl::heap_type::pointer& ptr, std::size_t size, rank_type src, + oomph::tag_type tag, util::unique_function&& cb, + std::size_t* scheduled) + { + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); #if OOMPH_ENABLE_DEVICE - auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); + auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); #else - auto const& reg = ptr.handle(); + auto const& reg = ptr.handle(); #endif #ifdef EXTRA_SIZE_CHECKS - if (size != reg.get_size()) - { - LF_DEB(com_err, error(NS_DEBUG::str<>("recv mismatch"), "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()))); - } + if (size != reg.get_size()) + { + LF_DEB(com_err, + error(NS_DEBUG::str<>("recv mismatch"), "size", NS_DEBUG::hex<6>(size), + "reg size", NS_DEBUG::hex<6>(reg.get_size()))); + } #endif - m_context->get_controller()->recvs_posted_++; + m_context->get_controller()->recvs_posted_++; - // construct request which is also an operation context - auto s = m_req_state_factory.make(m_context, this, scheduled, src, tag, std::move(cb)); - s->create_self_ref(); + // construct request which is also an operation context + auto s = m_req_state_factory.make(m_context, this, scheduled, src, tag, std::move(cb)); + s->create_self_ref(); - // clang-format off + // clang-format off LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("recv"), "thisrank", NS_DEBUG::dec<>(rank()), @@ -286,41 +293,42 @@ class communicator_impl : public communicator_base NS_DEBUG::mem_crc32(reg.get_address(), size, "CRC32"))); } #endif - // clang-format on + // clang-format on - recv_tagged_region(reg, size, fi_addr_t(src), stag, &(s->m_operation_context)); - return {std::move(s)}; - } + recv_tagged_region(reg, size, fi_addr_t(src), stag, &(s->m_operation_context)); + return {std::move(s)}; + } - shared_recv_request shared_recv(context_impl::heap_type::pointer& ptr, std::size_t size, - rank_type src, oomph::tag_type tag, - util::unique_function&& cb, - std::atomic* scheduled) - { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); + shared_recv_request shared_recv(context_impl::heap_type::pointer& ptr, std::size_t size, + rank_type src, oomph::tag_type tag, + util::unique_function&& cb, + std::atomic* scheduled) + { + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); #if OOMPH_ENABLE_DEVICE - auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); + auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); #else - auto const& reg = ptr.handle(); + auto const& reg = ptr.handle(); #endif #ifdef EXTRA_SIZE_CHECKS - if (size != reg.get_size()) - { - LF_DEB(com_err, error(NS_DEBUG::str<>("recv mismatch"), "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()))); - } + if (size != reg.get_size()) + { + LF_DEB(com_err, + error(NS_DEBUG::str<>("recv mismatch"), "size", NS_DEBUG::hex<6>(size), + "reg size", NS_DEBUG::hex<6>(reg.get_size()))); + } #endif - m_context->get_controller()->recvs_posted_++; + m_context->get_controller()->recvs_posted_++; - // construct request which is also an operation context - auto s = std::make_shared(m_context, this, scheduled, src, - tag, std::move(cb)); - s->create_self_ref(); + // construct request which is also an operation context + auto s = std::make_shared( + m_context, this, scheduled, src, tag, std::move(cb)); + s->create_self_ref(); - // clang-format off + // clang-format off LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("shared_recv"), "thisrank", NS_DEBUG::dec<>(rank()), @@ -333,102 +341,97 @@ class communicator_impl : public communicator_base "reg size", NS_DEBUG::hex<6>(reg.get_size()), "op_ctx", NS_DEBUG::ptr(&(s->m_operation_context)), "req", NS_DEBUG::ptr(s.get()))); - // clang-format on + // clang-format on - recv_tagged_region(reg, size, fi_addr_t(src), stag, &(s->m_operation_context)); - m_context->get_controller()->poll_recv_queue(m_rx_endpoint.get_rx_cq(), this); - return {std::move(s)}; - } + recv_tagged_region(reg, size, fi_addr_t(src), stag, &(s->m_operation_context)); + m_context->get_controller()->poll_recv_queue(m_rx_endpoint.get_rx_cq(), this); + return {std::move(s)}; + } - void progress() - { - m_context->get_controller()->poll_for_work_completions(this); - clear_callback_queues(); - } + void progress() + { + m_context->get_controller()->poll_for_work_completions(this); + clear_callback_queues(); + } - void clear_callback_queues() - { - // work through ready callbacks, which were pushed to the queue - // (by other threads) - m_send_cb_queue.consume_all( - [](oomph::detail::request_state* req) - { + void clear_callback_queues() + { + // work through ready callbacks, which were pushed to the queue + // (by other threads) + m_send_cb_queue.consume_all([](oomph::detail::request_state* req) { [[maybe_unused]] auto scp = com_deb<9>.scope("m_send_cb_queue.consume_all", NS_DEBUG::ptr(req)); auto ptr = req->release_self_ref(); req->invoke_cb(); }); - m_recv_cb_queue.consume_all( - [](oomph::detail::request_state* req) - { + m_recv_cb_queue.consume_all([](oomph::detail::request_state* req) { [[maybe_unused]] auto scp = com_deb<9>.scope("m_recv_cb_queue.consume_all", NS_DEBUG::ptr(req)); auto ptr = req->release_self_ref(); req->invoke_cb(); }); - m_context->m_recv_cb_queue.consume_all( - [](detail::shared_request_state* req) - { + m_context->m_recv_cb_queue.consume_all([](detail::shared_request_state* req) { auto ptr = req->release_self_ref(); req->invoke_cb(); }); - } + } - // Cancel is a problem with libfabric because fi_cancel is asynchronous. - // The item to be cancelled will either complete with CANCELLED status - // or will complete as usual (ie before the cancel could take effect) - // - // We can only be certain if we poll until the completion happens - // or attach a callback to the cancel notification which is not supported - // by oomph. - bool cancel_recv(detail::request_state* s) - { - // get the original message operation context - operation_context* op_ctx = &(s->m_operation_context); + // Cancel is a problem with libfabric because fi_cancel is asynchronous. + // The item to be cancelled will either complete with CANCELLED status + // or will complete as usual (ie before the cancel could take effect) + // + // We can only be certain if we poll until the completion happens + // or attach a callback to the cancel notification which is not supported + // by oomph. + bool cancel_recv(detail::request_state* s) + { + // get the original message operation context + operation_context* op_ctx = &(s->m_operation_context); - // submit the cancellation request - bool ok = (fi_cancel(&m_rx_endpoint.get_ep()->fid, op_ctx) == 0); - LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("Cancel"), "ok", ok, "op_ctx", NS_DEBUG::ptr(op_ctx))); + // submit the cancellation request + bool ok = (fi_cancel(&m_rx_endpoint.get_ep()->fid, op_ctx) == 0); + LF_DEB(com_deb<9>, + debug(NS_DEBUG::str<>("Cancel"), "ok", ok, "op_ctx", NS_DEBUG::ptr(op_ctx))); - // if the cancel operation failed completely, return - if (!ok) return false; + // if the cancel operation failed completely, return + if (!ok) return false; - bool found = false; - while (!found) - { - m_context->get_controller()->poll_recv_queue(m_rx_endpoint.get_rx_cq(), this); - // otherwise, poll until we know if it worked - std::stack temp_stack; - detail::request_state* temp; - while (!found && m_recv_cb_cancel.pop(temp)) + bool found = false; + while (!found) { - if (temp == s) + m_context->get_controller()->poll_recv_queue(m_rx_endpoint.get_rx_cq(), this); + // otherwise, poll until we know if it worked + std::stack temp_stack; + detail::request_state* temp; + while (!found && m_recv_cb_cancel.pop(temp)) { - // our recv was cancelled correctly - found = true; - LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("Cancel"), "succeeded", "op_ctx", - NS_DEBUG::ptr(op_ctx))); - auto ptr = s->release_self_ref(); - s->set_canceled(); + if (temp == s) + { + // our recv was cancelled correctly + found = true; + LF_DEB(com_deb<9>, + debug(NS_DEBUG::str<>("Cancel"), "succeeded", "op_ctx", + NS_DEBUG::ptr(op_ctx))); + auto ptr = s->release_self_ref(); + s->set_canceled(); + } + else + { + // a different cancel operation + temp_stack.push(temp); + } } - else + // return any weird unhandled cancels back to the queue + while (!temp_stack.empty()) { - // a different cancel operation - temp_stack.push(temp); + auto temp = temp_stack.top(); + temp_stack.pop(); + m_recv_cb_cancel.push(temp); } } - // return any weird unhandled cancels back to the queue - while (!temp_stack.empty()) - { - auto temp = temp_stack.top(); - temp_stack.pop(); - m_recv_cb_cancel.push(temp); - } + return found; } - return found; - } -}; + }; -} // namespace oomph +} // namespace oomph diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index 5621a83b..cb7757a2 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -10,88 +10,83 @@ #include // #include - -#include - // paths relative to backend -#include -#include #include #include +#include +#include -namespace oomph -{ -// cppcheck-suppress ConfigurationNotChecked -static NS_DEBUG::enable_print src_deb("__SRC__"); +namespace oomph { + // cppcheck-suppress ConfigurationNotChecked + static NS_DEBUG::enable_print src_deb("__SRC__"); -using controller_type = libfabric::controller; + using controller_type = libfabric::controller; -context_impl::context_impl(MPI_Comm comm, bool thread_safe, - hwmalloc::heap_config const& heap_config) -: context_base(comm, thread_safe) -, m_heap{this, heap_config} -, m_recv_cb_queue(128) -, m_recv_cb_cancel(8) -{ - int rank, size; - OOMPH_CHECK_MPI_RESULT(MPI_Comm_rank(comm, &rank)); - OOMPH_CHECK_MPI_RESULT(MPI_Comm_size(comm, &size)); + context_impl::context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, + std::size_t message_pool_reserve) + : context_base(comm, thread_safe) + , m_heap{this, message_pool_never_free, message_pool_reserve} + , m_recv_cb_queue(128) + , m_recv_cb_cancel(8) + { + int rank, size; + OOMPH_CHECK_MPI_RESULT(MPI_Comm_rank(comm, &rank)); + OOMPH_CHECK_MPI_RESULT(MPI_Comm_size(comm, &size)); - m_ctxt_tag = reinterpret_cast(this); - OOMPH_CHECK_MPI_RESULT(MPI_Bcast(&m_ctxt_tag, 1, MPI_UINT64_T, 0, comm)); - LF_DEB(src_deb, debug(NS_DEBUG::str<>("Broadcast"), "rank", debug::dec<3>(rank), "context", - debug::ptr(m_ctxt_tag))); + m_ctxt_tag = reinterpret_cast(this); + OOMPH_CHECK_MPI_RESULT(MPI_Bcast(&m_ctxt_tag, 1, MPI_UINT64_T, 0, comm)); + LF_DEB(src_deb, + debug(NS_DEBUG::str<>("Broadcast"), "rank", debug::dec<3>(rank), "context", + debug::ptr(m_ctxt_tag))); - // TODO fix the thread safety - // problem: controller is a singleton and has problems when 2 contexts are created in the - // following order: single threaded first, then multi-threaded after - //int threads = thread_safe ? std::thread::hardware_concurrency() : 1; - //int threads = std::thread::hardware_concurrency(); - int threads = boost::thread::physical_concurrency(); - m_controller = init_libfabric_controller(this, comm, rank, size, threads); - m_domain = m_controller->get_domain(); -} + // TODO fix the thread safety + // problem: controller is a singleton and has problems when 2 contexts are created in the + // following order: single threaded first, then multi-threaded after + //int threads = thread_safe ? std::thread::hardware_concurrency() : 1; + //int threads = std::thread::hardware_concurrency(); + int threads = boost::thread::physical_concurrency(); + m_controller = init_libfabric_controller(this, comm, rank, size, threads); + m_domain = m_controller->get_domain(); + } -communicator_impl* -context_impl::get_communicator() -{ - auto comm = new communicator_impl{this}; - m_comms_set.insert(comm); - return comm; -} + communicator_impl* context_impl::get_communicator() + { + auto comm = new communicator_impl{this}; + m_comms_set.insert(comm); + return comm; + } -const char* -context_impl::get_transport_option(const std::string& opt) -{ - if (opt == "name") { return "libfabric"; } - else if (opt == "progress") { return libfabric_progress_string(); } - else if (opt == "endpoint") { return libfabric_endpoint_string(); } - else if (opt == "rendezvous_threshold") + char const* context_impl::get_transport_option(std::string const& opt) { - static char buffer[32]; - std::string temp = std::to_string(m_controller->rendezvous_threshold()); - strncpy(buffer, temp.c_str(), std::min(size_t(31), std::strlen(temp.c_str()))); - return buffer; + if (opt == "name") { return "libfabric"; } + else if (opt == "progress") { return libfabric_progress_string(); } + else if (opt == "endpoint") { return libfabric_endpoint_string(); } + else if (opt == "rendezvous_threshold") + { + static char buffer[32]; + std::string temp = std::to_string(m_controller->rendezvous_threshold()); + strncpy(buffer, temp.c_str(), std::min(size_t(31), std::strlen(temp.c_str()))); + return buffer; + } + else { return "unspecified"; } } - else { return "unspecified"; } -} -std::shared_ptr -context_impl::init_libfabric_controller(oomph::context_impl* /*ctx*/, MPI_Comm comm, int rank, - int size, int threads) -{ - // only allow one thread to pass, make other wait - static std::mutex m_init_mutex; - std::lock_guard lock(m_init_mutex); - static std::shared_ptr instance(nullptr); - if (!instance.get()) + std::shared_ptr context_impl::init_libfabric_controller( + oomph::context_impl* /*ctx*/, MPI_Comm comm, int rank, int size, int threads) { - LF_DEB(src_deb, debug(NS_DEBUG::str<>("New Controller"), "rank", debug::dec<3>(rank), - "size", debug::dec<3>(size), "threads", debug::dec<3>(threads))); - instance.reset(new controller_type()); - instance->initialize(HAVE_LIBFABRIC_PROVIDER, rank == 0, size, threads, comm); + // only allow one thread to pass, make other wait + static std::mutex m_init_mutex; + std::lock_guard lock(m_init_mutex); + static std::shared_ptr instance(nullptr); + if (!instance.get()) + { + LF_DEB(src_deb, + debug(NS_DEBUG::str<>("New Controller"), "rank", debug::dec<3>(rank), "size", + debug::dec<3>(size), "threads", debug::dec<3>(threads))); + instance.reset(new controller_type()); + instance->initialize(HAVE_LIBFABRIC_PROVIDER, rank == 0, size, threads, comm); + } + return instance; } - return instance; -} -} // namespace oomph +} // namespace oomph diff --git a/src/libfabric/context.hpp b/src/libfabric/context.hpp index a7c0c112..7a936223 100644 --- a/src/libfabric/context.hpp +++ b/src/libfabric/context.hpp @@ -9,148 +9,152 @@ */ #pragma once -#include #include +#include #include -#include #include #include // paths relative to backend #include <../context_base.hpp> -#include #include +#include #include -namespace oomph -{ - -static NS_DEBUG::enable_print ctx_deb("CONTEXT"); - -using controller_type = libfabric::controller; - -class context_impl : public context_base -{ - public: - using region_type = libfabric::memory_segment; - using domain_type = region_type::provider_domain; - using device_region_type = libfabric::memory_segment; - using heap_type = hwmalloc::heap; - using callback_queue = boost::lockfree::queue, boost::lockfree::allocator>>; - - private: - heap_type m_heap; - domain_type* m_domain; - std::shared_ptr m_controller; - std::uintptr_t m_ctxt_tag; - - public: - // -------------------------------------------------- - // create a singleton ptr to a libfabric controller that - // can be shared between oomph context objects - static std::shared_ptr init_libfabric_controller(oomph::context_impl* ctx, - MPI_Comm comm, int rank, int size, int threads); - - // queue for shared recv callbacks - callback_queue m_recv_cb_queue; - // queue for canceled shared recv requests - callback_queue m_recv_cb_cancel; - - public: - context_impl(MPI_Comm comm, bool thread_safe, hwmalloc::heap_config const& heap_config); - context_impl(context_impl const&) = delete; - context_impl(context_impl&&) = delete; - - region_type make_region(void* const ptr, std::size_t size, int device_id) +namespace oomph { + + static NS_DEBUG::enable_print ctx_deb("CONTEXT"); + + using controller_type = libfabric::controller; + + class context_impl : public context_base { - if (m_controller->get_mrbind()) + public: + using region_type = libfabric::memory_segment; + using domain_type = region_type::provider_domain; + using device_region_type = libfabric::memory_segment; + using heap_type = hwmalloc::heap; + using callback_queue = boost::lockfree::queue, boost::lockfree::allocator>>; + + private: + heap_type m_heap; + domain_type* m_domain; + std::shared_ptr m_controller; + std::uintptr_t m_ctxt_tag; + + public: + // -------------------------------------------------- + // create a singleton ptr to a libfabric controller that + // can be shared between oomph context objects + static std::shared_ptr init_libfabric_controller( + oomph::context_impl* ctx, MPI_Comm comm, int rank, int size, int threads); + + // queue for shared recv callbacks + callback_queue m_recv_cb_queue; + // queue for canceled shared recv requests + callback_queue m_recv_cb_cancel; + + public: + context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, + std::size_t message_pool_reserve); + context_impl(context_impl const&) = delete; + context_impl(context_impl&&) = delete; + + region_type make_region(void* const ptr, std::size_t size, int device_id) { - void* endpoint = m_controller->get_rx_endpoint().get_ep(); - return libfabric::memory_segment(m_domain, ptr, size, true, endpoint, device_id); + if (m_controller->get_mrbind()) + { + void* endpoint = m_controller->get_rx_endpoint().get_ep(); + return libfabric::memory_segment(m_domain, ptr, size, true, endpoint, device_id); + } + else + { + return libfabric::memory_segment(m_domain, ptr, size, false, nullptr, device_id); + } } - else { return libfabric::memory_segment(m_domain, ptr, size, false, nullptr, device_id); } - } - auto& get_heap() noexcept { return m_heap; } + auto& get_heap() noexcept { return m_heap; } - communicator_impl* get_communicator(); + communicator_impl* get_communicator(); - // we must modify all tags to use 32bits of context ptr for uniqueness - inline std::uintptr_t get_context_tag() { return m_ctxt_tag; } + // we must modify all tags to use 32bits of context ptr for uniqueness + inline std::uintptr_t get_context_tag() { return m_ctxt_tag; } - inline controller_type* get_controller() /*const */ { return m_controller.get(); } - const char* get_transport_option(const std::string& opt); + inline controller_type* get_controller() /*const */ { return m_controller.get(); } + char const* get_transport_option(std::string const& opt); - void progress() { get_controller()->poll_for_work_completions(nullptr); } + void progress() { get_controller()->poll_for_work_completions(nullptr); } - bool cancel_recv(detail::shared_request_state* s) - { - // get the original message operation context - auto op_ctx = &(s->m_operation_context); + bool cancel_recv(detail::shared_request_state* s) + { + // get the original message operation context + auto op_ctx = &(s->m_operation_context); - // submit the cancellation request - bool ok = (fi_cancel(&(get_controller()->get_rx_endpoint().get_ep()->fid), op_ctx) == 0); + // submit the cancellation request + bool ok = + (fi_cancel(&(get_controller()->get_rx_endpoint().get_ep()->fid), op_ctx) == 0); - // if the cancel operation failed completely, return - if (!ok) return false; + // if the cancel operation failed completely, return + if (!ok) return false; - bool found = false; - while (!found) - { - get_controller()->poll_recv_queue(get_controller()->get_rx_endpoint().get_rx_cq(), - nullptr); - // otherwise, poll until we know if it worked - std::stack temp_stack; - detail::shared_request_state* temp; - while (!found && m_recv_cb_cancel.pop(temp)) + bool found = false; + while (!found) { - if (temp == s) + get_controller()->poll_recv_queue( + get_controller()->get_rx_endpoint().get_rx_cq(), nullptr); + // otherwise, poll until we know if it worked + std::stack temp_stack; + detail::shared_request_state* temp; + while (!found && m_recv_cb_cancel.pop(temp)) { - // our recv was cancelled correctly - found = true; - LF_DEB(oomph::ctx_deb, debug(NS_DEBUG::str<>("Cancel shared"), "succeeded", - "op_ctx", NS_DEBUG::ptr(op_ctx))); - auto ptr = s->release_self_ref(); - s->set_canceled(); + if (temp == s) + { + // our recv was cancelled correctly + found = true; + LF_DEB(oomph::ctx_deb, + debug(NS_DEBUG::str<>("Cancel shared"), "succeeded", "op_ctx", + NS_DEBUG::ptr(op_ctx))); + auto ptr = s->release_self_ref(); + s->set_canceled(); + } + else + { + // a different cancel operation + temp_stack.push(temp); + } } - else + // return any weird unhandled cancels back to the queue + while (!temp_stack.empty()) { - // a different cancel operation - temp_stack.push(temp); + auto temp = temp_stack.top(); + temp_stack.pop(); + m_recv_cb_cancel.push(temp); } } - // return any weird unhandled cancels back to the queue - while (!temp_stack.empty()) - { - auto temp = temp_stack.top(); - temp_stack.pop(); - m_recv_cb_cancel.push(temp); - } + return found; } - return found; - } - unsigned int num_tag_bits() const noexcept { return 32; } -}; + unsigned int num_tag_bits() const noexcept { return 32; } + }; -// -------------------------------------------------------------------- -template<> -inline oomph::libfabric::memory_segment -register_memory(oomph::context_impl& c, void* const ptr, std::size_t size) -{ - return c.make_region(ptr, size, -2); -} + // -------------------------------------------------------------------- + template <> + inline oomph::libfabric::memory_segment + register_memory(oomph::context_impl& c, void* const ptr, std::size_t size) + { + return c.make_region(ptr, size, -2); + } #if OOMPH_ENABLE_DEVICE -template<> -inline oomph::libfabric::memory_segment -register_device_memory(context_impl& c, int device_id, void* ptr, std::size_t size) -{ - return c.make_region(ptr, size, device_id); -} + template <> + inline oomph::libfabric::memory_segment register_device_memory( + context_impl& c, int device_id, void* ptr, std::size_t size) + { + return c.make_region(ptr, size, device_id); + } #endif -} // namespace oomph +} // namespace oomph diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 5becc148..95e3ad17 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -35,428 +35,436 @@ #include #include // -#include "oomph_libfabric_defines.hpp" +#include "controller_base.hpp" #include "fabric_error.hpp" #include "locality.hpp" #include "memory_region.hpp" +#include "oomph_libfabric_defines.hpp" #include "operation_context.hpp" -#include "controller_base.hpp" // #include // #include -namespace NS_DEBUG -{ -// cppcheck-suppress ConfigurationNotChecked +namespace NS_DEBUG { + // cppcheck-suppress ConfigurationNotChecked -using namespace oomph::debug; -template -inline /*constexpr*/ NS_DEBUG::print_threshold cnt_deb("CONTROL"); -// -static NS_DEBUG::enable_print cnt_err("CONTROL"); -} // namespace NS_DEBUG - -namespace oomph::libfabric -{ - -class controller : public controller_base -{ - public: - // -------------------------------------------------------------------- - controller() - : controller_base() - { - } + using namespace oomph::debug; + template + inline /*constexpr*/ NS_DEBUG::print_threshold cnt_deb("CONTROL"); + // + static NS_DEBUG::enable_print cnt_err("CONTROL"); +} // namespace NS_DEBUG - // -------------------------------------------------------------------- - void initialize_derived(std::string const&, bool, int, size_t, MPI_Comm mpi_comm) - { - // Broadcast address of all endpoints to all ranks - // and fill address vector with info - exchange_addresses(av_, mpi_comm); - } +namespace oomph::libfabric { - // -------------------------------------------------------------------- - constexpr fi_threading threadlevel_flags() + class controller : public controller_base { + public: + // -------------------------------------------------------------------- + controller() + : controller_base() + { + } + + // -------------------------------------------------------------------- + void initialize_derived(std::string const&, bool, int, size_t, MPI_Comm mpi_comm) + { + // Broadcast address of all endpoints to all ranks + // and fill address vector with info + exchange_addresses(av_, mpi_comm); + } + + // -------------------------------------------------------------------- + constexpr fi_threading threadlevel_flags() + { #if defined(HAVE_LIBFABRIC_GNI) /*|| defined(HAVE_LIBFABRIC_CXI)*/ - return FI_THREAD_ENDPOINT; + return FI_THREAD_ENDPOINT; #else - return FI_THREAD_SAFE; + return FI_THREAD_SAFE; #endif - } + } - // -------------------------------------------------------------------- - constexpr uint64_t caps_flags() - { + // -------------------------------------------------------------------- + constexpr uint64_t caps_flags() + { #if OOMPH_ENABLE_DEVICE && !defined(HAVE_LIBFABRIC_TCP) - std::int64_t hmem_flags = FI_HMEM; + std::int64_t hmem_flags = FI_HMEM; #else - std::int64_t hmem_flags = 0; + std::int64_t hmem_flags = 0; #endif - return hmem_flags | FI_MSG | FI_TAGGED | FI_RMA | FI_READ | FI_WRITE | FI_RECV | FI_SEND | - FI_TRANSMIT | FI_REMOTE_READ | FI_REMOTE_WRITE; - } - - // -------------------------------------------------------------------- - // we do not need to perform any special actions on init (to contact root node) - void setup_root_node_address(struct fi_info* /*info*/) {} + return hmem_flags | FI_MSG | FI_TAGGED | FI_RMA | FI_READ | FI_WRITE | FI_RECV | + FI_SEND | FI_TRANSMIT | FI_REMOTE_READ | FI_REMOTE_WRITE; + } - // -------------------------------------------------------------------- - // send address to rank 0 and receive array of all localities - void MPI_exchange_localities(fid_av* av, MPI_Comm comm, int rank, int size) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - std::vector localities(size * locality_defs::array_size, 0); - // - if (rank > 0) - { - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("sending here"), iplocality(here_), - "size", locality_defs::array_size)); - /*int err = */ MPI_Send(here_.fabric_data(), locality_defs::array_size, MPI_CHAR, - 0, // dst rank - 0, // tag - comm); + // -------------------------------------------------------------------- + // we do not need to perform any special actions on init (to contact root node) + void setup_root_node_address(struct fi_info* /*info*/) {} - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("receiving all"), "size", locality_defs::array_size)); - - MPI_Status status; - /*err = */ MPI_Recv(localities.data(), size * locality_defs::array_size, MPI_CHAR, - 0, // src rank - 0, // tag - comm, &status); - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("received addresses"))); - } - else + // -------------------------------------------------------------------- + // send address to rank 0 and receive array of all localities + void MPI_exchange_localities(fid_av* av, MPI_Comm comm, int rank, int size) { - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("receiving addresses"))); - memcpy(&localities[0], here_.fabric_data(), locality_defs::array_size); - for (int i = 1; i < size; ++i) + [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + std::vector localities(size * locality_defs::array_size, 0); + // + if (rank > 0) { LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("receiving address"), debug::dec<>(i))); + debug(debug::str<>("sending here"), iplocality(here_), "size", + locality_defs::array_size)); + /*int err = */ MPI_Send(here_.fabric_data(), locality_defs::array_size, MPI_CHAR, + 0, // dst rank + 0, // tag + comm); + + LF_DEB(NS_DEBUG::cnt_deb<9>, + debug(debug::str<>("receiving all"), "size", locality_defs::array_size)); + MPI_Status status; - /*int err = */ MPI_Recv(&localities[i * locality_defs::array_size], - size * locality_defs::array_size, MPI_CHAR, - i, // src rank - 0, // tag + /*err = */ MPI_Recv(localities.data(), size * locality_defs::array_size, MPI_CHAR, + 0, // src rank + 0, // tag comm, &status); - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("received address"), debug::dec<>(i))); + LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("received addresses"))); + } + else + { + LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("receiving addresses"))); + memcpy(&localities[0], here_.fabric_data(), locality_defs::array_size); + for (int i = 1; i < size; ++i) + { + LF_DEB(NS_DEBUG::cnt_deb<9>, + debug(debug::str<>("receiving address"), debug::dec<>(i))); + MPI_Status status; + /*int err = */ MPI_Recv(&localities[i * locality_defs::array_size], + size * locality_defs::array_size, MPI_CHAR, + i, // src rank + 0, // tag + comm, &status); + LF_DEB(NS_DEBUG::cnt_deb<9>, + debug(debug::str<>("received address"), debug::dec<>(i))); + } + + LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("sending all"))); + for (int i = 1; i < size; ++i) + { + LF_DEB( + NS_DEBUG::cnt_deb<9>, debug(debug::str<>("sending to"), debug::dec<>(i))); + /*int err = */ MPI_Send(&localities[0], size * locality_defs::array_size, + MPI_CHAR, + i, // dst rank + 0, // tag + comm); + } } - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("sending all"))); - for (int i = 1; i < size; ++i) + // all ranks should now have a full localities vector + LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("populating vector"))); + for (int i = 0; i < size; ++i) { - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("sending to"), debug::dec<>(i))); - /*int err = */ MPI_Send(&localities[0], size * locality_defs::array_size, MPI_CHAR, - i, // dst rank - 0, // tag - comm); + locality temp; + int offset = i * locality_defs::array_size; + memcpy(temp.fabric_data_writable(), &localities[offset], locality_defs::array_size); + insert_address(av, temp); } } - // all ranks should now have a full localities vector - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("populating vector"))); - for (int i = 0; i < size; ++i) + // -------------------------------------------------------------------- + // if we did not bootstrap, then fetch the list of all localities + // and insert each one into the address vector + void exchange_addresses(fid_av* av, MPI_Comm mpi_comm) { - locality temp; - int offset = i * locality_defs::array_size; - memcpy(temp.fabric_data_writable(), &localities[offset], locality_defs::array_size); - insert_address(av, temp); - } - } + [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - // -------------------------------------------------------------------- - // if we did not bootstrap, then fetch the list of all localities - // and insert each one into the address vector - void exchange_addresses(fid_av* av, MPI_Comm mpi_comm) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - - int rank, size; - MPI_Comm_rank(mpi_comm, &rank); - MPI_Comm_size(mpi_comm, &size); + int rank, size; + MPI_Comm_rank(mpi_comm, &rank); + MPI_Comm_size(mpi_comm, &size); - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("initialize_localities"), size, "localities")); + LF_DEB(NS_DEBUG::cnt_deb<9>, + debug(debug::str<>("initialize_localities"), size, "localities")); - MPI_exchange_localities(av, mpi_comm, rank, size); - debug_print_av_vector(size); - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("Done localities"))); - } + MPI_exchange_localities(av, mpi_comm, rank, size); + debug_print_av_vector(size); + LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("Done localities"))); + } - // -------------------------------------------------------------------- - inline constexpr bool bypass_tx_lock() - { + // -------------------------------------------------------------------- + inline constexpr bool bypass_tx_lock() + { #if defined(HAVE_LIBFABRIC_GNI) - return true; + return true; #elif defined(HAVE_LIBFABRIC_CXI) - // @todo : cxi provider is not yet thread safe using scalable endpoints - return false; + // @todo : cxi provider is not yet thread safe using scalable endpoints + return false; #else - return (threadlevel_flags() == FI_THREAD_SAFE || + return (threadlevel_flags() == FI_THREAD_SAFE || endpoint_type_ == endpoint_type::threadlocalTx); #endif - } + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock get_tx_lock() - { - if (bypass_tx_lock()) return unique_lock(); - return unique_lock(send_mutex_); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock get_tx_lock() + { + if (bypass_tx_lock()) return unique_lock(); + return unique_lock(send_mutex_); + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock try_tx_lock() - { - if (bypass_tx_lock()) return unique_lock(); - return unique_lock(send_mutex_, std::try_to_lock_t{}); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock try_tx_lock() + { + if (bypass_tx_lock()) return unique_lock(); + return unique_lock(send_mutex_, std::try_to_lock_t{}); + } - // -------------------------------------------------------------------- - inline constexpr bool bypass_rx_lock() - { + // -------------------------------------------------------------------- + inline constexpr bool bypass_rx_lock() + { #ifdef HAVE_LIBFABRIC_GNI - return true; + return true; #else - return ( - threadlevel_flags() == FI_THREAD_SAFE || endpoint_type_ == endpoint_type::scalableTxRx); + return (threadlevel_flags() == FI_THREAD_SAFE || + endpoint_type_ == endpoint_type::scalableTxRx); #endif - } + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock get_rx_lock() - { - if (bypass_rx_lock()) return unique_lock(); - return unique_lock(recv_mutex_); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock get_rx_lock() + { + if (bypass_rx_lock()) return unique_lock(); + return unique_lock(recv_mutex_); + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock try_rx_lock() - { - if (bypass_rx_lock()) return unique_lock(); - return unique_lock(recv_mutex_, std::try_to_lock_t{}); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock try_rx_lock() + { + if (bypass_rx_lock()) return unique_lock(); + return unique_lock(recv_mutex_, std::try_to_lock_t{}); + } - // -------------------------------------------------------------------- - int poll_send_queue(fid_cq* send_cq, void* user_data) - { + // -------------------------------------------------------------------- + int poll_send_queue(fid_cq* send_cq, void* user_data) + { #ifdef EXCESSIVE_POLLING_BACKOFF_MICRO_S - std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); - if (std::chrono::duration_cast(now - send_poll_stamp).count() < - EXCESSIVE_POLLING_BACKOFF_MICRO_S) - return 0; - send_poll_stamp = now; + std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); + if (std::chrono::duration_cast(now - send_poll_stamp) + .count() < EXCESSIVE_POLLING_BACKOFF_MICRO_S) + return 0; + send_poll_stamp = now; #endif - int ret; - fi_cq_msg_entry entry[max_completions_array_limit_]; - assert(max_completions_per_poll_ <= max_completions_array_limit_); - { - auto lock = try_tx_lock(); + int ret; + fi_cq_msg_entry entry[max_completions_array_limit_]; + assert(max_completions_per_poll_ <= max_completions_array_limit_); + { + auto lock = try_tx_lock(); - // if we're not threadlocal and didn't get the lock, - // then another thread is polling now, just exit - if (!bypass_tx_lock() && !lock.owns_lock()) { return -1; } + // if we're not threadlocal and didn't get the lock, + // then another thread is polling now, just exit + if (!bypass_tx_lock() && !lock.owns_lock()) { return -1; } - static auto polling = - NS_DEBUG::cnt_deb<9>.make_timer(1, debug::str<>("poll send queue")); - LF_DEB(NS_DEBUG::cnt_deb<9>, timed(polling, NS_DEBUG::ptr(send_cq))); + static auto polling = + NS_DEBUG::cnt_deb<9>.make_timer(1, debug::str<>("poll send queue")); + LF_DEB(NS_DEBUG::cnt_deb<9>, timed(polling, NS_DEBUG::ptr(send_cq))); - // poll for completions - { - ret = fi_cq_read(send_cq, &entry[0], max_completions_per_poll_); - } - // if there is an error, retrieve it - if (ret == -FI_EAVAIL) - { - struct fi_cq_err_entry e = {}; - int err_sz = fi_cq_readerr(send_cq, &e, 0); - (void)err_sz; - - // flags might not be set correctly - if ((e.flags & (FI_MSG | FI_SEND | FI_TAGGED)) != 0) + // poll for completions { - NS_DEBUG::cnt_err.error("txcq Error FI_EAVAIL for " - "FI_SEND with len", - debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), "code", - NS_DEBUG::dec<3>(e.err), "flags", debug::bin<16>(e.flags), "error", - fi_cq_strerror(send_cq, e.prov_errno, e.err_data, (char*)e.buf, e.len)); + ret = fi_cq_read(send_cq, &entry[0], max_completions_per_poll_); } - else if ((e.flags & FI_RMA) != 0) + // if there is an error, retrieve it + if (ret == -FI_EAVAIL) { - NS_DEBUG::cnt_err.error("txcq Error FI_EAVAIL for " - "FI_RMA with len", - debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), "code", - NS_DEBUG::dec<3>(e.err), "flags", debug::bin<16>(e.flags), "error", - fi_cq_strerror(send_cq, e.prov_errno, e.err_data, (char*)e.buf, e.len)); + struct fi_cq_err_entry e = {}; + int err_sz = fi_cq_readerr(send_cq, &e, 0); + (void) err_sz; + + // flags might not be set correctly + if ((e.flags & (FI_MSG | FI_SEND | FI_TAGGED)) != 0) + { + NS_DEBUG::cnt_err.error("txcq Error FI_EAVAIL for " + "FI_SEND with len", + debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), "code", + NS_DEBUG::dec<3>(e.err), "flags", debug::bin<16>(e.flags), "error", + fi_cq_strerror( + send_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len)); + } + else if ((e.flags & FI_RMA) != 0) + { + NS_DEBUG::cnt_err.error("txcq Error FI_EAVAIL for " + "FI_RMA with len", + debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), "code", + NS_DEBUG::dec<3>(e.err), "flags", debug::bin<16>(e.flags), "error", + fi_cq_strerror( + send_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len)); + } + operation_context* handler = reinterpret_cast(e.op_context); + handler->handle_error(e); + return 0; } - operation_context* handler = reinterpret_cast(e.op_context); - handler->handle_error(e); - return 0; } - } - // - // exit possibly locked region and process each completion - // - if (ret > 0) - { - int processed = 0; - for (int i = 0; i < ret; ++i) + // + // exit possibly locked region and process each completion + // + if (ret > 0) { - ++sends_complete; - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("Completion"), i, debug::dec<2>(i), "txcq flags", - fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", - debug::dec<>(entry[i].flags), ")", "context", - NS_DEBUG::ptr(entry[i].op_context), "length", debug::hex<6>(entry[i].len))); - if ((entry[i].flags & (FI_TAGGED | FI_SEND | FI_MSG)) != 0) + int processed = 0; + for (int i = 0; i < ret; ++i) { + ++sends_complete; LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("Completion"), "txcq tagged send completion", - NS_DEBUG::ptr(entry[i].op_context))); - - operation_context* handler = - reinterpret_cast(entry[i].op_context); - processed += handler->handle_tagged_send_completion(user_data); - } - else - { - NS_DEBUG::cnt_err.error("Received an unknown txcq completion", - debug::dec<>(entry[i].flags), debug::bin<64>(entry[i].flags)); - std::terminate(); + debug(debug::str<>("Completion"), i, debug::dec<2>(i), "txcq flags", + fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", + debug::dec<>(entry[i].flags), ")", "context", + NS_DEBUG::ptr(entry[i].op_context), "length", + debug::hex<6>(entry[i].len))); + if ((entry[i].flags & (FI_TAGGED | FI_SEND | FI_MSG)) != 0) + { + LF_DEB(NS_DEBUG::cnt_deb<9>, + debug(debug::str<>("Completion"), "txcq tagged send completion", + NS_DEBUG::ptr(entry[i].op_context))); + + operation_context* handler = + reinterpret_cast(entry[i].op_context); + processed += handler->handle_tagged_send_completion(user_data); + } + else + { + NS_DEBUG::cnt_err.error("Received an unknown txcq completion", + debug::dec<>(entry[i].flags), debug::bin<64>(entry[i].flags)); + std::terminate(); + } } + return processed; } - return processed; - } - else if (ret == 0 || ret == -FI_EAGAIN) - { - // do nothing, we will try again on the next check + else if (ret == 0 || ret == -FI_EAGAIN) + { + // do nothing, we will try again on the next check + } + else { NS_DEBUG::cnt_err.error("unknown error in completion txcq read"); } + return 0; } - else { NS_DEBUG::cnt_err.error("unknown error in completion txcq read"); } - return 0; - } - // -------------------------------------------------------------------- - int poll_recv_queue(fid_cq* rx_cq, void* user_data) - { + // -------------------------------------------------------------------- + int poll_recv_queue(fid_cq* rx_cq, void* user_data) + { #ifdef EXCESSIVE_POLLING_BACKOFF_MICRO_S - std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); - if (std::chrono::duration_cast(now - recv_poll_stamp).count() < - EXCESSIVE_POLLING_BACKOFF_MICRO_S) - return 0; - recv_poll_stamp = now; + std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); + if (std::chrono::duration_cast(now - recv_poll_stamp) + .count() < EXCESSIVE_POLLING_BACKOFF_MICRO_S) + return 0; + recv_poll_stamp = now; #endif - int ret; - fi_cq_msg_entry entry[max_completions_array_limit_]; - assert(max_completions_per_poll_ <= max_completions_array_limit_); - { - auto lock = get_rx_lock(); + int ret; + fi_cq_msg_entry entry[max_completions_array_limit_]; + assert(max_completions_per_poll_ <= max_completions_array_limit_); + { + auto lock = get_rx_lock(); - // if we're not threadlocal and didn't get the lock, - // then another thread is polling now, just exit - if (!bypass_rx_lock() && !lock.owns_lock()) { return -1; } + // if we're not threadlocal and didn't get the lock, + // then another thread is polling now, just exit + if (!bypass_rx_lock() && !lock.owns_lock()) { return -1; } - static auto polling = - NS_DEBUG::cnt_deb<2>.make_timer(1, debug::str<>("poll recv queue")); - LF_DEB(NS_DEBUG::cnt_deb<2>, timed(polling, NS_DEBUG::ptr(rx_cq))); + static auto polling = + NS_DEBUG::cnt_deb<2>.make_timer(1, debug::str<>("poll recv queue")); + LF_DEB(NS_DEBUG::cnt_deb<2>, timed(polling, NS_DEBUG::ptr(rx_cq))); - // poll for completions - { - ret = fi_cq_read(rx_cq, &entry[0], max_completions_per_poll_); - } - // if there is an error, retrieve it - if (ret == -FI_EAVAIL) - { - // read the full error status - struct fi_cq_err_entry e = {}; - int err_sz = fi_cq_readerr(rx_cq, &e, 0); - (void)err_sz; - // from the manpage 'man 3 fi_cq_readerr' - if (e.err == FI_ECANCELED) + // poll for completions { - LF_DEB(NS_DEBUG::cnt_deb<1>, - debug(debug::str<>("rxcq Cancelled"), "flags", debug::hex<6>(e.flags), - "len", debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context))); - // the request was cancelled, we can simply exit - // as the canceller will have doone any cleanup needed - operation_context* handler = reinterpret_cast(e.op_context); - handler->handle_cancelled(); - return 0; + ret = fi_cq_read(rx_cq, &entry[0], max_completions_per_poll_); } - else if (e.err != FI_SUCCESS) + // if there is an error, retrieve it + if (ret == -FI_EAVAIL) { - NS_DEBUG::cnt_err.error(debug::str<>("poll_recv_queue"), "error code", - debug::dec<>(-e.err), "flags", debug::hex<6>(e.flags), "len", - debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), "error msg", - fi_cq_strerror(rx_cq, e.prov_errno, e.err_data, (char*)e.buf, e.len)); + // read the full error status + struct fi_cq_err_entry e = {}; + int err_sz = fi_cq_readerr(rx_cq, &e, 0); + (void) err_sz; + // from the manpage 'man 3 fi_cq_readerr' + if (e.err == FI_ECANCELED) + { + LF_DEB(NS_DEBUG::cnt_deb<1>, + debug(debug::str<>("rxcq Cancelled"), "flags", debug::hex<6>(e.flags), + "len", debug::hex<6>(e.len), "context", + NS_DEBUG::ptr(e.op_context))); + // the request was cancelled, we can simply exit + // as the canceller will have doone any cleanup needed + operation_context* handler = + reinterpret_cast(e.op_context); + handler->handle_cancelled(); + return 0; + } + else if (e.err != FI_SUCCESS) + { + NS_DEBUG::cnt_err.error(debug::str<>("poll_recv_queue"), "error code", + debug::dec<>(-e.err), "flags", debug::hex<6>(e.flags), "len", + debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), + "error msg", + fi_cq_strerror(rx_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len)); + } + operation_context* handler = reinterpret_cast(e.op_context); + if (handler) handler->handle_error(e); + return 0; } - operation_context* handler = reinterpret_cast(e.op_context); - if (handler) handler->handle_error(e); - return 0; } - } - // - // release the lock and process each completion - // - if (ret > 0) - { - int processed = 0; - for (int i = 0; i < ret; ++i) + // + // release the lock and process each completion + // + if (ret > 0) { - ++recvs_complete; - LF_DEB(NS_DEBUG::cnt_deb<2>, - debug(debug::str<>("Completion"), i, "rxcq flags", - fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", - debug::dec<>(entry[i].flags), ")", "context", - NS_DEBUG::ptr(entry[i].op_context), "length", debug::hex<6>(entry[i].len))); - if ((entry[i].flags & (FI_TAGGED | FI_RECV)) != 0) + int processed = 0; + for (int i = 0; i < ret; ++i) { + ++recvs_complete; LF_DEB(NS_DEBUG::cnt_deb<2>, - debug(debug::str<>("Completion"), "rxcq tagged recv completion", - NS_DEBUG::ptr(entry[i].op_context))); - - operation_context* handler = - reinterpret_cast(entry[i].op_context); - processed += handler->handle_tagged_recv_completion(user_data); - } - else - { - NS_DEBUG::cnt_err.error("Received an unknown rxcq completion", - debug::dec<>(entry[i].flags), debug::bin<64>(entry[i].flags)); - std::terminate(); + debug(debug::str<>("Completion"), i, "rxcq flags", + fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", + debug::dec<>(entry[i].flags), ")", "context", + NS_DEBUG::ptr(entry[i].op_context), "length", + debug::hex<6>(entry[i].len))); + if ((entry[i].flags & (FI_TAGGED | FI_RECV)) != 0) + { + LF_DEB(NS_DEBUG::cnt_deb<2>, + debug(debug::str<>("Completion"), "rxcq tagged recv completion", + NS_DEBUG::ptr(entry[i].op_context))); + + operation_context* handler = + reinterpret_cast(entry[i].op_context); + processed += handler->handle_tagged_recv_completion(user_data); + } + else + { + NS_DEBUG::cnt_err.error("Received an unknown rxcq completion", + debug::dec<>(entry[i].flags), debug::bin<64>(entry[i].flags)); + std::terminate(); + } } + return processed; + } + else if (ret == 0 || ret == -FI_EAGAIN) + { + // do nothing, we will try again on the next check } - return processed; + else { NS_DEBUG::cnt_err.error("unknown error in completion rxcq read"); } + return 0; } - else if (ret == 0 || ret == -FI_EAGAIN) + + // Jobs started using mpi don't have this info + struct fi_info* set_src_dst_addresses(struct fi_info* info, bool tx) { - // do nothing, we will try again on the next check + (void) info; // unused variable warning + (void) tx; // unused variable warning + + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fi_dupinfo"))); + struct fi_info* hints = fi_dupinfo(info); + if (!hints) throw NS_LIBFABRIC::fabric_error(0, "fi_dupinfo"); + // clear any Rx address data that might be set + // free(hints->src_addr); + // hints->src_addr = nullptr; + // hints->src_addrlen = 0; + free(hints->dest_addr); + hints->dest_addr = nullptr; + hints->dest_addrlen = 0; + return hints; } - else { NS_DEBUG::cnt_err.error("unknown error in completion rxcq read"); } - return 0; - } + }; - // Jobs started using mpi don't have this info - struct fi_info* set_src_dst_addresses(struct fi_info* info, bool tx) - { - (void)info; // unused variable warning - (void)tx; // unused variable warning - - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fi_dupinfo"))); - struct fi_info* hints = fi_dupinfo(info); - if (!hints) throw NS_LIBFABRIC::fabric_error(0, "fi_dupinfo"); - // clear any Rx address data that might be set - // free(hints->src_addr); - // hints->src_addr = nullptr; - // hints->src_addrlen = 0; - free(hints->dest_addr); - hints->dest_addr = nullptr; - hints->dest_addrlen = 0; - return hints; - } -}; - -} // namespace oomph::libfabric +} // namespace oomph::libfabric diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index e1ce377e..a5eb1705 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -53,15 +53,13 @@ // ---------------------------------------- // auto progress (libfabric thread) or manual // ---------------------------------------- -static fi_progress -libfabric_progress_type() +static fi_progress libfabric_progress_type() { if (std::getenv("LIBFABRIC_AUTO_PROGRESS") == nullptr) return FI_PROGRESS_MANUAL; return FI_PROGRESS_AUTO; } -static const char* -libfabric_progress_string() +static char const* libfabric_progress_string() { if (libfabric_progress_type() == FI_PROGRESS_AUTO) return "auto"; return "manual"; @@ -93,8 +91,7 @@ enum class endpoint_type : int // ---------------------------------------- // single endpoint or separate for send/recv // ---------------------------------------- -static endpoint_type -libfabric_endpoint_type() +static endpoint_type libfabric_endpoint_type() { auto env_str = std::getenv("LIBFABRIC_ENDPOINT_TYPE"); if (env_str == nullptr) return endpoint_type::single; @@ -114,8 +111,7 @@ libfabric_endpoint_type() return endpoint_type::single; } -static const char* -libfabric_endpoint_string() +static char const* libfabric_endpoint_string() { auto lf_ep_type = libfabric_endpoint_type(); if (lf_ep_type == endpoint_type::multiple) return "multiple"; @@ -128,8 +124,7 @@ libfabric_endpoint_string() // ---------------------------------------- // number of completions to handle per poll // ---------------------------------------- -static int -libfabric_completions_per_poll() +static int libfabric_completions_per_poll() { auto env_str = std::getenv("LIBFABRIC_POLL_SIZE"); if (env_str != nullptr) @@ -148,8 +143,7 @@ libfabric_completions_per_poll() // ---------------------------------------- // Eager/Rendezvous threshold // ---------------------------------------- -static int -libfabric_rendezvous_threshold(int def_val) +static int libfabric_rendezvous_threshold(int def_val) { auto env_str = std::getenv("LIBFABRIC_RENDEZVOUS_THRESHOLD"); if (env_str != nullptr) @@ -170,9 +164,9 @@ libfabric_rendezvous_threshold(int def_val) // Needed on Cray for GNI extensions // ------------------------------------------------ #ifdef HAVE_LIBFABRIC_GNI -#include "rdma/fi_ext_gni.h" +# include "rdma/fi_ext_gni.h" //#define OOMPH_GNI_REG "none" -#define OOMPH_GNI_REG "internal" +# define OOMPH_GNI_REG "internal" //#define OOMPH_GNI_REG "udreg" static std::vector> gni_strs = { @@ -213,19 +207,18 @@ static std::vector> gni_ints = { // api 2.0, then we ask for that, but the cxi legacy library on daint only supports 1.15, // so drop back to that version if needed #if defined(OOMPH_LIBFABRIC_V1_API) -#define LIBFABRIC_FI_VERSION_MAJOR 1 -#define LIBFABRIC_FI_VERSION_MINOR 15 +# define LIBFABRIC_FI_VERSION_MAJOR 1 +# define LIBFABRIC_FI_VERSION_MINOR 15 #else -#define LIBFABRIC_FI_VERSION_MAJOR 2 -#define LIBFABRIC_FI_VERSION_MINOR 0 +# define LIBFABRIC_FI_VERSION_MAJOR 2 +# define LIBFABRIC_FI_VERSION_MINOR 0 #endif -namespace NS_DEBUG -{ -// cppcheck-suppress ConfigurationNotChecked -static NS_DEBUG::enable_print cnb_deb("CONBASE"); -static NS_DEBUG::enable_print cnb_err("CONBASE"); -} // namespace NS_DEBUG +namespace NS_DEBUG { + // cppcheck-suppress ConfigurationNotChecked + static NS_DEBUG::enable_print cnb_deb("CONBASE"); + static NS_DEBUG::enable_print cnb_err("CONBASE"); +} // namespace NS_DEBUG /** @brief a class to return the number of progressed callbacks */ struct progress_status @@ -237,7 +230,7 @@ struct progress_status int num_sends() const noexcept { return m_num_sends; } int num_recvs() const noexcept { return m_num_recvs; } - progress_status& operator+=(const progress_status& other) noexcept + progress_status& operator+=(progress_status const& other) noexcept { m_num_sends += other.m_num_sends; m_num_recvs += other.m_num_recvs; @@ -245,814 +238,822 @@ struct progress_status } }; -namespace NS_LIBFABRIC -{ -/// A wrapper around fi_close that reports any error -/// Because we use so many handles, we must be careful to -/// delete them all before closing resources that use them -template -void -fidclose(Handle fid, const char* msg) -{ - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("closing"), msg)); - int ret = fi_close(fid); - if (ret == -FI_EBUSY) { throw NS_LIBFABRIC::fabric_error(ret, "fi_close EBUSY"); } - else if (ret == FI_SUCCESS) { return; } - throw NS_LIBFABRIC::fabric_error(ret, "fi_close error"); -} - -/// when using thread local endpoints, we encapsulate things that -/// are needed to manage an endpoint -struct endpoint_wrapper -{ - private: - friend class controller; - - fid_ep* ep_ = nullptr; - fid_cq* rq_ = nullptr; - fid_cq* tq_ = nullptr; - const char* name_ = nullptr; - - public: - endpoint_wrapper() {} - endpoint_wrapper(fid_ep* ep, fid_cq* rq, fid_cq* tq, const char* name) - : ep_(ep) - , rq_(rq) - , tq_(tq) - , name_(name) +namespace NS_LIBFABRIC { + /// A wrapper around fi_close that reports any error + /// Because we use so many handles, we must be careful to + /// delete them all before closing resources that use them + template + void fidclose(Handle fid, char const* msg) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, name_); + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("closing"), msg)); + int ret = fi_close(fid); + if (ret == -FI_EBUSY) { throw NS_LIBFABRIC::fabric_error(ret, "fi_close EBUSY"); } + else if (ret == FI_SUCCESS) { return; } + throw NS_LIBFABRIC::fabric_error(ret, "fi_close error"); } - // to keep boost::lockfree happy, we need these copy operators - endpoint_wrapper(const endpoint_wrapper& ep) = default; - endpoint_wrapper& operator=(const endpoint_wrapper& ep) = default; - - void cleanup() + /// when using thread local endpoints, we encapsulate things that + /// are needed to manage an endpoint + struct endpoint_wrapper { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, name_); - if (ep_) - { - fidclose(&ep_->fid, "endpoint"); - ep_ = nullptr; - } - if (rq_) + private: + friend class controller; + + fid_ep* ep_ = nullptr; + fid_cq* rq_ = nullptr; + fid_cq* tq_ = nullptr; + char const* name_ = nullptr; + + public: + endpoint_wrapper() {} + endpoint_wrapper(fid_ep* ep, fid_cq* rq, fid_cq* tq, char const* name) + : ep_(ep) + , rq_(rq) + , tq_(tq) + , name_(name) { - fidclose(&rq_->fid, "rq"); - rq_ = nullptr; - } - if (tq_) - { - fidclose(&tq_->fid, "tq"); - tq_ = nullptr; + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, name_); } - } - - inline fid_ep* get_ep() { return ep_; } - inline fid_cq* get_rx_cq() { return rq_; } - inline fid_cq* get_tx_cq() { return tq_; } - inline void set_tx_cq(fid_cq* cq) { tq_ = cq; } - inline const char* get_name() { return name_; } -}; - -using region_type = NS_MEMORY::memory_handle; -using endpoint_context_pool = - boost::lockfree::queue>; - -struct stack_endpoint -{ - endpoint_wrapper endpoint_; - endpoint_context_pool* pool_; - // - stack_endpoint() - : endpoint_() - , pool_(nullptr) - { - } - // - stack_endpoint(fid_ep* ep, fid_cq* rq, fid_cq* tq, const char* name, - endpoint_context_pool* pool) - : endpoint_(ep, rq, tq, name) - , pool_(pool) - { - } - // - stack_endpoint& operator=(stack_endpoint&& other) - { - endpoint_ = std::move(other.endpoint_); - pool_ = std::exchange(other.pool_, nullptr); - return *this; - } - - ~stack_endpoint() - { - if (!pool_) return; - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Scalable Ep"), "used push", "ep", NS_DEBUG::ptr(get_ep()), "tx cq", - NS_DEBUG::ptr(get_tx_cq()), "rx cq", NS_DEBUG::ptr(get_rx_cq()))); - pool_->push(endpoint_); - } - - inline fid_ep* get_ep() { return endpoint_.get_ep(); } - inline fid_cq* get_rx_cq() { return endpoint_.get_rx_cq(); } + // to keep boost::lockfree happy, we need these copy operators + endpoint_wrapper(endpoint_wrapper const& ep) = default; + endpoint_wrapper& operator=(endpoint_wrapper const& ep) = default; - inline fid_cq* get_tx_cq() { return endpoint_.get_tx_cq(); } -}; - -struct endpoints_lifetime_manager -{ - // threadlocal endpoints - static inline thread_local stack_endpoint tl_tx_; - static inline thread_local stack_endpoint tl_stx_; - static inline thread_local stack_endpoint tl_srx_; - // non threadlocal endpoints, tx/rx - endpoint_wrapper ep_tx_; - endpoint_wrapper ep_rx_; -}; - -template -class controller_base -{ - public: - typedef std::mutex mutex_type; - typedef std::lock_guard scoped_lock; - typedef std::unique_lock unique_lock; + void cleanup() + { + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, name_); + if (ep_) + { + fidclose(&ep_->fid, "endpoint"); + ep_ = nullptr; + } + if (rq_) + { + fidclose(&rq_->fid, "rq"); + rq_ = nullptr; + } + if (tq_) + { + fidclose(&tq_->fid, "tq"); + tq_ = nullptr; + } + } - protected: - // For threadlocal/scalable endpoints, - // we use a dedicated threadlocal endpoint wrapper - std::unique_ptr eps_; + inline fid_ep* get_ep() { return ep_; } + inline fid_cq* get_rx_cq() { return rq_; } + inline fid_cq* get_tx_cq() { return tq_; } + inline void set_tx_cq(fid_cq* cq) { tq_ = cq; } + inline char const* get_name() { return name_; } + }; + using region_type = NS_MEMORY::memory_handle; using endpoint_context_pool = boost::lockfree::queue>; - endpoint_context_pool tx_endpoints_; - endpoint_context_pool rx_endpoints_; - - struct fi_info* fabric_info_; - struct fid_fabric* fabric_; - struct fid_domain* fabric_domain_; - struct fid_pep* ep_passive_; - - struct fid_av* av_; - endpoint_type endpoint_type_; - - locality here_; - locality root_; - // used during queue creation setup and during polling - mutex_type controller_mutex_; - - // used to protect send/recv resources - alignas(64) mutex_type send_mutex_; - alignas(64) mutex_type recv_mutex_; - - std::size_t tx_inject_size_; - std::size_t tx_attr_size_; - std::size_t rx_attr_size_; - - uint32_t max_completions_per_poll_; - uint32_t msg_rendezvous_threshold_; - inline static constexpr uint32_t max_completions_array_limit_ = 256; + struct stack_endpoint + { + endpoint_wrapper endpoint_; + endpoint_context_pool* pool_; + // + stack_endpoint() + : endpoint_() + , pool_(nullptr) + { + } + // + stack_endpoint( + fid_ep* ep, fid_cq* rq, fid_cq* tq, char const* name, endpoint_context_pool* pool) + : endpoint_(ep, rq, tq, name) + , pool_(pool) + { + } + // + stack_endpoint& operator=(stack_endpoint&& other) + { + endpoint_ = std::move(other.endpoint_); + pool_ = std::exchange(other.pool_, nullptr); + return *this; + } - static inline thread_local std::chrono::steady_clock::time_point send_poll_stamp; - static inline thread_local std::chrono::steady_clock::time_point recv_poll_stamp; + ~stack_endpoint() + { + if (!pool_) return; + LF_DEB(NS_DEBUG::cnb_deb, + trace(debug::str<>("Scalable Ep"), "used push", "ep", NS_DEBUG::ptr(get_ep()), + "tx cq", NS_DEBUG::ptr(get_tx_cq()), "rx cq", NS_DEBUG::ptr(get_rx_cq()))); + pool_->push(endpoint_); + } - // set if FI_MR_LOCAL is required (local access requires binding) - bool mrlocal = false; - // set if FI_MR_ENDPOINT is required (per endpoint memory binding) - bool mrbind = false; - // set if FI_MR_HRMEM provider requires heterogeneous memory registration - bool mrhmem = false; + inline fid_ep* get_ep() { return endpoint_.get_ep(); } - public: - bool get_mrbind() { return mrbind; } + inline fid_cq* get_rx_cq() { return endpoint_.get_rx_cq(); } - public: - NS_LIBFABRIC::simple_counter sends_posted_; - NS_LIBFABRIC::simple_counter recvs_posted_; - NS_LIBFABRIC::simple_counter sends_readied_; - NS_LIBFABRIC::simple_counter recvs_readied_; - NS_LIBFABRIC::simple_counter sends_complete; - NS_LIBFABRIC::simple_counter recvs_complete; + inline fid_cq* get_tx_cq() { return endpoint_.get_tx_cq(); } + }; - void finvoke(const char* msg, const char* err, int ret) + struct endpoints_lifetime_manager { - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>(msg))); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, err); - } + // threadlocal endpoints + static inline thread_local stack_endpoint tl_tx_; + static inline thread_local stack_endpoint tl_stx_; + static inline thread_local stack_endpoint tl_srx_; + // non threadlocal endpoints, tx/rx + endpoint_wrapper ep_tx_; + endpoint_wrapper ep_rx_; + }; - public: - // -------------------------------------------------------------------- - controller_base() - : eps_(nullptr) - , tx_endpoints_(1) - , rx_endpoints_(1) - , fabric_info_(nullptr) - , fabric_(nullptr) - , fabric_domain_(nullptr) - , ep_passive_(nullptr) - , av_(nullptr) - , tx_inject_size_(0) - , tx_attr_size_(0) - , rx_attr_size_(0) - , max_completions_per_poll_(1) - , msg_rendezvous_threshold_(0x4000) - , sends_posted_(0) - , recvs_posted_(0) - , sends_readied_(0) - , recvs_readied_(0) - , sends_complete(0) - , recvs_complete(0) + template + class controller_base { - } + public: + typedef std::mutex mutex_type; + typedef std::lock_guard scoped_lock; + typedef std::unique_lock unique_lock; + + protected: + // For threadlocal/scalable endpoints, + // we use a dedicated threadlocal endpoint wrapper + std::unique_ptr eps_; + + using endpoint_context_pool = + boost::lockfree::queue>; + endpoint_context_pool tx_endpoints_; + endpoint_context_pool rx_endpoints_; + + struct fi_info* fabric_info_; + struct fid_fabric* fabric_; + struct fid_domain* fabric_domain_; + struct fid_pep* ep_passive_; + + struct fid_av* av_; + endpoint_type endpoint_type_; + + locality here_; + locality root_; + + // used during queue creation setup and during polling + mutex_type controller_mutex_; + + // used to protect send/recv resources + alignas(64) mutex_type send_mutex_; + alignas(64) mutex_type recv_mutex_; + + std::size_t tx_inject_size_; + std::size_t tx_attr_size_; + std::size_t rx_attr_size_; + + uint32_t max_completions_per_poll_; + uint32_t msg_rendezvous_threshold_; + inline static constexpr uint32_t max_completions_array_limit_ = 256; + + static inline thread_local std::chrono::steady_clock::time_point send_poll_stamp; + static inline thread_local std::chrono::steady_clock::time_point recv_poll_stamp; + + // set if FI_MR_LOCAL is required (local access requires binding) + bool mrlocal = false; + // set if FI_MR_ENDPOINT is required (per endpoint memory binding) + bool mrbind = false; + // set if FI_MR_HRMEM provider requires heterogeneous memory registration + bool mrhmem = false; + + public: + bool get_mrbind() { return mrbind; } + + public: + NS_LIBFABRIC::simple_counter sends_posted_; + NS_LIBFABRIC::simple_counter recvs_posted_; + NS_LIBFABRIC::simple_counter sends_readied_; + NS_LIBFABRIC::simple_counter recvs_readied_; + NS_LIBFABRIC::simple_counter sends_complete; + NS_LIBFABRIC::simple_counter recvs_complete; + + void finvoke(char const* msg, char const* err, int ret) + { + LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>(msg))); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, err); + } - // -------------------------------------------------------------------- - // clean up all resources - ~controller_base() - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - unsigned int messages_handled_ = 0; - unsigned int rma_reads_ = 0; - unsigned int recv_deletes_ = 0; + public: + // -------------------------------------------------------------------- + controller_base() + : eps_(nullptr) + , tx_endpoints_(1) + , rx_endpoints_(1) + , fabric_info_(nullptr) + , fabric_(nullptr) + , fabric_domain_(nullptr) + , ep_passive_(nullptr) + , av_(nullptr) + , tx_inject_size_(0) + , tx_attr_size_(0) + , rx_attr_size_(0) + , max_completions_per_poll_(1) + , msg_rendezvous_threshold_(0x4000) + , sends_posted_(0) + , recvs_posted_(0) + , sends_readied_(0) + , recvs_readied_(0) + , sends_complete(0) + , recvs_complete(0) + { + } - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("counters"), "Received messages", debug::dec<>(messages_handled_), - "Total reads", debug::dec<>(rma_reads_), "Total deletes", - debug::dec<>(recv_deletes_), "deletes error", - debug::dec<>(messages_handled_ - recv_deletes_))); + // -------------------------------------------------------------------- + // clean up all resources + ~controller_base() + { + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + unsigned int messages_handled_ = 0; + unsigned int rma_reads_ = 0; + unsigned int recv_deletes_ = 0; - tx_endpoints_.consume_all([](auto&& ep) { ep.cleanup(); }); - rx_endpoints_.consume_all([](auto&& ep) { ep.cleanup(); }); + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("counters"), "Received messages", + debug::dec<>(messages_handled_), "Total reads", debug::dec<>(rma_reads_), + "Total deletes", debug::dec<>(recv_deletes_), "deletes error", + debug::dec<>(messages_handled_ - recv_deletes_))); - // No cleanup threadlocals : done by consume_all cleanup above - // eps_->tl_tx_.endpoint_.cleanup(); - // eps_->tl_stx_.endpoint_.cleanup(); - // eps_->tl_srx_.endpoint_.cleanup(); + tx_endpoints_.consume_all([](auto&& ep) { ep.cleanup(); }); + rx_endpoints_.consume_all([](auto&& ep) { ep.cleanup(); }); - // non threadlocal endpoints, tx/rx - eps_->ep_tx_.cleanup(); - eps_->ep_rx_.cleanup(); + // No cleanup threadlocals : done by consume_all cleanup above + // eps_->tl_tx_.endpoint_.cleanup(); + // eps_->tl_stx_.endpoint_.cleanup(); + // eps_->tl_srx_.endpoint_.cleanup(); - // Cleanup endpoints - eps_.reset(nullptr); + // non threadlocal endpoints, tx/rx + eps_->ep_tx_.cleanup(); + eps_->ep_rx_.cleanup(); - // delete adddress vector - fidclose(&av_->fid, "Address Vector"); + // Cleanup endpoints + eps_.reset(nullptr); - try - { - fidclose(&fabric_domain_->fid, "Domain"); - } - catch (fabric_error& e) - { - std::cout << "fabric domain close failed : Ensure all RMA " - "objects are freed before program termination" - << std::endl; - } - fidclose(&fabric_->fid, "Fabric"); + // delete adddress vector + fidclose(&av_->fid, "Address Vector"); - // clean up - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("freeing fabric_info"))); + try + { + fidclose(&fabric_domain_->fid, "Domain"); + } + catch (fabric_error& e) + { + std::cout << "fabric domain close failed : Ensure all RMA " + "objects are freed before program termination" + << std::endl; + } + fidclose(&fabric_->fid, "Fabric"); - fi_freeinfo(fabric_info_); - } + // clean up + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("freeing fabric_info"))); - // -------------------------------------------------------------------- - // setup an endpoint for receiving messages, - // usually an rx endpoint is shared by all threads - endpoint_wrapper create_rx_endpoint(struct fid_domain* domain, struct fi_info* info, - struct fid_av* av) - { - auto ep_rx = new_endpoint_active(domain, info, false); + fi_freeinfo(fabric_info_); + } - // bind address vector - bind_address_vector_to_endpoint(ep_rx, av); + // -------------------------------------------------------------------- + // setup an endpoint for receiving messages, + // usually an rx endpoint is shared by all threads + endpoint_wrapper create_rx_endpoint( + struct fid_domain* domain, struct fi_info* info, struct fid_av* av) + { + auto ep_rx = new_endpoint_active(domain, info, false); - // create a completion queue for the rx endpoint - info->rx_attr->op_flags |= FI_COMPLETION; - auto rx_cq = create_completion_queue(domain, info->rx_attr->size, "rx"); + // bind address vector + bind_address_vector_to_endpoint(ep_rx, av); - // bind CQ to endpoint - bind_queue_to_endpoint(ep_rx, rx_cq, FI_RECV, "rx"); - return endpoint_wrapper(ep_rx, rx_cq, nullptr, "rx"); - } + // create a completion queue for the rx endpoint + info->rx_attr->op_flags |= FI_COMPLETION; + auto rx_cq = create_completion_queue(domain, info->rx_attr->size, "rx"); - // -------------------------------------------------------------------- - // initialize the basic fabric/domain/name - template - void initialize(std::string const& provider, bool rootnode, int size, size_t threads, - Args&&... args) - { - LF_DEB(NS_DEBUG::cnb_deb, eval([]() { std::cout.setf(std::ios::unitbuf); })); - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + // bind CQ to endpoint + bind_queue_to_endpoint(ep_rx, rx_cq, FI_RECV, "rx"); + return endpoint_wrapper(ep_rx, rx_cq, nullptr, "rx"); + } - max_completions_per_poll_ = libfabric_completions_per_poll(); - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("Poll completions"), debug::dec<3>(max_completions_per_poll_))); + // -------------------------------------------------------------------- + // initialize the basic fabric/domain/name + template + void initialize( + std::string const& provider, bool rootnode, int size, size_t threads, Args&&... args) + { + LF_DEB(NS_DEBUG::cnb_deb, eval([]() { std::cout.setf(std::ios::unitbuf); })); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - uint32_t default_val = (threads == 1) ? 0x400 : 0x4000; - msg_rendezvous_threshold_ = libfabric_rendezvous_threshold(default_val); - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("Rendezvous threshold"), debug::hex<4>(msg_rendezvous_threshold_))); + max_completions_per_poll_ = libfabric_completions_per_poll(); + LF_DEB(NS_DEBUG::cnb_err, + debug(debug::str<>("Poll completions"), debug::dec<3>(max_completions_per_poll_))); - endpoint_type_ = static_cast(libfabric_endpoint_type()); - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("Endpoints"), libfabric_endpoint_string())); + uint32_t default_val = (threads == 1) ? 0x400 : 0x4000; + msg_rendezvous_threshold_ = libfabric_rendezvous_threshold(default_val); + LF_DEB(NS_DEBUG::cnb_err, + debug(debug::str<>("Rendezvous threshold"), + debug::hex<4>(msg_rendezvous_threshold_))); - eps_ = std::make_unique(); + endpoint_type_ = static_cast(libfabric_endpoint_type()); + LF_DEB( + NS_DEBUG::cnb_err, debug(debug::str<>("Endpoints"), libfabric_endpoint_string())); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Threads"), debug::dec<3>(threads))); + eps_ = std::make_unique(); - open_fabric(provider, threads, rootnode); + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Threads"), debug::dec<3>(threads))); - // create an address vector that will be bound to (all) endpoints - av_ = create_address_vector(fabric_info_, size, threads); + open_fabric(provider, threads, rootnode); - // we need an rx endpoint in all cases except scalable rx - if (endpoint_type_ != endpoint_type::scalableTxRx) - { - // setup an endpoint for receiving messages - // rx endpoint is typically shared by all threads - eps_->ep_rx_ = create_rx_endpoint(fabric_domain_, fabric_info_, av_); - } + // create an address vector that will be bound to (all) endpoints + av_ = create_address_vector(fabric_info_, size, threads); - if (endpoint_type_ == endpoint_type::single) - { - // always bind a tx cq to the rx endpoint for single endpoint type - auto tx_cq = bind_tx_queue_to_rx_endpoint(fabric_info_, eps_->ep_rx_.get_ep()); - eps_->ep_rx_.set_tx_cq(tx_cq); - } - else if (endpoint_type_ != endpoint_type::scalableTxRx) - { + // we need an rx endpoint in all cases except scalable rx + if (endpoint_type_ != endpoint_type::scalableTxRx) + { + // setup an endpoint for receiving messages + // rx endpoint is typically shared by all threads + eps_->ep_rx_ = create_rx_endpoint(fabric_domain_, fabric_info_, av_); + } + + if (endpoint_type_ == endpoint_type::single) + { + // always bind a tx cq to the rx endpoint for single endpoint type + auto tx_cq = bind_tx_queue_to_rx_endpoint(fabric_info_, eps_->ep_rx_.get_ep()); + eps_->ep_rx_.set_tx_cq(tx_cq); + } + else if (endpoint_type_ != endpoint_type::scalableTxRx) + { #if defined(HAVE_LIBFABRIC_SOCKETS) || defined(HAVE_LIBFABRIC_TCP) || \ defined(HAVE_LIBFABRIC_VERBS) || defined(HAVE_LIBFABRIC_CXI) || defined(HAVE_LIBFABRIC_EFA) - // it appears that the rx endpoint cannot be enabled if it does not - // have a Tx CQ (at least when using sockets), so we create a dummy - // Tx CQ and bind it just to stop libfabric from triggering an error. - // The tx_cq won't actually be used because the user will get the real - // tx endpoint which will have the correct cq bound to it - auto dummy_cq = bind_tx_queue_to_rx_endpoint(fabric_info_, eps_->ep_rx_.get_ep()); - eps_->ep_rx_.set_tx_cq(dummy_cq); + // it appears that the rx endpoint cannot be enabled if it does not + // have a Tx CQ (at least when using sockets), so we create a dummy + // Tx CQ and bind it just to stop libfabric from triggering an error. + // The tx_cq won't actually be used because the user will get the real + // tx endpoint which will have the correct cq bound to it + auto dummy_cq = bind_tx_queue_to_rx_endpoint(fabric_info_, eps_->ep_rx_.get_ep()); + eps_->ep_rx_.set_tx_cq(dummy_cq); #endif - } + } - if (endpoint_type_ == endpoint_type::multiple) - { - // create a separate Tx endpoint for sending messages - // note that the CQ needs FI_RECV even though its a Tx cq to keep - // some providers happy as they trigger an error if an endpoint - // has no Rx cq attached (appears to be a progress related bug) - auto ep_tx = new_endpoint_active(fabric_domain_, fabric_info_, true); - - // create a completion queue for tx endpoint - fabric_info_->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); - auto tx_cq = - create_completion_queue(fabric_domain_, fabric_info_->tx_attr->size, "tx multiple"); - - bind_queue_to_endpoint(ep_tx, tx_cq, FI_TRANSMIT | FI_RECV, "tx multiple"); - bind_address_vector_to_endpoint(ep_tx, av_); - enable_endpoint(ep_tx, "tx multiple"); - - // combine endpoints and CQ into wrapper for convenience - eps_->ep_tx_ = endpoint_wrapper(ep_tx, nullptr, tx_cq, "tx multiple"); - } - else if (endpoint_type_ == endpoint_type::threadlocalTx) - { - // each thread creates a Tx endpoint on first call to get_tx_endpoint() - } - else if (endpoint_type_ == endpoint_type::scalableTx || - endpoint_type_ == endpoint_type::scalableTxRx) - { - // setup tx contexts for each possible thread - size_t threads_allocated = 0; - auto ep_sx = new_endpoint_scalable(fabric_domain_, fabric_info_, true /*Tx*/, threads, - threads_allocated); + if (endpoint_type_ == endpoint_type::multiple) + { + // create a separate Tx endpoint for sending messages + // note that the CQ needs FI_RECV even though its a Tx cq to keep + // some providers happy as they trigger an error if an endpoint + // has no Rx cq attached (appears to be a progress related bug) + auto ep_tx = new_endpoint_active(fabric_domain_, fabric_info_, true); - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("scalable endpoint ok"), - "Contexts allocated", debug::dec<4>(threads_allocated))); + // create a completion queue for tx endpoint + fabric_info_->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); + auto tx_cq = create_completion_queue( + fabric_domain_, fabric_info_->tx_attr->size, "tx multiple"); - finvoke("fi_scalable_ep_bind AV", "fi_scalable_ep_bind", - fi_scalable_ep_bind(ep_sx, &av_->fid, 0)); + bind_queue_to_endpoint(ep_tx, tx_cq, FI_TRANSMIT | FI_RECV, "tx multiple"); + bind_address_vector_to_endpoint(ep_tx, av_); + enable_endpoint(ep_tx, "tx multiple"); - // prepare the stack for insertions - tx_endpoints_.reserve(threads_allocated); - // - for (unsigned int i = 0; i < threads_allocated; i++) + // combine endpoints and CQ into wrapper for convenience + eps_->ep_tx_ = endpoint_wrapper(ep_tx, nullptr, tx_cq, "tx multiple"); + } + else if (endpoint_type_ == endpoint_type::threadlocalTx) { - [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "scalable", debug::dec<4>(i)); + // each thread creates a Tx endpoint on first call to get_tx_endpoint() + } + else if (endpoint_type_ == endpoint_type::scalableTx || + endpoint_type_ == endpoint_type::scalableTxRx) + { + // setup tx contexts for each possible thread + size_t threads_allocated = 0; + auto ep_sx = new_endpoint_scalable( + fabric_domain_, fabric_info_, true /*Tx*/, threads, threads_allocated); + + LF_DEB(NS_DEBUG::cnb_deb, + trace(debug::str<>("scalable endpoint ok"), "Contexts allocated", + debug::dec<4>(threads_allocated))); - // For threadlocal/scalable endpoints, tx/rx resources - fid_ep* scalable_ep_tx; - fid_cq* scalable_cq_tx; + finvoke("fi_scalable_ep_bind AV", "fi_scalable_ep_bind", + fi_scalable_ep_bind(ep_sx, &av_->fid, 0)); - // Create a Tx context, cq, bind and enable - finvoke("create tx context", "fi_tx_context", - fi_tx_context(ep_sx, i, NULL, &scalable_ep_tx, NULL)); - scalable_cq_tx = create_completion_queue(fabric_domain_, - fabric_info_->tx_attr->size, "tx scalable"); - bind_queue_to_endpoint(scalable_ep_tx, scalable_cq_tx, FI_TRANSMIT, "tx scalable"); - enable_endpoint(scalable_ep_tx, "tx scalable"); + // prepare the stack for insertions + tx_endpoints_.reserve(threads_allocated); + // + for (unsigned int i = 0; i < threads_allocated; i++) + { + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "scalable", debug::dec<4>(i)); + + // For threadlocal/scalable endpoints, tx/rx resources + fid_ep* scalable_ep_tx; + fid_cq* scalable_cq_tx; + + // Create a Tx context, cq, bind and enable + finvoke("create tx context", "fi_tx_context", + fi_tx_context(ep_sx, i, NULL, &scalable_ep_tx, NULL)); + scalable_cq_tx = create_completion_queue( + fabric_domain_, fabric_info_->tx_attr->size, "tx scalable"); + bind_queue_to_endpoint( + scalable_ep_tx, scalable_cq_tx, FI_TRANSMIT, "tx scalable"); + enable_endpoint(scalable_ep_tx, "tx scalable"); + + endpoint_wrapper tx(scalable_ep_tx, nullptr, scalable_cq_tx, "tx scalable"); + LF_DEB(NS_DEBUG::cnb_deb, + trace(debug::str<>("Scalable Ep"), "initial tx push", "ep", + NS_DEBUG::ptr(tx.get_ep()), "tx cq", NS_DEBUG::ptr(tx.get_tx_cq()), + "rx cq", NS_DEBUG::ptr(tx.get_rx_cq()))); + tx_endpoints_.push(tx); + } - endpoint_wrapper tx(scalable_ep_tx, nullptr, scalable_cq_tx, "tx scalable"); - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Scalable Ep"), "initial tx push", "ep", - NS_DEBUG::ptr(tx.get_ep()), "tx cq", NS_DEBUG::ptr(tx.get_tx_cq()), "rx cq", - NS_DEBUG::ptr(tx.get_rx_cq()))); - tx_endpoints_.push(tx); + eps_->ep_tx_ = endpoint_wrapper(ep_sx, nullptr, nullptr, "rx scalable"); } - eps_->ep_tx_ = endpoint_wrapper(ep_sx, nullptr, nullptr, "rx scalable"); + // once enabled we can get the address + enable_endpoint(eps_->ep_rx_.get_ep(), "rx here"); + here_ = get_endpoint_address(&eps_->ep_rx_.get_ep()->fid); + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting 'here'"), iplocality(here_))); + + // // if we are using scalable endpoints, then setup tx/rx contexts + // // we will us a single endpoint for all Tx/Rx contexts + // if (endpoint_type_ == endpoint_type::scalableTx || + // endpoint_type_ == endpoint_type::scalableTxRx) + // { + + // // thread slots might not be same as what we asked for + // size_t threads_allocated = 0; + // auto ep_sx = new_endpoint_scalable(fabric_domain_, fabric_info_, true /*Tx*/, threads, + // threads_allocated); + // if (!ep_sx) + // throw NS_LIBFABRIC::fabric_error(FI_EOTHER, "fi_scalable endpoint creation failed"); + + // LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("scalable endpoint ok"), + // "Contexts allocated", debug::dec<4>(threads_allocated))); + + // // prepare the stack for insertions + // tx_endpoints_.reserve(threads_allocated); + // rx_endpoints_.reserve(threads_allocated); + // // + // for (unsigned int i = 0; i < threads_allocated; i++) + // { + // [[maybe_unused]] auto scp = + // NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "scalable", debug::dec<4>(i)); + + // // For threadlocal/scalable endpoints, tx/rx resources + // fid_ep* scalable_ep_tx; + // fid_cq* scalable_cq_tx; + //// fid_ep* scalable_ep_rx; + //// fid_cq* scalable_cq_rx; + + // // Tx context setup + // finvoke("create tx context", "fi_tx_context", + // fi_tx_context(ep_sx, i, NULL, &scalable_ep_tx, NULL)); + + // scalable_cq_tx = create_completion_queue(fabric_domain_, + // fabric_info_->tx_attr->size, "tx scalable"); + + // bind_queue_to_endpoint(scalable_ep_tx, scalable_cq_tx, FI_TRANSMIT, "tx scalable"); + + // enable_endpoint(scalable_ep_tx, "tx scalable"); + + // endpoint_wrapper tx(scalable_ep_tx, nullptr, scalable_cq_tx, "tx scalable"); + // LF_DEB(NS_DEBUG::cnb_deb, + // trace(debug::str<>("Scalable Ep"), "initial tx push", "ep", + // NS_DEBUG::ptr(tx.get_ep()), "tx cq", NS_DEBUG::ptr(tx.get_tx_cq()), "rx cq", + // NS_DEBUG::ptr(tx.get_rx_cq()))); + // tx_endpoints_.push(tx); + + // // Rx contexts + //// finvoke("create rx context", "fi_rx_context", + //// fi_rx_context(ep_sx, i, NULL, &scalable_ep_rx, NULL)); + + //// scalable_cq_rx = + //// create_completion_queue(fabric_domain_, fabric_info_->rx_attr->size, "rx"); + + //// bind_queue_to_endpoint(scalable_ep_rx, scalable_cq_rx, FI_RECV, "rx scalable"); + + //// enable_endpoint(scalable_ep_rx, "rx scalable"); + + //// endpoint_wrapper rx(scalable_ep_rx, scalable_cq_rx, nullptr, "rx scalable"); + //// LF_DEB(NS_DEBUG::cnb_deb, + //// trace(debug::str<>("Scalable Ep"), "initial rx push", "ep", + //// NS_DEBUG::ptr(rx.get_ep()), "tx cq", NS_DEBUG::ptr(rx.get_tx_cq()), "rx cq", + //// NS_DEBUG::ptr(rx.get_rx_cq()))); + //// rx_endpoints_.push(rx); + // } + + // finvoke("fi_scalable_ep_bind AV", "fi_scalable_ep_bind", + // fi_scalable_ep_bind(ep_sx, &av_->fid, 0)); + + // eps_->ep_tx_ = endpoint_wrapper(ep_sx, nullptr, nullptr, "rx scalable"); + + return static_cast(this)->initialize_derived( + provider, rootnode, size, threads, std::forward(args)...); } - // once enabled we can get the address - enable_endpoint(eps_->ep_rx_.get_ep(), "rx here"); - here_ = get_endpoint_address(&eps_->ep_rx_.get_ep()->fid); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting 'here'"), iplocality(here_))); - - // // if we are using scalable endpoints, then setup tx/rx contexts - // // we will us a single endpoint for all Tx/Rx contexts - // if (endpoint_type_ == endpoint_type::scalableTx || - // endpoint_type_ == endpoint_type::scalableTxRx) - // { - - // // thread slots might not be same as what we asked for - // size_t threads_allocated = 0; - // auto ep_sx = new_endpoint_scalable(fabric_domain_, fabric_info_, true /*Tx*/, threads, - // threads_allocated); - // if (!ep_sx) - // throw NS_LIBFABRIC::fabric_error(FI_EOTHER, "fi_scalable endpoint creation failed"); - - // LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("scalable endpoint ok"), - // "Contexts allocated", debug::dec<4>(threads_allocated))); - - // // prepare the stack for insertions - // tx_endpoints_.reserve(threads_allocated); - // rx_endpoints_.reserve(threads_allocated); - // // - // for (unsigned int i = 0; i < threads_allocated; i++) - // { - // [[maybe_unused]] auto scp = - // NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "scalable", debug::dec<4>(i)); - - // // For threadlocal/scalable endpoints, tx/rx resources - // fid_ep* scalable_ep_tx; - // fid_cq* scalable_cq_tx; - //// fid_ep* scalable_ep_rx; - //// fid_cq* scalable_cq_rx; - - // // Tx context setup - // finvoke("create tx context", "fi_tx_context", - // fi_tx_context(ep_sx, i, NULL, &scalable_ep_tx, NULL)); - - // scalable_cq_tx = create_completion_queue(fabric_domain_, - // fabric_info_->tx_attr->size, "tx scalable"); - - // bind_queue_to_endpoint(scalable_ep_tx, scalable_cq_tx, FI_TRANSMIT, "tx scalable"); - - // enable_endpoint(scalable_ep_tx, "tx scalable"); - - // endpoint_wrapper tx(scalable_ep_tx, nullptr, scalable_cq_tx, "tx scalable"); - // LF_DEB(NS_DEBUG::cnb_deb, - // trace(debug::str<>("Scalable Ep"), "initial tx push", "ep", - // NS_DEBUG::ptr(tx.get_ep()), "tx cq", NS_DEBUG::ptr(tx.get_tx_cq()), "rx cq", - // NS_DEBUG::ptr(tx.get_rx_cq()))); - // tx_endpoints_.push(tx); - - // // Rx contexts - //// finvoke("create rx context", "fi_rx_context", - //// fi_rx_context(ep_sx, i, NULL, &scalable_ep_rx, NULL)); - - //// scalable_cq_rx = - //// create_completion_queue(fabric_domain_, fabric_info_->rx_attr->size, "rx"); - - //// bind_queue_to_endpoint(scalable_ep_rx, scalable_cq_rx, FI_RECV, "rx scalable"); - - //// enable_endpoint(scalable_ep_rx, "rx scalable"); - - //// endpoint_wrapper rx(scalable_ep_rx, scalable_cq_rx, nullptr, "rx scalable"); - //// LF_DEB(NS_DEBUG::cnb_deb, - //// trace(debug::str<>("Scalable Ep"), "initial rx push", "ep", - //// NS_DEBUG::ptr(rx.get_ep()), "tx cq", NS_DEBUG::ptr(rx.get_tx_cq()), "rx cq", - //// NS_DEBUG::ptr(rx.get_rx_cq()))); - //// rx_endpoints_.push(rx); - // } - - // finvoke("fi_scalable_ep_bind AV", "fi_scalable_ep_bind", - // fi_scalable_ep_bind(ep_sx, &av_->fid, 0)); - - // eps_->ep_tx_ = endpoint_wrapper(ep_sx, nullptr, nullptr, "rx scalable"); - - return static_cast(this)->initialize_derived(provider, rootnode, size, threads, - std::forward(args)...); - } - - // -------------------------------------------------------------------- - constexpr uint64_t caps_flags() { return static_cast(this)->caps_flags(); } + // -------------------------------------------------------------------- + constexpr uint64_t caps_flags() { return static_cast(this)->caps_flags(); } - // -------------------------------------------------------------------- - constexpr fi_threading threadlevel_flags() - { - return static_cast(this)->threadlevel_flags(); - } + // -------------------------------------------------------------------- + constexpr fi_threading threadlevel_flags() + { + return static_cast(this)->threadlevel_flags(); + } - // -------------------------------------------------------------------- - constexpr std::int64_t memory_registration_mode_flags() - { - std::int64_t base_flags = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; + // -------------------------------------------------------------------- + constexpr std::int64_t memory_registration_mode_flags() + { + std::int64_t base_flags = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; #if OOMPH_ENABLE_DEVICE - base_flags = base_flags | FI_MR_HMEM; + base_flags = base_flags | FI_MR_HMEM; #endif - base_flags = base_flags | FI_MR_LOCAL; + base_flags = base_flags | FI_MR_LOCAL; #if defined(HAVE_LIBFABRIC_CXI) - return base_flags | FI_MR_MMU_NOTIFY | FI_MR_ENDPOINT; + return base_flags | FI_MR_MMU_NOTIFY | FI_MR_ENDPOINT; #elif defined(HAVE_LIBFABRIC_EFA) - return base_flags | FI_MR_MMU_NOTIFY | FI_MR_ENDPOINT; + return base_flags | FI_MR_MMU_NOTIFY | FI_MR_ENDPOINT; #else - return base_flags; + return base_flags; #endif - } - - // -------------------------------------------------------------------- - uint32_t rendezvous_threshold() { return msg_rendezvous_threshold_; } - // -------------------------------------------------------------------- - // initialize the basic fabric/domain/name - void open_fabric(std::string const& provider, int threads, bool rootnode) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + } - struct fi_info* fabric_hints_ = fi_allocinfo(); - if (!fabric_hints_) + // -------------------------------------------------------------------- + uint32_t rendezvous_threshold() { return msg_rendezvous_threshold_; } + // -------------------------------------------------------------------- + // initialize the basic fabric/domain/name + void open_fabric(std::string const& provider, int threads, bool rootnode) { - throw NS_LIBFABRIC::fabric_error(-1, "Failed to allocate fabric hints"); - } + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Here locality"), iplocality(here_))); + struct fi_info* fabric_hints_ = fi_allocinfo(); + if (!fabric_hints_) + { + throw NS_LIBFABRIC::fabric_error(-1, "Failed to allocate fabric hints"); + } + + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Here locality"), iplocality(here_))); #if defined(HAVE_LIBFABRIC_SOCKETS) || defined(HAVE_LIBFABRIC_TCP) || defined(HAVE_LIBFABRIC_VERBS) - fabric_hints_->addr_format = FI_SOCKADDR_IN; + fabric_hints_->addr_format = FI_SOCKADDR_IN; #elif defined(HAVE_LIBFABRIC_EFA) - fabric_hints_->addr_format = FI_ADDR_EFA; + fabric_hints_->addr_format = FI_ADDR_EFA; #endif - fabric_hints_->caps = caps_flags(); + fabric_hints_->caps = caps_flags(); - fabric_hints_->mode = FI_CONTEXT /*| FI_MR_LOCAL*/; - if (provider.c_str() == std::string("tcp")) - { - fabric_hints_->fabric_attr->prov_name = - strdup(std::string(provider + ";ofi_rxm").c_str()); - } - else if (provider.c_str() == std::string("verbs")) - { - fabric_hints_->fabric_attr->prov_name = - strdup(std::string(provider + ";ofi_rxm").c_str()); - } - else { fabric_hints_->fabric_attr->prov_name = strdup(provider.c_str()); } - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("fabric provider"), fabric_hints_->fabric_attr->prov_name)); + fabric_hints_->mode = FI_CONTEXT /*| FI_MR_LOCAL*/; + if (provider.c_str() == std::string("tcp")) + { + fabric_hints_->fabric_attr->prov_name = + strdup(std::string(provider + ";ofi_rxm").c_str()); + } + else if (provider.c_str() == std::string("verbs")) + { + fabric_hints_->fabric_attr->prov_name = + strdup(std::string(provider + ";ofi_rxm").c_str()); + } + else { fabric_hints_->fabric_attr->prov_name = strdup(provider.c_str()); } + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("fabric provider"), fabric_hints_->fabric_attr->prov_name)); - fabric_hints_->domain_attr->mr_mode = memory_registration_mode_flags(); + fabric_hints_->domain_attr->mr_mode = memory_registration_mode_flags(); - // Enable/Disable the use of progress threads - auto progress = libfabric_progress_type(); - fabric_hints_->domain_attr->control_progress = progress; - fabric_hints_->domain_attr->data_progress = progress; - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("progress"), libfabric_progress_string())); + // Enable/Disable the use of progress threads + auto progress = libfabric_progress_type(); + fabric_hints_->domain_attr->control_progress = progress; + fabric_hints_->domain_attr->data_progress = progress; + LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("progress"), libfabric_progress_string())); - if (threads > 1) - { - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("FI_THREAD_FID"))); - // Enable thread safe mode (Does not work with psm2 provider) - // fabric_hints_->domain_attr->threading = FI_THREAD_SAFE; - //fabric_hints_->domain_attr->threading = FI_THREAD_FID; - fabric_hints_->domain_attr->threading = threadlevel_flags(); - } - else - { - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("FI_THREAD_DOMAIN"))); - // we serialize everything - fabric_hints_->domain_attr->threading = FI_THREAD_DOMAIN; - } + if (threads > 1) + { + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("FI_THREAD_FID"))); + // Enable thread safe mode (Does not work with psm2 provider) + // fabric_hints_->domain_attr->threading = FI_THREAD_SAFE; + //fabric_hints_->domain_attr->threading = FI_THREAD_FID; + fabric_hints_->domain_attr->threading = threadlevel_flags(); + } + else + { + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("FI_THREAD_DOMAIN"))); + // we serialize everything + fabric_hints_->domain_attr->threading = FI_THREAD_DOMAIN; + } - // Enable resource management - fabric_hints_->domain_attr->resource_mgmt = FI_RM_ENABLED; + // Enable resource management + fabric_hints_->domain_attr->resource_mgmt = FI_RM_ENABLED; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fabric endpoint"), "RDM")); - fabric_hints_->ep_attr->type = FI_EP_RDM; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fabric endpoint"), "RDM")); + fabric_hints_->ep_attr->type = FI_EP_RDM; - uint64_t flags = 0; - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("get fabric info"), "FI_VERSION", - debug::dec(LIBFABRIC_FI_VERSION_MAJOR), debug::dec(LIBFABRIC_FI_VERSION_MINOR))); + uint64_t flags = 0; + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("get fabric info"), "FI_VERSION", + debug::dec(LIBFABRIC_FI_VERSION_MAJOR), + debug::dec(LIBFABRIC_FI_VERSION_MINOR))); - int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), - nullptr, nullptr, flags, fabric_hints_, &fabric_info_); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fabric info"); + int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), + nullptr, nullptr, flags, fabric_hints_, &fabric_info_); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fabric info"); - if (rootnode) - { - LF_DEB(NS_DEBUG::cnb_err, - trace(debug::str<>("Fabric info"), "\n", fi_tostr(fabric_info_, FI_TYPE_INFO))); - } + if (rootnode) + { + LF_DEB(NS_DEBUG::cnb_err, + trace(debug::str<>("Fabric info"), "\n", fi_tostr(fabric_info_, FI_TYPE_INFO))); + } - bool context = (fabric_hints_->mode & FI_CONTEXT) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_CONTEXT"), context)); + bool context = (fabric_hints_->mode & FI_CONTEXT) != 0; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_CONTEXT"), context)); - mrlocal = (fabric_hints_->domain_attr->mr_mode & FI_MR_LOCAL) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_LOCAL"), mrlocal)); + mrlocal = (fabric_hints_->domain_attr->mr_mode & FI_MR_LOCAL) != 0; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_LOCAL"), mrlocal)); - mrbind = (fabric_hints_->domain_attr->mr_mode & FI_MR_ENDPOINT) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_ENDPOINT"), mrbind)); + mrbind = (fabric_hints_->domain_attr->mr_mode & FI_MR_ENDPOINT) != 0; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_ENDPOINT"), mrbind)); - /* Check if provider requires heterogeneous memory registration */ - mrhmem = (fabric_hints_->domain_attr->mr_mode & FI_MR_HMEM) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_HMEM"), mrhmem)); + /* Check if provider requires heterogeneous memory registration */ + mrhmem = (fabric_hints_->domain_attr->mr_mode & FI_MR_HMEM) != 0; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_HMEM"), mrhmem)); - bool mrhalloc = (fabric_hints_->domain_attr->mr_mode & FI_MR_ALLOCATED) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_ALLOCATED"), mrhalloc)); + bool mrhalloc = (fabric_hints_->domain_attr->mr_mode & FI_MR_ALLOCATED) != 0; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_ALLOCATED"), mrhalloc)); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Creating fi_fabric"))); - ret = fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fi_fabric"); + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Creating fi_fabric"))); + ret = fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fi_fabric"); - // Allocate a domain. - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Allocating domain"))); - ret = fi_domain(fabric_, fabric_info_, &fabric_domain_, nullptr); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_domain"); + // Allocate a domain. + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Allocating domain"))); + ret = fi_domain(fabric_, fabric_info_, &fabric_domain_, nullptr); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_domain"); #if defined(HAVE_LIBFABRIC_GNI) - { - [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "GNI memory registration block"); - - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"), "GNI String values")); - // Dump out all vars for debug purposes - for (auto& gni_data : gni_strs) { - _set_check_domain_op_value(gni_data.first, 0, gni_data.second.c_str(), - false); - } - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"), "GNI Int values")); - for (auto& gni_data : gni_ints) - { - _set_check_domain_op_value(gni_data.first, 0, gni_data.second.c_str(), - false); - } - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"))); - - // -------------------------- - // GNI_MR_CACHE - // set GNI mem reg to be either none, internal or udreg - // - _set_check_domain_op_value(GNI_MR_CACHE, const_cast(OOMPH_GNI_REG), - "GNI_MR_CACHE"); - - // -------------------------- - // GNI_MR_UDREG_REG_LIMIT - // Experiments showed default value of 2048 too high if - // launching multiple clients on one node - // - int32_t udreg_limit = 0x0800; // 0x0400 = 1024, 0x0800 = 2048 - _set_check_domain_op_value(GNI_MR_UDREG_REG_LIMIT, udreg_limit, - "GNI_MR_UDREG_REG_LIMIT"); - - // -------------------------- - // GNI_MR_CACHE_LAZY_DEREG - // Enable lazy deregistration in MR cache - // - int32_t enable = 1; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting GNI_MR_CACHE_LAZY_DEREG"))); - _set_check_domain_op_value(GNI_MR_CACHE_LAZY_DEREG, enable, - "GNI_MR_CACHE_LAZY_DEREG"); + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "GNI memory registration block"); - // -------------------------- - // GNI_MSG_RENDEZVOUS_THRESHOLD (c.f. GNI_RMA_RDMA_THRESHOLD) - // - int32_t thresh = msg_rendezvous_threshold_; - _set_check_domain_op_value(GNI_MSG_RENDEZVOUS_THRESHOLD, thresh, - "GNI_MSG_RENDEZVOUS_THRESHOLD"); - } + LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"), "GNI String values")); + // Dump out all vars for debug purposes + for (auto& gni_data : gni_strs) + { + _set_check_domain_op_value( + gni_data.first, 0, gni_data.second.c_str(), false); + } + LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"), "GNI Int values")); + for (auto& gni_data : gni_ints) + { + _set_check_domain_op_value( + gni_data.first, 0, gni_data.second.c_str(), false); + } + LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"))); + + // -------------------------- + // GNI_MR_CACHE + // set GNI mem reg to be either none, internal or udreg + // + _set_check_domain_op_value( + GNI_MR_CACHE, const_cast(OOMPH_GNI_REG), "GNI_MR_CACHE"); + + // -------------------------- + // GNI_MR_UDREG_REG_LIMIT + // Experiments showed default value of 2048 too high if + // launching multiple clients on one node + // + int32_t udreg_limit = 0x0800; // 0x0400 = 1024, 0x0800 = 2048 + _set_check_domain_op_value( + GNI_MR_UDREG_REG_LIMIT, udreg_limit, "GNI_MR_UDREG_REG_LIMIT"); + + // -------------------------- + // GNI_MR_CACHE_LAZY_DEREG + // Enable lazy deregistration in MR cache + // + int32_t enable = 1; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting GNI_MR_CACHE_LAZY_DEREG"))); + _set_check_domain_op_value( + GNI_MR_CACHE_LAZY_DEREG, enable, "GNI_MR_CACHE_LAZY_DEREG"); + + // -------------------------- + // GNI_MSG_RENDEZVOUS_THRESHOLD (c.f. GNI_RMA_RDMA_THRESHOLD) + // + int32_t thresh = msg_rendezvous_threshold_; + _set_check_domain_op_value( + GNI_MSG_RENDEZVOUS_THRESHOLD, thresh, "GNI_MSG_RENDEZVOUS_THRESHOLD"); + } #endif - tx_inject_size_ = fabric_info_->tx_attr->inject_size; + tx_inject_size_ = fabric_info_->tx_attr->inject_size; - // the number of preposted receives, and sender queue depth - // is set by querying the tx/tx attr sizes - tx_attr_size_ = std::min(size_t(512), fabric_info_->tx_attr->size / 2); - rx_attr_size_ = std::min(size_t(512), fabric_info_->rx_attr->size / 2); - fi_freeinfo(fabric_hints_); - } + // the number of preposted receives, and sender queue depth + // is set by querying the tx/tx attr sizes + tx_attr_size_ = std::min(size_t(512), fabric_info_->tx_attr->size / 2); + rx_attr_size_ = std::min(size_t(512), fabric_info_->rx_attr->size / 2); + fi_freeinfo(fabric_hints_); + } - // -------------------------------------------------------------------- - struct fi_info* set_src_dst_addresses(struct fi_info* info, bool tx) - { - return static_cast(this)->set_src_dst_addresses(info, tx); - } + // -------------------------------------------------------------------- + struct fi_info* set_src_dst_addresses(struct fi_info* info, bool tx) + { + return static_cast(this)->set_src_dst_addresses(info, tx); + } #ifdef HAVE_LIBFABRIC_GNI - // -------------------------------------------------------------------- - // Special GNI extensions to disable memory registration cache - - // if set is false, the old value is returned and nothing is set - template - int _set_check_domain_op_value(int op, T value, const char* info, bool set = true) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - static struct fi_gni_ops_domain* gni_domain_ops = nullptr; - int ret = 0; + // -------------------------------------------------------------------- + // Special GNI extensions to disable memory registration cache - if (gni_domain_ops == nullptr) + // if set is false, the old value is returned and nothing is set + template + int _set_check_domain_op_value(int op, T value, char const* info, bool set = true) { - ret = fi_open_ops(&fabric_domain_->fid, FI_GNI_DOMAIN_OPS_1, 0, (void**)&gni_domain_ops, - nullptr); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("gni open ops"), (ret == 0 ? "OK" : "FAIL"), - NS_DEBUG::ptr(gni_domain_ops))); - } + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + static struct fi_gni_ops_domain* gni_domain_ops = nullptr; + int ret = 0; - // if open was ok and set flag is present, then set value - if (ret == 0 && set) - { - ret = gni_domain_ops->set_val(&fabric_domain_->fid, (dom_ops_val_t)(op), - reinterpret_cast(&value)); + if (gni_domain_ops == nullptr) + { + ret = fi_open_ops(&fabric_domain_->fid, FI_GNI_DOMAIN_OPS_1, 0, + (void**) &gni_domain_ops, nullptr); + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("gni open ops"), (ret == 0 ? "OK" : "FAIL"), + NS_DEBUG::ptr(gni_domain_ops))); + } - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("gni set ops val"), value, (ret == 0 ? "OK" : "FAIL"))); - } + // if open was ok and set flag is present, then set value + if (ret == 0 && set) + { + ret = gni_domain_ops->set_val( + &fabric_domain_->fid, (dom_ops_val_t) (op), reinterpret_cast(&value)); - // Get the value (so we can check that the value we set is now returned) - T new_value; - ret = gni_domain_ops->get_val(&fabric_domain_->fid, (dom_ops_val_t)(op), &new_value); - if constexpr (std::is_integral::value) - { - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), - info, debug::hex<8>(new_value))); - } - else - { - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, new_value)); - } - // - if (ret) throw NS_LIBFABRIC::fabric_error(ret, std::string("setting ") + info); + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("gni set ops val"), value, (ret == 0 ? "OK" : "FAIL"))); + } - return ret; - } + // Get the value (so we can check that the value we set is now returned) + T new_value; + ret = gni_domain_ops->get_val(&fabric_domain_->fid, (dom_ops_val_t) (op), &new_value); + if constexpr (std::is_integral::value) + { + LF_DEB(NS_DEBUG::cnb_err, + debug(debug::str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, + debug::hex<8>(new_value))); + } + else + { + LF_DEB(NS_DEBUG::cnb_err, + debug(debug::str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, new_value)); + } + // + if (ret) throw NS_LIBFABRIC::fabric_error(ret, std::string("setting ") + info); + + return ret; + } #endif - // -------------------------------------------------------------------- - struct fid_ep* new_endpoint_active(struct fid_domain* domain, struct fi_info* info, bool tx) - { - // don't allow multiple threads to call endpoint create at the same time - scoped_lock lock(controller_mutex_); + // -------------------------------------------------------------------- + struct fid_ep* new_endpoint_active(struct fid_domain* domain, struct fi_info* info, bool tx) + { + // don't allow multiple threads to call endpoint create at the same time + scoped_lock lock(controller_mutex_); - // make sure src_addr/dst_addr are set accordingly - // and we do not create two endpoint with the same src address - struct fi_info* hints = set_src_dst_addresses(info, tx); + // make sure src_addr/dst_addr are set accordingly + // and we do not create two endpoint with the same src address + struct fi_info* hints = set_src_dst_addresses(info, tx); - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("Got info mode"), (info->mode & FI_NOTIFY_FLAGS_ONLY))); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("Got info mode"), (info->mode & FI_NOTIFY_FLAGS_ONLY))); - struct fid_ep* ep; - int ret = fi_endpoint(domain, hints, &ep, nullptr); - if (ret) - { - throw NS_LIBFABRIC::fabric_error(ret, "fi_endpoint (too many threadlocal " - "endpoints?)"); + struct fid_ep* ep; + int ret = fi_endpoint(domain, hints, &ep, nullptr); + if (ret) + { + throw NS_LIBFABRIC::fabric_error(ret, + "fi_endpoint (too many threadlocal " + "endpoints?)"); + } + fi_freeinfo(hints); + LF_DEB( + NS_DEBUG::cnb_deb, debug(debug::str<>("new_endpoint_active"), NS_DEBUG::ptr(ep))); + return ep; } - fi_freeinfo(hints); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("new_endpoint_active"), NS_DEBUG::ptr(ep))); - return ep; - } - // -------------------------------------------------------------------- - struct fid_ep* new_endpoint_scalable(struct fid_domain* domain, struct fi_info* info, bool tx, - size_t threads, size_t& threads_allocated) - { - // don't allow multiple threads to call endpoint create at the same time - scoped_lock lock(controller_mutex_); + // -------------------------------------------------------------------- + struct fid_ep* new_endpoint_scalable(struct fid_domain* domain, struct fi_info* info, + bool tx, size_t threads, size_t& threads_allocated) + { + // don't allow multiple threads to call endpoint create at the same time + scoped_lock lock(controller_mutex_); - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fi_dupinfo"))); - struct fi_info* hints = fi_dupinfo(info); - if (!hints) throw NS_LIBFABRIC::fabric_error(0, "fi_dupinfo"); + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fi_dupinfo"))); + struct fi_info* hints = fi_dupinfo(info); + if (!hints) throw NS_LIBFABRIC::fabric_error(0, "fi_dupinfo"); - int flags = 0; - struct fi_info* new_hints = nullptr; - int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), - nullptr, nullptr, flags, hints, &new_hints); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_getinfo"); + int flags = 0; + struct fi_info* new_hints = nullptr; + int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), + nullptr, nullptr, flags, hints, &new_hints); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_getinfo"); - // Check the optimal number of TX/RX contexts supported by the provider - size_t context_count = 0; - if (tx) { context_count = std::min(new_hints->domain_attr->tx_ctx_cnt, threads); } - else { context_count = std::min(new_hints->domain_attr->rx_ctx_cnt, threads); } + // Check the optimal number of TX/RX contexts supported by the provider + size_t context_count = 0; + if (tx) { context_count = std::min(new_hints->domain_attr->tx_ctx_cnt, threads); } + else { context_count = std::min(new_hints->domain_attr->rx_ctx_cnt, threads); } - // clang-format off + // clang-format off LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("scalable endpoint"), "Tx", tx, @@ -1060,440 +1061,451 @@ class controller_base "tx_ctx_cnt", debug::dec<3>(new_hints->domain_attr->tx_ctx_cnt), "rx_ctx_cnt", debug::dec<3>(new_hints->domain_attr->rx_ctx_cnt), "context_count", debug::dec<3>(context_count))); - // clang-format on - - threads_allocated = context_count; - new_hints->ep_attr->tx_ctx_cnt = context_count; - new_hints->ep_attr->rx_ctx_cnt = context_count; - - struct fid_ep* ep; - ret = fi_scalable_ep(domain, new_hints, &ep, nullptr); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_scalable_ep"); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("new_endpoint_scalable"), NS_DEBUG::ptr(ep))); - fi_freeinfo(hints); - return ep; - } - - // -------------------------------------------------------------------- - endpoint_wrapper& get_rx_endpoint() - { - static auto rx = NS_DEBUG::cnb_deb.make_timer(1, debug::str<>("get_rx_endpoint")); - LF_DEB(NS_DEBUG::cnb_deb, timed(rx)); + // clang-format on + + threads_allocated = context_count; + new_hints->ep_attr->tx_ctx_cnt = context_count; + new_hints->ep_attr->rx_ctx_cnt = context_count; + + struct fid_ep* ep; + ret = fi_scalable_ep(domain, new_hints, &ep, nullptr); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_scalable_ep"); + LF_DEB( + NS_DEBUG::cnb_deb, debug(debug::str<>("new_endpoint_scalable"), NS_DEBUG::ptr(ep))); + fi_freeinfo(hints); + return ep; + } - if (endpoint_type_ == endpoint_type::scalableTxRx) + // -------------------------------------------------------------------- + endpoint_wrapper& get_rx_endpoint() { - if (eps_->tl_srx_.get_ep() == nullptr) + static auto rx = NS_DEBUG::cnb_deb.make_timer(1, debug::str<>("get_rx_endpoint")); + LF_DEB(NS_DEBUG::cnb_deb, timed(rx)); + + if (endpoint_type_ == endpoint_type::scalableTxRx) { - endpoint_wrapper ep; - bool ok = rx_endpoints_.pop(ep); - if (!ok) + if (eps_->tl_srx_.get_ep() == nullptr) { - // clang-format off + endpoint_wrapper ep; + bool ok = rx_endpoints_.pop(ep); + if (!ok) + { + // clang-format off LF_DEB(NS_DEBUG::cnb_deb, error(debug::str<>("Scalable Ep"), "pop rx", "ep", NS_DEBUG::ptr(ep.get_ep()), "tx cq", NS_DEBUG::ptr(ep.get_tx_cq()), "rx cq", NS_DEBUG::ptr(ep.get_rx_cq()))); - // clang-format on - throw std::runtime_error("rx endpoint wrapper pop fail"); + // clang-format on + throw std::runtime_error("rx endpoint wrapper pop fail"); + } + eps_->tl_srx_ = stack_endpoint( + ep.get_ep(), ep.get_rx_cq(), ep.get_tx_cq(), ep.get_name(), &rx_endpoints_); + LF_DEB(NS_DEBUG::cnb_deb, + trace(debug::str<>("Scalable Ep"), "pop rx", "ep", + NS_DEBUG::ptr(eps_->tl_srx_.get_ep()), "tx cq", + NS_DEBUG::ptr(eps_->tl_srx_.get_tx_cq()), "rx cq", + NS_DEBUG::ptr(eps_->tl_srx_.get_rx_cq()))); } - eps_->tl_srx_ = stack_endpoint(ep.get_ep(), ep.get_rx_cq(), ep.get_tx_cq(), - ep.get_name(), &rx_endpoints_); - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("Scalable Ep"), "pop rx", "ep", - NS_DEBUG::ptr(eps_->tl_srx_.get_ep()), "tx cq", - NS_DEBUG::ptr(eps_->tl_srx_.get_tx_cq()), "rx cq", - NS_DEBUG::ptr(eps_->tl_srx_.get_rx_cq()))); + return eps_->tl_srx_.endpoint_; } - return eps_->tl_srx_.endpoint_; + // otherwise just return the normal Rx endpoint + return eps_->ep_rx_; } - // otherwise just return the normal Rx endpoint - return eps_->ep_rx_; - } - // -------------------------------------------------------------------- - endpoint_wrapper& get_tx_endpoint() - { - if (endpoint_type_ == endpoint_type::threadlocalTx) + // -------------------------------------------------------------------- + endpoint_wrapper& get_tx_endpoint() { - if (eps_->tl_tx_.get_ep() == nullptr) + if (endpoint_type_ == endpoint_type::threadlocalTx) { - [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, "threadlocal"); - - // create a completion queue for tx endpoint - fabric_info_->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); - auto tx_cq = create_completion_queue(fabric_domain_, fabric_info_->tx_attr->size, - "tx threadlocal"); - - // setup an endpoint for sending messages - // note that the CQ needs FI_RECV even though its a Tx cq to keep - // some providers happy as they trigger an error if an endpoint - // has no Rx cq attached (progress bug) - auto ep_tx = new_endpoint_active(fabric_domain_, fabric_info_, true); - bind_queue_to_endpoint(ep_tx, tx_cq, FI_TRANSMIT | FI_RECV, "tx threadlocal"); - bind_address_vector_to_endpoint(ep_tx, av_); - enable_endpoint(ep_tx, "tx threadlocal"); - - // set threadlocal endpoint wrapper - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Threadlocal Ep"), "create Tx", "ep", NS_DEBUG::ptr(ep_tx), - "tx cq", NS_DEBUG::ptr(tx_cq), "rx cq", NS_DEBUG::ptr(nullptr))); - // for cleaning up at termination - endpoint_wrapper ep(ep_tx, nullptr, tx_cq, "tx threadlocal"); - tx_endpoints_.push(ep); - eps_->tl_tx_ = stack_endpoint(ep_tx, nullptr, tx_cq, "threadlocal", nullptr); + if (eps_->tl_tx_.get_ep() == nullptr) + { + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, "threadlocal"); + + // create a completion queue for tx endpoint + fabric_info_->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); + auto tx_cq = create_completion_queue( + fabric_domain_, fabric_info_->tx_attr->size, "tx threadlocal"); + + // setup an endpoint for sending messages + // note that the CQ needs FI_RECV even though its a Tx cq to keep + // some providers happy as they trigger an error if an endpoint + // has no Rx cq attached (progress bug) + auto ep_tx = new_endpoint_active(fabric_domain_, fabric_info_, true); + bind_queue_to_endpoint(ep_tx, tx_cq, FI_TRANSMIT | FI_RECV, "tx threadlocal"); + bind_address_vector_to_endpoint(ep_tx, av_); + enable_endpoint(ep_tx, "tx threadlocal"); + + // set threadlocal endpoint wrapper + LF_DEB(NS_DEBUG::cnb_deb, + trace(debug::str<>("Threadlocal Ep"), "create Tx", "ep", + NS_DEBUG::ptr(ep_tx), "tx cq", NS_DEBUG::ptr(tx_cq), "rx cq", + NS_DEBUG::ptr(nullptr))); + // for cleaning up at termination + endpoint_wrapper ep(ep_tx, nullptr, tx_cq, "tx threadlocal"); + tx_endpoints_.push(ep); + eps_->tl_tx_ = stack_endpoint(ep_tx, nullptr, tx_cq, "threadlocal", nullptr); + } + return eps_->tl_tx_.endpoint_; } - return eps_->tl_tx_.endpoint_; - } - else if (endpoint_type_ == endpoint_type::scalableTx || - endpoint_type_ == endpoint_type::scalableTxRx) - { - if (eps_->tl_stx_.get_ep() == nullptr) + else if (endpoint_type_ == endpoint_type::scalableTx || + endpoint_type_ == endpoint_type::scalableTxRx) { - endpoint_wrapper ep; - bool ok = tx_endpoints_.pop(ep); - if (!ok) + if (eps_->tl_stx_.get_ep() == nullptr) { + endpoint_wrapper ep; + bool ok = tx_endpoints_.pop(ep); + if (!ok) + { + LF_DEB(NS_DEBUG::cnb_deb, + error(debug::str<>("Scalable Ep"), "pop tx", "ep", + NS_DEBUG::ptr(ep.get_ep()), "tx cq", NS_DEBUG::ptr(ep.get_tx_cq()), + "rx cq", NS_DEBUG::ptr(ep.get_rx_cq()))); + throw std::runtime_error("tx endpoint wrapper pop fail"); + } + eps_->tl_stx_ = stack_endpoint( + ep.get_ep(), ep.get_rx_cq(), ep.get_tx_cq(), ep.get_name(), &tx_endpoints_); LF_DEB(NS_DEBUG::cnb_deb, - error(debug::str<>("Scalable Ep"), "pop tx", "ep", - NS_DEBUG::ptr(ep.get_ep()), "tx cq", NS_DEBUG::ptr(ep.get_tx_cq()), - "rx cq", NS_DEBUG::ptr(ep.get_rx_cq()))); - throw std::runtime_error("tx endpoint wrapper pop fail"); + trace(debug::str<>("Scalable Ep"), "pop tx", "ep", + NS_DEBUG::ptr(eps_->tl_stx_.get_ep()), "tx cq", + NS_DEBUG::ptr(eps_->tl_stx_.get_tx_cq()), "rx cq", + NS_DEBUG::ptr(eps_->tl_stx_.get_rx_cq()))); } - eps_->tl_stx_ = stack_endpoint(ep.get_ep(), ep.get_rx_cq(), ep.get_tx_cq(), - ep.get_name(), &tx_endpoints_); - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("Scalable Ep"), "pop tx", "ep", - NS_DEBUG::ptr(eps_->tl_stx_.get_ep()), "tx cq", - NS_DEBUG::ptr(eps_->tl_stx_.get_tx_cq()), "rx cq", - NS_DEBUG::ptr(eps_->tl_stx_.get_rx_cq()))); + return eps_->tl_stx_.endpoint_; } - return eps_->tl_stx_.endpoint_; + else if (endpoint_type_ == endpoint_type::multiple) { return eps_->ep_tx_; } + // single : shared tx/rx endpoint + return eps_->ep_rx_; } - else if (endpoint_type_ == endpoint_type::multiple) { return eps_->ep_tx_; } - // single : shared tx/rx endpoint - return eps_->ep_rx_; - } - - // -------------------------------------------------------------------- - void bind_address_vector_to_endpoint(struct fid_ep* endpoint, struct fid_av* av) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Binding AV"), "to", NS_DEBUG::ptr(endpoint))); - int ret = fi_ep_bind(endpoint, &av->fid, 0); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "bind address_vector"); - } - - // -------------------------------------------------------------------- - void bind_queue_to_endpoint(struct fid_ep* endpoint, struct fid_cq*& cq, uint32_t cqtype, - const char* type) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("Binding CQ"), "to", NS_DEBUG::ptr(endpoint), type)); - int ret = fi_ep_bind(endpoint, &cq->fid, cqtype); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "bind cq"); - } + // -------------------------------------------------------------------- + void bind_address_vector_to_endpoint(struct fid_ep* endpoint, struct fid_av* av) + { + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - // -------------------------------------------------------------------- - fid_cq* bind_tx_queue_to_rx_endpoint(struct fi_info* info, struct fid_ep* ep) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - info->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); - fid_cq* tx_cq = create_completion_queue(fabric_domain_, info->tx_attr->size, "tx->rx"); - // shared send/recv endpoint - bind send cq to the recv endpoint - bind_queue_to_endpoint(ep, tx_cq, FI_TRANSMIT, "tx->rx bug fix"); - return tx_cq; - } + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("Binding AV"), "to", NS_DEBUG::ptr(endpoint))); + int ret = fi_ep_bind(endpoint, &av->fid, 0); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "bind address_vector"); + } - // -------------------------------------------------------------------- - void enable_endpoint(struct fid_ep* endpoint, const char* type) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); + // -------------------------------------------------------------------- + void bind_queue_to_endpoint( + struct fid_ep* endpoint, struct fid_cq*& cq, uint32_t cqtype, char const* type) + { + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("Enabling endpoint"), NS_DEBUG::ptr(endpoint))); - int ret = fi_enable(endpoint); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_enable"); - } + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("Binding CQ"), "to", NS_DEBUG::ptr(endpoint), type)); + int ret = fi_ep_bind(endpoint, &cq->fid, cqtype); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "bind cq"); + } - // -------------------------------------------------------------------- - locality get_endpoint_address(struct fid* id) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + // -------------------------------------------------------------------- + fid_cq* bind_tx_queue_to_rx_endpoint(struct fi_info* info, struct fid_ep* ep) + { + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + info->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); + fid_cq* tx_cq = create_completion_queue(fabric_domain_, info->tx_attr->size, "tx->rx"); + // shared send/recv endpoint - bind send cq to the recv endpoint + bind_queue_to_endpoint(ep, tx_cq, FI_TRANSMIT, "tx->rx bug fix"); + return tx_cq; + } - locality::locality_data local_addr; - std::size_t addrlen = locality_defs::array_size; - int ret = fi_getname(id, local_addr.data(), &addrlen); - if (ret || (addrlen > locality_defs::array_size)) + // -------------------------------------------------------------------- + void enable_endpoint(struct fid_ep* endpoint, char const* type) { - std::string err = - std::to_string(addrlen) + "=" + std::to_string(locality_defs::array_size); - NS_LIBFABRIC::fabric_error(ret, "fi_getname - size error or other problem " + err); + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); + + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("Enabling endpoint"), NS_DEBUG::ptr(endpoint))); + int ret = fi_enable(endpoint); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_enable"); } - // optimized out when debug logging is false - if constexpr (NS_DEBUG::cnb_deb.is_enabled()) + // -------------------------------------------------------------------- + locality get_endpoint_address(struct fid* id) { - std::stringstream temp1; - for (std::size_t i = 0; i < locality_defs::array_length; ++i) + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + + locality::locality_data local_addr; + std::size_t addrlen = locality_defs::array_size; + int ret = fi_getname(id, local_addr.data(), &addrlen); + if (ret || (addrlen > locality_defs::array_size)) { - temp1 << debug::ipaddr(&local_addr[i]) << " - "; + std::string err = + std::to_string(addrlen) + "=" + std::to_string(locality_defs::array_size); + NS_LIBFABRIC::fabric_error(ret, "fi_getname - size error or other problem " + err); } - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("raw address data"), "size", - debug::dec<>(addrlen), " : ", temp1.str().c_str())); - std::stringstream temp2; - for (std::size_t i = 0; i < locality_defs::array_length; ++i) + // optimized out when debug logging is false + if constexpr (NS_DEBUG::cnb_deb.is_enabled()) { - temp2 << debug::hex<8>(local_addr[i]) << " - "; + std::stringstream temp1; + for (std::size_t i = 0; i < locality_defs::array_length; ++i) + { + temp1 << debug::ipaddr(&local_addr[i]) << " - "; + } + + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("raw address data"), "size", debug::dec<>(addrlen), " : ", + temp1.str().c_str())); + std::stringstream temp2; + for (std::size_t i = 0; i < locality_defs::array_length; ++i) + { + temp2 << debug::hex<8>(local_addr[i]) << " - "; + } + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("raw address data"), temp2.str().c_str())); } - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("raw address data"), temp2.str().c_str())); + return locality(local_addr); } - return locality(local_addr); - } - // -------------------------------------------------------------------- - fid_pep* create_passive_endpoint(struct fid_fabric* fabric, struct fi_info* info) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + // -------------------------------------------------------------------- + fid_pep* create_passive_endpoint(struct fid_fabric* fabric, struct fi_info* info) + { + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - struct fid_pep* ep; - int ret = fi_passive_ep(fabric, info, &ep, nullptr); - if (ret) { throw NS_LIBFABRIC::fabric_error(ret, "Failed to create fi_passive_ep"); } - return ep; - } + struct fid_pep* ep; + int ret = fi_passive_ep(fabric, info, &ep, nullptr); + if (ret) { throw NS_LIBFABRIC::fabric_error(ret, "Failed to create fi_passive_ep"); } + return ep; + } - // -------------------------------------------------------------------- - inline const locality& here() const { return here_; } + // -------------------------------------------------------------------- + inline locality const& here() const { return here_; } - // -------------------------------------------------------------------- - inline const fi_addr_t& fi_address() const { return here_.fi_address(); } + // -------------------------------------------------------------------- + inline fi_addr_t const& fi_address() const { return here_.fi_address(); } - // -------------------------------------------------------------------- - inline void setHere(const locality& val) { here_ = val; } + // -------------------------------------------------------------------- + inline void setHere(locality const& val) { here_ = val; } - // -------------------------------------------------------------------- - inline const locality& root() const { return root_; } + // -------------------------------------------------------------------- + inline locality const& root() const { return root_; } - // -------------------------------------------------------------------- - inline struct fid_domain* get_domain() const { return fabric_domain_; } + // -------------------------------------------------------------------- + inline struct fid_domain* get_domain() const { return fabric_domain_; } - // -------------------------------------------------------------------- - inline std::size_t get_rma_protocol_size() { return 65536; } + // -------------------------------------------------------------------- + inline std::size_t get_rma_protocol_size() { return 65536; } #ifdef DISABLE_FI_INJECT - // -------------------------------------------------------------------- - inline std::size_t get_tx_inject_size() { return 0; } + // -------------------------------------------------------------------- + inline std::size_t get_tx_inject_size() { return 0; } #else - // -------------------------------------------------------------------- - inline std::size_t get_tx_inject_size() { return tx_inject_size_; } + // -------------------------------------------------------------------- + inline std::size_t get_tx_inject_size() { return tx_inject_size_; } #endif - // -------------------------------------------------------------------- - inline std::size_t get_tx_size() { return tx_attr_size_; } + // -------------------------------------------------------------------- + inline std::size_t get_tx_size() { return tx_attr_size_; } - // -------------------------------------------------------------------- - inline std::size_t get_rx_size() { return rx_attr_size_; } + // -------------------------------------------------------------------- + inline std::size_t get_rx_size() { return rx_attr_size_; } - // -------------------------------------------------------------------- - // returns true when all connections have been disconnected and none are active - inline bool isTerminated() - { - return false; - //return (qp_endpoint_map_.size() == 0); - } + // -------------------------------------------------------------------- + // returns true when all connections have been disconnected and none are active + inline bool isTerminated() + { + return false; + //return (qp_endpoint_map_.size() == 0); + } - // -------------------------------------------------------------------- - void debug_print_av_vector(std::size_t N) - { - locality addr; - std::size_t addrlen = locality_defs::array_size; - for (std::size_t i = 0; i < N; ++i) + // -------------------------------------------------------------------- + void debug_print_av_vector(std::size_t N) { - int ret = fi_av_lookup(av_, fi_addr_t(i), addr.fabric_data_writable(), &addrlen); - addr.set_fi_address(fi_addr_t(i)); - if ((ret == 0) && (addrlen == locality_defs::array_size)) - { - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("address vector"), debug::dec<3>(i), iplocality(addr))); - } - else + locality addr; + std::size_t addrlen = locality_defs::array_size; + for (std::size_t i = 0; i < N; ++i) { - LF_DEB(NS_DEBUG::cnb_err, - error(debug::str<>("address length"), debug::dec<3>(addrlen), - debug::dec<3>(locality_defs::array_size))); - throw std::runtime_error("debug_print_av_vector : address vector " - "traversal failure"); + int ret = fi_av_lookup(av_, fi_addr_t(i), addr.fabric_data_writable(), &addrlen); + addr.set_fi_address(fi_addr_t(i)); + if ((ret == 0) && (addrlen == locality_defs::array_size)) + { + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("address vector"), debug::dec<3>(i), iplocality(addr))); + } + else + { + LF_DEB(NS_DEBUG::cnb_err, + error(debug::str<>("address length"), debug::dec<3>(addrlen), + debug::dec<3>(locality_defs::array_size))); + throw std::runtime_error("debug_print_av_vector : address vector " + "traversal failure"); + } } } - } - // -------------------------------------------------------------------- - inline constexpr bool bypass_tx_lock() - { + // -------------------------------------------------------------------- + inline constexpr bool bypass_tx_lock() + { #if defined(HAVE_LIBFABRIC_GNI) - return true; + return true; #elif defined(HAVE_LIBFABRIC_CXI) - // @todo : cxi provider is not yet thread safe using scalable endpoints - return false; + // @todo : cxi provider is not yet thread safe using scalable endpoints + return false; #else - return (threadlevel_flags() == FI_THREAD_SAFE || + return (threadlevel_flags() == FI_THREAD_SAFE || endpoint_type_ == endpoint_type::threadlocalTx); #endif - } + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock get_tx_lock() - { - if (bypass_tx_lock()) return unique_lock(); - return unique_lock(send_mutex_); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock get_tx_lock() + { + if (bypass_tx_lock()) return unique_lock(); + return unique_lock(send_mutex_); + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock try_tx_lock() - { - if (bypass_tx_lock()) return unique_lock(); - return unique_lock(send_mutex_, std::try_to_lock_t{}); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock try_tx_lock() + { + if (bypass_tx_lock()) return unique_lock(); + return unique_lock(send_mutex_, std::try_to_lock_t{}); + } - // -------------------------------------------------------------------- - inline constexpr bool bypass_rx_lock() - { + // -------------------------------------------------------------------- + inline constexpr bool bypass_rx_lock() + { #ifdef HAVE_LIBFABRIC_GNI - return true; + return true; #else - return ( - threadlevel_flags() == FI_THREAD_SAFE || endpoint_type_ == endpoint_type::scalableTxRx); + return (threadlevel_flags() == FI_THREAD_SAFE || + endpoint_type_ == endpoint_type::scalableTxRx); #endif - } + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock get_rx_lock() - { - if (bypass_rx_lock()) return unique_lock(); - return unique_lock(recv_mutex_); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock get_rx_lock() + { + if (bypass_rx_lock()) return unique_lock(); + return unique_lock(recv_mutex_); + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock try_rx_lock() - { - if (bypass_rx_lock()) return unique_lock(); - return unique_lock(recv_mutex_, std::try_to_lock_t{}); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock try_rx_lock() + { + if (bypass_rx_lock()) return unique_lock(); + return unique_lock(recv_mutex_, std::try_to_lock_t{}); + } - // -------------------------------------------------------------------- - progress_status poll_for_work_completions(void* user_data) - { - progress_status p{0, 0}; - bool retry = false; - do { - // sends - uint32_t nsend = static_cast(this)->poll_send_queue( - get_tx_endpoint().get_tx_cq(), user_data); - p.m_num_sends += nsend; - retry = (nsend == max_completions_per_poll_); - // recvs - uint32_t nrecv = static_cast(this)->poll_recv_queue( - get_rx_endpoint().get_rx_cq(), user_data); - p.m_num_recvs += nrecv; - retry |= (nrecv == max_completions_per_poll_); - } while (retry); - return p; - } + // -------------------------------------------------------------------- + progress_status poll_for_work_completions(void* user_data) + { + progress_status p{0, 0}; + bool retry = false; + do { + // sends + uint32_t nsend = static_cast(this)->poll_send_queue( + get_tx_endpoint().get_tx_cq(), user_data); + p.m_num_sends += nsend; + retry = (nsend == max_completions_per_poll_); + // recvs + uint32_t nrecv = static_cast(this)->poll_recv_queue( + get_rx_endpoint().get_rx_cq(), user_data); + p.m_num_recvs += nrecv; + retry |= (nrecv == max_completions_per_poll_); + } while (retry); + return p; + } - // -------------------------------------------------------------------- - inline int poll_send_queue(fid_cq* tx_cq, void* user_data) - { - return static_cast(this)->poll_send_queue(tx_cq, user_data); - } + // -------------------------------------------------------------------- + inline int poll_send_queue(fid_cq* tx_cq, void* user_data) + { + return static_cast(this)->poll_send_queue(tx_cq, user_data); + } - // -------------------------------------------------------------------- - inline int poll_recv_queue(fid_cq* rx_cq, void* user_data) - { - return static_cast(this)->poll_recv_queue(rx_cq, user_data); - } + // -------------------------------------------------------------------- + inline int poll_recv_queue(fid_cq* rx_cq, void* user_data) + { + return static_cast(this)->poll_recv_queue(rx_cq, user_data); + } - // -------------------------------------------------------------------- - struct fid_cq* create_completion_queue(struct fid_domain* domain, size_t size, const char* type) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); - - struct fid_cq* cq; - fi_cq_attr cq_attr = {}; - cq_attr.format = FI_CQ_FORMAT_MSG; - cq_attr.wait_obj = FI_WAIT_NONE; - cq_attr.wait_cond = FI_CQ_COND_NONE; - cq_attr.size = size; - cq_attr.flags = 0 /*FI_COMPLETION*/; - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("CQ size"), debug::dec<4>(size))); - // open completion queue on fabric domain and set context to null - int ret = fi_cq_open(domain, &cq_attr, &cq, nullptr); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_cq_open"); - return cq; - } + // -------------------------------------------------------------------- + struct fid_cq* create_completion_queue( + struct fid_domain* domain, size_t size, char const* type) + { + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); + + struct fid_cq* cq; + fi_cq_attr cq_attr = {}; + cq_attr.format = FI_CQ_FORMAT_MSG; + cq_attr.wait_obj = FI_WAIT_NONE; + cq_attr.wait_cond = FI_CQ_COND_NONE; + cq_attr.size = size; + cq_attr.flags = 0 /*FI_COMPLETION*/; + LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("CQ size"), debug::dec<4>(size))); + // open completion queue on fabric domain and set context to null + int ret = fi_cq_open(domain, &cq_attr, &cq, nullptr); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_cq_open"); + return cq; + } - // -------------------------------------------------------------------- - fid_av* create_address_vector(struct fi_info* info, int N, int num_rx_contexts) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + // -------------------------------------------------------------------- + fid_av* create_address_vector(struct fi_info* info, int N, int num_rx_contexts) + { + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - fid_av* av; - fi_av_attr av_attr = {fi_av_type(0), 0, 0, 0, nullptr, nullptr, 0}; + fid_av* av; + fi_av_attr av_attr = {fi_av_type(0), 0, 0, 0, nullptr, nullptr, 0}; - // number of addresses expected - av_attr.count = N; + // number of addresses expected + av_attr.count = N; - // number of receive contexts used - int rx_ctx_bits = 0; + // number of receive contexts used + int rx_ctx_bits = 0; #ifdef RX_CONTEXTS_SUPPORT - while (num_rx_contexts >> ++rx_ctx_bits) - ; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("rx_ctx_bits"), rx_ctx_bits)); + while (num_rx_contexts >> ++rx_ctx_bits); + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("rx_ctx_bits"), rx_ctx_bits)); #endif - av_attr.rx_ctx_bits = rx_ctx_bits; - // if contexts is nonzero, then we are using a single scalable endpoint - av_attr.ep_per_node = (num_rx_contexts > 0) ? 2 : 0; - - if (info->domain_attr->av_type != FI_AV_UNSPEC) - { - av_attr.type = info->domain_attr->av_type; - } - else - { - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("map FI_AV_TABLE"))); - av_attr.type = FI_AV_TABLE; - } + av_attr.rx_ctx_bits = rx_ctx_bits; + // if contexts is nonzero, then we are using a single scalable endpoint + av_attr.ep_per_node = (num_rx_contexts > 0) ? 2 : 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Creating AV"))); - int ret = fi_av_open(fabric_domain_, &av_attr, &av, nullptr); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_av_open"); - return av; - } + if (info->domain_attr->av_type != FI_AV_UNSPEC) + { + av_attr.type = info->domain_attr->av_type; + } + else + { + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("map FI_AV_TABLE"))); + av_attr.type = FI_AV_TABLE; + } - // -------------------------------------------------------------------- - locality insert_address(const locality& address) { return insert_address(av_, address); } + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Creating AV"))); + int ret = fi_av_open(fabric_domain_, &av_attr, &av, nullptr); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_av_open"); + return av; + } - // -------------------------------------------------------------------- - locality insert_address(fid_av* av, const locality& address) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + // -------------------------------------------------------------------- + locality insert_address(locality const& address) { return insert_address(av_, address); } - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("inserting AV"), iplocality(address), NS_DEBUG::ptr(av))); - fi_addr_t fi_addr = 0xffffffff; - int ret = fi_av_insert(av, address.fabric_data(), 1, &fi_addr, 0, nullptr); - if (ret < 0) { throw NS_LIBFABRIC::fabric_error(ret, "fi_av_insert"); } - else if (ret == 0) + // -------------------------------------------------------------------- + locality insert_address(fid_av* av, locality const& address) { - NS_DEBUG::cnb_deb.error("fi_av_insert called with existing address"); - NS_LIBFABRIC::fabric_error(ret, "fi_av_insert did not return 1"); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + + LF_DEB(NS_DEBUG::cnb_deb, + trace(debug::str<>("inserting AV"), iplocality(address), NS_DEBUG::ptr(av))); + fi_addr_t fi_addr = 0xffff'ffff; + int ret = fi_av_insert(av, address.fabric_data(), 1, &fi_addr, 0, nullptr); + if (ret < 0) { throw NS_LIBFABRIC::fabric_error(ret, "fi_av_insert"); } + else if (ret == 0) + { + NS_DEBUG::cnb_deb.error("fi_av_insert called with existing address"); + NS_LIBFABRIC::fabric_error(ret, "fi_av_insert did not return 1"); + } + // address was generated correctly, now update the locality with the fi_addr + locality new_locality(address, fi_addr); + LF_DEB(NS_DEBUG::cnb_deb, + trace(debug::str<>("AV add"), "rank", debug::dec<>(fi_addr), + iplocality(new_locality), "fi_addr", debug::hex<4>(fi_addr))); + return new_locality; } - // address was generated correctly, now update the locality with the fi_addr - locality new_locality(address, fi_addr); - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("AV add"), "rank", debug::dec<>(fi_addr), - iplocality(new_locality), "fi_addr", debug::hex<4>(fi_addr))); - return new_locality; - } -}; + }; -} // namespace NS_LIBFABRIC +} // namespace NS_LIBFABRIC diff --git a/src/libfabric/fabric_error.hpp b/src/libfabric/fabric_error.hpp index 0f2db4c1..325975a7 100644 --- a/src/libfabric/fabric_error.hpp +++ b/src/libfabric/fabric_error.hpp @@ -10,43 +10,41 @@ #pragma once #include -#include #include +#include // #include // #include "oomph_libfabric_defines.hpp" -namespace NS_DEBUG -{ -// cppcheck-suppress ConfigurationNotChecked -static NS_DEBUG::enable_print err_deb("ERROR__"); -} // namespace NS_DEBUG +namespace NS_DEBUG { + // cppcheck-suppress ConfigurationNotChecked + static NS_DEBUG::enable_print err_deb("ERROR__"); +} // namespace NS_DEBUG -namespace NS_LIBFABRIC -{ +namespace NS_LIBFABRIC { -class fabric_error : public std::runtime_error -{ - public: - // -------------------------------------------------------------------- - fabric_error(int err, const std::string& msg) - : std::runtime_error(std::string(fi_strerror(-err)) + msg) - , error_(err) + class fabric_error : public std::runtime_error { - NS_DEBUG::err_deb.error(msg, ":", fi_strerror(-err)); - std::terminate(); - } + public: + // -------------------------------------------------------------------- + fabric_error(int err, std::string const& msg) + : std::runtime_error(std::string(fi_strerror(-err)) + msg) + , error_(err) + { + NS_DEBUG::err_deb.error(msg, ":", fi_strerror(-err)); + std::terminate(); + } - fabric_error(int err) - : std::runtime_error(fi_strerror(-err)) - , error_(-err) - { - NS_DEBUG::err_deb.error(what()); - std::terminate(); - } + fabric_error(int err) + : std::runtime_error(fi_strerror(-err)) + , error_(-err) + { + NS_DEBUG::err_deb.error(what()); + std::terminate(); + } - int error_; -}; + int error_; + }; -} // namespace NS_LIBFABRIC +} // namespace NS_LIBFABRIC diff --git a/src/libfabric/libfabric_defines_template.hpp b/src/libfabric/libfabric_defines_template.hpp index 64c04944..efd2bb67 100644 --- a/src/libfabric/libfabric_defines_template.hpp +++ b/src/libfabric/libfabric_defines_template.hpp @@ -14,26 +14,26 @@ // some namespaces for the lib and for debugging are setup correctly #define NS_LIBFABRIC oomph::libfabric -#define NS_MEMORY oomph::libfabric -#define NS_DEBUG oomph::debug +#define NS_MEMORY oomph::libfabric +#define NS_DEBUG oomph::debug #ifndef LF_DEB -#define LF_DEB(printer, Expr) \ - if constexpr (printer.is_enabled()) { printer.Expr; }; +# define LF_DEB(printer, Expr) \ + if constexpr (printer.is_enabled()) { printer.Expr; }; #endif #define LFSOURCE_DIR "@OOMPH_SRC_LIBFABRIC_DIR@" -#define LFPRINT_HPP "@OOMPH_SRC_LIBFABRIC_DIR@/print.hpp" -#define LFCOUNT_HPP "@OOMPH_SRC_LIBFABRIC_DIR@/simple_counter.hpp" +#define LFPRINT_HPP "@OOMPH_SRC_LIBFABRIC_DIR@/print.hpp" +#define LFCOUNT_HPP "@OOMPH_SRC_LIBFABRIC_DIR@/simple_counter.hpp" // oomph has a debug print helper file in the main source tree #if __has_include(LFPRINT_HPP) -#include LFPRINT_HPP -#define has_debug 1 +# include LFPRINT_HPP +# define has_debug 1 #endif #if __has_include(LFCOUNT_HPP) -#include LFCOUNT_HPP +# include LFCOUNT_HPP #endif #endif diff --git a/src/libfabric/locality.cpp b/src/libfabric/locality.cpp index 487912f5..ff23eeb5 100644 --- a/src/libfabric/locality.cpp +++ b/src/libfabric/locality.cpp @@ -10,27 +10,22 @@ #include -namespace oomph -{ -namespace libfabric -{ +namespace oomph { namespace libfabric { -// ------------------------------------------------------------------ -// format as ip address, port, libfabric address -// ------------------------------------------------------------------ -iplocality::iplocality(const locality& l) -: data(l) -{ -} + // ------------------------------------------------------------------ + // format as ip address, port, libfabric address + // ------------------------------------------------------------------ + iplocality::iplocality(locality const& l) + : data(l) + { + } -std::ostream& -operator<<(std::ostream& os, const iplocality& p) -{ - os << std::dec << NS_DEBUG::ipaddr(p.data.fabric_data()) << " - " - << NS_DEBUG::ipaddr(p.data.ip_address()) << ":" << NS_DEBUG::dec<>(p.data.port()) << " (" - << NS_DEBUG::dec<>(p.data.fi_address()) << ") "; - return os; -} + std::ostream& operator<<(std::ostream& os, iplocality const& p) + { + os << std::dec << NS_DEBUG::ipaddr(p.data.fabric_data()) << " - " + << NS_DEBUG::ipaddr(p.data.ip_address()) << ":" << NS_DEBUG::dec<>(p.data.port()) << " (" + << NS_DEBUG::dec<>(p.data.fi_address()) << ") "; + return os; + } -} // namespace libfabric -} // namespace oomph +}} // namespace oomph::libfabric diff --git a/src/libfabric/locality.hpp b/src/libfabric/locality.hpp index 74f6b290..84f5ddc2 100644 --- a/src/libfabric/locality.hpp +++ b/src/libfabric/locality.hpp @@ -15,243 +15,238 @@ #include #include // -#include -#include #include +#include +#include // #include "oomph_libfabric_defines.hpp" // Different providers use different address formats that we must accommodate // in our locality object. #ifdef HAVE_LIBFABRIC_GNI -#define HAVE_LIBFABRIC_LOCALITY_SIZE 48 +# define HAVE_LIBFABRIC_LOCALITY_SIZE 48 #endif #ifdef HAVE_LIBFABRIC_CXI -#ifdef HAVE_LIBFABRIC_CXI_1_15 -#define HAVE_LIBFABRIC_LOCALITY_SIZE sizeof(int) -#else -#define HAVE_LIBFABRIC_LOCALITY_SIZE sizeof(long int) -#endif +# ifdef HAVE_LIBFABRIC_CXI_1_15 +# define HAVE_LIBFABRIC_LOCALITY_SIZE sizeof(int) +# else +# define HAVE_LIBFABRIC_LOCALITY_SIZE sizeof(long int) +# endif #endif #ifdef HAVE_LIBFABRIC_EFA -#define HAVE_LIBFABRIC_LOCALITY_SIZE 32 +# define HAVE_LIBFABRIC_LOCALITY_SIZE 32 #endif #if defined(HAVE_LIBFABRIC_VERBS) || defined(HAVE_LIBFABRIC_TCP) || \ defined(HAVE_LIBFABRIC_SOCKETS) || defined(HAVE_LIBFABRIC_PSM2) -#define HAVE_LIBFABRIC_LOCALITY_SIZE 16 -#define HAVE_LIBFABRIC_LOCALITY_SOCKADDR +# define HAVE_LIBFABRIC_LOCALITY_SIZE 16 +# define HAVE_LIBFABRIC_LOCALITY_SOCKADDR #endif -namespace oomph -{ -// cppcheck-suppress ConfigurationNotChecked -static NS_DEBUG::enable_print loc_deb("LOCALTY"); -} // namespace oomph - -namespace oomph -{ -namespace libfabric -{ - -struct locality; - -// ------------------------------------------------------------------ -// format as ip address, port, libfabric address -// ------------------------------------------------------------------ -struct iplocality -{ - const locality& data; - iplocality(const locality& a); - friend std::ostream& operator<<(std::ostream& os, const iplocality& p); -}; - -// -------------------------------------------------------------------- -// Locality, in this structure we store the information required by -// libfabric to make a connection to another node. -// With libfabric 1.4.x the array contains the fabric ip address stored -// as the second uint32_t in the array. For this reason we use an -// array of uint32_t rather than uint8_t/char so we can easily access -// the ip for debug/validation purposes -// -------------------------------------------------------------------- -namespace locality_defs -{ -// the number of 32bit ints stored in our array -const uint32_t array_size = HAVE_LIBFABRIC_LOCALITY_SIZE; -const uint32_t array_length = HAVE_LIBFABRIC_LOCALITY_SIZE / 4; -} // namespace locality_defs - -struct locality -{ - // array type of our locality data - typedef std::array locality_data; - - static const char* type() { return "libfabric"; } - - explicit locality(const locality_data& in_data) - { - std::memcpy(&data_[0], &in_data[0], locality_defs::array_size); - fi_address_ = 0; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("expl constructing"), iplocality((*this)))); - } - - locality() - { - std::memset(&data_[0], 0x00, locality_defs::array_size); - fi_address_ = 0; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("default construct"), iplocality((*this)))); - } - - locality(const locality& other) - : data_(other.data_) - , fi_address_(other.fi_address_) - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy construct"), iplocality((*this)))); - } - - locality(const locality& other, fi_addr_t addr) - : data_(other.data_) - , fi_address_(addr) - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy fi construct"), iplocality((*this)))); - } - - locality(locality&& other) - : data_(std::move(other.data_)) - , fi_address_(other.fi_address_) - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("move construct"), iplocality((*this)))); - } - - // provided to support sockets mode bootstrap - explicit locality(const std::string& address, const std::string& portnum) - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("explicit construct"), address, ":", portnum)); - // - struct sockaddr_in socket_data; - memset(&socket_data, 0, sizeof(socket_data)); - socket_data.sin_family = AF_INET; - socket_data.sin_port = htons(std::stol(portnum)); - inet_pton(AF_INET, address.c_str(), &(socket_data.sin_addr)); - // - std::memcpy(&data_[0], &socket_data, locality_defs::array_size); - fi_address_ = 0; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("string constructing"), iplocality((*this)))); - } - - // some condition marking this locality as valid - explicit inline operator bool() const - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("bool operator"), iplocality((*this)))); - return (ip_address() != 0); - } - - inline bool valid() const - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("valid operator"), iplocality((*this)))); - return (ip_address() != 0); - } - - locality& operator=(const locality& other) - { - data_ = other.data_; - fi_address_ = other.fi_address_; - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("copy operator"), iplocality(*this), iplocality(other))); - return *this; - } - - bool operator==(const locality& other) - { - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("equality operator"), iplocality(*this), iplocality(other))); - return std::memcmp(&data_, &other.data_, locality_defs::array_size) == 0; - } - - bool less_than(const locality& other) - { - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("less operator"), iplocality(*this), iplocality(other))); - if (ip_address() < other.ip_address()) return true; - if (ip_address() == other.ip_address()) return port() < other.port(); - return false; - } - - const uint32_t& ip_address() const - { +namespace oomph { + // cppcheck-suppress ConfigurationNotChecked + static NS_DEBUG::enable_print loc_deb("LOCALTY"); +} // namespace oomph + +namespace oomph { namespace libfabric { + + struct locality; + + // ------------------------------------------------------------------ + // format as ip address, port, libfabric address + // ------------------------------------------------------------------ + struct iplocality + { + locality const& data; + iplocality(locality const& a); + friend std::ostream& operator<<(std::ostream& os, iplocality const& p); + }; + + // -------------------------------------------------------------------- + // Locality, in this structure we store the information required by + // libfabric to make a connection to another node. + // With libfabric 1.4.x the array contains the fabric ip address stored + // as the second uint32_t in the array. For this reason we use an + // array of uint32_t rather than uint8_t/char so we can easily access + // the ip for debug/validation purposes + // -------------------------------------------------------------------- + namespace locality_defs { + // the number of 32bit ints stored in our array + uint32_t const array_size = HAVE_LIBFABRIC_LOCALITY_SIZE; + uint32_t const array_length = HAVE_LIBFABRIC_LOCALITY_SIZE / 4; + } // namespace locality_defs + + struct locality + { + // array type of our locality data + typedef std::array locality_data; + + static char const* type() { return "libfabric"; } + + explicit locality(locality_data const& in_data) + { + std::memcpy(&data_[0], &in_data[0], locality_defs::array_size); + fi_address_ = 0; + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("expl constructing"), iplocality((*this)))); + } + + locality() + { + std::memset(&data_[0], 0x00, locality_defs::array_size); + fi_address_ = 0; + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("default construct"), iplocality((*this)))); + } + + locality(locality const& other) + : data_(other.data_) + , fi_address_(other.fi_address_) + { + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy construct"), iplocality((*this)))); + } + + locality(locality const& other, fi_addr_t addr) + : data_(other.data_) + , fi_address_(addr) + { + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy fi construct"), iplocality((*this)))); + } + + locality(locality&& other) + : data_(std::move(other.data_)) + , fi_address_(other.fi_address_) + { + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("move construct"), iplocality((*this)))); + } + + // provided to support sockets mode bootstrap + explicit locality(std::string const& address, std::string const& portnum) + { + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("explicit construct"), address, ":", portnum)); + // + struct sockaddr_in socket_data; + memset(&socket_data, 0, sizeof(socket_data)); + socket_data.sin_family = AF_INET; + socket_data.sin_port = htons(std::stol(portnum)); + inet_pton(AF_INET, address.c_str(), &(socket_data.sin_addr)); + // + std::memcpy(&data_[0], &socket_data, locality_defs::array_size); + fi_address_ = 0; + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("string constructing"), iplocality((*this)))); + } + + // some condition marking this locality as valid + explicit inline operator bool() const + { + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("bool operator"), iplocality((*this)))); + return (ip_address() != 0); + } + + inline bool valid() const + { + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("valid operator"), iplocality((*this)))); + return (ip_address() != 0); + } + + locality& operator=(locality const& other) + { + data_ = other.data_; + fi_address_ = other.fi_address_; + LF_DEB(loc_deb, + trace(NS_DEBUG::str<>("copy operator"), iplocality(*this), iplocality(other))); + return *this; + } + + bool operator==(locality const& other) + { + LF_DEB(loc_deb, + trace(NS_DEBUG::str<>("equality operator"), iplocality(*this), iplocality(other))); + return std::memcmp(&data_, &other.data_, locality_defs::array_size) == 0; + } + + bool less_than(locality const& other) + { + LF_DEB(loc_deb, + trace(NS_DEBUG::str<>("less operator"), iplocality(*this), iplocality(other))); + if (ip_address() < other.ip_address()) return true; + if (ip_address() == other.ip_address()) return port() < other.port(); + return false; + } + + uint32_t const& ip_address() const + { #if defined(HAVE_LIBFABRIC_LOCALITY_SOCKADDR) - return reinterpret_cast(data_.data())->sin_addr.s_addr; + return reinterpret_cast(data_.data())->sin_addr.s_addr; #elif defined(HAVE_LIBFABRIC_GNI) - return data_[0]; + return data_[0]; #elif defined(HAVE_LIBFABRIC_CXI) - return data_[0]; + return data_[0]; #elif defined(HAVE_LIBFABRIC_EFA) - return data_[0]; + return data_[0]; #else - throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); + throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); #endif - } + } - static const uint32_t& ip_address(const locality_data& data) - { + static uint32_t const& ip_address(locality_data const& data) + { #if defined(HAVE_LIBFABRIC_LOCALITY_SOCKADDR) - return reinterpret_cast(&data)->sin_addr.s_addr; + return reinterpret_cast(&data)->sin_addr.s_addr; #elif defined(HAVE_LIBFABRIC_GNI) - return data[0]; + return data[0]; #elif defined(HAVE_LIBFABRIC_CXI) - return data[0]; + return data[0]; #elif defined(HAVE_LIBFABRIC_EFA) - return data[0]; + return data[0]; #else - throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); + throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); #endif - } - - inline const fi_addr_t& fi_address() const { return fi_address_; } - - inline void set_fi_address(fi_addr_t fi_addr) { fi_address_ = fi_addr; } - - inline uint16_t port() const - { - uint16_t port = 256 * reinterpret_cast(data_.data())[2] + - reinterpret_cast(data_.data())[3]; - return port; - } - - inline const void* fabric_data() const { return data_.data(); } - - inline char* fabric_data_writable() { return reinterpret_cast(data_.data()); } - - private: - friend bool operator==(locality const& lhs, locality const& rhs) - { - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("equality friend"), iplocality(lhs), iplocality(rhs))); - return ((lhs.data_ == rhs.data_) && (lhs.fi_address_ == rhs.fi_address_)); - } - - friend bool operator<(locality const& lhs, locality const& rhs) - { - const uint32_t& a1 = lhs.ip_address(); - const uint32_t& a2 = rhs.ip_address(); - const fi_addr_t& f1 = lhs.fi_address(); - const fi_addr_t& f2 = rhs.fi_address(); - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("less friend"), iplocality(lhs), iplocality(rhs))); - return (a1 < a2) || (a1 == a2 && f1 < f2); - } - - friend std::ostream& operator<<(std::ostream& os, locality const& loc) - { - for (uint32_t i = 0; i < locality_defs::array_length; ++i) { os << loc.data_[i]; } - return os; - } - - private: - locality_data data_; - fi_addr_t fi_address_; -}; - -} // namespace libfabric -} // namespace oomph + } + + inline fi_addr_t const& fi_address() const { return fi_address_; } + + inline void set_fi_address(fi_addr_t fi_addr) { fi_address_ = fi_addr; } + + inline uint16_t port() const + { + uint16_t port = 256 * reinterpret_cast(data_.data())[2] + + reinterpret_cast(data_.data())[3]; + return port; + } + + inline void const* fabric_data() const { return data_.data(); } + + inline char* fabric_data_writable() { return reinterpret_cast(data_.data()); } + + private: + friend bool operator==(locality const& lhs, locality const& rhs) + { + LF_DEB(loc_deb, + trace(NS_DEBUG::str<>("equality friend"), iplocality(lhs), iplocality(rhs))); + return ((lhs.data_ == rhs.data_) && (lhs.fi_address_ == rhs.fi_address_)); + } + + friend bool operator<(locality const& lhs, locality const& rhs) + { + uint32_t const& a1 = lhs.ip_address(); + uint32_t const& a2 = rhs.ip_address(); + fi_addr_t const& f1 = lhs.fi_address(); + fi_addr_t const& f2 = rhs.fi_address(); + LF_DEB( + loc_deb, trace(NS_DEBUG::str<>("less friend"), iplocality(lhs), iplocality(rhs))); + return (a1 < a2) || (a1 == a2 && f1 < f2); + } + + friend std::ostream& operator<<(std::ostream& os, locality const& loc) + { + for (uint32_t i = 0; i < locality_defs::array_length; ++i) { os << loc.data_[i]; } + return os; + } + + private: + locality_data data_; + fi_addr_t fi_address_; + }; + +}} // namespace oomph::libfabric diff --git a/src/libfabric/memory_region.hpp b/src/libfabric/memory_region.hpp index 0cd5c4a7..f1eb5326 100644 --- a/src/libfabric/memory_region.hpp +++ b/src/libfabric/memory_region.hpp @@ -18,20 +18,19 @@ #include #include -#include "oomph_libfabric_defines.hpp" #include "fabric_error.hpp" +#include "oomph_libfabric_defines.hpp" #ifdef OOMPH_ENABLE_DEVICE -#include +# include #endif // ------------------------------------------------------------------ -namespace NS_MEMORY -{ +namespace NS_MEMORY { -static NS_DEBUG::enable_print mrn_deb("REGION_"); + static NS_DEBUG::enable_print mrn_deb("REGION_"); -/* + /* struct fi_mr_attr { union { const struct iovec *mr_iov; @@ -60,342 +59,356 @@ struct fi_mr_attr { */ -// This is the only part of the code that actually -// calls libfabric functions -struct region_provider -{ - // The internal memory region handle - using provider_region = struct fid_mr; - using provider_domain = struct fid_domain; - - // register region - static inline int fi_register_memory(provider_domain* pd, int device_id, const void* buf, - size_t len, uint64_t access_flags, uint64_t offset, uint64_t request_key, - struct fid_mr** mr) + // This is the only part of the code that actually + // calls libfabric functions + struct region_provider { - [[maybe_unused]] auto scp = - NS_MEMORY::mrn_deb.scope(__func__, NS_DEBUG::ptr(buf), NS_DEBUG::dec<>(len), device_id); - // - struct iovec addresses = {/*.iov_base = */ const_cast(buf), /*.iov_len = */ len}; - fi_mr_attr attr = { - /*.mr_iov = */ &addresses, - /*.iov_count = */ 1, - /*.access = */ access_flags, - /*.offset = */ offset, - /*.requested_key = */ request_key, - /*.context = */ nullptr, - /*.auth_key_size = */ 0, - /*.auth_key = */ nullptr, - /*.iface = */ FI_HMEM_SYSTEM, - /*.device = */ {0}, + // The internal memory region handle + using provider_region = struct fid_mr; + using provider_domain = struct fid_domain; + + // register region + static inline int fi_register_memory(provider_domain* pd, int device_id, void const* buf, + size_t len, uint64_t access_flags, uint64_t offset, uint64_t request_key, + struct fid_mr** mr) + { + [[maybe_unused]] auto scp = NS_MEMORY::mrn_deb.scope( + __func__, NS_DEBUG::ptr(buf), NS_DEBUG::dec<>(len), device_id); + // + struct iovec addresses = {/*.iov_base = */ const_cast(buf), /*.iov_len = */ len}; + fi_mr_attr attr = { + /*.mr_iov = */ &addresses, + /*.iov_count = */ 1, + /*.access = */ access_flags, + /*.offset = */ offset, + /*.requested_key = */ request_key, + /*.context = */ nullptr, + /*.auth_key_size = */ 0, + /*.auth_key = */ nullptr, + /*.iface = */ FI_HMEM_SYSTEM, + /*.device = */ {0}, #if (FI_MAJOR_VERSION > 1) || ((FI_MAJOR_VERSION == 1) && FI_MINOR_VERSION > 17) - /*.hmem_data = */ nullptr, + /*.hmem_data = */ nullptr, #endif #if (FI_MAJOR_VERSION >= 2) - /*page_size = */ static_cast(sysconf(_SC_PAGESIZE)), - /*base_mr = */ nullptr, - /*sub_mr_cnt = */ 0, - }; + /*page_size = */ static_cast(sysconf(_SC_PAGESIZE)), + /*base_mr = */ nullptr, + /*sub_mr_cnt = */ 0, + }; #else - }; + }; #endif - if (device_id >= 0) - { + if (device_id >= 0) + { #ifdef OOMPH_ENABLE_DEVICE - attr.device.cuda = device_id; - int handle = hwmalloc::get_device_id(); - attr.device.cuda = handle; -#if defined(OOMPH_DEVICE_CUDA) - attr.iface = FI_HMEM_CUDA; - LF_DEB(NS_MEMORY::mrn_deb, - trace(NS_DEBUG::str<>("CUDA"), "set device id", device_id, handle)); -#elif defined(OOMPH_DEVICE_HIP) - attr.iface = FI_HMEM_ROCR; - LF_DEB(NS_MEMORY::mrn_deb, - trace(NS_DEBUG::str<>("HIP"), "set device id", device_id, handle)); -#endif + attr.device.cuda = device_id; + int handle = hwmalloc::get_device_id(); + attr.device.cuda = handle; +# if defined(OOMPH_DEVICE_CUDA) + attr.iface = FI_HMEM_CUDA; + LF_DEB(NS_MEMORY::mrn_deb, + trace(NS_DEBUG::str<>("CUDA"), "set device id", device_id, handle)); +# elif defined(OOMPH_DEVICE_HIP) + attr.iface = FI_HMEM_ROCR; + LF_DEB(NS_MEMORY::mrn_deb, + trace(NS_DEBUG::str<>("HIP"), "set device id", device_id, handle)); +# endif #endif + } + uint64_t flags = 0; + int ret = fi_mr_regattr(pd, &attr, flags, mr); + if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "register_memory"); } + return ret; } - uint64_t flags = 0; - int ret = fi_mr_regattr(pd, &attr, flags, mr); - if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "register_memory"); } - return ret; - } - // unregister region - static inline int unregister_memory(provider_region* region) { return fi_close(®ion->fid); } - - // Default registration flags for this provider - static inline constexpr int access_flags() - { - return FI_READ | FI_WRITE | FI_RECV | FI_SEND /*| FI_REMOTE_READ | FI_REMOTE_WRITE*/; - } + // unregister region + static inline int unregister_memory(provider_region* region) + { + return fi_close(®ion->fid); + } - // Get the local descriptor of the memory region. - static inline void* get_local_key(provider_region* const region) { return fi_mr_desc(region); } + // Default registration flags for this provider + static inline constexpr int access_flags() + { + return FI_READ | FI_WRITE | FI_RECV | FI_SEND /*| FI_REMOTE_READ | FI_REMOTE_WRITE*/; + } - // Get the remote key of the memory region. - static inline uint64_t get_remote_key(provider_region* const region) - { - return fi_mr_key(region); - } -}; + // Get the local descriptor of the memory region. + static inline void* get_local_key(provider_region* const region) + { + return fi_mr_desc(region); + } -// -------------------------------------------------------------------- -// This is a handle to a small chunk of memory that has been registered -// as part of a much larger allocation (a memory_segment) -struct memory_handle -{ - // -------------------------------------------------------------------- - using provider_region = region_provider::provider_region; + // Get the remote key of the memory region. + static inline uint64_t get_remote_key(provider_region* const region) + { + return fi_mr_key(region); + } + }; // -------------------------------------------------------------------- - // Default constructor creates unusable handle(region) - memory_handle() - : address_{nullptr} - , region_{nullptr} - , size_{0} - , used_space_{0} - { - } - memory_handle(memory_handle const&) noexcept = default; - memory_handle& operator=(memory_handle const&) noexcept = default; - - memory_handle(provider_region* region, unsigned char* addr, - std::size_t size /*, uint32_t flags*/) noexcept - : address_{addr} - , region_{region} - , size_{uint32_t(size)} - , used_space_{0} + // This is a handle to a small chunk of memory that has been registered + // as part of a much larger allocation (a memory_segment) + struct memory_handle { - // LF_DEB(NS_MEMORY::mrn_deb, - // trace(NS_DEBUG::str<>("memory_handle"), *this)); - } + // -------------------------------------------------------------------- + using provider_region = region_provider::provider_region; + + // -------------------------------------------------------------------- + // Default constructor creates unusable handle(region) + memory_handle() + : address_{nullptr} + , region_{nullptr} + , size_{0} + , used_space_{0} + { + } + memory_handle(memory_handle const&) noexcept = default; + memory_handle& operator=(memory_handle const&) noexcept = default; + + memory_handle(provider_region* region, unsigned char* addr, + std::size_t size /*, uint32_t flags*/) noexcept + : address_{addr} + , region_{region} + , size_{uint32_t(size)} + , used_space_{0} + { + // LF_DEB(NS_MEMORY::mrn_deb, + // trace(NS_DEBUG::str<>("memory_handle"), *this)); + } - // -------------------------------------------------------------------- - // move constructor, clear other region so that it is not unregistered twice - memory_handle(memory_handle&& other) noexcept - : address_{other.address_} - , region_{std::exchange(other.region_, nullptr)} - , size_{other.size_} - , used_space_{other.used_space_} - { - } + // -------------------------------------------------------------------- + // move constructor, clear other region so that it is not unregistered twice + memory_handle(memory_handle&& other) noexcept + : address_{other.address_} + , region_{std::exchange(other.region_, nullptr)} + , size_{other.size_} + , used_space_{other.used_space_} + { + } - // -------------------------------------------------------------------- - // move assignment, clear other region so that it is not unregistered twice - memory_handle& operator=(memory_handle&& other) noexcept - { - address_ = other.address_; - region_ = std::exchange(other.region_, nullptr); - size_ = other.size_; - used_space_ = other.used_space_; - return *this; - } + // -------------------------------------------------------------------- + // move assignment, clear other region so that it is not unregistered twice + memory_handle& operator=(memory_handle&& other) noexcept + { + address_ = other.address_; + region_ = std::exchange(other.region_, nullptr); + size_ = other.size_; + used_space_ = other.used_space_; + return *this; + } - // -------------------------------------------------------------------- - // Return the address of this memory region block. - inline unsigned char* get_address(void) const { return address_; } + // -------------------------------------------------------------------- + // Return the address of this memory region block. + inline unsigned char* get_address(void) const { return address_; } - // -------------------------------------------------------------------- - // Get the local descriptor of the memory region. - inline void* get_local_key(void) const { return region_provider::get_local_key(region_); } + // -------------------------------------------------------------------- + // Get the local descriptor of the memory region. + inline void* get_local_key(void) const { return region_provider::get_local_key(region_); } - // -------------------------------------------------------------------- - // Get the remote key of the memory region. - inline uint64_t get_remote_key(void) const { return region_provider::get_remote_key(region_); } + // -------------------------------------------------------------------- + // Get the remote key of the memory region. + inline uint64_t get_remote_key(void) const + { + return region_provider::get_remote_key(region_); + } - // -------------------------------------------------------------------- - // Get the size of the memory chunk usable by this memory region, - // this may be smaller than the value returned by get_length - // if the region is a sub region (partial region) within another block - inline uint64_t get_size(void) const { return size_; } + // -------------------------------------------------------------------- + // Get the size of the memory chunk usable by this memory region, + // this may be smaller than the value returned by get_length + // if the region is a sub region (partial region) within another block + inline uint64_t get_size(void) const { return size_; } - // -------------------------------------------------------------------- - // Get the size used by a message in the memory region. - inline uint32_t get_message_length(void) const { return used_space_; } + // -------------------------------------------------------------------- + // Get the size used by a message in the memory region. + inline uint32_t get_message_length(void) const { return used_space_; } - // -------------------------------------------------------------------- - // Set the size used by a message in the memory region. - inline void set_message_length(uint32_t length) { used_space_ = length; } + // -------------------------------------------------------------------- + // Set the size used by a message in the memory region. + inline void set_message_length(uint32_t length) { used_space_ = length; } - // -------------------------------------------------------------------- - void release_region() noexcept { region_ = nullptr; } + // -------------------------------------------------------------------- + void release_region() noexcept { region_ = nullptr; } - // -------------------------------------------------------------------- - // return the underlying libfabric region handle - inline provider_region* get_region() const { return region_; } + // -------------------------------------------------------------------- + // return the underlying libfabric region handle + inline provider_region* get_region() const { return region_; } - // -------------------------------------------------------------------- - // Deregister the memory region. - // returns 0 when successful, -1 otherwise - int deregister(void) const - { - if (region_ /*&& !get_user_region()*/) + // -------------------------------------------------------------------- + // Deregister the memory region. + // returns 0 when successful, -1 otherwise + int deregister(void) const { - LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("release"), region_)); - // - if (region_provider::unregister_memory(region_)) + if (region_ /*&& !get_user_region()*/) { - LF_DEB(NS_MEMORY::mrn_deb, error("fi_close mr failed")); - return -1; + LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("release"), region_)); + // + if (region_provider::unregister_memory(region_)) + { + LF_DEB(NS_MEMORY::mrn_deb, error("fi_close mr failed")); + return -1; + } + else + { + LF_DEB( + NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("de-Registered region"), *this)); + } + region_ = nullptr; } - else - { - LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("de-Registered region"), *this)); - } - region_ = nullptr; + return 0; } - return 0; - } - // -------------------------------------------------------------------- - friend std::ostream& operator<<(std::ostream& os, memory_handle const& region) - { - (void)region; + // -------------------------------------------------------------------- + friend std::ostream& operator<<(std::ostream& os, memory_handle const& region) + { + (void) region; #if 1 || has_debug - os << "region " - << NS_DEBUG::ptr(®ion) - //<< " fi_region " << NS_DEBUG::ptr(region.region_) - << " address " << NS_DEBUG::ptr(region.address_) << " size " - << NS_DEBUG::hex<6>(region.size_) - //<< " used_space " << NS_DEBUG::hex<6>(region.used_space_/*size_*/) - << " loc key " - << NS_DEBUG::ptr( - region.region_ ? region_provider::get_local_key(region.region_) : nullptr) - << " rem key " - << NS_DEBUG::ptr(region.region_ ? region_provider::get_remote_key(region.region_) : 0); - ///// clang-format off - ///// clang-format on + os << "region " + << NS_DEBUG::ptr(®ion) + //<< " fi_region " << NS_DEBUG::ptr(region.region_) + << " address " << NS_DEBUG::ptr(region.address_) << " size " + << NS_DEBUG::hex<6>(region.size_) + //<< " used_space " << NS_DEBUG::hex<6>(region.used_space_/*size_*/) + << " loc key " + << NS_DEBUG::ptr( + region.region_ ? region_provider::get_local_key(region.region_) : nullptr) + << " rem key " + << NS_DEBUG::ptr( + region.region_ ? region_provider::get_remote_key(region.region_) : 0); + ///// clang-format off + ///// clang-format on #endif - return os; - } - - protected: - // This gives the start address of this region. - // This is the address that should be used for data storage - unsigned char* address_; + return os; + } - // The hardware level handle to the region (as returned from libfabric fi_mr_reg) - mutable provider_region* region_; + protected: + // This gives the start address of this region. + // This is the address that should be used for data storage + unsigned char* address_; - // The (maximum available) size of the memory buffer - uint32_t size_; + // The hardware level handle to the region (as returned from libfabric fi_mr_reg) + mutable provider_region* region_; - // Space used by a message in the memory region. - // This may be smaller/less than the size available if more space - // was allocated than it turns out was needed - mutable uint32_t used_space_; -}; + // The (maximum available) size of the memory buffer + uint32_t size_; -// -------------------------------------------------------------------- -// a memory segment is a pinned block of memory that has been specialized -// by a particular region provider. Each provider (infiniband, libfabric, -// other) has a different definition for the object and the protection -// domain used to limit access. -// -------------------------------------------------------------------- -struct memory_segment : public memory_handle -{ - using provider_domain = region_provider::provider_domain; - using provider_region = region_provider::provider_region; - using handle_type = memory_handle; + // Space used by a message in the memory region. + // This may be smaller/less than the size available if more space + // was allocated than it turns out was needed + mutable uint32_t used_space_; + }; // -------------------------------------------------------------------- - memory_segment(provider_region* region, unsigned char* address, unsigned char* base_address, - uint64_t size) - : memory_handle(region, address, size) - , base_addr_(base_address) - { - } - + // a memory segment is a pinned block of memory that has been specialized + // by a particular region provider. Each provider (infiniband, libfabric, + // other) has a different definition for the object and the protection + // domain used to limit access. // -------------------------------------------------------------------- - // move constructor, clear other region - memory_segment(memory_segment&& other) noexcept - : memory_handle(std::move(other)) - , base_addr_{std::exchange(other.base_addr_, nullptr)} + struct memory_segment : public memory_handle { - } + using provider_domain = region_provider::provider_domain; + using provider_region = region_provider::provider_region; + using handle_type = memory_handle; + + // -------------------------------------------------------------------- + memory_segment(provider_region* region, unsigned char* address, unsigned char* base_address, + uint64_t size) + : memory_handle(region, address, size) + , base_addr_(base_address) + { + } - // -------------------------------------------------------------------- - // move assignment, clear other region - memory_segment& operator=(memory_segment&& other) noexcept - { - memory_handle(std::move(other)); - region_ = std::exchange(other.region_, nullptr); - return *this; - } + // -------------------------------------------------------------------- + // move constructor, clear other region + memory_segment(memory_segment&& other) noexcept + : memory_handle(std::move(other)) + , base_addr_{std::exchange(other.base_addr_, nullptr)} + { + } - // -------------------------------------------------------------------- - // construct a memory region object by registering an existing address buffer - // we do not cache local/remote keys here because memory segments are only - // used by the heap to store chunks and the user will always receive - // a memory_handle - which does have keys cached - memory_segment(provider_domain* pd, const void* buffer, const uint64_t length, bool bind_mr, - void* ep, int device_id) - { - // an rma key counter to keep some providers (CXI) happy - static std::atomic key = 0; - // - address_ = static_cast(const_cast(buffer)); - size_ = length; - used_space_ = length; - region_ = nullptr; - // - base_addr_ = memory_handle::address_; - LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("memory_segment"), *this, device_id)); - - int ret = region_provider::fi_register_memory(pd, device_id, buffer, length, - region_provider::access_flags(), 0, key++, &(region_)); - if (!ret) + // -------------------------------------------------------------------- + // move assignment, clear other region + memory_segment& operator=(memory_segment&& other) noexcept { - LF_DEB(NS_MEMORY::mrn_deb, - trace(NS_DEBUG::str<>("Registered region"), "device", device_id, *this)); + memory_handle(std::move(other)); + region_ = std::exchange(other.region_, nullptr); + return *this; } - if (bind_mr) + // -------------------------------------------------------------------- + // construct a memory region object by registering an existing address buffer + // we do not cache local/remote keys here because memory segments are only + // used by the heap to store chunks and the user will always receive + // a memory_handle - which does have keys cached + memory_segment(provider_domain* pd, void const* buffer, uint64_t const length, bool bind_mr, + void* ep, int device_id) { - ret = fi_mr_bind(region_, (struct fid*)ep, 0); - if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "fi_mr_bind"); } - else { LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("Bound region"), *this)); } + // an rma key counter to keep some providers (CXI) happy + static std::atomic key = 0; + // + address_ = static_cast(const_cast(buffer)); + size_ = length; + used_space_ = length; + region_ = nullptr; + // + base_addr_ = memory_handle::address_; + LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("memory_segment"), *this, device_id)); - ret = fi_mr_enable(region_); - if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "fi_mr_enable"); } - else { LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("Enabled region"), *this)); } + int ret = region_provider::fi_register_memory(pd, device_id, buffer, length, + region_provider::access_flags(), 0, key++, &(region_)); + if (!ret) + { + LF_DEB(NS_MEMORY::mrn_deb, + trace(NS_DEBUG::str<>("Registered region"), "device", device_id, *this)); + } + + if (bind_mr) + { + ret = fi_mr_bind(region_, (struct fid*) ep, 0); + if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "fi_mr_bind"); } + else { LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("Bound region"), *this)); } + + ret = fi_mr_enable(region_); + if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "fi_mr_enable"); } + else + { + LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("Enabled region"), *this)); + } + } } - } - // -------------------------------------------------------------------- - // destroy the region and memory according to flag settings - ~memory_segment() { deregister(); } + // -------------------------------------------------------------------- + // destroy the region and memory according to flag settings + ~memory_segment() { deregister(); } - handle_type get_handle(std::size_t offset, std::size_t size) const noexcept - { - return memory_handle(region_, base_addr_ + offset, size); - } + handle_type get_handle(std::size_t offset, std::size_t size) const noexcept + { + return memory_handle(region_, base_addr_ + offset, size); + } - // -------------------------------------------------------------------- - // Get the address of the base memory region. - // This is the address of the memory allocated from the system - inline unsigned char* get_base_address(void) const { return base_addr_; } + // -------------------------------------------------------------------- + // Get the address of the base memory region. + // This is the address of the memory allocated from the system + inline unsigned char* get_base_address(void) const { return base_addr_; } - // -------------------------------------------------------------------- - friend std::ostream& operator<<(std::ostream& os, memory_segment const& region) - { - (void)region; + // -------------------------------------------------------------------- + friend std::ostream& operator<<(std::ostream& os, memory_segment const& region) + { + (void) region; #if has_debug - // clang-format off + // clang-format off os << *static_cast(®ion) << " base address " << NS_DEBUG::ptr(region.base_addr_); - // clang-format on + // clang-format on #endif - return os; - } + return os; + } - public: - // this is the base address of the memory registered by this segment - // individual memory_handles are offset from this address - unsigned char* base_addr_; -}; + public: + // this is the base address of the memory registered by this segment + // individual memory_handles are offset from this address + unsigned char* base_addr_; + }; -} // namespace NS_MEMORY +} // namespace NS_MEMORY diff --git a/src/libfabric/operation_context.cpp b/src/libfabric/operation_context.cpp index ce5081dd..8c8d277f 100644 --- a/src/libfabric/operation_context.cpp +++ b/src/libfabric/operation_context.cpp @@ -8,49 +8,52 @@ * SPDX-License-Identifier: BSD-3-Clause */ // paths relative to backend -#include -#include #include #include +#include +#include -namespace oomph::libfabric -{ -void -operation_context::handle_cancelled() -{ - [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::ptr(this), __func__); - // enqueue the cancelled/callback - if (std::holds_alternative(m_req)) - { - // regular (non-shared) recv - auto s = std::get(m_req); - while (!(s->m_comm->m_recv_cb_cancel.push(s))) {} - } - else if (std::holds_alternative(m_req)) +namespace oomph::libfabric { + void operation_context::handle_cancelled() { - // shared recv - auto s = std::get(m_req); - while (!(s->m_ctxt->m_recv_cb_cancel.push(s))) {} + [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::ptr(this), __func__); + // enqueue the cancelled/callback + if (std::holds_alternative(m_req)) + { + // regular (non-shared) recv + auto s = std::get(m_req); + while (!(s->m_comm->m_recv_cb_cancel.push(s))) {} + } + else if (std::holds_alternative(m_req)) + { + // shared recv + auto s = std::get(m_req); + while (!(s->m_ctxt->m_recv_cb_cancel.push(s))) {} + } + else { throw std::runtime_error("Request state invalid in handle_cancelled"); } } - else { throw std::runtime_error("Request state invalid in handle_cancelled"); } -} -int -operation_context::handle_tagged_recv_completion_impl(void* user_data) -{ - [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::ptr(this), __func__); - if (std::holds_alternative(m_req)) + int operation_context::handle_tagged_recv_completion_impl(void* user_data) { - // regular (non-shared) recv - auto s = std::get(m_req); - //if (std::this_thread::get_id() == thread_id_) - if (reinterpret_cast(user_data) == s->m_comm) + [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::ptr(this), __func__); + if (std::holds_alternative(m_req)) { - if (!s->m_comm->has_reached_recursion_depth()) + // regular (non-shared) recv + auto s = std::get(m_req); + //if (std::this_thread::get_id() == thread_id_) + if (reinterpret_cast(user_data) == s->m_comm) { - auto inc = s->m_comm->recursion(); - auto ptr = s->release_self_ref(); - s->invoke_cb(); + if (!s->m_comm->has_reached_recursion_depth()) + { + auto inc = s->m_comm->recursion(); + auto ptr = s->release_self_ref(); + s->invoke_cb(); + } + else + { + // enqueue the callback + while (!(s->m_comm->m_recv_cb_queue.push(s))) {} + } } else { @@ -58,82 +61,76 @@ operation_context::handle_tagged_recv_completion_impl(void* user_data) while (!(s->m_comm->m_recv_cb_queue.push(s))) {} } } - else - { - // enqueue the callback - while (!(s->m_comm->m_recv_cb_queue.push(s))) {} - } - } - else if (std::holds_alternative(m_req)) - { - // shared recv - auto s = std::get(m_req); - if (!s->m_comm->m_context->has_reached_recursion_depth()) + else if (std::holds_alternative(m_req)) { - auto inc = s->m_comm->m_context->recursion(); - auto ptr = s->release_self_ref(); - s->invoke_cb(); - } - else - { - // enqueue the callback - while (!(s->m_comm->m_context->m_recv_cb_queue.push(s))) {} - } - } - else - { - detail::request_state** req = reinterpret_cast(&m_req); - LF_DEB(NS_MEMORY::opctx_deb<9>, - error(NS_DEBUG::str<>("invalid request_state"), this, "request", NS_DEBUG::ptr(req))); - throw std::runtime_error("Request state invalid in handle_tagged_recv"); - } - return 1; -} - -int -operation_context::handle_tagged_send_completion_impl(void* user_data) -{ - if (std::holds_alternative(m_req)) - { - // regular (non-shared) recv - auto s = std::get(m_req); - if (reinterpret_cast(user_data) == s->m_comm) - { - if (!s->m_comm->has_reached_recursion_depth()) + // shared recv + auto s = std::get(m_req); + if (!s->m_comm->m_context->has_reached_recursion_depth()) { - auto inc = s->m_comm->recursion(); + auto inc = s->m_comm->m_context->recursion(); auto ptr = s->release_self_ref(); s->invoke_cb(); } else { // enqueue the callback - while (!(s->m_comm->m_send_cb_queue.push(s))) {} + while (!(s->m_comm->m_context->m_recv_cb_queue.push(s))) {} } } else { - // enqueue the callback - while (!(s->m_comm->m_send_cb_queue.push(s))) {} + detail::request_state** req = reinterpret_cast(&m_req); + LF_DEB(NS_MEMORY::opctx_deb<9>, + error( + NS_DEBUG::str<>("invalid request_state"), this, "request", NS_DEBUG::ptr(req))); + throw std::runtime_error("Request state invalid in handle_tagged_recv"); } + return 1; } - else if (std::holds_alternative(m_req)) + + int operation_context::handle_tagged_send_completion_impl(void* user_data) { - // shared recv - auto s = std::get(m_req); - if (!s->m_comm->m_context->has_reached_recursion_depth()) + if (std::holds_alternative(m_req)) { - auto inc = s->m_comm->m_context->recursion(); - auto ptr = s->release_self_ref(); - s->invoke_cb(); + // regular (non-shared) recv + auto s = std::get(m_req); + if (reinterpret_cast(user_data) == s->m_comm) + { + if (!s->m_comm->has_reached_recursion_depth()) + { + auto inc = s->m_comm->recursion(); + auto ptr = s->release_self_ref(); + s->invoke_cb(); + } + else + { + // enqueue the callback + while (!(s->m_comm->m_send_cb_queue.push(s))) {} + } + } + else + { + // enqueue the callback + while (!(s->m_comm->m_send_cb_queue.push(s))) {} + } } - else + else if (std::holds_alternative(m_req)) { - // enqueue the callback - while (!(s->m_comm->m_context->m_recv_cb_queue.push(s))) {} + // shared recv + auto s = std::get(m_req); + if (!s->m_comm->m_context->has_reached_recursion_depth()) + { + auto inc = s->m_comm->m_context->recursion(); + auto ptr = s->release_self_ref(); + s->invoke_cb(); + } + else + { + // enqueue the callback + while (!(s->m_comm->m_context->m_recv_cb_queue.push(s))) {} + } } + else { throw std::runtime_error("Request state invalid in handle_tagged_send"); } + return 1; } - else { throw std::runtime_error("Request state invalid in handle_tagged_send"); } - return 1; -} -} // namespace oomph::libfabric +} // namespace oomph::libfabric diff --git a/src/libfabric/operation_context.hpp b/src/libfabric/operation_context.hpp index ad106e6a..0f6b5103 100644 --- a/src/libfabric/operation_context.hpp +++ b/src/libfabric/operation_context.hpp @@ -15,39 +15,38 @@ // #include "operation_context_base.hpp" // -namespace oomph::libfabric -{ - -template -inline /*constexpr*/ NS_DEBUG::print_threshold opctx_deb("OP__CXT"); - -// This struct holds the ready state of a future -// we must also store the context used in libfabric, in case -// a request is cancelled - fi_cancel(...) needs it -struct operation_context : public operation_context_base -{ - std::variant m_req; - - template - operation_context(RequestState* req) - : operation_context_base() - , m_req{req} - { - [[maybe_unused]] auto scp = - opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__, "request", req); - } - - // -------------------------------------------------------------------- - // When a completion returns FI_ECANCELED, this is called - void handle_cancelled(); +namespace oomph::libfabric { - // -------------------------------------------------------------------- - // Called when a tagged recv completes - int handle_tagged_recv_completion_impl(void* user_data); + template + inline /*constexpr*/ NS_DEBUG::print_threshold opctx_deb("OP__CXT"); - // -------------------------------------------------------------------- - // Called when a tagged send completes - int handle_tagged_send_completion_impl(void* user_data); -}; - -} // namespace oomph::libfabric + // This struct holds the ready state of a future + // we must also store the context used in libfabric, in case + // a request is cancelled - fi_cancel(...) needs it + struct operation_context : public operation_context_base + { + std::variant m_req; + + template + operation_context(RequestState* req) + : operation_context_base() + , m_req{req} + { + [[maybe_unused]] auto scp = + opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__, "request", req); + } + + // -------------------------------------------------------------------- + // When a completion returns FI_ECANCELED, this is called + void handle_cancelled(); + + // -------------------------------------------------------------------- + // Called when a tagged recv completes + int handle_tagged_recv_completion_impl(void* user_data); + + // -------------------------------------------------------------------- + // Called when a tagged send completes + int handle_tagged_send_completion_impl(void* user_data); + }; + +} // namespace oomph::libfabric diff --git a/src/libfabric/operation_context_base.hpp b/src/libfabric/operation_context_base.hpp index e5156f99..5de5c386 100644 --- a/src/libfabric/operation_context_base.hpp +++ b/src/libfabric/operation_context_base.hpp @@ -12,85 +12,84 @@ #include #include "oomph_libfabric_defines.hpp" -namespace NS_LIBFABRIC -{ +namespace NS_LIBFABRIC { -class controller; + class controller; -static NS_DEBUG::enable_print ctx_bas("CTXBASE"); + static NS_DEBUG::enable_print ctx_bas("CTXBASE"); -// This struct holds the ready state of a future -// we must also store the context used in libfabric, in case -// a request is cancelled - fi_cancel(...) needs it -template -struct operation_context_base -{ - private: - // libfabric requires some space for it's internal bookkeeping - // so the first member of this struct must be fi_context - fi_context context_reserved_space; - - public: - operation_context_base() - : context_reserved_space() + // This struct holds the ready state of a future + // we must also store the context used in libfabric, in case + // a request is cancelled - fi_cancel(...) needs it + template + struct operation_context_base { - [[maybe_unused]] auto scp = ctx_bas.scope(NS_DEBUG::ptr(this), __func__); - } + private: + // libfabric requires some space for it's internal bookkeeping + // so the first member of this struct must be fi_context + fi_context context_reserved_space; - // error - void handle_error(struct fi_cq_err_entry& err) - { - static_cast(this)->handle_error_impl(err); - } - void handle_error_impl(struct fi_cq_err_entry& /*err*/) { std::terminate(); } + public: + operation_context_base() + : context_reserved_space() + { + [[maybe_unused]] auto scp = ctx_bas.scope(NS_DEBUG::ptr(this), __func__); + } - void handle_cancelled() { static_cast(this)->handle_cancelled_impl(); } - void handle_cancelled_impl() { std::terminate(); } + // error + void handle_error(struct fi_cq_err_entry& err) + { + static_cast(this)->handle_error_impl(err); + } + void handle_error_impl(struct fi_cq_err_entry& /*err*/) { std::terminate(); } - // send - int handle_send_completion() - { - return static_cast(this)->handle_send_completion_impl(); - } - int handle_send_completion_impl() { return 0; } + void handle_cancelled() { static_cast(this)->handle_cancelled_impl(); } + void handle_cancelled_impl() { std::terminate(); } - // tagged send - int handle_tagged_send_completion(void* user_data) - { - return static_cast(this)->handle_tagged_send_completion_impl(user_data); - } - int handle_tagged_send_completion_impl(void* /*user_data*/) { return 0; } + // send + int handle_send_completion() + { + return static_cast(this)->handle_send_completion_impl(); + } + int handle_send_completion_impl() { return 0; } - // recv - int handle_recv_completion(std::uint64_t len) - { - return static_cast(this)->handle_recv_completion_impl(len); - } - int handle_recv_completion_impl(std::uint64_t /*len*/) { return 0; } + // tagged send + int handle_tagged_send_completion(void* user_data) + { + return static_cast(this)->handle_tagged_send_completion_impl(user_data); + } + int handle_tagged_send_completion_impl(void* /*user_data*/) { return 0; } - // tagged recv - int handle_tagged_recv_completion(void* user_data) - { - return static_cast(this)->handle_tagged_recv_completion_impl(user_data); - } - int handle_tagged_recv_completion_impl(bool /*threadlocal*/) { return 0; } + // recv + int handle_recv_completion(std::uint64_t len) + { + return static_cast(this)->handle_recv_completion_impl(len); + } + int handle_recv_completion_impl(std::uint64_t /*len*/) { return 0; } - void handle_rma_read_completion() - { - static_cast(this)->handle_rma_read_completion_impl(); - } - void handle_rma_read_completion_impl() {} + // tagged recv + int handle_tagged_recv_completion(void* user_data) + { + return static_cast(this)->handle_tagged_recv_completion_impl(user_data); + } + int handle_tagged_recv_completion_impl(bool /*threadlocal*/) { return 0; } - // unknown sender = new connection - int handle_new_connection(controller* ctrl, std::uint64_t len) - { - return static_cast(this)->handle_new_connection_impl(ctrl, len); - } - int handle_new_connection_impl(controller*, std::uint64_t) { return 0; } -}; + void handle_rma_read_completion() + { + static_cast(this)->handle_rma_read_completion_impl(); + } + void handle_rma_read_completion_impl() {} -// provided so that a pointer can be cast to this and the operation_context_type queried -struct unspecialized_context : public operation_context_base -{ -}; -} // namespace NS_LIBFABRIC + // unknown sender = new connection + int handle_new_connection(controller* ctrl, std::uint64_t len) + { + return static_cast(this)->handle_new_connection_impl(ctrl, len); + } + int handle_new_connection_impl(controller*, std::uint64_t) { return 0; } + }; + + // provided so that a pointer can be cast to this and the operation_context_type queried + struct unspecialized_context : public operation_context_base + { + }; +} // namespace NS_LIBFABRIC diff --git a/src/libfabric/print.hpp b/src/libfabric/print.hpp index cf8de408..73c37c41 100644 --- a/src/libfabric/print.hpp +++ b/src/libfabric/print.hpp @@ -27,12 +27,12 @@ #include // #if defined(__linux) || defined(linux) || defined(__linux__) -#include -#include +# include +# include #elif defined(__APPLE__) -#include -#include -#define environ (*_NSGetEnviron()) +# include +# include +# define environ (*_NSGetEnviron()) #else extern char** environ; #endif @@ -78,665 +78,648 @@ extern char** environ; // ------------------------------------------------------------ /// \cond NODETAIL -namespace NS_DEBUG -{ - -// ------------------------------------------------------------------ -// format as zero padded int -// ------------------------------------------------------------------ -namespace detail -{ - -template -struct dec -{ - constexpr dec(T const& v) - : data_(v) - { - } +namespace NS_DEBUG { - T const& data_; + // ------------------------------------------------------------------ + // format as zero padded int + // ------------------------------------------------------------------ + namespace detail { - friend std::ostream& operator<<(std::ostream& os, dec const& d) - { - os << std::right << std::setfill('0') << std::setw(N) << std::noshowbase << std::dec - << d.data_; - return os; - } -}; -} // namespace detail - -template -constexpr detail::dec -dec(T const& v) -{ - return detail::dec(v); -} - -// ------------------------------------------------------------------ -// format as pointer -// ------------------------------------------------------------------ -struct ptr -{ - ptr(void const* v) - : data_(v) - { - } - ptr(std::uintptr_t const v) - : data_(reinterpret_cast(v)) - { - } - void const* data_; - friend std::ostream& operator<<(std::ostream& os, ptr const& d) - { - os << std::right << "0x" << std::setfill('0') << std::setw(12) << std::noshowbase - << std::hex << reinterpret_cast(d.data_); - return os; - } -}; - -// ------------------------------------------------------------------ -// format as zero padded hex -// ------------------------------------------------------------------ -namespace detail -{ - -template -struct hex; - -template -struct hex::value>::type> -{ - constexpr hex(T const& v) - : data_(v) - { - } - T const& data_; - friend std::ostream& operator<<(std::ostream& os, const hex& d) - { - os << std::right << "0x" << std::setfill('0') << std::setw(N) << std::noshowbase << std::hex - << d.data_; - return os; - } -}; + template + struct dec + { + constexpr dec(T const& v) + : data_(v) + { + } -template -struct hex::value>::type> -{ - constexpr hex(T const& v) - : data_(v) - { - } - T const& data_; - friend std::ostream& operator<<(std::ostream& os, const hex& d) - { - os << std::right << std::setw(N) << std::noshowbase << std::hex << d.data_; - return os; - } -}; -} // namespace detail - -template -constexpr detail::hex -hex(T const& v) -{ - return detail::hex(v); -} - -// ------------------------------------------------------------------ -// format as binary bits -// ------------------------------------------------------------------ -namespace detail -{ - -template -struct bin -{ - constexpr bin(T const& v) - : data_(v) - { - } - T const& data_; - friend std::ostream& operator<<(std::ostream& os, const bin& d) + T const& data_; + + friend std::ostream& operator<<(std::ostream& os, dec const& d) + { + os << std::right << std::setfill('0') << std::setw(N) << std::noshowbase << std::dec + << d.data_; + return os; + } + }; + } // namespace detail + + template + constexpr detail::dec dec(T const& v) { - os << std::bitset(d.data_); - return os; + return detail::dec(v); } -}; -} // namespace detail - -template -constexpr detail::bin -bin(T const& v) -{ - return detail::bin(v); -} - -// ------------------------------------------------------------------ -// format as padded string -// ------------------------------------------------------------------ -template -struct str -{ - constexpr str(char const* v) - : data_(v) + + // ------------------------------------------------------------------ + // format as pointer + // ------------------------------------------------------------------ + struct ptr { - } + ptr(void const* v) + : data_(v) + { + } + ptr(std::uintptr_t const v) + : data_(reinterpret_cast(v)) + { + } + void const* data_; + friend std::ostream& operator<<(std::ostream& os, ptr const& d) + { + os << std::right << "0x" << std::setfill('0') << std::setw(12) << std::noshowbase + << std::hex << reinterpret_cast(d.data_); + return os; + } + }; + + // ------------------------------------------------------------------ + // format as zero padded hex + // ------------------------------------------------------------------ + namespace detail { - char const* data_; + template + struct hex; - friend std::ostream& operator<<(std::ostream& os, str const& d) + template + struct hex::value>::type> + { + constexpr hex(T const& v) + : data_(v) + { + } + T const& data_; + friend std::ostream& operator<<(std::ostream& os, hex const& d) + { + os << std::right << "0x" << std::setfill('0') << std::setw(N) << std::noshowbase + << std::hex << d.data_; + return os; + } + }; + + template + struct hex::value>::type> + { + constexpr hex(T const& v) + : data_(v) + { + } + T const& data_; + friend std::ostream& operator<<(std::ostream& os, hex const& d) + { + os << std::right << std::setw(N) << std::noshowbase << std::hex << d.data_; + return os; + } + }; + } // namespace detail + + template + constexpr detail::hex hex(T const& v) { - os << std::left << std::setfill(' ') << std::setw(N) << d.data_; - return os; + return detail::hex(v); } -}; - -// ------------------------------------------------------------------ -// format as ip address -// ------------------------------------------------------------------ -struct ipaddr -{ - ipaddr(const void* a) - : data_(reinterpret_cast(a)) - , ipdata_(0) + + // ------------------------------------------------------------------ + // format as binary bits + // ------------------------------------------------------------------ + namespace detail { + + template + struct bin + { + constexpr bin(T const& v) + : data_(v) + { + } + T const& data_; + friend std::ostream& operator<<(std::ostream& os, bin const& d) + { + os << std::bitset(d.data_); + return os; + } + }; + } // namespace detail + + template + constexpr detail::bin bin(T const& v) { + return detail::bin(v); } - ipaddr(const uint32_t a) - : data_(reinterpret_cast(&ipdata_)) - , ipdata_(a) + + // ------------------------------------------------------------------ + // format as padded string + // ------------------------------------------------------------------ + template + struct str { - } - const uint8_t* data_; - const uint32_t ipdata_; + constexpr str(char const* v) + : data_(v) + { + } + + char const* data_; + + friend std::ostream& operator<<(std::ostream& os, str const& d) + { + os << std::left << std::setfill(' ') << std::setw(N) << d.data_; + return os; + } + }; - friend std::ostream& operator<<(std::ostream& os, ipaddr const& p) + // ------------------------------------------------------------------ + // format as ip address + // ------------------------------------------------------------------ + struct ipaddr { - os << std::dec << int(p.data_[0]) << "." << int(p.data_[1]) << "." << int(p.data_[2]) << "." - << int(p.data_[3]); - return os; - } -}; - -// ------------------------------------------------------------------ -// helper fuction for printing CRC32 -// ------------------------------------------------------------------ -inline uint32_t -crc32(const void* address, size_t length) -{ - boost::crc_32_type result; - result.process_bytes(address, length); - return result.checksum(); -} - -// ------------------------------------------------------------------ -// helper fuction for printing short memory dump and crc32 -// useful for debugging corruptions in buffers during -// rma or other transfers -// ------------------------------------------------------------------ -struct mem_crc32 -{ - mem_crc32(const void* a, std::size_t len, const char* txt) - : addr_(reinterpret_cast(a)) - , len_(len) - , txt_(txt) + ipaddr(void const* a) + : data_(reinterpret_cast(a)) + , ipdata_(0) + { + } + ipaddr(uint32_t const a) + : data_(reinterpret_cast(&ipdata_)) + , ipdata_(a) + { + } + uint8_t const* data_; + uint32_t const ipdata_; + + friend std::ostream& operator<<(std::ostream& os, ipaddr const& p) + { + os << std::dec << int(p.data_[0]) << "." << int(p.data_[1]) << "." << int(p.data_[2]) + << "." << int(p.data_[3]); + return os; + } + }; + + // ------------------------------------------------------------------ + // helper fuction for printing CRC32 + // ------------------------------------------------------------------ + inline uint32_t crc32(void const* address, size_t length) { + boost::crc_32_type result; + result.process_bytes(address, length); + return result.checksum(); } - const std::uint8_t* addr_; - const std::size_t len_; - const char* txt_; - friend std::ostream& operator<<(std::ostream& os, mem_crc32 const& p) + + // ------------------------------------------------------------------ + // helper fuction for printing short memory dump and crc32 + // useful for debugging corruptions in buffers during + // rma or other transfers + // ------------------------------------------------------------------ + struct mem_crc32 { - const std::uint8_t* byte = static_cast(p.addr_); - os << "Memory:"; - os << " address " << ptr(p.addr_) << " length " << hex<6, std::size_t>(p.len_) - << " CRC32:" << hex<8, std::size_t>(crc32(p.addr_, p.len_)) << "\n"; - size_t i = 0; - while (i < std::min(size_t(128), p.len_)) - { - os << "0x"; - for (int j = 7; j >= 0; j--) + mem_crc32(void const* a, std::size_t len, char const* txt) + : addr_(reinterpret_cast(a)) + , len_(len) + , txt_(txt) + { + } + std::uint8_t const* addr_; + std::size_t const len_; + char const* txt_; + friend std::ostream& operator<<(std::ostream& os, mem_crc32 const& p) + { + std::uint8_t const* byte = static_cast(p.addr_); + os << "Memory:"; + os << " address " << ptr(p.addr_) << " length " << hex<6, std::size_t>(p.len_) + << " CRC32:" << hex<8, std::size_t>(crc32(p.addr_, p.len_)) << "\n"; + size_t i = 0; + while (i < std::min(size_t(128), p.len_)) { - os << std::hex << std::setfill('0') << std::setw(2) - << (((i + j) > p.len_) ? (int)0 : (int)byte[i + j]); + os << "0x"; + for (int j = 7; j >= 0; j--) + { + os << std::hex << std::setfill('0') << std::setw(2) + << (((i + j) > p.len_) ? (int) 0 : (int) byte[i + j]); + } + i += 8; + if (i % 32 == 0) + os << std::endl; + else + os << " "; } - i += 8; - if (i % 32 == 0) os << std::endl; - else - os << " "; + os << ": " << p.txt_; + return os; } - os << ": " << p.txt_; - return os; - } -}; - -namespace detail -{ - -template -void -tuple_print(std::ostream& os, TupleType const& t, std::index_sequence) -{ - (..., (os << (I == 0 ? "" : " ") << std::get(t))); -} - -template -void -tuple_print(std::ostream& os, const std::tuple& t) -{ - tuple_print(os, t, std::make_index_sequence()); -} -} // namespace detail - -namespace detail -{ - -// ------------------------------------------------------------------ -// helper class for printing thread ID -// ------------------------------------------------------------------ -struct current_thread_print_helper -{ -}; - -inline std::ostream& -operator<<(std::ostream& os, current_thread_print_helper const&) -{ - os << hex<12, std::thread::id>(std::this_thread::get_id()) + }; + + namespace detail { + + template + void tuple_print(std::ostream& os, TupleType const& t, std::index_sequence) + { + (..., (os << (I == 0 ? "" : " ") << std::get(t))); + } + + template + void tuple_print(std::ostream& os, std::tuple const& t) + { + tuple_print(os, t, std::make_index_sequence()); + } + } // namespace detail + + namespace detail { + + // ------------------------------------------------------------------ + // helper class for printing thread ID + // ------------------------------------------------------------------ + struct current_thread_print_helper + { + }; + + inline std::ostream& operator<<(std::ostream& os, current_thread_print_helper const&) + { + os << hex<12, std::thread::id>(std::this_thread::get_id()) #ifdef DEBUGGING_PRINT_LINUX - << " cpu " << debug::dec<3, int>(sched_getcpu()) << " "; + << " cpu " << debug::dec<3, int>(sched_getcpu()) << " "; #else - << " cpu " - << "--- "; + << " cpu " + << "--- "; #endif - return os; -} - -// ------------------------------------------------------------------ -// helper class for printing time since start -// ------------------------------------------------------------------ -struct hostname_print_helper -{ - const char* get_hostname() const - { - static bool initialized = false; - static char hostname_[20]; - if (!initialized) - { - initialized = true; - gethostname(hostname_, std::size_t(12)); - std::string temp = "(" + std::to_string(guess_rank()) + ")"; - std::strcat(hostname_, temp.c_str()); + return os; } - return hostname_; - } - int guess_rank() const - { - std::vector env_strings{"_RANK=", "_NODEID="}; - for (char** current = environ; *current; current++) + // ------------------------------------------------------------------ + // helper class for printing time since start + // ------------------------------------------------------------------ + struct hostname_print_helper { - auto e = std::string(*current); - for (auto s : env_strings) + char const* get_hostname() const + { + static bool initialized = false; + static char hostname_[20]; + if (!initialized) + { + initialized = true; + gethostname(hostname_, std::size_t(12)); + std::string temp = "(" + std::to_string(guess_rank()) + ")"; + std::strcat(hostname_, temp.c_str()); + } + return hostname_; + } + + int guess_rank() const { - auto pos = e.find(s); - if (pos != std::string::npos) + std::vector env_strings{"_RANK=", "_NODEID="}; + for (char** current = environ; *current; current++) { - //std::cout << "Got a rank string : " << e << std::endl; - return std::stoi(e.substr(pos + s.size(), 5)); + auto e = std::string(*current); + for (auto s : env_strings) + { + auto pos = e.find(s); + if (pos != std::string::npos) + { + //std::cout << "Got a rank string : " << e << std::endl; + return std::stoi(e.substr(pos + s.size(), 5)); + } + } } + return -1; } + }; + + inline std::ostream& operator<<(std::ostream& os, hostname_print_helper const& h) + { + os << debug::str<13>(h.get_hostname()) << " "; + return os; } - return -1; - } -}; - -inline std::ostream& -operator<<(std::ostream& os, hostname_print_helper const& h) -{ - os << debug::str<13>(h.get_hostname()) << " "; - return os; -} - -// ------------------------------------------------------------------ -// helper class for printing time since start -// ------------------------------------------------------------------ -struct current_time_print_helper -{ -}; - -inline std::ostream& -operator<<(std::ostream& os, current_time_print_helper const&) -{ - using namespace std::chrono; - static steady_clock::time_point log_t_start = steady_clock::now(); - // - auto now = steady_clock::now(); - auto nowt = duration_cast(now - log_t_start).count(); - // - os << debug::dec<10>(nowt) << " "; - return os; -} - -template -void -display(char const* prefix, Args const&... args) -{ - // using a temp stream object with a single copy to cout at the end - // prevents multiple threads from injecting overlapping text - std::stringstream tempstream; - tempstream << prefix << detail::current_time_print_helper() - << detail::current_thread_print_helper() << detail::hostname_print_helper(); - ((tempstream << args << " "), ...); - tempstream << "\n"; - std::cout << tempstream.str() << std::flush; -} - -template -void -debug(Args const&... args) -{ - display(" ", args...); -} - -template -void -warning(Args const&... args) -{ - display(" ", args...); -} - -template -void -error(Args const&... args) -{ - display(" ", args...); -} - -template -void -scope(Args const&... args) -{ - display(" ", args...); -} - -template -void -trace(Args const&... args) -{ - display(" ", args...); -} - -template -void -timed(Args const&... args) -{ - display(" ", args...); -} -} // namespace detail - -template -struct scoped_var -{ - // capture tuple elements by reference - no temp vars in constructor please - char const* prefix_; - std::tuple const message_; - std::string buffered_msg; - - // - scoped_var(char const* p, Args const&... args) - : prefix_(p) - , message_(args...) - { - std::stringstream tempstream; - detail::tuple_print(tempstream, message_); - buffered_msg = tempstream.str(); - detail::display(" ", prefix_, debug::str<>(">> enter <<"), tempstream.str()); - } - ~scoped_var() { detail::display(" ", prefix_, debug::str<>("<< leave >>"), buffered_msg); } -}; - -template -struct timed_var -{ - mutable std::chrono::steady_clock::time_point time_start_; - double const delay_; - std::tuple const message_; - // - timed_var(double const& delay, Args const&... args) - : time_start_(std::chrono::steady_clock::now()) - , delay_(delay) - , message_(args...) - { - } + // ------------------------------------------------------------------ + // helper class for printing time since start + // ------------------------------------------------------------------ + struct current_time_print_helper + { + }; - bool elapsed(std::chrono::steady_clock::time_point const& now) const - { - double elapsed_ = - std::chrono::duration_cast>(now - time_start_).count(); + inline std::ostream& operator<<(std::ostream& os, current_time_print_helper const&) + { + using namespace std::chrono; + static steady_clock::time_point log_t_start = steady_clock::now(); + // + auto now = steady_clock::now(); + auto nowt = duration_cast(now - log_t_start).count(); + // + os << debug::dec<10>(nowt) << " "; + return os; + } - if (elapsed_ > delay_) + template + void display(char const* prefix, Args const&... args) { - time_start_ = now; - return true; + // using a temp stream object with a single copy to cout at the end + // prevents multiple threads from injecting overlapping text + std::stringstream tempstream; + tempstream << prefix << detail::current_time_print_helper() + << detail::current_thread_print_helper() << detail::hostname_print_helper(); + ((tempstream << args << " "), ...); + tempstream << "\n"; + std::cout << tempstream.str() << std::flush; } - return false; - } - friend std::ostream& operator<<(std::ostream& os, timed_var const& ti) - { - detail::tuple_print(os, ti.message_); - return os; - } -}; + template + void debug(Args const&... args) + { + display(" ", args...); + } -/////////////////////////////////////////////////////////////////////////// -template -struct enable_print; + template + void warning(Args const&... args) + { + display(" ", args...); + } -// when false, debug statements should produce no code -template<> -struct enable_print -{ - constexpr enable_print(const char*) {} + template + void error(Args const&... args) + { + display(" ", args...); + } - constexpr bool is_enabled() const { return false; } + template + void scope(Args const&... args) + { + display(" ", args...); + } - template - constexpr void debug(Args const&...) const - { - } + template + void trace(Args const&... args) + { + display(" ", args...); + } - template - constexpr void warning(Args const&...) const - { - } + template + void timed(Args const&... args) + { + display(" ", args...); + } + } // namespace detail - template - constexpr void trace(Args const&...) const + template + struct scoped_var { - } + // capture tuple elements by reference - no temp vars in constructor please + char const* prefix_; + std::tuple const message_; + std::string buffered_msg; - template - constexpr void error(Args const&...) const - { - } + // + scoped_var(char const* p, Args const&... args) + : prefix_(p) + , message_(args...) + { + std::stringstream tempstream; + detail::tuple_print(tempstream, message_); + buffered_msg = tempstream.str(); + detail::display(" ", prefix_, debug::str<>(">> enter <<"), tempstream.str()); + } - template - constexpr void timed(Args const&...) const - { - } + ~scoped_var() + { + detail::display(" ", prefix_, debug::str<>("<< leave >>"), buffered_msg); + } + }; + + template + struct timed_var + { + mutable std::chrono::steady_clock::time_point time_start_; + double const delay_; + std::tuple const message_; + // + timed_var(double const& delay, Args const&... args) + : time_start_(std::chrono::steady_clock::now()) + , delay_(delay) + , message_(args...) + { + } - template - constexpr void array(std::string const&, std::vector const&) const - { - } + bool elapsed(std::chrono::steady_clock::time_point const& now) const + { + double elapsed_ = + std::chrono::duration_cast>(now - time_start_) + .count(); - template - constexpr void array(std::string const&, std::array const&) const - { - } + if (elapsed_ > delay_) + { + time_start_ = now; + return true; + } + return false; + } - template - constexpr void array(std::string const&, Iter, Iter) const - { - } + friend std::ostream& operator<<(std::ostream& os, timed_var const& ti) + { + detail::tuple_print(os, ti.message_); + return os; + } + }; - template - constexpr bool scope(Args const&...) - { - return true; - } + /////////////////////////////////////////////////////////////////////////// + template + struct enable_print; - template - constexpr bool declare_variable(Args const&...) const + // when false, debug statements should produce no code + template <> + struct enable_print { - return true; - } + constexpr enable_print(char const*) {} - template - constexpr void set(T&, V const&) - { - } + constexpr bool is_enabled() const { return false; } - // @todo, return void so that timers have zero footprint when disabled - template - constexpr int make_timer(const double, Args const&...) const - { - return 0; - } + template + constexpr void debug(Args const&...) const + { + } - template - constexpr bool eval(Expr const&) - { - return true; - } -}; - -// when true, debug statements produce valid output -template<> -struct enable_print -{ - private: - char const* prefix_; - - public: - constexpr enable_print() - : prefix_("") - { - } + template + constexpr void warning(Args const&...) const + { + } - constexpr enable_print(const char* p) - : prefix_(p) - { - } + template + constexpr void trace(Args const&...) const + { + } - constexpr bool is_enabled() const { return true; } + template + constexpr void error(Args const&...) const + { + } - template - constexpr void debug(Args const&... args) const - { - detail::debug(prefix_, args...); - } + template + constexpr void timed(Args const&...) const + { + } - template - constexpr void warning(Args const&... args) const - { - detail::warning(prefix_, args...); - } + template + constexpr void array(std::string const&, std::vector const&) const + { + } - template - constexpr void trace(Args const&... args) const - { - detail::trace(prefix_, args...); - } + template + constexpr void array(std::string const&, std::array const&) const + { + } - template - constexpr void error(Args const&... args) const - { - detail::error(prefix_, args...); - } + template + constexpr void array(std::string const&, Iter, Iter) const + { + } - template - scoped_var scope(Args const&... args) - { - return scoped_var(prefix_, args...); - } + template + constexpr bool scope(Args const&...) + { + return true; + } - template - void timed(timed_var const& init, Args const&... args) const - { - auto now = std::chrono::steady_clock::now(); - if (init.elapsed(now)) { detail::timed(prefix_, init, args...); } - } + template + constexpr bool declare_variable(Args const&...) const + { + return true; + } - template - void array(std::string const& name, std::vector const& v) const - { - std::cout << str<20>(name.c_str()) << ": {" << debug::dec<4>(v.size()) << "} : "; - std::copy(std::begin(v), std::end(v), std::ostream_iterator(std::cout, ", ")); - std::cout << "\n"; - } + template + constexpr void set(T&, V const&) + { + } - template - void array(std::string const& name, const std::array& v) const - { - std::cout << str<20>(name.c_str()) << ": {" << debug::dec<4>(v.size()) << "} : "; - std::copy(std::begin(v), std::end(v), std::ostream_iterator(std::cout, ", ")); - std::cout << "\n"; - } + // @todo, return void so that timers have zero footprint when disabled + template + constexpr int make_timer(double const, Args const&...) const + { + return 0; + } - template - void array(std::string const& name, Iter begin, Iter end) const - { - std::cout << str<20>(name.c_str()) << ": {" << debug::dec<4>(std::distance(begin, end)) - << "} : "; - std::copy(begin, end, - std::ostream_iterator::value_type>(std::cout, - ", ")); - std::cout << std::endl; - } + template + constexpr bool eval(Expr const&) + { + return true; + } + }; - template - T declare_variable(Args const&... args) const + // when true, debug statements produce valid output + template <> + struct enable_print { - return T(args...); - } + private: + char const* prefix_; - template - void set(T& var, V const& val) - { - var = val; - } + public: + constexpr enable_print() + : prefix_("") + { + } + + constexpr enable_print(char const* p) + : prefix_(p) + { + } + + constexpr bool is_enabled() const { return true; } + + template + constexpr void debug(Args const&... args) const + { + detail::debug(prefix_, args...); + } + + template + constexpr void warning(Args const&... args) const + { + detail::warning(prefix_, args...); + } + + template + constexpr void trace(Args const&... args) const + { + detail::trace(prefix_, args...); + } - template - timed_var make_timer(const double delay, const Args... args) const + template + constexpr void error(Args const&... args) const + { + detail::error(prefix_, args...); + } + + template + scoped_var scope(Args const&... args) + { + return scoped_var(prefix_, args...); + } + + template + void timed(timed_var const& init, Args const&... args) const + { + auto now = std::chrono::steady_clock::now(); + if (init.elapsed(now)) { detail::timed(prefix_, init, args...); } + } + + template + void array(std::string const& name, std::vector const& v) const + { + std::cout << str<20>(name.c_str()) << ": {" << debug::dec<4>(v.size()) << "} : "; + std::copy(std::begin(v), std::end(v), std::ostream_iterator(std::cout, ", ")); + std::cout << "\n"; + } + + template + void array(std::string const& name, std::array const& v) const + { + std::cout << str<20>(name.c_str()) << ": {" << debug::dec<4>(v.size()) << "} : "; + std::copy(std::begin(v), std::end(v), std::ostream_iterator(std::cout, ", ")); + std::cout << "\n"; + } + + template + void array(std::string const& name, Iter begin, Iter end) const + { + std::cout << str<20>(name.c_str()) << ": {" << debug::dec<4>(std::distance(begin, end)) + << "} : "; + std::copy(begin, end, + std::ostream_iterator::value_type>( + std::cout, ", ")); + std::cout << std::endl; + } + + template + T declare_variable(Args const&... args) const + { + return T(args...); + } + + template + void set(T& var, V const& val) + { + var = val; + } + + template + timed_var make_timer(double const delay, Args const... args) const + { + return timed_var(delay, args...); + } + + template + auto eval(Expr const& e) + { + return e(); + } + }; + + // ------------------------------------------------------------------ + // helper for N>M true/false + // ------------------------------------------------------------------ + template + struct check_level : std::integral_constant { - return timed_var(delay, args...); - } + }; - template - auto eval(Expr const& e) + template + struct print_threshold : enable_print::value> { - return e(); - } -}; - -// ------------------------------------------------------------------ -// helper for N>M true/false -// ------------------------------------------------------------------ -template -struct check_level : std::integral_constant -{ -}; - -template -struct print_threshold : enable_print::value> -{ - using base_type = enable_print::value>; - // inherit constructor - using base_type::base_type; -}; - -} // namespace NS_DEBUG + using base_type = enable_print::value>; + // inherit constructor + using base_type::base_type; + }; + +} // namespace NS_DEBUG /// \endcond diff --git a/src/libfabric/request_state.hpp b/src/libfabric/request_state.hpp index d00e0367..58f15dd5 100644 --- a/src/libfabric/request_state.hpp +++ b/src/libfabric/request_state.hpp @@ -13,90 +13,88 @@ #include "../request_state_base.hpp" #include "./operation_context.hpp" -namespace oomph -{ -namespace detail -{ - -struct request_state -: public util::enable_shared_from_this -, public request_state_base -{ - using base = request_state_base; - using shared_ptr_t = util::unsafe_shared_ptr; - using operation_context = libfabric::operation_context; - - operation_context m_operation_context; - util::unsafe_shared_ptr m_self_ptr; - - request_state(oomph::context_impl* ctxt, oomph::communicator_impl* comm, std::size_t* scheduled, - rank_type rank, tag_type tag, cb_type&& cb) - : base{ctxt, comm, scheduled, rank, tag, std::move(cb)} - , m_operation_context{this} - { - } - - void progress(); - - bool cancel(); - - void create_self_ref() - { - // create a self-reference cycle!! - // this is useful if we only keep a raw pointer around internally, which still is supposed - // to keep the object alive - m_self_ptr = shared_from_this(); - } - - shared_ptr_t release_self_ref() noexcept - { - assert(((bool)m_self_ptr) && "doesn't own a self-reference!"); - return std::move(m_self_ptr); - } -}; - -struct shared_request_state -: public std::enable_shared_from_this -, public request_state_base -{ - using base = request_state_base; - using shared_ptr_t = std::shared_ptr; - using operation_context = libfabric::operation_context; - - operation_context m_operation_context; - std::shared_ptr m_self_ptr; - - shared_request_state(oomph::context_impl* ctxt, oomph::communicator_impl* comm, - std::atomic* scheduled, rank_type rank, tag_type tag, cb_type&& cb) - : base{ctxt, comm, scheduled, rank, tag, std::move(cb)} - , m_operation_context{this} - { - [[maybe_unused]] auto scp = libfabric::opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - } +namespace oomph { namespace detail { - ~shared_request_state() + struct request_state + : public util::enable_shared_from_this + , public request_state_base { - [[maybe_unused]] auto scp = libfabric::opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - } - - void progress(); - - bool cancel(); - - void create_self_ref() + using base = request_state_base; + using shared_ptr_t = util::unsafe_shared_ptr; + using operation_context = libfabric::operation_context; + + operation_context m_operation_context; + util::unsafe_shared_ptr m_self_ptr; + + request_state(oomph::context_impl* ctxt, oomph::communicator_impl* comm, + std::size_t* scheduled, rank_type rank, tag_type tag, cb_type&& cb) + : base{ctxt, comm, scheduled, rank, tag, std::move(cb)} + , m_operation_context{this} + { + } + + void progress(); + + bool cancel(); + + void create_self_ref() + { + // create a self-reference cycle!! + // this is useful if we only keep a raw pointer around internally, which still is supposed + // to keep the object alive + m_self_ptr = shared_from_this(); + } + + shared_ptr_t release_self_ref() noexcept + { + assert(((bool) m_self_ptr) && "doesn't own a self-reference!"); + return std::move(m_self_ptr); + } + }; + + struct shared_request_state + : public std::enable_shared_from_this + , public request_state_base { - // create a self-reference cycle!! - // this is useful if we only keep a raw pointer around internally, which still is supposed - // to keep the object alive - m_self_ptr = shared_from_this(); - } - - shared_ptr_t release_self_ref() noexcept - { - assert(((bool)m_self_ptr) && "doesn't own a self-reference!"); - return std::move(m_self_ptr); - } -}; - -} // namespace detail -} // namespace oomph + using base = request_state_base; + using shared_ptr_t = std::shared_ptr; + using operation_context = libfabric::operation_context; + + operation_context m_operation_context; + std::shared_ptr m_self_ptr; + + shared_request_state(oomph::context_impl* ctxt, oomph::communicator_impl* comm, + std::atomic* scheduled, rank_type rank, tag_type tag, cb_type&& cb) + : base{ctxt, comm, scheduled, rank, tag, std::move(cb)} + , m_operation_context{this} + { + [[maybe_unused]] auto scp = + libfabric::opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + } + + ~shared_request_state() + { + [[maybe_unused]] auto scp = + libfabric::opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + } + + void progress(); + + bool cancel(); + + void create_self_ref() + { + // create a self-reference cycle!! + // this is useful if we only keep a raw pointer around internally, which still is supposed + // to keep the object alive + m_self_ptr = shared_from_this(); + } + + shared_ptr_t release_self_ref() noexcept + { + assert(((bool) m_self_ptr) && "doesn't own a self-reference!"); + return std::move(m_self_ptr); + } + }; + +}} // namespace oomph::detail diff --git a/src/libfabric/simple_counter.hpp b/src/libfabric/simple_counter.hpp index f44eac92..26ecf8d5 100644 --- a/src/libfabric/simple_counter.hpp +++ b/src/libfabric/simple_counter.hpp @@ -12,13 +12,13 @@ #include "oomph_libfabric_defines.hpp" // #include -#include #include +#include #ifdef OOMPH_LIBFABRIC_HAVE_PERFORMANCE_COUNTERS -#define PERFORMANCE_COUNTER_ENABLED true +# define PERFORMANCE_COUNTER_ENABLED true #else -#define PERFORMANCE_COUNTER_ENABLED false +# define PERFORMANCE_COUNTER_ENABLED false #endif // @@ -29,90 +29,86 @@ // the performance counter that will simply do nothing when disabled - but // still allow code that uses the counters in arithmetic to compile. // -namespace oomph -{ -namespace libfabric -{ -template::value>> -struct simple_counter -{ -}; - -// -------------------------------------------------------------------- -// specialization for performance counters Enabled -// we provide an atomic that can be incremented or added/subtracted to -template -struct simple_counter -{ - simple_counter() - : value_{T()} +namespace oomph { namespace libfabric { + template ::value>> + struct simple_counter { - } + }; - simple_counter(const T& init) - : value_{init} + // -------------------------------------------------------------------- + // specialization for performance counters Enabled + // we provide an atomic that can be incremented or added/subtracted to + template + struct simple_counter { - } + simple_counter() + : value_{T()} + { + } - inline operator T() const { return value_; } + simple_counter(T const& init) + : value_{init} + { + } - inline T operator=(const T& x) { return value_ = x; } + inline operator T() const { return value_; } - inline T operator++() { return ++value_; } + inline T operator=(T const& x) { return value_ = x; } - inline T operator++(int x) { return (value_ += x); } + inline T operator++() { return ++value_; } - inline T operator+=(const T& rhs) { return (value_ += rhs); } + inline T operator++(int x) { return (value_ += x); } - inline T operator--() { return --value_; } + inline T operator+=(T const& rhs) { return (value_ += rhs); } - inline T operator--(int x) { return (value_ -= x); } + inline T operator--() { return --value_; } - inline T operator-=(const T& rhs) { return (value_ -= rhs); } + inline T operator--(int x) { return (value_ -= x); } - friend std::ostream& operator<<(std::ostream& os, const simple_counter& x) - { - os << x.value_; - return os; - } + inline T operator-=(T const& rhs) { return (value_ -= rhs); } - std::atomic value_; -}; + friend std::ostream& operator<<(std::ostream& os, simple_counter const& x) + { + os << x.value_; + return os; + } -// -------------------------------------------------------------------- -// specialization for performance counters Disabled -// just return dummy values so that arithmetic operations compile ok -template -struct simple_counter -{ - simple_counter() {} + std::atomic value_; + }; - simple_counter(const T&) {} + // -------------------------------------------------------------------- + // specialization for performance counters Disabled + // just return dummy values so that arithmetic operations compile ok + template + struct simple_counter + { + simple_counter() {} - inline operator T() const { return 0; } + simple_counter(T const&) {} - // inline bool operator==(const T&) { return true; } + inline operator T() const { return 0; } - inline T operator=(const T&) { return 0; } + // inline bool operator==(const T&) { return true; } - inline T operator++() { return 0; } + inline T operator=(T const&) { return 0; } - inline T operator++(int) { return 0; } + inline T operator++() { return 0; } - inline T operator+=(const T&) { return 0; } + inline T operator++(int) { return 0; } - inline T operator--() { return 0; } + inline T operator+=(T const&) { return 0; } - inline T operator--(int) { return 0; } + inline T operator--() { return 0; } - inline T operator-=(const T&) { return 0; } + inline T operator--(int) { return 0; } - friend std::ostream& operator<<(std::ostream& os, const simple_counter&) - { - os << "undefined"; - return os; - } -}; -} // namespace libfabric -} // namespace oomph + inline T operator-=(T const&) { return 0; } + + friend std::ostream& operator<<(std::ostream& os, simple_counter const&) + { + os << "undefined"; + return os; + } + }; +}} // namespace oomph::libfabric From e1715510c25928f69144468bf05acc853b6903a7 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 7 Jul 2025 11:30:23 +0200 Subject: [PATCH 04/35] disable clang-format for cmake generated code, fix missing include --- cmake/config.hpp.in | 3 +++ cmake/oomph_defs.hpp.in | 2 ++ include/oomph/detail/communicator_helper.hpp | 1 + 3 files changed, 6 insertions(+) diff --git a/cmake/config.hpp.in b/cmake/config.hpp.in index 458b038a..e9fcf5e4 100644 --- a/cmake/config.hpp.in +++ b/cmake/config.hpp.in @@ -26,9 +26,12 @@ #cmakedefine01 OOMPH_USE_FAST_PIMPL #cmakedefine01 OOMPH_ENABLE_BARRIER + +// clang-format off #define OOMPH_RECURSION_DEPTH @OOMPH_RECURSION_DEPTH@ #define OOMPH_VERSION @OOMPH_VERSION_NUMERIC@ #define OOMPH_VERSION_MAJOR @OOMPH_VERSION_MAJOR@ #define OOMPH_VERSION_MINOR @OOMPH_VERSION_MINOR@ #define OOMPH_VERSION_PATCH @OOMPH_VERSION_PATCH@ +// clang-format on diff --git a/cmake/oomph_defs.hpp.in b/cmake/oomph_defs.hpp.in index 70ae8732..a52a943f 100644 --- a/cmake/oomph_defs.hpp.in +++ b/cmake/oomph_defs.hpp.in @@ -15,7 +15,9 @@ namespace oomph { namespace fort { + // clang-format off using fp_type = @OOMPH_FORTRAN_FP@; + // clang-format on typedef enum { OomphBarrierGlobal=1, OomphBarrierThread=2, diff --git a/include/oomph/detail/communicator_helper.hpp b/include/oomph/detail/communicator_helper.hpp index 44f6d828..8335c6eb 100644 --- a/include/oomph/detail/communicator_helper.hpp +++ b/include/oomph/detail/communicator_helper.hpp @@ -11,6 +11,7 @@ #include #include +#include #include #include //#include From 66f7fd6eac1dc589667ffb2260ebb5ad8326623b Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 7 Jul 2025 13:51:34 +0200 Subject: [PATCH 05/35] Add shm provider support to libfabric transport layer The shm addressing method uses a string type, we add a convenience function to convert an fi_addr type to string to help with displaying addresses from any provider Add a simple libfabric utility checker that displays fi_info so that when testing for the first time on a new machine one can try to view the capabilities/options exposed by the provider selected --- CMakeLists.txt | 9 ++--- cmake/oomph_libfabric.cmake | 9 ++--- src/libfabric/CMakeLists.txt | 21 +++++++++++ src/libfabric/context.cpp | 18 +++++----- src/libfabric/context.hpp | 6 ++-- src/libfabric/controller.hpp | 2 +- src/libfabric/controller_base.hpp | 50 +++++++++++++++----------- src/libfabric/fabric_error.hpp | 2 +- src/libfabric/locality.hpp | 22 +++++++++++- src/libfabric/test/check_libfabric.cpp | 29 +++++++++++++++ 10 files changed, 123 insertions(+), 45 deletions(-) create mode 100644 src/libfabric/test/check_libfabric.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 90a582d1..3db53422 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,12 +1,6 @@ cmake_minimum_required(VERSION 3.17) # CMake version is set at 3.17 because of find_package(CUDAToolkit) -if (NOT ${CMAKE_VERSION} VERSION_LESS 3.27) - # new in 3.27: additionally use uppercase _ROOT - # environment and CMake variables for find_package - cmake_policy(SET CMP0144 NEW) -endif() - set(OOMPH_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") list(APPEND CMAKE_MODULE_PATH "${OOMPH_MODULE_PATH}") @@ -28,6 +22,7 @@ endfunction() set_policy(CMP0074 NEW) # find_package uses XXX_ROOT vars using PackageName set_policy(CMP0144 NEW) # find_package allows XXX_ROOT vars using PACKAGENAME Uppercase +set_policy(CMP0167 NEW) # find_package uses new boost config (boost 1.70 onwards) # --------------------------------------------------------------------- # CMake setup, C++ version, build type, modules, etc @@ -92,7 +87,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/include/oomph/config.hpp @ONLY) install(FILES ${PROJECT_BINARY_DIR}/include/oomph/config.hpp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/oomph) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_config.inc.in +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_config.inc.in ${CMAKE_CURRENT_BINARY_DIR}/include/oomph/cmake_config.inc) # --------------------------------------------------------------------- diff --git a/cmake/oomph_libfabric.cmake b/cmake/oomph_libfabric.cmake index 758f3f4d..18c90369 100644 --- a/cmake/oomph_libfabric.cmake +++ b/cmake/oomph_libfabric.cmake @@ -95,7 +95,7 @@ if (OOMPH_WITH_LIBFABRIC) set(OOMPH_LIBFABRIC_PROVIDER "tcp" CACHE STRING "The provider (cxi(Cray Slingshot)/efa(Amazon Elastic)/gni(Cray Gemini)/psm2(Intel Omni-Path)/tcp/verbs(Infiniband))") set_property(CACHE OOMPH_LIBFABRIC_PROVIDER PROPERTY STRINGS - "cxi" "efa" "gni" "psm2" "tcp" "verbs") + "cxi" "efa" "gni" "psm2" "tcp" "verbs" "shm") oomph_libfabric_add_config_define_namespace( DEFINE HAVE_LIBFABRIC_PROVIDER @@ -141,6 +141,10 @@ if (OOMPH_WITH_LIBFABRIC) oomph_libfabric_add_config_define_namespace( DEFINE HAVE_LIBFABRIC_PSM2 NAMESPACE libfabric) + elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "shm") + oomph_libfabric_add_config_define_namespace( + DEFINE HAVE_LIBFABRIC_SHM + NAMESPACE libfabric) endif() #------------------------------------------------------------------------------ @@ -171,6 +175,3 @@ if (OOMPH_WITH_LIBFABRIC) ) target_include_directories(oomph_libfabric PRIVATE "${PROJECT_BINARY_DIR}/src/libfabric") endif() - - - diff --git a/src/libfabric/CMakeLists.txt b/src/libfabric/CMakeLists.txt index c82e387d..fa99a413 100644 --- a/src/libfabric/CMakeLists.txt +++ b/src/libfabric/CMakeLists.txt @@ -20,3 +20,24 @@ target_sources(oomph_libfabric PRIVATE ${oomph_sources_libfabric}) target_sources(oomph_libfabric PRIVATE context.cpp) target_sources(oomph_libfabric PRIVATE operation_context.cpp) target_sources(oomph_libfabric PRIVATE locality.cpp) + +# if we are using GPU, then the libfabric library was probably built with +# gpu support, and we should link to cuda to prevent link errors +if (HWMALLOC_ENABLE_DEVICE) + include(CheckLanguage) + check_language(CUDA) + + if(CMAKE_CUDA_COMPILER) + enable_language(CUDA) + else() + message(STATUS "No CUDA support") + return() + endif() + + find_package(CUDAToolkit) + target_link_libraries(oomph_libfabric PRIVATE CUDA::cudart) +endif() + +add_executable(check_libfabric test/check_libfabric.cpp) +target_link_libraries(check_libfabric PUBLIC oomph_libfabric) +target_include_directories(check_libfabric PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index cb7757a2..68112e9e 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -23,7 +23,7 @@ namespace oomph { using controller_type = libfabric::controller; context_impl::context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, - std::size_t message_pool_reserve) + std::size_t message_pool_reserve, bool debug) : context_base(comm, thread_safe) , m_heap{this, message_pool_never_free, message_pool_reserve} , m_recv_cb_queue(128) @@ -40,12 +40,12 @@ namespace oomph { debug::ptr(m_ctxt_tag))); // TODO fix the thread safety - // problem: controller is a singleton and has problems when 2 contexts are created in the - // following order: single threaded first, then multi-threaded after - //int threads = thread_safe ? std::thread::hardware_concurrency() : 1; - //int threads = std::thread::hardware_concurrency(); + // problem: controller is a singleton and has problems when 2 contexts are created + // in the following order: single threaded first, then multi-threaded after + // int threads = thread_safe ? std::thread::hardware_concurrency() : 1; + // int threads = std::thread::hardware_concurrency(); int threads = boost::thread::physical_concurrency(); - m_controller = init_libfabric_controller(this, comm, rank, size, threads); + m_controller = init_libfabric_controller(this, comm, rank, size, threads, debug); m_domain = m_controller->get_domain(); } @@ -65,14 +65,15 @@ namespace oomph { { static char buffer[32]; std::string temp = std::to_string(m_controller->rendezvous_threshold()); - strncpy(buffer, temp.c_str(), std::min(size_t(31), std::strlen(temp.c_str()))); + if (temp.size() > 31) throw std::runtime_error("Bad string option check, fix please"); + strcpy(buffer, temp.c_str()); return buffer; } else { return "unspecified"; } } std::shared_ptr context_impl::init_libfabric_controller( - oomph::context_impl* /*ctx*/, MPI_Comm comm, int rank, int size, int threads) + oomph::context_impl* /*ctx*/, MPI_Comm comm, int rank, int size, int threads, bool debug) { // only allow one thread to pass, make other wait static std::mutex m_init_mutex; @@ -84,6 +85,7 @@ namespace oomph { debug(NS_DEBUG::str<>("New Controller"), "rank", debug::dec<3>(rank), "size", debug::dec<3>(size), "threads", debug::dec<3>(threads))); instance.reset(new controller_type()); + if (debug) instance->enable_debug(); instance->initialize(HAVE_LIBFABRIC_PROVIDER, rank == 0, size, threads, comm); } return instance; diff --git a/src/libfabric/context.hpp b/src/libfabric/context.hpp index 7a936223..e8e71837 100644 --- a/src/libfabric/context.hpp +++ b/src/libfabric/context.hpp @@ -49,8 +49,8 @@ namespace oomph { // -------------------------------------------------- // create a singleton ptr to a libfabric controller that // can be shared between oomph context objects - static std::shared_ptr init_libfabric_controller( - oomph::context_impl* ctx, MPI_Comm comm, int rank, int size, int threads); + static std::shared_ptr init_libfabric_controller(oomph::context_impl* ctx, + MPI_Comm comm, int rank, int size, int threads, bool debug = false); // queue for shared recv callbacks callback_queue m_recv_cb_queue; @@ -59,7 +59,7 @@ namespace oomph { public: context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, - std::size_t message_pool_reserve); + std::size_t message_pool_reserve, bool debug = false); context_impl(context_impl const&) = delete; context_impl(context_impl&&) = delete; diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 95e3ad17..4b711ea9 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -53,7 +53,7 @@ namespace NS_DEBUG { template inline /*constexpr*/ NS_DEBUG::print_threshold cnt_deb("CONTROL"); // - static NS_DEBUG::enable_print cnt_err("CONTROL"); + static NS_DEBUG::enable_print cnt_err("CONTROL"); } // namespace NS_DEBUG namespace oomph::libfabric { diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index a5eb1705..5e7bd133 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -45,8 +45,8 @@ #include "memory_region.hpp" #include "operation_context_base.hpp" -//#define DISABLE_FI_INJECT -//#define EXCESSIVE_POLLING_BACKOFF_MICRO_S 50 +// #define DISABLE_FI_INJECT +// #define EXCESSIVE_POLLING_BACKOFF_MICRO_S 50 // ------------------------------------------------------------------ @@ -165,9 +165,9 @@ static int libfabric_rendezvous_threshold(int def_val) // ------------------------------------------------ #ifdef HAVE_LIBFABRIC_GNI # include "rdma/fi_ext_gni.h" -//#define OOMPH_GNI_REG "none" +// #define OOMPH_GNI_REG "none" # define OOMPH_GNI_REG "internal" -//#define OOMPH_GNI_REG "udreg" +// #define OOMPH_GNI_REG "udreg" static std::vector> gni_strs = { {GNI_MR_CACHE, "GNI_MR_CACHE"}, @@ -203,9 +203,9 @@ static std::vector> gni_ints = { // clang-format on #endif -// the libfabric library expects us to ask for an API supported version, so if we know we support -// api 2.0, then we ask for that, but the cxi legacy library on daint only supports 1.15, -// so drop back to that version if needed +// the libfabric library expects us to ask for an API supported version, so if +// we know we support api 2.0, then we ask for that, but the cxi legacy library +// on daint only supports 1.15, so drop back to that version if needed #if defined(OOMPH_LIBFABRIC_V1_API) # define LIBFABRIC_FI_VERSION_MAJOR 1 # define LIBFABRIC_FI_VERSION_MINOR 15 @@ -382,6 +382,7 @@ namespace NS_LIBFABRIC { endpoint_context_pool tx_endpoints_; endpoint_context_pool rx_endpoints_; + bool display_fabric_info_; // for debugging purposes, show fi_info hints struct fi_info* fabric_info_; struct fid_fabric* fabric_; struct fid_domain* fabric_domain_; @@ -441,6 +442,7 @@ namespace NS_LIBFABRIC { : eps_(nullptr) , tx_endpoints_(1) , rx_endpoints_(1) + , display_fabric_info_(false) , fabric_info_(nullptr) , fabric_(nullptr) , fabric_domain_(nullptr) @@ -511,6 +513,10 @@ namespace NS_LIBFABRIC { fi_freeinfo(fabric_info_); } + // -------------------------------------------------------------------- + // only used in check_libfabric quick test for helpful output + void enable_debug() { display_fabric_info_ = true; } + // -------------------------------------------------------------------- // setup an endpoint for receiving messages, // usually an rx endpoint is shared by all threads @@ -580,7 +586,8 @@ namespace NS_LIBFABRIC { else if (endpoint_type_ != endpoint_type::scalableTxRx) { #if defined(HAVE_LIBFABRIC_SOCKETS) || defined(HAVE_LIBFABRIC_TCP) || \ - defined(HAVE_LIBFABRIC_VERBS) || defined(HAVE_LIBFABRIC_CXI) || defined(HAVE_LIBFABRIC_EFA) + defined(HAVE_LIBFABRIC_SHM) || defined(HAVE_LIBFABRIC_VERBS) || defined(HAVE_LIBFABRIC_CXI) || \ + defined(HAVE_LIBFABRIC_EFA) // it appears that the rx endpoint cannot be enabled if it does not // have a Tx CQ (at least when using sockets), so we create a dummy // Tx CQ and bind it just to stop libfabric from triggering an error. @@ -792,6 +799,8 @@ namespace NS_LIBFABRIC { fabric_hints_->addr_format = FI_SOCKADDR_IN; #elif defined(HAVE_LIBFABRIC_EFA) fabric_hints_->addr_format = FI_ADDR_EFA; +#elif defined(HAVE_LIBFABRIC_SHM) + fabric_hints_->addr_format = FI_ADDR_STR; #endif fabric_hints_->caps = caps_flags(); @@ -824,7 +833,7 @@ namespace NS_LIBFABRIC { LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("FI_THREAD_FID"))); // Enable thread safe mode (Does not work with psm2 provider) // fabric_hints_->domain_attr->threading = FI_THREAD_SAFE; - //fabric_hints_->domain_attr->threading = FI_THREAD_FID; + // fabric_hints_->domain_attr->threading = FI_THREAD_FID; fabric_hints_->domain_attr->threading = threadlevel_flags(); } else @@ -940,6 +949,12 @@ namespace NS_LIBFABRIC { // is set by querying the tx/tx attr sizes tx_attr_size_ = std::min(size_t(512), fabric_info_->tx_attr->size / 2); rx_attr_size_ = std::min(size_t(512), fabric_info_->rx_attr->size / 2); + // Print fabric info to a human-readable string if available + if (display_fabric_info_ && fabric_info_) + { + char const* info_str = fi_tostr(fabric_info_, FI_TYPE_INFO); + if (info_str) { std::cout << "Libfabric fabric info:\n" << info_str << std::endl; } + } fi_freeinfo(fabric_hints_); } @@ -1237,21 +1252,16 @@ namespace NS_LIBFABRIC { { std::string err = std::to_string(addrlen) + "=" + std::to_string(locality_defs::array_size); - NS_LIBFABRIC::fabric_error(ret, "fi_getname - size error or other problem " + err); + NS_LIBFABRIC::fabric_error(ret, "fi_getname - error (address size ?) " + err); } // optimized out when debug logging is false if constexpr (NS_DEBUG::cnb_deb.is_enabled()) { - std::stringstream temp1; - for (std::size_t i = 0; i < locality_defs::array_length; ++i) - { - temp1 << debug::ipaddr(&local_addr[i]) << " - "; - } - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("raw address data"), "size", debug::dec<>(addrlen), " : ", - temp1.str().c_str())); + debug(debug::str<>("raw address data"), "size", debug::dec<4>(addrlen), " : ", + locality::to_str(local_addr, av_))); + std::stringstream temp2; for (std::size_t i = 0; i < locality_defs::array_length; ++i) { @@ -1310,7 +1320,7 @@ namespace NS_LIBFABRIC { inline bool isTerminated() { return false; - //return (qp_endpoint_map_.size() == 0); + // return (qp_endpoint_map_.size() == 0); } // -------------------------------------------------------------------- @@ -1322,7 +1332,7 @@ namespace NS_LIBFABRIC { { int ret = fi_av_lookup(av_, fi_addr_t(i), addr.fabric_data_writable(), &addrlen); addr.set_fi_address(fi_addr_t(i)); - if ((ret == 0) && (addrlen == locality_defs::array_size)) + if ((ret == 0) && (addrlen <= locality_defs::array_size)) { LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("address vector"), debug::dec<3>(i), iplocality(addr))); diff --git a/src/libfabric/fabric_error.hpp b/src/libfabric/fabric_error.hpp index 325975a7..b1508f28 100644 --- a/src/libfabric/fabric_error.hpp +++ b/src/libfabric/fabric_error.hpp @@ -19,7 +19,7 @@ namespace NS_DEBUG { // cppcheck-suppress ConfigurationNotChecked - static NS_DEBUG::enable_print err_deb("ERROR__"); + static NS_DEBUG::enable_print err_deb("ERROR__"); } // namespace NS_DEBUG namespace NS_LIBFABRIC { diff --git a/src/libfabric/locality.hpp b/src/libfabric/locality.hpp index 84f5ddc2..1fd35425 100644 --- a/src/libfabric/locality.hpp +++ b/src/libfabric/locality.hpp @@ -15,9 +15,11 @@ #include #include // +#include +#include +// #include #include -#include // #include "oomph_libfabric_defines.hpp" @@ -45,6 +47,10 @@ # define HAVE_LIBFABRIC_LOCALITY_SOCKADDR #endif +#if defined(HAVE_LIBFABRIC_SHM) +# define HAVE_LIBFABRIC_LOCALITY_SIZE 24 +#endif + namespace oomph { // cppcheck-suppress ConfigurationNotChecked static NS_DEBUG::enable_print loc_deb("LOCALTY"); @@ -184,6 +190,8 @@ namespace oomph { namespace libfabric { return data_[0]; #elif defined(HAVE_LIBFABRIC_EFA) return data_[0]; +#elif defined(HAVE_LIBFABRIC_SHM) + return data_[0]; #else throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); #endif @@ -199,6 +207,8 @@ namespace oomph { namespace libfabric { return data[0]; #elif defined(HAVE_LIBFABRIC_EFA) return data[0]; +#elif defined(HAVE_LIBFABRIC_SHM) + return data[0]; #else throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); #endif @@ -219,6 +229,16 @@ namespace oomph { namespace libfabric { inline char* fabric_data_writable() { return reinterpret_cast(data_.data()); } + static std::string to_str(locality_data const& data, struct fid_av* av) + { + char sbuf[256]; + size_t buflen = 256; + char const* straddr_ret = fi_av_straddr(av, data.data(), sbuf, &buflen); + std::string result = straddr_ret ? straddr_ret : ""; + // free((char*)(straddr_ret)); + return result; + } + private: friend bool operator==(locality const& lhs, locality const& rhs) { diff --git a/src/libfabric/test/check_libfabric.cpp b/src/libfabric/test/check_libfabric.cpp new file mode 100644 index 00000000..070c8f11 --- /dev/null +++ b/src/libfabric/test/check_libfabric.cpp @@ -0,0 +1,29 @@ +/* + * ghex-org + * + * Copyright (c) 2014-2023, ETH Zurich + * All rights reserved. + * + * Please, refer to the LICENSE file in the root directory. + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include "../benchmarks/mpi_environment.hpp" +// +#include "../communicator.hpp" +#include "../context.hpp" + +int main(int argc, char** argv) +{ + using namespace oomph; + bool const message_pool_never_free = false; + std::size_t const message_pool_reserve = 1024 * 1024 * 128; + bool const multi_threaded = true; + bool debug = true; + // + mpi_environment env(multi_threaded, argc, argv); + auto ctxt = + context_impl(MPI_COMM_WORLD, true, message_pool_never_free, message_pool_reserve, debug); +} From 2a4ba065288879c8956cc20eed60e9e13e835696 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 7 Jul 2025 14:28:45 +0200 Subject: [PATCH 06/35] Remove unused includes and fix warnings in libfabric backend --- src/libfabric/context.hpp | 1 - src/libfabric/controller.hpp | 11 ----------- src/libfabric/controller_base.hpp | 6 ------ src/libfabric/fabric_error.hpp | 1 - src/libfabric/memory_region.hpp | 3 +-- 5 files changed, 1 insertion(+), 21 deletions(-) diff --git a/src/libfabric/context.hpp b/src/libfabric/context.hpp index e8e71837..cf02c850 100644 --- a/src/libfabric/context.hpp +++ b/src/libfabric/context.hpp @@ -10,7 +10,6 @@ #pragma once #include -#include #include #include diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 4b711ea9..53c67bad 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -9,23 +9,13 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include #include // #include #include #include -#include // #include #include @@ -38,7 +28,6 @@ #include "controller_base.hpp" #include "fabric_error.hpp" #include "locality.hpp" -#include "memory_region.hpp" #include "oomph_libfabric_defines.hpp" #include "operation_context.hpp" // diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index 5e7bd133..51057234 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -9,18 +9,12 @@ */ #pragma once -#include -#include #include -#include -#include #include -#include #include #include #include #include -#include // #include #include diff --git a/src/libfabric/fabric_error.hpp b/src/libfabric/fabric_error.hpp index b1508f28..2ec59997 100644 --- a/src/libfabric/fabric_error.hpp +++ b/src/libfabric/fabric_error.hpp @@ -10,7 +10,6 @@ #pragma once #include -#include #include // #include diff --git a/src/libfabric/memory_region.hpp b/src/libfabric/memory_region.hpp index f1eb5326..f2cd5d45 100644 --- a/src/libfabric/memory_region.hpp +++ b/src/libfabric/memory_region.hpp @@ -15,7 +15,6 @@ #include // #include -#include #include #include "fabric_error.hpp" @@ -77,7 +76,7 @@ struct fi_mr_attr { // struct iovec addresses = {/*.iov_base = */ const_cast(buf), /*.iov_len = */ len}; fi_mr_attr attr = { - /*.mr_iov = */ &addresses, + /*.mr_iov = */ {&addresses}, /*.iov_count = */ 1, /*.access = */ access_flags, /*.offset = */ offset, From 596166147e17d00d2da2ac124ec2c6bca7e9f709 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 7 Jul 2025 20:09:15 +0200 Subject: [PATCH 07/35] Remove ipaddress locality functions and instead use AV fi_to_str Setting the address format is not required when the provider chooses it, make use of formatting feature provided by libfabric to simplify display of addresses and setting hints during open --- src/libfabric/CMakeLists.txt | 3 +- src/libfabric/controller.hpp | 21 ++--- src/libfabric/controller_base.hpp | 40 +++------ src/libfabric/locality.cpp | 31 ------- src/libfabric/locality.hpp | 132 +++++++----------------------- 5 files changed, 55 insertions(+), 172 deletions(-) delete mode 100644 src/libfabric/locality.cpp diff --git a/src/libfabric/CMakeLists.txt b/src/libfabric/CMakeLists.txt index fa99a413..92128897 100644 --- a/src/libfabric/CMakeLists.txt +++ b/src/libfabric/CMakeLists.txt @@ -19,9 +19,8 @@ list(TRANSFORM oomph_sources PREPEND ${CMAKE_CURRENT_SOURCE_DIR}/../ target_sources(oomph_libfabric PRIVATE ${oomph_sources_libfabric}) target_sources(oomph_libfabric PRIVATE context.cpp) target_sources(oomph_libfabric PRIVATE operation_context.cpp) -target_sources(oomph_libfabric PRIVATE locality.cpp) -# if we are using GPU, then the libfabric library was probably built with +# if we are using GPU, then the libfabric library was probably built with # gpu support, and we should link to cuda to prevent link errors if (HWMALLOC_ENABLE_DEVICE) include(CheckLanguage) diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 53c67bad..8f1d3d30 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -95,14 +95,17 @@ namespace oomph::libfabric { void MPI_exchange_localities(fid_av* av, MPI_Comm comm, int rank, int size) { [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - std::vector localities(size * locality_defs::array_size, 0); + + // array of empty locality objects + std::vector localities(size); // if (rank > 0) { LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("sending here"), iplocality(here_), "size", + debug(debug::str<>("sending here"), here_.to_str(), "size", locality_defs::array_size)); - /*int err = */ MPI_Send(here_.fabric_data(), locality_defs::array_size, MPI_CHAR, + /*int err = */ MPI_Send(here_.fabric_data().data(), locality_defs::array_size, + MPI_CHAR, 0, // dst rank 0, // tag comm); @@ -120,14 +123,14 @@ namespace oomph::libfabric { else { LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("receiving addresses"))); - memcpy(&localities[0], here_.fabric_data(), locality_defs::array_size); + memcpy(&localities[0], here_.fabric_data().data(), locality_defs::array_size); for (int i = 1; i < size; ++i) { LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("receiving address"), debug::dec<>(i))); MPI_Status status; - /*int err = */ MPI_Recv(&localities[i * locality_defs::array_size], - size * locality_defs::array_size, MPI_CHAR, + /*int err = */ MPI_Recv(&localities[i], size * locality_defs::array_size, + MPI_CHAR, i, // src rank 0, // tag comm, &status); @@ -152,10 +155,8 @@ namespace oomph::libfabric { LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("populating vector"))); for (int i = 0; i < size; ++i) { - locality temp; - int offset = i * locality_defs::array_size; - memcpy(temp.fabric_data_writable(), &localities[offset], locality_defs::array_size); - insert_address(av, temp); + locality temp(localities[i], av); + insert_address(temp); } } diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index 51057234..a91f8bc8 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -666,7 +666,7 @@ namespace NS_LIBFABRIC { // once enabled we can get the address enable_endpoint(eps_->ep_rx_.get_ep(), "rx here"); here_ = get_endpoint_address(&eps_->ep_rx_.get_ep()->fid); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting 'here'"), iplocality(here_))); + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting 'here'"), here_.to_str())); // // if we are using scalable endpoints, then setup tx/rx contexts // // we will us a single endpoint for all Tx/Rx contexts @@ -787,25 +787,9 @@ namespace NS_LIBFABRIC { throw NS_LIBFABRIC::fabric_error(-1, "Failed to allocate fabric hints"); } - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Here locality"), iplocality(here_))); - -#if defined(HAVE_LIBFABRIC_SOCKETS) || defined(HAVE_LIBFABRIC_TCP) || defined(HAVE_LIBFABRIC_VERBS) - fabric_hints_->addr_format = FI_SOCKADDR_IN; -#elif defined(HAVE_LIBFABRIC_EFA) - fabric_hints_->addr_format = FI_ADDR_EFA; -#elif defined(HAVE_LIBFABRIC_SHM) - fabric_hints_->addr_format = FI_ADDR_STR; -#endif - - fabric_hints_->caps = caps_flags(); - - fabric_hints_->mode = FI_CONTEXT /*| FI_MR_LOCAL*/; - if (provider.c_str() == std::string("tcp")) - { - fabric_hints_->fabric_attr->prov_name = - strdup(std::string(provider + ";ofi_rxm").c_str()); - } - else if (provider.c_str() == std::string("verbs")) + // setup the provider we want to use before getting info + if ((provider.c_str() == std::string("tcp")) || + (provider.c_str() == std::string("verbs"))) { fabric_hints_->fabric_attr->prov_name = strdup(std::string(provider + ";ofi_rxm").c_str()); @@ -1254,7 +1238,7 @@ namespace NS_LIBFABRIC { { LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("raw address data"), "size", debug::dec<4>(addrlen), " : ", - locality::to_str(local_addr, av_))); + locality(local_addr, av_).to_str())); std::stringstream temp2; for (std::size_t i = 0; i < locality_defs::array_length; ++i) @@ -1264,7 +1248,7 @@ namespace NS_LIBFABRIC { LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("raw address data"), temp2.str().c_str())); } - return locality(local_addr); + return locality(local_addr, av_); } // -------------------------------------------------------------------- @@ -1329,7 +1313,7 @@ namespace NS_LIBFABRIC { if ((ret == 0) && (addrlen <= locality_defs::array_size)) { LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("address vector"), debug::dec<3>(i), iplocality(addr))); + debug(debug::str<>("address vector"), debug::dec<3>(i), addr.to_str())); } else { @@ -1494,9 +1478,9 @@ namespace NS_LIBFABRIC { [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("inserting AV"), iplocality(address), NS_DEBUG::ptr(av))); + trace(debug::str<>("inserting AV"), address.to_str(), NS_DEBUG::ptr(av))); fi_addr_t fi_addr = 0xffff'ffff; - int ret = fi_av_insert(av, address.fabric_data(), 1, &fi_addr, 0, nullptr); + int ret = fi_av_insert(av, address.fabric_data().data(), 1, &fi_addr, 0, nullptr); if (ret < 0) { throw NS_LIBFABRIC::fabric_error(ret, "fi_av_insert"); } else if (ret == 0) { @@ -1504,10 +1488,10 @@ namespace NS_LIBFABRIC { NS_LIBFABRIC::fabric_error(ret, "fi_av_insert did not return 1"); } // address was generated correctly, now update the locality with the fi_addr - locality new_locality(address, fi_addr); + locality new_locality(address, fi_addr, av); LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("AV add"), "rank", debug::dec<>(fi_addr), - iplocality(new_locality), "fi_addr", debug::hex<4>(fi_addr))); + trace(debug::str<>("AV add"), "rank", debug::dec<>(fi_addr), new_locality.to_str(), + "fi_addr", debug::hex<4>(fi_addr))); return new_locality; } }; diff --git a/src/libfabric/locality.cpp b/src/libfabric/locality.cpp deleted file mode 100644 index ff23eeb5..00000000 --- a/src/libfabric/locality.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * ghex-org - * - * Copyright (c) 2014-2023, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ - -#include - -namespace oomph { namespace libfabric { - - // ------------------------------------------------------------------ - // format as ip address, port, libfabric address - // ------------------------------------------------------------------ - iplocality::iplocality(locality const& l) - : data(l) - { - } - - std::ostream& operator<<(std::ostream& os, iplocality const& p) - { - os << std::dec << NS_DEBUG::ipaddr(p.data.fabric_data()) << " - " - << NS_DEBUG::ipaddr(p.data.ip_address()) << ":" << NS_DEBUG::dec<>(p.data.port()) << " (" - << NS_DEBUG::dec<>(p.data.fi_address()) << ") "; - return os; - } - -}} // namespace oomph::libfabric diff --git a/src/libfabric/locality.hpp b/src/libfabric/locality.hpp index 1fd35425..24cdef24 100644 --- a/src/libfabric/locality.hpp +++ b/src/libfabric/locality.hpp @@ -23,8 +23,7 @@ // #include "oomph_libfabric_defines.hpp" -// Different providers use different address formats that we must accommodate -// in our locality object. +// Different providers use different address formats that we must accommodate in our locality object. #ifdef HAVE_LIBFABRIC_GNI # define HAVE_LIBFABRIC_LOCALITY_SIZE 48 #endif @@ -44,32 +43,25 @@ #if defined(HAVE_LIBFABRIC_VERBS) || defined(HAVE_LIBFABRIC_TCP) || \ defined(HAVE_LIBFABRIC_SOCKETS) || defined(HAVE_LIBFABRIC_PSM2) # define HAVE_LIBFABRIC_LOCALITY_SIZE 16 -# define HAVE_LIBFABRIC_LOCALITY_SOCKADDR #endif #if defined(HAVE_LIBFABRIC_SHM) # define HAVE_LIBFABRIC_LOCALITY_SIZE 24 #endif +#if defined(HAVE_LIBFABRIC_LNX) +# define HAVE_LIBFABRIC_LOCALITY_SIZE 32 +#endif + namespace oomph { // cppcheck-suppress ConfigurationNotChecked - static NS_DEBUG::enable_print loc_deb("LOCALTY"); + static NS_DEBUG::enable_print loc_deb("LOCALTY"); } // namespace oomph namespace oomph { namespace libfabric { struct locality; - // ------------------------------------------------------------------ - // format as ip address, port, libfabric address - // ------------------------------------------------------------------ - struct iplocality - { - locality const& data; - iplocality(locality const& a); - friend std::ostream& operator<<(std::ostream& os, iplocality const& p); - }; - // -------------------------------------------------------------------- // Locality, in this structure we store the information required by // libfabric to make a connection to another node. @@ -91,45 +83,50 @@ namespace oomph { namespace libfabric { static char const* type() { return "libfabric"; } - explicit locality(locality_data const& in_data) + explicit locality(locality_data const& in_data, struct fid_av* av) { std::memcpy(&data_[0], &in_data[0], locality_defs::array_size); fi_address_ = 0; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("expl constructing"), iplocality((*this)))); + av_ = av; + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("explicit construct"), to_str())); } locality() { std::memset(&data_[0], 0x00, locality_defs::array_size); fi_address_ = 0; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("default construct"), iplocality((*this)))); + av_ = nullptr; + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("default construct"), to_str())); } locality(locality const& other) : data_(other.data_) , fi_address_(other.fi_address_) + , av_(other.av_) { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy construct"), iplocality((*this)))); + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy construct"), to_str())); } - locality(locality const& other, fi_addr_t addr) + locality(locality const& other, fi_addr_t addr, struct fid_av* av) : data_(other.data_) , fi_address_(addr) + , av_(av) { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy fi construct"), iplocality((*this)))); + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy fi construct"), to_str())); } locality(locality&& other) : data_(std::move(other.data_)) , fi_address_(other.fi_address_) + , av_(other.av_) { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("move construct"), iplocality((*this)))); + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("move construct"), to_str())); } // provided to support sockets mode bootstrap explicit locality(std::string const& address, std::string const& portnum) { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("explicit construct"), address, ":", portnum)); + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("explicit construct-2"), address, ":", portnum)); // struct sockaddr_in socket_data; memset(&socket_data, 0, sizeof(socket_data)); @@ -139,81 +136,25 @@ namespace oomph { namespace libfabric { // std::memcpy(&data_[0], &socket_data, locality_defs::array_size); fi_address_ = 0; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("string constructing"), iplocality((*this)))); - } - - // some condition marking this locality as valid - explicit inline operator bool() const - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("bool operator"), iplocality((*this)))); - return (ip_address() != 0); - } - - inline bool valid() const - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("valid operator"), iplocality((*this)))); - return (ip_address() != 0); + av_ = nullptr; + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("string constructing"), to_str())); } locality& operator=(locality const& other) { data_ = other.data_; fi_address_ = other.fi_address_; - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("copy operator"), iplocality(*this), iplocality(other))); + av_ = other.av_; + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy operator"), to_str(), other.to_str())); return *this; } bool operator==(locality const& other) { - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("equality operator"), iplocality(*this), iplocality(other))); + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("equality operator"), to_str(), other.to_str())); return std::memcmp(&data_, &other.data_, locality_defs::array_size) == 0; } - bool less_than(locality const& other) - { - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("less operator"), iplocality(*this), iplocality(other))); - if (ip_address() < other.ip_address()) return true; - if (ip_address() == other.ip_address()) return port() < other.port(); - return false; - } - - uint32_t const& ip_address() const - { -#if defined(HAVE_LIBFABRIC_LOCALITY_SOCKADDR) - return reinterpret_cast(data_.data())->sin_addr.s_addr; -#elif defined(HAVE_LIBFABRIC_GNI) - return data_[0]; -#elif defined(HAVE_LIBFABRIC_CXI) - return data_[0]; -#elif defined(HAVE_LIBFABRIC_EFA) - return data_[0]; -#elif defined(HAVE_LIBFABRIC_SHM) - return data_[0]; -#else - throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); -#endif - } - - static uint32_t const& ip_address(locality_data const& data) - { -#if defined(HAVE_LIBFABRIC_LOCALITY_SOCKADDR) - return reinterpret_cast(&data)->sin_addr.s_addr; -#elif defined(HAVE_LIBFABRIC_GNI) - return data[0]; -#elif defined(HAVE_LIBFABRIC_CXI) - return data[0]; -#elif defined(HAVE_LIBFABRIC_EFA) - return data[0]; -#elif defined(HAVE_LIBFABRIC_SHM) - return data[0]; -#else - throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); -#endif - } - inline fi_addr_t const& fi_address() const { return fi_address_; } inline void set_fi_address(fi_addr_t fi_addr) { fi_address_ = fi_addr; } @@ -225,39 +166,27 @@ namespace oomph { namespace libfabric { return port; } - inline void const* fabric_data() const { return data_.data(); } + inline locality_data const& fabric_data() const { return data_; } inline char* fabric_data_writable() { return reinterpret_cast(data_.data()); } - static std::string to_str(locality_data const& data, struct fid_av* av) + std::string to_str() const { char sbuf[256]; size_t buflen = 256; - char const* straddr_ret = fi_av_straddr(av, data.data(), sbuf, &buflen); - std::string result = straddr_ret ? straddr_ret : ""; - // free((char*)(straddr_ret)); + if (!av_) { return "No address vector"; } + char const* straddr_ret = fi_av_straddr(av_, data_.data(), sbuf, &buflen); + std::string result = straddr_ret ? straddr_ret : "Address formatting Error"; return result; } private: friend bool operator==(locality const& lhs, locality const& rhs) { - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("equality friend"), iplocality(lhs), iplocality(rhs))); + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("equality friend"), lhs.to_str(), rhs.to_str())); return ((lhs.data_ == rhs.data_) && (lhs.fi_address_ == rhs.fi_address_)); } - friend bool operator<(locality const& lhs, locality const& rhs) - { - uint32_t const& a1 = lhs.ip_address(); - uint32_t const& a2 = rhs.ip_address(); - fi_addr_t const& f1 = lhs.fi_address(); - fi_addr_t const& f2 = rhs.fi_address(); - LF_DEB( - loc_deb, trace(NS_DEBUG::str<>("less friend"), iplocality(lhs), iplocality(rhs))); - return (a1 < a2) || (a1 == a2 && f1 < f2); - } - friend std::ostream& operator<<(std::ostream& os, locality const& loc) { for (uint32_t i = 0; i < locality_defs::array_length; ++i) { os << loc.data_[i]; } @@ -267,6 +196,7 @@ namespace oomph { namespace libfabric { private: locality_data data_; fi_addr_t fi_address_; + struct fid_av* av_; }; }} // namespace oomph::libfabric From 24e113e2aa01d2bfd850a7cd49f6f6df7448a456 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 7 Jul 2025 22:13:51 +0200 Subject: [PATCH 08/35] Add LNX provider, simplify provider #ifdefs and fabric hints/info setup We do not need #ifdefs for all providers now that address formats are simpler, and by using info/hints supplied by get_info we can allow libfabric to set default values for more fields Simplify cmake generation of provider #ifdefs by simply capitalizing the provider name in the ifdef rathar doing each one by hand --- cmake/oomph_libfabric.cmake | 347 ++++++++++++++++-------------- src/libfabric/controller.hpp | 13 +- src/libfabric/controller_base.hpp | 55 ++++- 3 files changed, 239 insertions(+), 176 deletions(-) diff --git a/cmake/oomph_libfabric.cmake b/cmake/oomph_libfabric.cmake index 18c90369..1ddaf71d 100644 --- a/cmake/oomph_libfabric.cmake +++ b/cmake/oomph_libfabric.cmake @@ -1,177 +1,196 @@ # set all libfabric related options and values -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # Enable libfabric support -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ set(OOMPH_WITH_LIBFABRIC OFF CACHE BOOL "Build with LIBFABRIC backend") -if (OOMPH_WITH_LIBFABRIC) - find_package(Libfabric REQUIRED) - add_library(oomph_libfabric SHARED) - add_library(oomph::libfabric ALIAS oomph_libfabric) - oomph_shared_lib_options(oomph_libfabric) - target_link_libraries(oomph_libfabric PUBLIC libfabric::libfabric) - install(TARGETS oomph_libfabric - EXPORT oomph-targets - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) - - # --------------------------------------------------------------------- - # Function to add config defines to a list that depends on a namespace variable - # #defines that match the namespace can later be written out to a file - # --------------------------------------------------------------------- - function(oomph_libfabric_add_config_define_namespace) - set(options) - set(one_value_args DEFINE NAMESPACE) - set(multi_value_args VALUE) - cmake_parse_arguments(OPTION - "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN}) - - set(DEF_VAR OOMPH_LIBFABRIC_CONFIG_DEFINITIONS_${OPTION_NAMESPACE}) - - # to avoid extra trailing spaces (no value), use an if check - if(OPTION_VALUE) - set_property(GLOBAL APPEND PROPERTY ${DEF_VAR} "${OPTION_DEFINE} ${OPTION_VALUE}") - else() - set_property(GLOBAL APPEND PROPERTY ${DEF_VAR} "${OPTION_DEFINE}") - endif() - - endfunction() - - # --------------------------------------------------------------------- - # Function to write out all the config defines for a given namespace - # into a config file - # --------------------------------------------------------------------- - function(oomph_libfabric_write_config_defines_file) - set(options) - set(one_value_args TEMPLATE NAMESPACE FILENAME) - set(multi_value_args) - cmake_parse_arguments(OPTION - "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN}) - - get_property(DEFINITIONS_VAR GLOBAL PROPERTY - OOMPH_LIBFABRIC_CONFIG_DEFINITIONS_${OPTION_NAMESPACE}) - - if(DEFINED DEFINITIONS_VAR) - list(SORT DEFINITIONS_VAR) - list(REMOVE_DUPLICATES DEFINITIONS_VAR) - endif() - - set(oomph_config_defines "\n") - foreach(def ${DEFINITIONS_VAR}) - set(oomph_config_defines "${oomph_config_defines}#define ${def}\n") - endforeach() - - # if the user has not specified a template, generate a proper header file - if (NOT OPTION_TEMPLATE) - string(TOUPPER ${OPTION_NAMESPACE} NAMESPACE_UPPER) - set(PREAMBLE +if(OOMPH_WITH_LIBFABRIC) + find_package(Libfabric REQUIRED) + add_library(oomph_libfabric SHARED) + add_library(oomph::libfabric ALIAS oomph_libfabric) + oomph_shared_lib_options(oomph_libfabric) + target_link_libraries(oomph_libfabric PUBLIC libfabric::libfabric) + install(TARGETS oomph_libfabric EXPORT oomph-targets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + + # --------------------------------------------------------------------- + # Function to add config defines to a list that depends on a namespace + # variable #defines that match the namespace can later be written out to a + # file + # --------------------------------------------------------------------- + function(oomph_libfabric_add_config_define_namespace) + set(options) + set(one_value_args DEFINE NAMESPACE) + set(multi_value_args VALUE) + cmake_parse_arguments( + OPTION "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN} + ) + + set(DEF_VAR OOMPH_LIBFABRIC_CONFIG_DEFINITIONS_${OPTION_NAMESPACE}) + + # to avoid extra trailing spaces (no value), use an if check + if(OPTION_VALUE) + set_property( + GLOBAL APPEND PROPERTY ${DEF_VAR} "${OPTION_DEFINE} ${OPTION_VALUE}" + ) + else() + set_property(GLOBAL APPEND PROPERTY ${DEF_VAR} "${OPTION_DEFINE}") + endif() + + endfunction() + + # --------------------------------------------------------------------- + # Function to write out all the config defines for a given namespace into a + # config file + # --------------------------------------------------------------------- + function(oomph_libfabric_write_config_defines_file) + set(options) + set(one_value_args TEMPLATE NAMESPACE FILENAME) + set(multi_value_args) + cmake_parse_arguments( + OPTION "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN} + ) + + get_property( + DEFINITIONS_VAR GLOBAL + PROPERTY OOMPH_LIBFABRIC_CONFIG_DEFINITIONS_${OPTION_NAMESPACE} + ) + + if(DEFINED DEFINITIONS_VAR) + list(SORT DEFINITIONS_VAR) + list(REMOVE_DUPLICATES DEFINITIONS_VAR) + endif() + + set(oomph_config_defines "\n") + foreach(def ${DEFINITIONS_VAR}) + set(oomph_config_defines "${oomph_config_defines}#define ${def}\n") + endforeach() + + # if the user has not specified a template, generate a proper header file + if(NOT OPTION_TEMPLATE) + string(TOUPPER ${OPTION_NAMESPACE} NAMESPACE_UPPER) + set(PREAMBLE "\n" "// Do not edit this file! It has been generated by the cmake configuration step.\n" "\n" "#ifndef OOMPH_LIBFABRIC_CONFIG_${NAMESPACE_UPPER}_HPP\n" "#define OOMPH_LIBFABRIC_CONFIG_${NAMESPACE_UPPER}_HPP\n" - ) - set(TEMP_FILENAME "${PROJECT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${NAMESPACE_UPPER}") - file(WRITE ${TEMP_FILENAME} - ${PREAMBLE} - ${oomph_config_defines} - "#endif\n" - ) - configure_file("${TEMP_FILENAME}" "${OPTION_FILENAME}" COPYONLY) - file(REMOVE "${TEMP_FILENAME}") - else() - configure_file("${OPTION_TEMPLATE}" - "${OPTION_FILENAME}" - @ONLY) - endif() - endfunction() - - include(CMakeParseArguments) - - #------------------------------------------------------------------------------ - # Hardware device selection - #------------------------------------------------------------------------------ - set(OOMPH_LIBFABRIC_PROVIDER "tcp" CACHE - STRING "The provider (cxi(Cray Slingshot)/efa(Amazon Elastic)/gni(Cray Gemini)/psm2(Intel Omni-Path)/tcp/verbs(Infiniband))") - set_property(CACHE OOMPH_LIBFABRIC_PROVIDER PROPERTY STRINGS - "cxi" "efa" "gni" "psm2" "tcp" "verbs" "shm") - - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_PROVIDER - VALUE "\"${OOMPH_LIBFABRIC_PROVIDER}\"" - NAMESPACE libfabric) - - option(OOMPH_LIBFABRIC_V1_API "Support older libfabric@1.15" OFF) - if (OOMPH_LIBFABRIC_V1_API) - oomph_libfabric_add_config_define_namespace( - DEFINE OOMPH_LIBFABRIC_V1_API - NAMESPACE libfabric) - endif() - - if(OOMPH_LIBFABRIC_PROVIDER MATCHES "verbs") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_VERBS - NAMESPACE libfabric) - elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "gni") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_GNI - NAMESPACE libfabric) - # add pmi library - set(_libfabric_libraries ${_libfabric_libraries} PMIx::libpmix) - elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "cxi") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_CXI - NAMESPACE libfabric) - elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "efa") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_EFA - NAMESPACE libfabric) - elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "tcp") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_TCP - NAMESPACE libfabric) - elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "sockets") - message(WARNING "The Sockets provider is deprecated in favor of the tcp, udp, " - "and utility providers") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_SOCKETS - NAMESPACE libfabric) - elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "psm2") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_PSM2 - NAMESPACE libfabric) - elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "shm") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_SHM - NAMESPACE libfabric) - endif() - - #------------------------------------------------------------------------------ - # Performance counters - #------------------------------------------------------------------------------ - set(OOMPH_LIBFABRIC_WITH_PERFORMANCE_COUNTERS OFF BOOL - STRING "Enable libfabric parcelport performance counters (default: OFF)") - mark_as_advanced(OOMPH_LIBFABRIC_WITH_PERFORMANCE_COUNTERS) - - if (OOMPH_LIBFABRIC_WITH_PERFORMANCE_COUNTERS) - oomph_libfabric_add_config_define_namespace( - DEFINE OOMPH_LIBFABRIC_HAVE_PERFORMANCE_COUNTERS - NAMESPACE libfabric) + ) + set(TEMP_FILENAME + "${PROJECT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${NAMESPACE_UPPER}" + ) + file(WRITE ${TEMP_FILENAME} ${PREAMBLE} ${oomph_config_defines} + "#endif\n" + ) + configure_file("${TEMP_FILENAME}" "${OPTION_FILENAME}" COPYONLY) + file(REMOVE "${TEMP_FILENAME}") + else() + configure_file("${OPTION_TEMPLATE}" "${OPTION_FILENAME}" @ONLY) endif() - - #------------------------------------------------------------------------------ - # used by template expansion for location of print.hpp - #------------------------------------------------------------------------------ - set(OOMPH_SRC_LIBFABRIC_DIR "${PROJECT_SOURCE_DIR}/src/libfabric") - - #------------------------------------------------------------------------------ - # Write options to file in build dir - #------------------------------------------------------------------------------ - oomph_libfabric_write_config_defines_file( - NAMESPACE libfabric - FILENAME "${PROJECT_BINARY_DIR}/src/libfabric/oomph_libfabric_defines.hpp" - TEMPLATE "${OOMPH_SRC_LIBFABRIC_DIR}/libfabric_defines_template.hpp" + endfunction() + + include(CMakeParseArguments) + + # ------------------------------------------------------------------------------ + # Hardware device selection + # ------------------------------------------------------------------------------ + set(OOMPH_LIBFABRIC_PROVIDER + "tcp" + CACHE + STRING + "The provider cxi(Cray Slingshot)/efa(Amazon Elastic)/gni(Cray Gemini)/psm2(Intel Omni-Path)/tcp/verbs(Infiniband), shm, lnx" + ) + set_property( + CACHE OOMPH_LIBFABRIC_PROVIDER + PROPERTY STRINGS + "cxi" + "efa" + "gni" + "psm2" + "tcp" + "verbs" + "shm" + "lnx" + ) + + oomph_libfabric_add_config_define_namespace( + DEFINE HAVE_LIBFABRIC_PROVIDER VALUE "\"${OOMPH_LIBFABRIC_PROVIDER}\"" + NAMESPACE libfabric + ) + + option(OOMPH_LIBFABRIC_V1_API "Support older libfabric@1.15" OFF) + if(OOMPH_LIBFABRIC_V1_API) + oomph_libfabric_add_config_define_namespace( + DEFINE OOMPH_LIBFABRIC_V1_API NAMESPACE libfabric + ) + endif() + + # Map provider string to uppercase and create a define + string(TOUPPER "${OOMPH_LIBFABRIC_PROVIDER}" PROVIDER_UPPER) + oomph_libfabric_add_config_define_namespace( + DEFINE "HAVE_LIBFABRIC_${PROVIDER_UPPER}" NAMESPACE libfabric + ) + + # Special handling for deprecated or extra cases + if(OOMPH_LIBFABRIC_PROVIDER STREQUAL "sockets") + message( + WARNING + "The + Sockets + provider + is + deprecated + in + favor + of + the + tcp, + udp, + and + utility + providers" + ) + endif() + + # Special handling for gni provider needing PMIx + if(OOMPH_LIBFABRIC_PROVIDER STREQUAL "gni") + set(_libfabric_libraries ${_libfabric_libraries} PMIx::libpmix) + endif() + + # ------------------------------------------------------------------------------ + # Performance counters + # ------------------------------------------------------------------------------ + set(OOMPH_LIBFABRIC_WITH_PERFORMANCE_COUNTERS + OFF + BOOL + STRING + "Enable libfabric performance counters (default: OFF)" + ) + mark_as_advanced(OOMPH_LIBFABRIC_WITH_PERFORMANCE_COUNTERS) + + if(OOMPH_LIBFABRIC_WITH_PERFORMANCE_COUNTERS) + oomph_libfabric_add_config_define_namespace( + DEFINE OOMPH_LIBFABRIC_HAVE_PERFORMANCE_COUNTERS NAMESPACE libfabric ) - target_include_directories(oomph_libfabric PRIVATE "${PROJECT_BINARY_DIR}/src/libfabric") + endif() + + # ------------------------------------------------------------------------------ + # used by template expansion for location of print.hpp + # ------------------------------------------------------------------------------ + set(OOMPH_SRC_LIBFABRIC_DIR "${PROJECT_SOURCE_DIR}/src/libfabric") + + # ------------------------------------------------------------------------------ + # Write options to file in build dir + # ------------------------------------------------------------------------------ + oomph_libfabric_write_config_defines_file( + NAMESPACE libfabric FILENAME + "${PROJECT_BINARY_DIR}/src/libfabric/oomph_libfabric_defines.hpp" TEMPLATE + "${OOMPH_SRC_LIBFABRIC_DIR}/libfabric_defines_template.hpp" + ) + target_include_directories( + oomph_libfabric PRIVATE "${PROJECT_BINARY_DIR}/src/libfabric" + ) endif() diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 8f1d3d30..fbae4a3c 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -75,15 +75,14 @@ namespace oomph::libfabric { } // -------------------------------------------------------------------- - constexpr uint64_t caps_flags() + uint64_t caps_flags(uint64_t /*available_flags*/) const { -#if OOMPH_ENABLE_DEVICE && !defined(HAVE_LIBFABRIC_TCP) - std::int64_t hmem_flags = FI_HMEM; -#else - std::int64_t hmem_flags = 0; + uint64_t flags_required = FI_MSG | FI_TAGGED | FI_RMA | FI_READ | FI_WRITE | FI_RECV | + FI_SEND | FI_REMOTE_READ | FI_REMOTE_WRITE; +#if OOMPH_ENABLE_DEVICE + flags_required |= FI_HMEM; #endif - return hmem_flags | FI_MSG | FI_TAGGED | FI_RMA | FI_READ | FI_WRITE | FI_RECV | - FI_SEND | FI_TRANSMIT | FI_REMOTE_READ | FI_REMOTE_WRITE; + return flags_required; } // -------------------------------------------------------------------- diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index a91f8bc8..10a373b9 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -205,7 +205,7 @@ static std::vector> gni_ints = { # define LIBFABRIC_FI_VERSION_MINOR 15 #else # define LIBFABRIC_FI_VERSION_MAJOR 2 -# define LIBFABRIC_FI_VERSION_MINOR 0 +# define LIBFABRIC_FI_VERSION_MINOR 2 #endif namespace NS_DEBUG { @@ -746,7 +746,24 @@ namespace NS_LIBFABRIC { } // -------------------------------------------------------------------- - constexpr uint64_t caps_flags() { return static_cast(this)->caps_flags(); } + uint64_t caps_flags(uint64_t available_flags) const + { + uint64_t required_flags = + static_cast(this)->caps_flags(available_flags); + // + uint64_t final_flags = required_flags; + for (uint64_t bit = 0; bit < 64; ++bit) + { + uint64_t f = (1ULL << bit); + if ((required_flags & f) && ((available_flags & f) == 0)) + { + NS_DEBUG::cnb_err.error( + debug::str<>("caps flags unavailable"), fi_tostr(&f, FI_TYPE_CAPS)); + final_flags &= ~f; + } + } + return final_flags; + } // -------------------------------------------------------------------- constexpr fi_threading threadlevel_flags() @@ -764,7 +781,7 @@ namespace NS_LIBFABRIC { base_flags = base_flags | FI_MR_LOCAL; #if defined(HAVE_LIBFABRIC_CXI) - return base_flags | FI_MR_MMU_NOTIFY | FI_MR_ENDPOINT; + return base_flags | FI_MR_MMU_NOTIFY /*| FI_MR_ENDPOINT*/; #elif defined(HAVE_LIBFABRIC_EFA) return base_flags | FI_MR_MMU_NOTIFY | FI_MR_ENDPOINT; @@ -775,6 +792,7 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- uint32_t rendezvous_threshold() { return msg_rendezvous_threshold_; } + // -------------------------------------------------------------------- // initialize the basic fabric/domain/name void open_fabric(std::string const& provider, int threads, bool rootnode) @@ -798,7 +816,35 @@ namespace NS_LIBFABRIC { LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fabric provider"), fabric_hints_->fabric_attr->prov_name)); + // get an info object to see what might be available before we set any flags + uint64_t flags = 0; + int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), + nullptr, nullptr, flags, fabric_hints_, &fabric_info_); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fabric info"); + if (display_fabric_info_ && fabric_info_) + { + char const* info_str = fi_tostr(fabric_info_, FI_TYPE_INFO); + if (info_str) + { + LF_DEB(NS_DEBUG::cnb_err, + trace(debug::str<>("Fabric info"), "pre-check ->", + fabric_hints_->fabric_attr->prov_name, "\n", + fi_tostr(fabric_info_, FI_TYPE_INFO))); + } + } + + fabric_hints_->caps = caps_flags(fabric_info_->caps); + if ((fabric_info_->mode & FI_CONTEXT) == 0) + { + LF_DEB(NS_DEBUG::cnb_err, + debug(debug::str<>("mode FI_CONTEXT!=0"), + fi_tostr(&fabric_hints_->domain_attr->mode, FI_TYPE_MODE))); + } + fabric_hints_->mode = fabric_info_->mode; + fabric_hints_->domain_attr->name = strdup(fabric_info_->domain_attr->name); fabric_hints_->domain_attr->mr_mode = memory_registration_mode_flags(); + std::cout << fi_tostr(&fabric_hints_->domain_attr->mr_mode, FI_TYPE_MR_MODE) + << std::endl; // Enable/Disable the use of progress threads auto progress = libfabric_progress_type(); @@ -827,13 +873,12 @@ namespace NS_LIBFABRIC { LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fabric endpoint"), "RDM")); fabric_hints_->ep_attr->type = FI_EP_RDM; - uint64_t flags = 0; LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("get fabric info"), "FI_VERSION", debug::dec(LIBFABRIC_FI_VERSION_MAJOR), debug::dec(LIBFABRIC_FI_VERSION_MINOR))); - int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), + ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), nullptr, nullptr, flags, fabric_hints_, &fabric_info_); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fabric info"); From a1f3fa6ae31ec57bbbeeb3638211b085c2e96fa1 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Wed, 9 Jul 2025 08:52:52 +0200 Subject: [PATCH 09/35] Use thread mask (instead of boost::physical_concurrency) for num threads when using GNU --- src/libfabric/context.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index 68112e9e..2ce3bee1 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -44,7 +44,18 @@ namespace oomph { // in the following order: single threaded first, then multi-threaded after // int threads = thread_safe ? std::thread::hardware_concurrency() : 1; // int threads = std::thread::hardware_concurrency(); - int threads = boost::thread::physical_concurrency(); + // Determine the number of threads based on the CPU affinity mask + int threads = 1; +#if defined(_GNU_SOURCE) + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + if (sched_getaffinity(0, sizeof(cpuset), &cpuset) == 0) + threads = CPU_COUNT(&cpuset); + else + threads = boost::thread::physical_concurrency(); +#else + threads = boost::thread::physical_concurrency(); +#endif m_controller = init_libfabric_controller(this, comm, rank, size, threads, debug); m_domain = m_controller->get_domain(); } From ea38cbffdf2c498d307fa65f9e728632d0a42a33 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Wed, 9 Jul 2025 16:00:14 +0200 Subject: [PATCH 10/35] Fix cxi initialization, some hints must be set before fi_info becomes valid --- src/libfabric/controller_base.hpp | 28 +++++++++++++++----- src/libfabric/libfabric_defines_template.hpp | 5 +++- src/libfabric/print.hpp | 9 +++++-- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index 10a373b9..d97e5fc3 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -517,6 +517,7 @@ namespace NS_LIBFABRIC { endpoint_wrapper create_rx_endpoint( struct fid_domain* domain, struct fi_info* info, struct fid_av* av) { + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); auto ep_rx = new_endpoint_active(domain, info, false); // bind address vector @@ -774,14 +775,14 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- constexpr std::int64_t memory_registration_mode_flags() { - std::int64_t base_flags = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; + std::int64_t base_flags = FI_MR_ALLOCATED; // | FI_MR_VIRT_ADDR | FI_MR_PROV_KEY; #if OOMPH_ENABLE_DEVICE base_flags = base_flags | FI_MR_HMEM; #endif base_flags = base_flags | FI_MR_LOCAL; #if defined(HAVE_LIBFABRIC_CXI) - return base_flags | FI_MR_MMU_NOTIFY /*| FI_MR_ENDPOINT*/; + return base_flags | FI_MR_ENDPOINT; #elif defined(HAVE_LIBFABRIC_EFA) return base_flags | FI_MR_MMU_NOTIFY | FI_MR_ENDPOINT; @@ -816,6 +817,20 @@ namespace NS_LIBFABRIC { LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fabric provider"), fabric_hints_->fabric_attr->prov_name)); +#if defined(HAVE_LIBFABRIC_CXI) + // libfabric domain for multi-nic CXI provider + char const* cxi_domain = std::getenv("FI_CXI_DEVICE_NAME"); + if (cxi_domain == nullptr) + { + LF_DEB(NS_DEBUG::cnb_err, error(str<>("Domain"), "FI_CXI_DEVICE_NAME not set")); + } + else { fabric_hints_->domain_attr->name = strdup(cxi_domain); } + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("fabric domain"), fabric_hints_->domain_attr->name)); +#endif + + fabric_hints_->domain_attr->mr_mode = memory_registration_mode_flags(); + // get an info object to see what might be available before we set any flags uint64_t flags = 0; int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), @@ -842,7 +857,6 @@ namespace NS_LIBFABRIC { } fabric_hints_->mode = fabric_info_->mode; fabric_hints_->domain_attr->name = strdup(fabric_info_->domain_attr->name); - fabric_hints_->domain_attr->mr_mode = memory_registration_mode_flags(); std::cout << fi_tostr(&fabric_hints_->domain_attr->mr_mode, FI_TYPE_MR_MODE) << std::endl; @@ -888,6 +902,9 @@ namespace NS_LIBFABRIC { trace(debug::str<>("Fabric info"), "\n", fi_tostr(fabric_info_, FI_TYPE_INFO))); } + int mrkey = (fabric_hints_->domain_attr->mr_mode & FI_MR_PROV_KEY) != 0; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_PROV_KEY"), mrkey)); + bool context = (fabric_hints_->mode & FI_CONTEXT) != 0; LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_CONTEXT"), context)); @@ -1057,9 +1074,8 @@ namespace NS_LIBFABRIC { int ret = fi_endpoint(domain, hints, &ep, nullptr); if (ret) { - throw NS_LIBFABRIC::fabric_error(ret, - "fi_endpoint (too many threadlocal " - "endpoints?)"); + throw NS_LIBFABRIC::fabric_error( + ret, "fi_endpoint (too many threadlocal endpoints?)"); } fi_freeinfo(hints); LF_DEB( diff --git a/src/libfabric/libfabric_defines_template.hpp b/src/libfabric/libfabric_defines_template.hpp index efd2bb67..ea2a105b 100644 --- a/src/libfabric/libfabric_defines_template.hpp +++ b/src/libfabric/libfabric_defines_template.hpp @@ -19,7 +19,10 @@ #ifndef LF_DEB # define LF_DEB(printer, Expr) \ - if constexpr (printer.is_enabled()) { printer.Expr; }; + { \ + using namespace NS_DEBUG; \ + if constexpr (printer.is_enabled()) { printer.Expr; }; \ + } #endif #define LFSOURCE_DIR "@OOMPH_SRC_LIBFABRIC_DIR@" diff --git a/src/libfabric/print.hpp b/src/libfabric/print.hpp index 73c37c41..301f8e12 100644 --- a/src/libfabric/print.hpp +++ b/src/libfabric/print.hpp @@ -73,8 +73,13 @@ extern char** environ; // ------------------------------------------------------------ #define NS_DEBUG oomph::debug -#define LF_DEB(printer, Expr) \ - if constexpr (printer.is_enabled()) { printer.Expr; }; +#ifndef LF_DEB +# define LF_DEB(printer, Expr) \ + { \ + using namespace NS_DEBUG; \ + if constexpr (printer.is_enabled()) { printer.Expr; }; \ + } +#endif // ------------------------------------------------------------ /// \cond NODETAIL From f668ee7121c66c2aa443a4e2b5b4dec6254e5fb4 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Wed, 9 Jul 2025 08:52:22 +0200 Subject: [PATCH 11/35] Disable debug messages --- src/libfabric/communicator.hpp | 2 +- src/libfabric/controller.hpp | 2 +- src/libfabric/fabric_error.hpp | 2 +- src/libfabric/locality.hpp | 2 +- src/libfabric/operation_context.hpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/libfabric/communicator.hpp b/src/libfabric/communicator.hpp index a38419dc..850f3e98 100644 --- a/src/libfabric/communicator.hpp +++ b/src/libfabric/communicator.hpp @@ -32,7 +32,7 @@ namespace oomph { using tag_disp = NS_DEBUG::detail::hex<12, uintptr_t>; template - inline /*constexpr*/ NS_DEBUG::print_threshold com_deb("COMMUNI"); + inline NS_DEBUG::print_threshold com_deb("COMMUNI"); static NS_DEBUG::enable_print com_err("COMMUNI"); diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index fbae4a3c..b8df1b70 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -40,7 +40,7 @@ namespace NS_DEBUG { using namespace oomph::debug; template - inline /*constexpr*/ NS_DEBUG::print_threshold cnt_deb("CONTROL"); + inline NS_DEBUG::print_threshold cnt_deb("CONTROL"); // static NS_DEBUG::enable_print cnt_err("CONTROL"); } // namespace NS_DEBUG diff --git a/src/libfabric/fabric_error.hpp b/src/libfabric/fabric_error.hpp index 2ec59997..84e43dd5 100644 --- a/src/libfabric/fabric_error.hpp +++ b/src/libfabric/fabric_error.hpp @@ -18,7 +18,7 @@ namespace NS_DEBUG { // cppcheck-suppress ConfigurationNotChecked - static NS_DEBUG::enable_print err_deb("ERROR__"); + static NS_DEBUG::enable_print err_deb("ERROR__"); } // namespace NS_DEBUG namespace NS_LIBFABRIC { diff --git a/src/libfabric/locality.hpp b/src/libfabric/locality.hpp index 24cdef24..67c753e7 100644 --- a/src/libfabric/locality.hpp +++ b/src/libfabric/locality.hpp @@ -55,7 +55,7 @@ namespace oomph { // cppcheck-suppress ConfigurationNotChecked - static NS_DEBUG::enable_print loc_deb("LOCALTY"); + static NS_DEBUG::enable_print loc_deb("LOCALTY"); } // namespace oomph namespace oomph { namespace libfabric { diff --git a/src/libfabric/operation_context.hpp b/src/libfabric/operation_context.hpp index 0f6b5103..74d6ba09 100644 --- a/src/libfabric/operation_context.hpp +++ b/src/libfabric/operation_context.hpp @@ -18,7 +18,7 @@ namespace oomph::libfabric { template - inline /*constexpr*/ NS_DEBUG::print_threshold opctx_deb("OP__CXT"); + inline NS_DEBUG::print_threshold opctx_deb("OP__CXT"); // This struct holds the ready state of a future // we must also store the context used in libfabric, in case From e26f4bd651a262f47ad12f826e320d24bf538d0a Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Wed, 9 Jul 2025 20:28:24 +0200 Subject: [PATCH 12/35] Clean up debug: namespace usage and rename ptr to (hex) hptr --- src/libfabric/communicator.hpp | 113 ++++----- src/libfabric/context.cpp | 7 +- src/libfabric/context.hpp | 3 +- src/libfabric/controller.hpp | 132 +++++----- src/libfabric/controller_base.hpp | 303 +++++++++++------------ src/libfabric/memory_region.hpp | 32 ++- src/libfabric/operation_context.cpp | 6 +- src/libfabric/operation_context.hpp | 2 +- src/libfabric/operation_context_base.hpp | 2 +- src/libfabric/print.hpp | 37 +-- src/libfabric/request_state.hpp | 4 +- 11 files changed, 291 insertions(+), 350 deletions(-) diff --git a/src/libfabric/communicator.hpp b/src/libfabric/communicator.hpp index 850f3e98..6bec497b 100644 --- a/src/libfabric/communicator.hpp +++ b/src/libfabric/communicator.hpp @@ -63,7 +63,7 @@ namespace oomph { , m_recv_cb_queue(128) , m_recv_cb_cancel(8) { - LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("MPI_comm"), NS_DEBUG::ptr(mpi_comm()))); + LF_DEB(com_deb<9>, debug(str<>("MPI_comm"), hptr(mpi_comm()))); m_tx_endpoint = m_context->get_controller()->get_tx_endpoint(); m_rx_endpoint = m_context->get_controller()->get_rx_endpoint(); } @@ -115,15 +115,15 @@ namespace oomph { void send_tagged_region(region_type const& send_region, std::size_t size, fi_addr_t dst_addr_, uint64_t tag_, operation_context* ctxt) { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); // clang-format off LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("send_tagged_region"), - "->", NS_DEBUG::dec<2>(dst_addr_), + debug(str<>("send_tagged_region"), + "->", dec<2>(dst_addr_), send_region, "tag", tag_disp(tag_), - "context", NS_DEBUG::ptr(ctxt), - "tx endpoint", NS_DEBUG::ptr(m_tx_endpoint.get_ep()))); + "context", hptr(ctxt), + "tx endpoint", hptr(m_tx_endpoint.get_ep()))); // clang-format on execute_fi_function(fi_tsend, "fi_tsend", m_tx_endpoint.get_ep(), send_region.get_address(), size, send_region.get_local_key(), dst_addr_, tag_, @@ -135,12 +135,11 @@ namespace oomph { void inject_tagged_region( region_type const& send_region, std::size_t size, fi_addr_t dst_addr_, uint64_t tag_) { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); // clang-format on LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("inject tagged"), "->", NS_DEBUG::dec<2>(dst_addr_), - send_region, "tag", tag_disp(tag_), "tx endpoint", - NS_DEBUG::ptr(m_tx_endpoint.get_ep()))); + debug(str<>("inject tagged"), "->", dec<2>(dst_addr_), send_region, "tag", + tag_disp(tag_), "tx endpoint", hptr(m_tx_endpoint.get_ep()))); // clang-format off execute_fi_function(fi_tinject, "fi_tinject", m_tx_endpoint.get_ep(), send_region.get_address(), size, dst_addr_, tag_); @@ -153,15 +152,15 @@ namespace oomph { void recv_tagged_region(region_type const& recv_region, std::size_t size, fi_addr_t src_addr_, uint64_t tag_, operation_context* ctxt) { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); // clang-format off LF_DEB(com_deb<1>, - debug(NS_DEBUG::str<>("recv_tagged_region"), - "<-", NS_DEBUG::dec<2>(src_addr_), + debug(str<>("recv_tagged_region"), + "<-", dec<2>(src_addr_), recv_region, "tag", tag_disp(tag_), - "context", NS_DEBUG::ptr(ctxt), - "rx endpoint", NS_DEBUG::ptr(m_rx_endpoint.get_ep()))); + "context", hptr(ctxt), + "rx endpoint", hptr(m_rx_endpoint.get_ep()))); // clang-format on constexpr uint64_t ignore = 0; execute_fi_function(fi_trecv, "fi_trecv", m_rx_endpoint.get_ep(), @@ -175,7 +174,7 @@ namespace oomph { rank_type dst, oomph::tag_type tag, util::unique_function&& cb, std::size_t* scheduled) { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); std::uint64_t stag = make_tag64(tag, /*this->rank(), */ this->m_context->get_context_tag()); @@ -189,8 +188,8 @@ namespace oomph { if (size != reg.get_size()) { LF_DEB(com_err, - error(NS_DEBUG::str<>("send mismatch"), "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()))); + error(str<>("send mismatch"), "size", hex<6>(size), "reg size", + hex<6>(reg.get_size()))); } #endif m_context->get_controller()->sends_posted_++; @@ -222,22 +221,22 @@ namespace oomph { // clang-format off LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("Send"), - "thisrank", NS_DEBUG::dec<>(rank()), - "rank", NS_DEBUG::dec<>(dst), + debug(str<>("Send"), + "thisrank", dec<>(rank()), + "rank", dec<>(dst), "tag", tag_disp(std::uint64_t(tag)), //"wrapped tag", tag_disp(std::uint64_t(tag.get())), "stag", tag_disp(stag), - "addr", NS_DEBUG::ptr(reg.get_address()), - "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()), - "op_ctx", NS_DEBUG::ptr(&(s->m_operation_context)), - "req", NS_DEBUG::ptr(s.get()))); + "addr", hptr(reg.get_address()), + "size", hex<6>(size), + "reg size", hex<6>(reg.get_size()), + "op_ctx", hptr(&(s->m_operation_context)), + "req", hptr(s.get()))); #if OOMPH_ENABLE_DEVICE if (!ptr.on_device()) { LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("send region CRC32"), - NS_DEBUG::mem_crc32(reg.get_address(), size, "CRC32"))); + debug(str<>("send region CRC32"), + mem_crc32(reg.get_address(), size, "CRC32"))); } #endif // clang-format on @@ -250,7 +249,7 @@ namespace oomph { oomph::tag_type tag, util::unique_function&& cb, std::size_t* scheduled) { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); #if OOMPH_ENABLE_DEVICE @@ -263,8 +262,8 @@ namespace oomph { if (size != reg.get_size()) { LF_DEB(com_err, - error(NS_DEBUG::str<>("recv mismatch"), "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()))); + error(str<>("recv mismatch"), "size", hex<6>(size), "reg size", + hex<6>(reg.get_size()))); } #endif m_context->get_controller()->recvs_posted_++; @@ -275,22 +274,22 @@ namespace oomph { // clang-format off LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("recv"), - "thisrank", NS_DEBUG::dec<>(rank()), - "rank", NS_DEBUG::dec<>(src), + debug(str<>("recv"), + "thisrank", dec<>(rank()), + "rank", dec<>(src), "tag", tag_disp(std::uint64_t(tag)), //"wrapped tag", tag_disp(std::uint64_t(tag.get())), "stag", tag_disp(stag), - "addr", NS_DEBUG::ptr(reg.get_address()), - "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()), - "op_ctx", NS_DEBUG::ptr(&(s->m_operation_context)), - "req", NS_DEBUG::ptr(s.get()))); + "addr", hptr(reg.get_address()), + "size", hex<6>(size), + "reg size", hex<6>(reg.get_size()), + "op_ctx", hptr(&(s->m_operation_context)), + "req", hptr(s.get()))); #if OOMPH_ENABLE_DEVICE if (!ptr.on_device()) { LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("recv region CRC32"), - NS_DEBUG::mem_crc32(reg.get_address(), size, "CRC32"))); + debug(str<>("recv region CRC32"), + mem_crc32(reg.get_address(), size, "CRC32"))); } #endif // clang-format on @@ -304,7 +303,7 @@ namespace oomph { util::unique_function&& cb, std::atomic* scheduled) { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); #if OOMPH_ENABLE_DEVICE @@ -317,8 +316,8 @@ namespace oomph { if (size != reg.get_size()) { LF_DEB(com_err, - error(NS_DEBUG::str<>("recv mismatch"), "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()))); + error(str<>("recv mismatch"), "size", hex<6>(size), "reg size", + hex<6>(reg.get_size()))); } #endif m_context->get_controller()->recvs_posted_++; @@ -330,17 +329,17 @@ namespace oomph { // clang-format off LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("shared_recv"), - "thisrank", NS_DEBUG::dec<>(rank()), - "rank", NS_DEBUG::dec<>(src), + debug(str<>("shared_recv"), + "thisrank", dec<>(rank()), + "rank", dec<>(src), "tag", tag_disp(std::uint64_t(tag)), //"wrapped tag", tag_disp(std::uint64_t(tag.get())), "stag", tag_disp(stag), - "addr", NS_DEBUG::ptr(reg.get_address()), - "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()), - "op_ctx", NS_DEBUG::ptr(&(s->m_operation_context)), - "req", NS_DEBUG::ptr(s.get()))); + "addr", hptr(reg.get_address()), + "size", hex<6>(size), + "reg size", hex<6>(reg.get_size()), + "op_ctx", hptr(&(s->m_operation_context)), + "req", hptr(s.get()))); // clang-format on recv_tagged_region(reg, size, fi_addr_t(src), stag, &(s->m_operation_context)); @@ -360,14 +359,14 @@ namespace oomph { // (by other threads) m_send_cb_queue.consume_all([](oomph::detail::request_state* req) { [[maybe_unused]] auto scp = - com_deb<9>.scope("m_send_cb_queue.consume_all", NS_DEBUG::ptr(req)); + com_deb<9>.scope("m_send_cb_queue.consume_all", NS_DEBUG::hptr(req)); auto ptr = req->release_self_ref(); req->invoke_cb(); }); m_recv_cb_queue.consume_all([](oomph::detail::request_state* req) { [[maybe_unused]] auto scp = - com_deb<9>.scope("m_recv_cb_queue.consume_all", NS_DEBUG::ptr(req)); + com_deb<9>.scope("m_recv_cb_queue.consume_all", NS_DEBUG::hptr(req)); auto ptr = req->release_self_ref(); req->invoke_cb(); }); @@ -391,8 +390,7 @@ namespace oomph { // submit the cancellation request bool ok = (fi_cancel(&m_rx_endpoint.get_ep()->fid, op_ctx) == 0); - LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("Cancel"), "ok", ok, "op_ctx", NS_DEBUG::ptr(op_ctx))); + LF_DEB(com_deb<9>, debug(str<>("Cancel"), "ok", ok, "op_ctx", hptr(op_ctx))); // if the cancel operation failed completely, return if (!ok) return false; @@ -411,8 +409,7 @@ namespace oomph { // our recv was cancelled correctly found = true; LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("Cancel"), "succeeded", "op_ctx", - NS_DEBUG::ptr(op_ctx))); + debug(str<>("Cancel"), "succeeded", "op_ctx", hptr(op_ctx))); auto ptr = s->release_self_ref(); s->set_canceled(); } diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index 2ce3bee1..6b49098a 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -36,8 +36,7 @@ namespace oomph { m_ctxt_tag = reinterpret_cast(this); OOMPH_CHECK_MPI_RESULT(MPI_Bcast(&m_ctxt_tag, 1, MPI_UINT64_T, 0, comm)); LF_DEB(src_deb, - debug(NS_DEBUG::str<>("Broadcast"), "rank", debug::dec<3>(rank), "context", - debug::ptr(m_ctxt_tag))); + debug(str<>("Broadcast"), "rank", dec<3>(rank), "context", hptr(m_ctxt_tag))); // TODO fix the thread safety // problem: controller is a singleton and has problems when 2 contexts are created @@ -93,8 +92,8 @@ namespace oomph { if (!instance.get()) { LF_DEB(src_deb, - debug(NS_DEBUG::str<>("New Controller"), "rank", debug::dec<3>(rank), "size", - debug::dec<3>(size), "threads", debug::dec<3>(threads))); + debug(NS_DEBUG::str<>("New Controller"), "rank", dec<3>(rank), "size", dec<3>(size), + "threads", dec<3>(threads))); instance.reset(new controller_type()); if (debug) instance->enable_debug(); instance->initialize(HAVE_LIBFABRIC_PROVIDER, rank == 0, size, threads, comm); diff --git a/src/libfabric/context.hpp b/src/libfabric/context.hpp index cf02c850..e7f0308f 100644 --- a/src/libfabric/context.hpp +++ b/src/libfabric/context.hpp @@ -114,8 +114,7 @@ namespace oomph { // our recv was cancelled correctly found = true; LF_DEB(oomph::ctx_deb, - debug(NS_DEBUG::str<>("Cancel shared"), "succeeded", "op_ctx", - NS_DEBUG::ptr(op_ctx))); + debug(str<>("Cancel shared"), "succeeded", "op_ctx", hptr(op_ctx))); auto ptr = s->release_self_ref(); s->set_canceled(); } diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index b8df1b70..39c88fd9 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -93,55 +93,52 @@ namespace oomph::libfabric { // send address to rank 0 and receive array of all localities void MPI_exchange_localities(fid_av* av, MPI_Comm comm, int rank, int size) { - [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::hptr(this), __func__); // array of empty locality objects std::vector localities(size); // if (rank > 0) { - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("sending here"), here_.to_str(), "size", - locality_defs::array_size)); + LF_DEB(cnt_deb<9>, + debug( + str<>("sending here"), here_.to_str(), "size", locality_defs::array_size)); /*int err = */ MPI_Send(here_.fabric_data().data(), locality_defs::array_size, MPI_CHAR, 0, // dst rank 0, // tag comm); - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("receiving all"), "size", locality_defs::array_size)); + LF_DEB( + cnt_deb<9>, debug(str<>("receiving all"), "size", locality_defs::array_size)); MPI_Status status; /*err = */ MPI_Recv(localities.data(), size * locality_defs::array_size, MPI_CHAR, 0, // src rank 0, // tag comm, &status); - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("received addresses"))); + LF_DEB(cnt_deb<9>, debug(str<>("received addresses"))); } else { - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("receiving addresses"))); + LF_DEB(cnt_deb<9>, debug(str<>("receiving addresses"))); memcpy(&localities[0], here_.fabric_data().data(), locality_defs::array_size); for (int i = 1; i < size; ++i) { - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("receiving address"), debug::dec<>(i))); + LF_DEB(cnt_deb<9>, debug(str<>("receiving address"), dec<>(i))); MPI_Status status; /*int err = */ MPI_Recv(&localities[i], size * locality_defs::array_size, MPI_CHAR, i, // src rank 0, // tag comm, &status); - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("received address"), debug::dec<>(i))); + LF_DEB(cnt_deb<9>, debug(str<>("received address"), dec<>(i))); } - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("sending all"))); + LF_DEB(cnt_deb<9>, debug(str<>("sending all"))); for (int i = 1; i < size; ++i) { - LF_DEB( - NS_DEBUG::cnt_deb<9>, debug(debug::str<>("sending to"), debug::dec<>(i))); + LF_DEB(cnt_deb<9>, debug(str<>("sending to"), dec<>(i))); /*int err = */ MPI_Send(&localities[0], size * locality_defs::array_size, MPI_CHAR, i, // dst rank @@ -151,7 +148,7 @@ namespace oomph::libfabric { } // all ranks should now have a full localities vector - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("populating vector"))); + LF_DEB(cnt_deb<9>, debug(str<>("populating vector"))); for (int i = 0; i < size; ++i) { locality temp(localities[i], av); @@ -164,18 +161,17 @@ namespace oomph::libfabric { // and insert each one into the address vector void exchange_addresses(fid_av* av, MPI_Comm mpi_comm) { - [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::hptr(this), __func__); int rank, size; MPI_Comm_rank(mpi_comm, &rank); MPI_Comm_size(mpi_comm, &size); - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("initialize_localities"), size, "localities")); + LF_DEB(cnt_deb<9>, debug(str<>("initialize_localities"), size, "localities")); MPI_exchange_localities(av, mpi_comm, rank, size); debug_print_av_vector(size); - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("Done localities"))); + LF_DEB(cnt_deb<9>, debug(str<>("Done localities"))); } // -------------------------------------------------------------------- @@ -252,8 +248,8 @@ namespace oomph::libfabric { if (!bypass_tx_lock() && !lock.owns_lock()) { return -1; } static auto polling = - NS_DEBUG::cnt_deb<9>.make_timer(1, debug::str<>("poll send queue")); - LF_DEB(NS_DEBUG::cnt_deb<9>, timed(polling, NS_DEBUG::ptr(send_cq))); + NS_DEBUG::cnt_deb<9>.make_timer(1, NS_DEBUG::str<>("poll send queue")); + LF_DEB(cnt_deb<9>, timed(polling, hptr(send_cq))); // poll for completions { @@ -269,21 +265,21 @@ namespace oomph::libfabric { // flags might not be set correctly if ((e.flags & (FI_MSG | FI_SEND | FI_TAGGED)) != 0) { - NS_DEBUG::cnt_err.error("txcq Error FI_EAVAIL for " - "FI_SEND with len", - debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), "code", - NS_DEBUG::dec<3>(e.err), "flags", debug::bin<16>(e.flags), "error", - fi_cq_strerror( - send_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len)); + LF_DEB(cnt_err, + error("txcq Error FI_EAVAIL for FI_SEND with len", hex<6>(e.len), + "context", hptr(e.op_context), "code", dec<3>(e.err), "flags", + bin<16>(e.flags), "error", + fi_cq_strerror( + send_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len))); } else if ((e.flags & FI_RMA) != 0) { - NS_DEBUG::cnt_err.error("txcq Error FI_EAVAIL for " - "FI_RMA with len", - debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), "code", - NS_DEBUG::dec<3>(e.err), "flags", debug::bin<16>(e.flags), "error", - fi_cq_strerror( - send_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len)); + LF_DEB(cnt_err, + error("txcq Error FI_EAVAIL for FI_RMA with len", hex<6>(e.len), + "context", hptr(e.op_context), "code", dec<3>(e.err), "flags", + bin<16>(e.flags), "error", + fi_cq_strerror( + send_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len))); } operation_context* handler = reinterpret_cast(e.op_context); handler->handle_error(e); @@ -299,17 +295,16 @@ namespace oomph::libfabric { for (int i = 0; i < ret; ++i) { ++sends_complete; - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("Completion"), i, debug::dec<2>(i), "txcq flags", + LF_DEB(cnt_deb<9>, + debug(str<>("Completion"), i, dec<2>(i), "txcq flags", fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", - debug::dec<>(entry[i].flags), ")", "context", - NS_DEBUG::ptr(entry[i].op_context), "length", - debug::hex<6>(entry[i].len))); + dec<>(entry[i].flags), ")", "context", hptr(entry[i].op_context), + "length", hex<6>(entry[i].len))); if ((entry[i].flags & (FI_TAGGED | FI_SEND | FI_MSG)) != 0) { - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("Completion"), "txcq tagged send completion", - NS_DEBUG::ptr(entry[i].op_context))); + LF_DEB(cnt_deb<9>, + debug(str<>("Completion"), "txcq tagged send completion", + hptr(entry[i].op_context))); operation_context* handler = reinterpret_cast(entry[i].op_context); @@ -317,8 +312,9 @@ namespace oomph::libfabric { } else { - NS_DEBUG::cnt_err.error("Received an unknown txcq completion", - debug::dec<>(entry[i].flags), debug::bin<64>(entry[i].flags)); + LF_DEB(cnt_err, + error("Received an unknown txcq completion", dec<>(entry[i].flags), + bin<64>(entry[i].flags))); std::terminate(); } } @@ -328,7 +324,7 @@ namespace oomph::libfabric { { // do nothing, we will try again on the next check } - else { NS_DEBUG::cnt_err.error("unknown error in completion txcq read"); } + else { LF_DEB(cnt_err, error("unknown error in completion txcq read")); } return 0; } @@ -353,8 +349,8 @@ namespace oomph::libfabric { if (!bypass_rx_lock() && !lock.owns_lock()) { return -1; } static auto polling = - NS_DEBUG::cnt_deb<2>.make_timer(1, debug::str<>("poll recv queue")); - LF_DEB(NS_DEBUG::cnt_deb<2>, timed(polling, NS_DEBUG::ptr(rx_cq))); + NS_DEBUG::cnt_deb<2>.make_timer(1, NS_DEBUG::str<>("poll recv queue")); + LF_DEB(cnt_deb<2>, timed(polling, hptr(rx_cq))); // poll for completions { @@ -370,10 +366,9 @@ namespace oomph::libfabric { // from the manpage 'man 3 fi_cq_readerr' if (e.err == FI_ECANCELED) { - LF_DEB(NS_DEBUG::cnt_deb<1>, - debug(debug::str<>("rxcq Cancelled"), "flags", debug::hex<6>(e.flags), - "len", debug::hex<6>(e.len), "context", - NS_DEBUG::ptr(e.op_context))); + LF_DEB(cnt_deb<1>, + debug(str<>("rxcq Cancelled"), "flags", hex<6>(e.flags), "len", + hex<6>(e.len), "context", hptr(e.op_context))); // the request was cancelled, we can simply exit // as the canceller will have doone any cleanup needed operation_context* handler = @@ -383,11 +378,12 @@ namespace oomph::libfabric { } else if (e.err != FI_SUCCESS) { - NS_DEBUG::cnt_err.error(debug::str<>("poll_recv_queue"), "error code", - debug::dec<>(-e.err), "flags", debug::hex<6>(e.flags), "len", - debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), - "error msg", - fi_cq_strerror(rx_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len)); + LF_DEB(cnt_err, + error(str<>("poll_recv_queue"), "error code", dec<>(-e.err), "flags", + hex<6>(e.flags), "len", hex<6>(e.len), "context", + hptr(e.op_context), "error msg", + fi_cq_strerror( + rx_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len))); } operation_context* handler = reinterpret_cast(e.op_context); if (handler) handler->handle_error(e); @@ -403,17 +399,16 @@ namespace oomph::libfabric { for (int i = 0; i < ret; ++i) { ++recvs_complete; - LF_DEB(NS_DEBUG::cnt_deb<2>, - debug(debug::str<>("Completion"), i, "rxcq flags", + LF_DEB(cnt_deb<2>, + debug(str<>("Completion"), i, "rxcq flags", fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", - debug::dec<>(entry[i].flags), ")", "context", - NS_DEBUG::ptr(entry[i].op_context), "length", - debug::hex<6>(entry[i].len))); + dec<>(entry[i].flags), ")", "context", hptr(entry[i].op_context), + "length", hex<6>(entry[i].len))); if ((entry[i].flags & (FI_TAGGED | FI_RECV)) != 0) { - LF_DEB(NS_DEBUG::cnt_deb<2>, - debug(debug::str<>("Completion"), "rxcq tagged recv completion", - NS_DEBUG::ptr(entry[i].op_context))); + LF_DEB(cnt_deb<2>, + debug(str<>("Completion"), "rxcq tagged recv completion", + hptr(entry[i].op_context))); operation_context* handler = reinterpret_cast(entry[i].op_context); @@ -421,8 +416,9 @@ namespace oomph::libfabric { } else { - NS_DEBUG::cnt_err.error("Received an unknown rxcq completion", - debug::dec<>(entry[i].flags), debug::bin<64>(entry[i].flags)); + LF_DEB(cnt_err, + error("Received an unknown rxcq completion", dec<>(entry[i].flags), + bin<64>(entry[i].flags))); std::terminate(); } } @@ -432,7 +428,7 @@ namespace oomph::libfabric { { // do nothing, we will try again on the next check } - else { NS_DEBUG::cnt_err.error("unknown error in completion rxcq read"); } + else { LF_DEB(cnt_err, error("unknown error in completion rxcq read")); } return 0; } @@ -442,7 +438,7 @@ namespace oomph::libfabric { (void) info; // unused variable warning (void) tx; // unused variable warning - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fi_dupinfo"))); + LF_DEB(cnb_deb, debug(str<>("fi_dupinfo"))); struct fi_info* hints = fi_dupinfo(info); if (!hints) throw NS_LIBFABRIC::fabric_error(0, "fi_dupinfo"); // clear any Rx address data that might be set diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index d97e5fc3..205c40ab 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -239,7 +239,7 @@ namespace NS_LIBFABRIC { template void fidclose(Handle fid, char const* msg) { - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("closing"), msg)); + LF_DEB(cnb_deb, debug(str<>("closing"), msg)); int ret = fi_close(fid); if (ret == -FI_EBUSY) { throw NS_LIBFABRIC::fabric_error(ret, "fi_close EBUSY"); } else if (ret == FI_SUCCESS) { return; } @@ -267,7 +267,7 @@ namespace NS_LIBFABRIC { , name_(name) { [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, name_); + NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, name_); } // to keep boost::lockfree happy, we need these copy operators @@ -277,7 +277,7 @@ namespace NS_LIBFABRIC { void cleanup() { [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, name_); + NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, name_); if (ep_) { fidclose(&ep_->fid, "endpoint"); @@ -334,9 +334,9 @@ namespace NS_LIBFABRIC { ~stack_endpoint() { if (!pool_) return; - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Scalable Ep"), "used push", "ep", NS_DEBUG::ptr(get_ep()), - "tx cq", NS_DEBUG::ptr(get_tx_cq()), "rx cq", NS_DEBUG::ptr(get_rx_cq()))); + LF_DEB(cnb_deb, + trace(str<>("Scalable Ep"), "used push", "ep", hptr(get_ep()), "tx cq", + hptr(get_tx_cq()), "rx cq", hptr(get_rx_cq()))); pool_->push(endpoint_); } @@ -426,7 +426,7 @@ namespace NS_LIBFABRIC { void finvoke(char const* msg, char const* err, int ret) { - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>(msg))); + LF_DEB(cnb_deb, trace(str<>(msg))); if (ret) throw NS_LIBFABRIC::fabric_error(ret, err); } @@ -460,16 +460,15 @@ namespace NS_LIBFABRIC { // clean up all resources ~controller_base() { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); unsigned int messages_handled_ = 0; unsigned int rma_reads_ = 0; unsigned int recv_deletes_ = 0; - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("counters"), "Received messages", - debug::dec<>(messages_handled_), "Total reads", debug::dec<>(rma_reads_), - "Total deletes", debug::dec<>(recv_deletes_), "deletes error", - debug::dec<>(messages_handled_ - recv_deletes_))); + LF_DEB(cnb_deb, + debug(str<>("counters"), "Received messages", dec<>(messages_handled_), + "Total reads", dec<>(rma_reads_), "Total deletes", dec<>(recv_deletes_), + "deletes error", dec<>(messages_handled_ - recv_deletes_))); tx_endpoints_.consume_all([](auto&& ep) { ep.cleanup(); }); rx_endpoints_.consume_all([](auto&& ep) { ep.cleanup(); }); @@ -502,7 +501,7 @@ namespace NS_LIBFABRIC { fidclose(&fabric_->fid, "Fabric"); // clean up - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("freeing fabric_info"))); + LF_DEB(cnb_deb, debug(str<>("freeing fabric_info"))); fi_freeinfo(fabric_info_); } @@ -517,7 +516,7 @@ namespace NS_LIBFABRIC { endpoint_wrapper create_rx_endpoint( struct fid_domain* domain, struct fi_info* info, struct fid_av* av) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); auto ep_rx = new_endpoint_active(domain, info, false); // bind address vector @@ -538,26 +537,23 @@ namespace NS_LIBFABRIC { void initialize( std::string const& provider, bool rootnode, int size, size_t threads, Args&&... args) { - LF_DEB(NS_DEBUG::cnb_deb, eval([]() { std::cout.setf(std::ios::unitbuf); })); - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + LF_DEB(cnb_deb, eval([]() { std::cout.setf(std::ios::unitbuf); })); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); max_completions_per_poll_ = libfabric_completions_per_poll(); - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("Poll completions"), debug::dec<3>(max_completions_per_poll_))); + LF_DEB(cnb_err, debug(str<>("Poll completions"), dec<3>(max_completions_per_poll_))); uint32_t default_val = (threads == 1) ? 0x400 : 0x4000; msg_rendezvous_threshold_ = libfabric_rendezvous_threshold(default_val); - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("Rendezvous threshold"), - debug::hex<4>(msg_rendezvous_threshold_))); + LF_DEB( + cnb_err, debug(str<>("Rendezvous threshold"), hex<4>(msg_rendezvous_threshold_))); endpoint_type_ = static_cast(libfabric_endpoint_type()); - LF_DEB( - NS_DEBUG::cnb_err, debug(debug::str<>("Endpoints"), libfabric_endpoint_string())); + LF_DEB(cnb_err, debug(str<>("Endpoints"), libfabric_endpoint_string())); eps_ = std::make_unique(); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Threads"), debug::dec<3>(threads))); + LF_DEB(cnb_deb, debug(str<>("Threads"), dec<3>(threads))); open_fabric(provider, threads, rootnode); @@ -625,9 +621,9 @@ namespace NS_LIBFABRIC { auto ep_sx = new_endpoint_scalable( fabric_domain_, fabric_info_, true /*Tx*/, threads, threads_allocated); - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("scalable endpoint ok"), "Contexts allocated", - debug::dec<4>(threads_allocated))); + LF_DEB(cnb_deb, + trace(str<>("scalable endpoint ok"), "Contexts allocated", + dec<4>(threads_allocated))); finvoke("fi_scalable_ep_bind AV", "fi_scalable_ep_bind", fi_scalable_ep_bind(ep_sx, &av_->fid, 0)); @@ -637,8 +633,8 @@ namespace NS_LIBFABRIC { // for (unsigned int i = 0; i < threads_allocated; i++) { - [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "scalable", debug::dec<4>(i)); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope( + NS_DEBUG::hptr(this), "scalable", NS_DEBUG::dec<4>(i)); // For threadlocal/scalable endpoints, tx/rx resources fid_ep* scalable_ep_tx; @@ -654,10 +650,9 @@ namespace NS_LIBFABRIC { enable_endpoint(scalable_ep_tx, "tx scalable"); endpoint_wrapper tx(scalable_ep_tx, nullptr, scalable_cq_tx, "tx scalable"); - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Scalable Ep"), "initial tx push", "ep", - NS_DEBUG::ptr(tx.get_ep()), "tx cq", NS_DEBUG::ptr(tx.get_tx_cq()), - "rx cq", NS_DEBUG::ptr(tx.get_rx_cq()))); + LF_DEB(cnb_deb, + trace(str<>("Scalable Ep"), "initial tx push", "ep", hptr(tx.get_ep()), + "tx cq", hptr(tx.get_tx_cq()), "rx cq", hptr(tx.get_rx_cq()))); tx_endpoints_.push(tx); } @@ -667,7 +662,7 @@ namespace NS_LIBFABRIC { // once enabled we can get the address enable_endpoint(eps_->ep_rx_.get_ep(), "rx here"); here_ = get_endpoint_address(&eps_->ep_rx_.get_ep()->fid); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting 'here'"), here_.to_str())); + LF_DEB(cnb_deb, debug(str<>("setting 'here'"), here_.to_str())); // // if we are using scalable endpoints, then setup tx/rx contexts // // we will us a single endpoint for all Tx/Rx contexts @@ -682,8 +677,8 @@ namespace NS_LIBFABRIC { // if (!ep_sx) // throw NS_LIBFABRIC::fabric_error(FI_EOTHER, "fi_scalable endpoint creation failed"); - // LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("scalable endpoint ok"), - // "Contexts allocated", debug::dec<4>(threads_allocated))); + // LF_DEB(cnb_deb, trace(str<>("scalable endpoint ok"), + // "Contexts allocated", dec<4>(threads_allocated))); // // prepare the stack for insertions // tx_endpoints_.reserve(threads_allocated); @@ -692,7 +687,7 @@ namespace NS_LIBFABRIC { // for (unsigned int i = 0; i < threads_allocated; i++) // { // [[maybe_unused]] auto scp = - // NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "scalable", debug::dec<4>(i)); + // NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), "scalable", dec<4>(i)); // // For threadlocal/scalable endpoints, tx/rx resources // fid_ep* scalable_ep_tx; @@ -712,8 +707,8 @@ namespace NS_LIBFABRIC { // enable_endpoint(scalable_ep_tx, "tx scalable"); // endpoint_wrapper tx(scalable_ep_tx, nullptr, scalable_cq_tx, "tx scalable"); - // LF_DEB(NS_DEBUG::cnb_deb, - // trace(debug::str<>("Scalable Ep"), "initial tx push", "ep", + // LF_DEB(cnb_deb, + // trace(str<>("Scalable Ep"), "initial tx push", "ep", // NS_DEBUG::ptr(tx.get_ep()), "tx cq", NS_DEBUG::ptr(tx.get_tx_cq()), "rx cq", // NS_DEBUG::ptr(tx.get_rx_cq()))); // tx_endpoints_.push(tx); @@ -730,8 +725,8 @@ namespace NS_LIBFABRIC { //// enable_endpoint(scalable_ep_rx, "rx scalable"); //// endpoint_wrapper rx(scalable_ep_rx, scalable_cq_rx, nullptr, "rx scalable"); - //// LF_DEB(NS_DEBUG::cnb_deb, - //// trace(debug::str<>("Scalable Ep"), "initial rx push", "ep", + //// LF_DEB(cnb_deb, + //// trace(str<>("Scalable Ep"), "initial rx push", "ep", //// NS_DEBUG::ptr(rx.get_ep()), "tx cq", NS_DEBUG::ptr(rx.get_tx_cq()), "rx cq", //// NS_DEBUG::ptr(rx.get_rx_cq()))); //// rx_endpoints_.push(rx); @@ -758,8 +753,8 @@ namespace NS_LIBFABRIC { uint64_t f = (1ULL << bit); if ((required_flags & f) && ((available_flags & f) == 0)) { - NS_DEBUG::cnb_err.error( - debug::str<>("caps flags unavailable"), fi_tostr(&f, FI_TYPE_CAPS)); + LF_DEB(cnb_err, + error(str<>("caps flags unavailable"), fi_tostr(&f, FI_TYPE_CAPS))); final_flags &= ~f; } } @@ -798,7 +793,7 @@ namespace NS_LIBFABRIC { // initialize the basic fabric/domain/name void open_fabric(std::string const& provider, int threads, bool rootnode) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); struct fi_info* fabric_hints_ = fi_allocinfo(); if (!fabric_hints_) @@ -814,19 +809,18 @@ namespace NS_LIBFABRIC { strdup(std::string(provider + ";ofi_rxm").c_str()); } else { fabric_hints_->fabric_attr->prov_name = strdup(provider.c_str()); } - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("fabric provider"), fabric_hints_->fabric_attr->prov_name)); + LF_DEB(cnb_deb, debug(str<>("fabric provider"), fabric_hints_->fabric_attr->prov_name)); #if defined(HAVE_LIBFABRIC_CXI) // libfabric domain for multi-nic CXI provider char const* cxi_domain = std::getenv("FI_CXI_DEVICE_NAME"); if (cxi_domain == nullptr) { - LF_DEB(NS_DEBUG::cnb_err, error(str<>("Domain"), "FI_CXI_DEVICE_NAME not set")); + LF_DEB(cnb_err, error(str<>("Domain"), "FI_CXI_DEVICE_NAME not set")); } else { fabric_hints_->domain_attr->name = strdup(cxi_domain); } - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("fabric domain"), fabric_hints_->domain_attr->name)); + LF_DEB( + NS_DEBUG::cnb_deb, debug(str<>("fabric domain"), fabric_hints_->domain_attr->name)); #endif fabric_hints_->domain_attr->mr_mode = memory_registration_mode_flags(); @@ -841,8 +835,8 @@ namespace NS_LIBFABRIC { char const* info_str = fi_tostr(fabric_info_, FI_TYPE_INFO); if (info_str) { - LF_DEB(NS_DEBUG::cnb_err, - trace(debug::str<>("Fabric info"), "pre-check ->", + LF_DEB(cnb_err, + trace(str<>("Fabric info"), "pre-check ->", fabric_hints_->fabric_attr->prov_name, "\n", fi_tostr(fabric_info_, FI_TYPE_INFO))); } @@ -851,8 +845,8 @@ namespace NS_LIBFABRIC { fabric_hints_->caps = caps_flags(fabric_info_->caps); if ((fabric_info_->mode & FI_CONTEXT) == 0) { - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("mode FI_CONTEXT!=0"), + LF_DEB(cnb_err, + debug(str<>("mode FI_CONTEXT!=0"), fi_tostr(&fabric_hints_->domain_attr->mode, FI_TYPE_MODE))); } fabric_hints_->mode = fabric_info_->mode; @@ -864,11 +858,11 @@ namespace NS_LIBFABRIC { auto progress = libfabric_progress_type(); fabric_hints_->domain_attr->control_progress = progress; fabric_hints_->domain_attr->data_progress = progress; - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("progress"), libfabric_progress_string())); + LF_DEB(cnb_err, debug(str<>("progress"), libfabric_progress_string())); if (threads > 1) { - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("FI_THREAD_FID"))); + LF_DEB(cnb_deb, debug(str<>("FI_THREAD_FID"))); // Enable thread safe mode (Does not work with psm2 provider) // fabric_hints_->domain_attr->threading = FI_THREAD_SAFE; // fabric_hints_->domain_attr->threading = FI_THREAD_FID; @@ -876,7 +870,7 @@ namespace NS_LIBFABRIC { } else { - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("FI_THREAD_DOMAIN"))); + LF_DEB(cnb_deb, debug(str<>("FI_THREAD_DOMAIN"))); // we serialize everything fabric_hints_->domain_attr->threading = FI_THREAD_DOMAIN; } @@ -884,13 +878,12 @@ namespace NS_LIBFABRIC { // Enable resource management fabric_hints_->domain_attr->resource_mgmt = FI_RM_ENABLED; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fabric endpoint"), "RDM")); + LF_DEB(cnb_deb, debug(str<>("fabric endpoint"), "RDM")); fabric_hints_->ep_attr->type = FI_EP_RDM; - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("get fabric info"), "FI_VERSION", - debug::dec(LIBFABRIC_FI_VERSION_MAJOR), - debug::dec(LIBFABRIC_FI_VERSION_MINOR))); + LF_DEB(cnb_deb, + debug(str<>("get fabric info"), "FI_VERSION", dec(LIBFABRIC_FI_VERSION_MAJOR), + dec(LIBFABRIC_FI_VERSION_MINOR))); ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), nullptr, nullptr, flags, fabric_hints_, &fabric_info_); @@ -898,57 +891,57 @@ namespace NS_LIBFABRIC { if (rootnode) { - LF_DEB(NS_DEBUG::cnb_err, - trace(debug::str<>("Fabric info"), "\n", fi_tostr(fabric_info_, FI_TYPE_INFO))); + LF_DEB(cnb_err, + trace(str<>("Fabric info"), "\n", fi_tostr(fabric_info_, FI_TYPE_INFO))); } int mrkey = (fabric_hints_->domain_attr->mr_mode & FI_MR_PROV_KEY) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_PROV_KEY"), mrkey)); + LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_PROV_KEY"), mrkey)); bool context = (fabric_hints_->mode & FI_CONTEXT) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_CONTEXT"), context)); + LF_DEB(cnb_deb, debug(str<>("Requires FI_CONTEXT"), context)); mrlocal = (fabric_hints_->domain_attr->mr_mode & FI_MR_LOCAL) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_LOCAL"), mrlocal)); + LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_LOCAL"), mrlocal)); mrbind = (fabric_hints_->domain_attr->mr_mode & FI_MR_ENDPOINT) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_ENDPOINT"), mrbind)); + LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_ENDPOINT"), mrbind)); /* Check if provider requires heterogeneous memory registration */ mrhmem = (fabric_hints_->domain_attr->mr_mode & FI_MR_HMEM) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_HMEM"), mrhmem)); + LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_HMEM"), mrhmem)); bool mrhalloc = (fabric_hints_->domain_attr->mr_mode & FI_MR_ALLOCATED) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_ALLOCATED"), mrhalloc)); + LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_ALLOCATED"), mrhalloc)); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Creating fi_fabric"))); + LF_DEB(cnb_deb, debug(str<>("Creating fi_fabric"))); ret = fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fi_fabric"); // Allocate a domain. - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Allocating domain"))); + LF_DEB(cnb_deb, debug(str<>("Allocating domain"))); ret = fi_domain(fabric_, fabric_info_, &fabric_domain_, nullptr); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_domain"); #if defined(HAVE_LIBFABRIC_GNI) { [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "GNI memory registration block"); + NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), "GNI memory registration block"); - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"), "GNI String values")); + LF_DEB(cnb_err, debug(str<>("-------"), "GNI String values")); // Dump out all vars for debug purposes for (auto& gni_data : gni_strs) { _set_check_domain_op_value( gni_data.first, 0, gni_data.second.c_str(), false); } - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"), "GNI Int values")); + LF_DEB(cnb_err, debug(str<>("-------"), "GNI Int values")); for (auto& gni_data : gni_ints) { _set_check_domain_op_value( gni_data.first, 0, gni_data.second.c_str(), false); } - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"))); + LF_DEB(cnb_err, debug(str<>("-------"))); // -------------------------- // GNI_MR_CACHE @@ -971,7 +964,7 @@ namespace NS_LIBFABRIC { // Enable lazy deregistration in MR cache // int32_t enable = 1; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting GNI_MR_CACHE_LAZY_DEREG"))); + LF_DEB(cnb_deb, debug(str<>("setting GNI_MR_CACHE_LAZY_DEREG"))); _set_check_domain_op_value( GNI_MR_CACHE_LAZY_DEREG, enable, "GNI_MR_CACHE_LAZY_DEREG"); @@ -1012,7 +1005,7 @@ namespace NS_LIBFABRIC { template int _set_check_domain_op_value(int op, T value, char const* info, bool set = true) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); static struct fi_gni_ops_domain* gni_domain_ops = nullptr; int ret = 0; @@ -1020,8 +1013,8 @@ namespace NS_LIBFABRIC { { ret = fi_open_ops(&fabric_domain_->fid, FI_GNI_DOMAIN_OPS_1, 0, (void**) &gni_domain_ops, nullptr); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("gni open ops"), (ret == 0 ? "OK" : "FAIL"), + LF_DEB(cnb_deb, + debug(str<>("gni open ops"), (ret == 0 ? "OK" : "FAIL"), NS_DEBUG::ptr(gni_domain_ops))); } @@ -1031,8 +1024,7 @@ namespace NS_LIBFABRIC { ret = gni_domain_ops->set_val( &fabric_domain_->fid, (dom_ops_val_t) (op), reinterpret_cast(&value)); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("gni set ops val"), value, (ret == 0 ? "OK" : "FAIL"))); + LF_DEB(cnb_deb, debug(str<>("gni set ops val"), value, (ret == 0 ? "OK" : "FAIL"))); } // Get the value (so we can check that the value we set is now returned) @@ -1040,14 +1032,14 @@ namespace NS_LIBFABRIC { ret = gni_domain_ops->get_val(&fabric_domain_->fid, (dom_ops_val_t) (op), &new_value); if constexpr (std::is_integral::value) { - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, - debug::hex<8>(new_value))); + LF_DEB(cnb_err, + debug( + str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, hex<8>(new_value))); } else { - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, new_value)); + LF_DEB(cnb_err, + debug(str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, new_value)); } // if (ret) throw NS_LIBFABRIC::fabric_error(ret, std::string("setting ") + info); @@ -1066,9 +1058,8 @@ namespace NS_LIBFABRIC { // and we do not create two endpoint with the same src address struct fi_info* hints = set_src_dst_addresses(info, tx); - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("Got info mode"), (info->mode & FI_NOTIFY_FLAGS_ONLY))); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); + LF_DEB(cnb_deb, debug(str<>("Got info mode"), (info->mode & FI_NOTIFY_FLAGS_ONLY))); struct fid_ep* ep; int ret = fi_endpoint(domain, hints, &ep, nullptr); @@ -1078,8 +1069,7 @@ namespace NS_LIBFABRIC { ret, "fi_endpoint (too many threadlocal endpoints?)"); } fi_freeinfo(hints); - LF_DEB( - NS_DEBUG::cnb_deb, debug(debug::str<>("new_endpoint_active"), NS_DEBUG::ptr(ep))); + LF_DEB(cnb_deb, debug(str<>("new_endpoint_active"), hptr(ep))); return ep; } @@ -1090,9 +1080,9 @@ namespace NS_LIBFABRIC { // don't allow multiple threads to call endpoint create at the same time scoped_lock lock(controller_mutex_); - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fi_dupinfo"))); + LF_DEB(cnb_deb, debug(str<>("fi_dupinfo"))); struct fi_info* hints = fi_dupinfo(info); if (!hints) throw NS_LIBFABRIC::fabric_error(0, "fi_dupinfo"); @@ -1108,13 +1098,13 @@ namespace NS_LIBFABRIC { else { context_count = std::min(new_hints->domain_attr->rx_ctx_cnt, threads); } // clang-format off - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("scalable endpoint"), + LF_DEB(cnb_deb, + trace(str<>("scalable endpoint"), "Tx", tx, - "Threads", debug::dec<3>(threads), - "tx_ctx_cnt", debug::dec<3>(new_hints->domain_attr->tx_ctx_cnt), - "rx_ctx_cnt", debug::dec<3>(new_hints->domain_attr->rx_ctx_cnt), - "context_count", debug::dec<3>(context_count))); + "Threads", dec<3>(threads), + "tx_ctx_cnt", dec<3>(new_hints->domain_attr->tx_ctx_cnt), + "rx_ctx_cnt", dec<3>(new_hints->domain_attr->rx_ctx_cnt), + "context_count", dec<3>(context_count))); // clang-format on threads_allocated = context_count; @@ -1124,8 +1114,7 @@ namespace NS_LIBFABRIC { struct fid_ep* ep; ret = fi_scalable_ep(domain, new_hints, &ep, nullptr); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_scalable_ep"); - LF_DEB( - NS_DEBUG::cnb_deb, debug(debug::str<>("new_endpoint_scalable"), NS_DEBUG::ptr(ep))); + LF_DEB(cnb_deb, debug(str<>("new_endpoint_scalable"), hptr(ep))); fi_freeinfo(hints); return ep; } @@ -1133,8 +1122,8 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- endpoint_wrapper& get_rx_endpoint() { - static auto rx = NS_DEBUG::cnb_deb.make_timer(1, debug::str<>("get_rx_endpoint")); - LF_DEB(NS_DEBUG::cnb_deb, timed(rx)); + static auto rx = NS_DEBUG::cnb_deb.make_timer(1, NS_DEBUG::str<>("get_rx_endpoint")); + LF_DEB(cnb_deb, timed(rx)); if (endpoint_type_ == endpoint_type::scalableTxRx) { @@ -1145,20 +1134,19 @@ namespace NS_LIBFABRIC { if (!ok) { // clang-format off - LF_DEB(NS_DEBUG::cnb_deb, error(debug::str<>("Scalable Ep"), "pop rx", - "ep", NS_DEBUG::ptr(ep.get_ep()), - "tx cq", NS_DEBUG::ptr(ep.get_tx_cq()), - "rx cq", NS_DEBUG::ptr(ep.get_rx_cq()))); + LF_DEB(cnb_deb, error(str<>("Scalable Ep"), "pop rx", + "ep", hptr(ep.get_ep()), + "tx cq", hptr(ep.get_tx_cq()), + "rx cq", hptr(ep.get_rx_cq()))); // clang-format on throw std::runtime_error("rx endpoint wrapper pop fail"); } eps_->tl_srx_ = stack_endpoint( ep.get_ep(), ep.get_rx_cq(), ep.get_tx_cq(), ep.get_name(), &rx_endpoints_); - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Scalable Ep"), "pop rx", "ep", - NS_DEBUG::ptr(eps_->tl_srx_.get_ep()), "tx cq", - NS_DEBUG::ptr(eps_->tl_srx_.get_tx_cq()), "rx cq", - NS_DEBUG::ptr(eps_->tl_srx_.get_rx_cq()))); + LF_DEB(cnb_deb, + trace(str<>("Scalable Ep"), "pop rx", "ep", hptr(eps_->tl_srx_.get_ep()), + "tx cq", hptr(eps_->tl_srx_.get_tx_cq()), "rx cq", + hptr(eps_->tl_srx_.get_rx_cq()))); } return eps_->tl_srx_.endpoint_; } @@ -1174,7 +1162,7 @@ namespace NS_LIBFABRIC { if (eps_->tl_tx_.get_ep() == nullptr) { [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, "threadlocal"); + NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, "threadlocal"); // create a completion queue for tx endpoint fabric_info_->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); @@ -1191,10 +1179,9 @@ namespace NS_LIBFABRIC { enable_endpoint(ep_tx, "tx threadlocal"); // set threadlocal endpoint wrapper - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Threadlocal Ep"), "create Tx", "ep", - NS_DEBUG::ptr(ep_tx), "tx cq", NS_DEBUG::ptr(tx_cq), "rx cq", - NS_DEBUG::ptr(nullptr))); + LF_DEB(cnb_deb, + trace(str<>("Threadlocal Ep"), "create Tx", "ep", hptr(ep_tx), "tx cq", + hptr(tx_cq), "rx cq", hptr(nullptr))); // for cleaning up at termination endpoint_wrapper ep(ep_tx, nullptr, tx_cq, "tx threadlocal"); tx_endpoints_.push(ep); @@ -1211,19 +1198,17 @@ namespace NS_LIBFABRIC { bool ok = tx_endpoints_.pop(ep); if (!ok) { - LF_DEB(NS_DEBUG::cnb_deb, - error(debug::str<>("Scalable Ep"), "pop tx", "ep", - NS_DEBUG::ptr(ep.get_ep()), "tx cq", NS_DEBUG::ptr(ep.get_tx_cq()), - "rx cq", NS_DEBUG::ptr(ep.get_rx_cq()))); + LF_DEB(cnb_deb, + error(str<>("Scalable Ep"), "pop tx", "ep", hptr(ep.get_ep()), "tx cq", + hptr(ep.get_tx_cq()), "rx cq", hptr(ep.get_rx_cq()))); throw std::runtime_error("tx endpoint wrapper pop fail"); } eps_->tl_stx_ = stack_endpoint( ep.get_ep(), ep.get_rx_cq(), ep.get_tx_cq(), ep.get_name(), &tx_endpoints_); - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Scalable Ep"), "pop tx", "ep", - NS_DEBUG::ptr(eps_->tl_stx_.get_ep()), "tx cq", - NS_DEBUG::ptr(eps_->tl_stx_.get_tx_cq()), "rx cq", - NS_DEBUG::ptr(eps_->tl_stx_.get_rx_cq()))); + LF_DEB(cnb_deb, + trace(str<>("Scalable Ep"), "pop tx", "ep", hptr(eps_->tl_stx_.get_ep()), + "tx cq", hptr(eps_->tl_stx_.get_tx_cq()), "rx cq", + hptr(eps_->tl_stx_.get_rx_cq()))); } return eps_->tl_stx_.endpoint_; } @@ -1235,10 +1220,9 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- void bind_address_vector_to_endpoint(struct fid_ep* endpoint, struct fid_av* av) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("Binding AV"), "to", NS_DEBUG::ptr(endpoint))); + LF_DEB(cnb_deb, debug(str<>("Binding AV"), "to", hptr(endpoint))); int ret = fi_ep_bind(endpoint, &av->fid, 0); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "bind address_vector"); } @@ -1248,10 +1232,9 @@ namespace NS_LIBFABRIC { struct fid_ep* endpoint, struct fid_cq*& cq, uint32_t cqtype, char const* type) { [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); + NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, type); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("Binding CQ"), "to", NS_DEBUG::ptr(endpoint), type)); + LF_DEB(cnb_deb, debug(str<>("Binding CQ"), "to", hptr(endpoint), type)); int ret = fi_ep_bind(endpoint, &cq->fid, cqtype); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "bind cq"); } @@ -1259,7 +1242,7 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- fid_cq* bind_tx_queue_to_rx_endpoint(struct fi_info* info, struct fid_ep* ep) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); info->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); fid_cq* tx_cq = create_completion_queue(fabric_domain_, info->tx_attr->size, "tx->rx"); // shared send/recv endpoint - bind send cq to the recv endpoint @@ -1271,10 +1254,9 @@ namespace NS_LIBFABRIC { void enable_endpoint(struct fid_ep* endpoint, char const* type) { [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); + NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, type); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("Enabling endpoint"), NS_DEBUG::ptr(endpoint))); + LF_DEB(cnb_deb, debug(str<>("Enabling endpoint"), hptr(endpoint))); int ret = fi_enable(endpoint); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_enable"); } @@ -1282,7 +1264,7 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- locality get_endpoint_address(struct fid* id) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); locality::locality_data local_addr; std::size_t addrlen = locality_defs::array_size; @@ -1297,17 +1279,16 @@ namespace NS_LIBFABRIC { // optimized out when debug logging is false if constexpr (NS_DEBUG::cnb_deb.is_enabled()) { - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("raw address data"), "size", debug::dec<4>(addrlen), " : ", + LF_DEB(cnb_deb, + debug(str<>("raw address data"), "size", dec<4>(addrlen), " : ", locality(local_addr, av_).to_str())); std::stringstream temp2; for (std::size_t i = 0; i < locality_defs::array_length; ++i) { - temp2 << debug::hex<8>(local_addr[i]) << " - "; + temp2 << NS_DEBUG::hex<8>(local_addr[i]) << " - "; } - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("raw address data"), temp2.str().c_str())); + LF_DEB(cnb_deb, debug(str<>("raw address data"), temp2.str().c_str())); } return locality(local_addr, av_); } @@ -1315,7 +1296,7 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- fid_pep* create_passive_endpoint(struct fid_fabric* fabric, struct fi_info* info) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); struct fid_pep* ep; int ret = fi_passive_ep(fabric, info, &ep, nullptr); @@ -1373,14 +1354,13 @@ namespace NS_LIBFABRIC { addr.set_fi_address(fi_addr_t(i)); if ((ret == 0) && (addrlen <= locality_defs::array_size)) { - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("address vector"), debug::dec<3>(i), addr.to_str())); + LF_DEB(cnb_deb, debug(str<>("address vector"), dec<3>(i), addr.to_str())); } else { - LF_DEB(NS_DEBUG::cnb_err, - error(debug::str<>("address length"), debug::dec<3>(addrlen), - debug::dec<3>(locality_defs::array_size))); + LF_DEB(cnb_err, + error(str<>("address length"), dec<3>(addrlen), + dec<3>(locality_defs::array_size))); throw std::runtime_error("debug_print_av_vector : address vector " "traversal failure"); } @@ -1477,7 +1457,7 @@ namespace NS_LIBFABRIC { struct fid_domain* domain, size_t size, char const* type) { [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); + NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, type); struct fid_cq* cq; fi_cq_attr cq_attr = {}; @@ -1486,7 +1466,7 @@ namespace NS_LIBFABRIC { cq_attr.wait_cond = FI_CQ_COND_NONE; cq_attr.size = size; cq_attr.flags = 0 /*FI_COMPLETION*/; - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("CQ size"), debug::dec<4>(size))); + LF_DEB(cnb_deb, trace(str<>("CQ size"), dec<4>(size))); // open completion queue on fabric domain and set context to null int ret = fi_cq_open(domain, &cq_attr, &cq, nullptr); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_cq_open"); @@ -1496,7 +1476,7 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- fid_av* create_address_vector(struct fi_info* info, int N, int num_rx_contexts) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); fid_av* av; fi_av_attr av_attr = {fi_av_type(0), 0, 0, 0, nullptr, nullptr, 0}; @@ -1508,7 +1488,7 @@ namespace NS_LIBFABRIC { int rx_ctx_bits = 0; #ifdef RX_CONTEXTS_SUPPORT while (num_rx_contexts >> ++rx_ctx_bits); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("rx_ctx_bits"), rx_ctx_bits)); + LF_DEB(cnb_deb, debug(str<>("rx_ctx_bits"), rx_ctx_bits)); #endif av_attr.rx_ctx_bits = rx_ctx_bits; // if contexts is nonzero, then we are using a single scalable endpoint @@ -1520,11 +1500,11 @@ namespace NS_LIBFABRIC { } else { - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("map FI_AV_TABLE"))); + LF_DEB(cnb_deb, debug(str<>("map FI_AV_TABLE"))); av_attr.type = FI_AV_TABLE; } - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Creating AV"))); + LF_DEB(cnb_deb, debug(str<>("Creating AV"))); int ret = fi_av_open(fabric_domain_, &av_attr, &av, nullptr); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_av_open"); return av; @@ -1536,23 +1516,22 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- locality insert_address(fid_av* av, locality const& address) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("inserting AV"), address.to_str(), NS_DEBUG::ptr(av))); + LF_DEB(cnb_deb, trace(str<>("inserting AV"), address.to_str(), hptr(av))); fi_addr_t fi_addr = 0xffff'ffff; int ret = fi_av_insert(av, address.fabric_data().data(), 1, &fi_addr, 0, nullptr); if (ret < 0) { throw NS_LIBFABRIC::fabric_error(ret, "fi_av_insert"); } else if (ret == 0) { - NS_DEBUG::cnb_deb.error("fi_av_insert called with existing address"); + LF_DEB(cnb_deb, error("fi_av_insert called with existing address")); NS_LIBFABRIC::fabric_error(ret, "fi_av_insert did not return 1"); } // address was generated correctly, now update the locality with the fi_addr locality new_locality(address, fi_addr, av); - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("AV add"), "rank", debug::dec<>(fi_addr), new_locality.to_str(), - "fi_addr", debug::hex<4>(fi_addr))); + LF_DEB(cnb_deb, + trace(str<>("AV add"), "rank", dec<>(fi_addr), new_locality.to_str(), "fi_addr", + hex<4>(fi_addr))); return new_locality; } }; diff --git a/src/libfabric/memory_region.hpp b/src/libfabric/memory_region.hpp index f2cd5d45..2028fc41 100644 --- a/src/libfabric/memory_region.hpp +++ b/src/libfabric/memory_region.hpp @@ -72,7 +72,7 @@ struct fi_mr_attr { struct fid_mr** mr) { [[maybe_unused]] auto scp = NS_MEMORY::mrn_deb.scope( - __func__, NS_DEBUG::ptr(buf), NS_DEBUG::dec<>(len), device_id); + __func__, NS_DEBUG::hptr(buf), NS_DEBUG::dec<>(len), device_id); // struct iovec addresses = {/*.iov_base = */ const_cast(buf), /*.iov_len = */ len}; fi_mr_attr attr = { @@ -263,18 +263,17 @@ struct fi_mr_attr { { (void) region; #if 1 || has_debug + using namespace NS_DEBUG; os << "region " - << NS_DEBUG::ptr(®ion) - //<< " fi_region " << NS_DEBUG::ptr(region.region_) - << " address " << NS_DEBUG::ptr(region.address_) << " size " - << NS_DEBUG::hex<6>(region.size_) - //<< " used_space " << NS_DEBUG::hex<6>(region.used_space_/*size_*/) + << hptr(®ion) + //<< " fi_region " << hptr(region.region_) + << " address " << hptr(region.address_) << " size " + << hex<6>(region.size_) + //<< " used_space " << hex<6>(region.used_space_/*size_*/) << " loc key " - << NS_DEBUG::ptr( - region.region_ ? region_provider::get_local_key(region.region_) : nullptr) + << hptr(region.region_ ? region_provider::get_local_key(region.region_) : nullptr) << " rem key " - << NS_DEBUG::ptr( - region.region_ ? region_provider::get_remote_key(region.region_) : 0); + << hptr(region.region_ ? region_provider::get_remote_key(region.region_) : 0); ///// clang-format off ///// clang-format on #endif @@ -352,28 +351,25 @@ struct fi_mr_attr { region_ = nullptr; // base_addr_ = memory_handle::address_; - LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("memory_segment"), *this, device_id)); + LF_DEB(NS_MEMORY::mrn_deb, trace(str<>("memory_segment"), *this, device_id)); int ret = region_provider::fi_register_memory(pd, device_id, buffer, length, region_provider::access_flags(), 0, key++, &(region_)); if (!ret) { LF_DEB(NS_MEMORY::mrn_deb, - trace(NS_DEBUG::str<>("Registered region"), "device", device_id, *this)); + trace(str<>("Registered region"), "device", device_id, *this)); } if (bind_mr) { ret = fi_mr_bind(region_, (struct fid*) ep, 0); if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "fi_mr_bind"); } - else { LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("Bound region"), *this)); } + else { LF_DEB(NS_MEMORY::mrn_deb, trace(str<>("Bound region"), *this)); } ret = fi_mr_enable(region_); if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "fi_mr_enable"); } - else - { - LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("Enabled region"), *this)); - } + else { LF_DEB(NS_MEMORY::mrn_deb, trace(str<>("Enabled region"), *this)); } } } @@ -398,7 +394,7 @@ struct fi_mr_attr { #if has_debug // clang-format off os << *static_cast(®ion) - << " base address " << NS_DEBUG::ptr(region.base_addr_); + << " base address " << NS_DEBUG::hptr(region.base_addr_); // clang-format on #endif return os; diff --git a/src/libfabric/operation_context.cpp b/src/libfabric/operation_context.cpp index 8c8d277f..0f6de97a 100644 --- a/src/libfabric/operation_context.cpp +++ b/src/libfabric/operation_context.cpp @@ -16,7 +16,7 @@ namespace oomph::libfabric { void operation_context::handle_cancelled() { - [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::hptr(this), __func__); // enqueue the cancelled/callback if (std::holds_alternative(m_req)) { @@ -35,7 +35,7 @@ namespace oomph::libfabric { int operation_context::handle_tagged_recv_completion_impl(void* user_data) { - [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::hptr(this), __func__); if (std::holds_alternative(m_req)) { // regular (non-shared) recv @@ -82,7 +82,7 @@ namespace oomph::libfabric { detail::request_state** req = reinterpret_cast(&m_req); LF_DEB(NS_MEMORY::opctx_deb<9>, error( - NS_DEBUG::str<>("invalid request_state"), this, "request", NS_DEBUG::ptr(req))); + str<>("invalid request_state"), this, "request", hptr(req))); throw std::runtime_error("Request state invalid in handle_tagged_recv"); } return 1; diff --git a/src/libfabric/operation_context.hpp b/src/libfabric/operation_context.hpp index 74d6ba09..faed3d70 100644 --- a/src/libfabric/operation_context.hpp +++ b/src/libfabric/operation_context.hpp @@ -33,7 +33,7 @@ namespace oomph::libfabric { , m_req{req} { [[maybe_unused]] auto scp = - opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__, "request", req); + opctx_deb<9>.scope(NS_DEBUG::hptr(this), __func__, "request", req); } // -------------------------------------------------------------------- diff --git a/src/libfabric/operation_context_base.hpp b/src/libfabric/operation_context_base.hpp index 5de5c386..462c79b5 100644 --- a/src/libfabric/operation_context_base.hpp +++ b/src/libfabric/operation_context_base.hpp @@ -33,7 +33,7 @@ namespace NS_LIBFABRIC { operation_context_base() : context_reserved_space() { - [[maybe_unused]] auto scp = ctx_bas.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = ctx_bas.scope(NS_DEBUG::hptr(this), __func__); } // error diff --git a/src/libfabric/print.hpp b/src/libfabric/print.hpp index 301f8e12..04364b98 100644 --- a/src/libfabric/print.hpp +++ b/src/libfabric/print.hpp @@ -118,18 +118,18 @@ namespace NS_DEBUG { // ------------------------------------------------------------------ // format as pointer // ------------------------------------------------------------------ - struct ptr + struct hptr { - ptr(void const* v) + hptr(void const* v) : data_(v) { } - ptr(std::uintptr_t const v) + hptr(std::uintptr_t const v) : data_(reinterpret_cast(v)) { } void const* data_; - friend std::ostream& operator<<(std::ostream& os, ptr const& d) + friend std::ostream& operator<<(std::ostream& os, hptr const& d) { os << std::right << "0x" << std::setfill('0') << std::setw(12) << std::noshowbase << std::hex << reinterpret_cast(d.data_); @@ -230,32 +230,6 @@ namespace NS_DEBUG { } }; - // ------------------------------------------------------------------ - // format as ip address - // ------------------------------------------------------------------ - struct ipaddr - { - ipaddr(void const* a) - : data_(reinterpret_cast(a)) - , ipdata_(0) - { - } - ipaddr(uint32_t const a) - : data_(reinterpret_cast(&ipdata_)) - , ipdata_(a) - { - } - uint8_t const* data_; - uint32_t const ipdata_; - - friend std::ostream& operator<<(std::ostream& os, ipaddr const& p) - { - os << std::dec << int(p.data_[0]) << "." << int(p.data_[1]) << "." << int(p.data_[2]) - << "." << int(p.data_[3]); - return os; - } - }; - // ------------------------------------------------------------------ // helper fuction for printing CRC32 // ------------------------------------------------------------------ @@ -284,9 +258,10 @@ namespace NS_DEBUG { char const* txt_; friend std::ostream& operator<<(std::ostream& os, mem_crc32 const& p) { + using namespace NS_DEBUG; std::uint8_t const* byte = static_cast(p.addr_); os << "Memory:"; - os << " address " << ptr(p.addr_) << " length " << hex<6, std::size_t>(p.len_) + os << " address " << hptr(p.addr_) << " length " << hex<6, std::size_t>(p.len_) << " CRC32:" << hex<8, std::size_t>(crc32(p.addr_, p.len_)) << "\n"; size_t i = 0; while (i < std::min(size_t(128), p.len_)) diff --git a/src/libfabric/request_state.hpp b/src/libfabric/request_state.hpp index 58f15dd5..74958fc5 100644 --- a/src/libfabric/request_state.hpp +++ b/src/libfabric/request_state.hpp @@ -69,13 +69,13 @@ namespace oomph { namespace detail { , m_operation_context{this} { [[maybe_unused]] auto scp = - libfabric::opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + libfabric::opctx_deb<9>.scope(NS_DEBUG::hptr(this), __func__); } ~shared_request_state() { [[maybe_unused]] auto scp = - libfabric::opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + libfabric::opctx_deb<9>.scope(NS_DEBUG::hptr(this), __func__); } void progress(); From 526088c42441d2e2a94cb4b6eb7510fcb845383e Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 10 Jul 2025 11:49:17 +0200 Subject: [PATCH 13/35] Use safe fi_tostr_r and a std::array buffer in place of fi_tostr --- src/libfabric/controller.hpp | 12 ++++-- src/libfabric/controller_base.hpp | 68 +++++++++++++++++++------------ 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 39c88fd9..1b8e3b55 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -291,14 +291,16 @@ namespace oomph::libfabric { // if (ret > 0) { + std::array buf; int processed = 0; for (int i = 0; i < ret; ++i) { ++sends_complete; LF_DEB(cnt_deb<9>, debug(str<>("Completion"), i, dec<2>(i), "txcq flags", - fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", - dec<>(entry[i].flags), ")", "context", hptr(entry[i].op_context), + fi_tostr_r( + buf.data(), buf.size(), &entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), + "(", dec<>(entry[i].flags), ")", "context", hptr(entry[i].op_context), "length", hex<6>(entry[i].len))); if ((entry[i].flags & (FI_TAGGED | FI_SEND | FI_MSG)) != 0) { @@ -395,14 +397,16 @@ namespace oomph::libfabric { // if (ret > 0) { + std::array buf; int processed = 0; for (int i = 0; i < ret; ++i) { ++recvs_complete; LF_DEB(cnt_deb<2>, debug(str<>("Completion"), i, "rxcq flags", - fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", - dec<>(entry[i].flags), ")", "context", hptr(entry[i].op_context), + fi_tostr_r( + buf.data(), buf.size(), &entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), + "(", dec<>(entry[i].flags), ")", "context", hptr(entry[i].op_context), "length", hex<6>(entry[i].len))); if ((entry[i].flags & (FI_TAGGED | FI_RECV)) != 0) { diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index 205c40ab..301e2e8d 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -744,6 +744,10 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- uint64_t caps_flags(uint64_t available_flags) const { + char buf[1024]; + LF_DEB(cnb_err, + debug(str<>("caps available"), hex(available_flags), + fi_tostr_r(buf, 1024, &available_flags, FI_TYPE_CAPS))); uint64_t required_flags = static_cast(this)->caps_flags(available_flags); // @@ -754,10 +758,14 @@ namespace NS_LIBFABRIC { if ((required_flags & f) && ((available_flags & f) == 0)) { LF_DEB(cnb_err, - error(str<>("caps flags unavailable"), fi_tostr(&f, FI_TYPE_CAPS))); + error(str<>("caps flags unavailable"), + fi_tostr_r(buf, 1024, &f, FI_TYPE_CAPS))); final_flags &= ~f; } } + LF_DEB(cnb_err, + debug(str<>("caps flags requested"), hex(final_flags), + fi_tostr_r(buf, 1024, &final_flags, FI_TYPE_CAPS))); return final_flags; } @@ -832,27 +840,30 @@ namespace NS_LIBFABRIC { if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fabric info"); if (display_fabric_info_ && fabric_info_) { - char const* info_str = fi_tostr(fabric_info_, FI_TYPE_INFO); - if (info_str) - { - LF_DEB(cnb_err, - trace(str<>("Fabric info"), "pre-check ->", - fabric_hints_->fabric_attr->prov_name, "\n", - fi_tostr(fabric_info_, FI_TYPE_INFO))); - } + std::array buf; + LF_DEB(cnb_err, + trace(str<>("Fabric info"), "pre-check ->", + fabric_hints_->fabric_attr->prov_name, "\n", + fi_tostr_r(buf.data(), buf.size(), fabric_info_, FI_TYPE_INFO))); } - fabric_hints_->caps = caps_flags(fabric_info_->caps); + // set capabilities we want to request + uint64_t all_caps = + caps_flags(fabric_info_->rx_attr->caps | fabric_info_->tx_attr->caps); + + // fabric_hints_->caps = all_caps; + fabric_hints_->tx_attr->caps = fabric_info_->tx_attr->caps & all_caps; + fabric_hints_->rx_attr->caps = fabric_info_->rx_attr->caps & all_caps; + if ((fabric_info_->mode & FI_CONTEXT) == 0) { + std::array buf; LF_DEB(cnb_err, debug(str<>("mode FI_CONTEXT!=0"), - fi_tostr(&fabric_hints_->domain_attr->mode, FI_TYPE_MODE))); + fi_tostr_r(buf.data(), buf.size(), &fabric_hints_->domain_attr->mode, + FI_TYPE_MODE))); } - fabric_hints_->mode = fabric_info_->mode; fabric_hints_->domain_attr->name = strdup(fabric_info_->domain_attr->name); - std::cout << fi_tostr(&fabric_hints_->domain_attr->mr_mode, FI_TYPE_MR_MODE) - << std::endl; // Enable/Disable the use of progress threads auto progress = libfabric_progress_type(); @@ -862,8 +873,7 @@ namespace NS_LIBFABRIC { if (threads > 1) { - LF_DEB(cnb_deb, debug(str<>("FI_THREAD_FID"))); - // Enable thread safe mode (Does not work with psm2 provider) + LF_DEB(cnb_deb, debug(str<>("Setting Threads>1 level"))); // fabric_hints_->domain_attr->threading = FI_THREAD_SAFE; // fabric_hints_->domain_attr->threading = FI_THREAD_FID; fabric_hints_->domain_attr->threading = threadlevel_flags(); @@ -891,29 +901,35 @@ namespace NS_LIBFABRIC { if (rootnode) { + std::array buf; LF_DEB(cnb_err, - trace(str<>("Fabric info"), "\n", fi_tostr(fabric_info_, FI_TYPE_INFO))); + trace(str<>("Fabric info"), "\n", + fi_tostr_r(buf.data(), buf.size(), fabric_info_, FI_TYPE_INFO))); } - int mrkey = (fabric_hints_->domain_attr->mr_mode & FI_MR_PROV_KEY) != 0; + int mrkey = (fabric_info_->domain_attr->mr_mode & FI_MR_PROV_KEY) != 0; LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_PROV_KEY"), mrkey)); - bool context = (fabric_hints_->mode & FI_CONTEXT) != 0; + bool context = (fabric_info_->mode & FI_CONTEXT) != 0; LF_DEB(cnb_deb, debug(str<>("Requires FI_CONTEXT"), context)); - mrlocal = (fabric_hints_->domain_attr->mr_mode & FI_MR_LOCAL) != 0; + mrlocal = (fabric_info_->domain_attr->mr_mode & FI_MR_LOCAL) != 0; LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_LOCAL"), mrlocal)); - mrbind = (fabric_hints_->domain_attr->mr_mode & FI_MR_ENDPOINT) != 0; + mrbind = (fabric_info_->domain_attr->mr_mode & FI_MR_ENDPOINT) != 0; LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_ENDPOINT"), mrbind)); /* Check if provider requires heterogeneous memory registration */ - mrhmem = (fabric_hints_->domain_attr->mr_mode & FI_MR_HMEM) != 0; + mrhmem = (fabric_info_->domain_attr->mr_mode & FI_MR_HMEM) != 0; LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_HMEM"), mrhmem)); - bool mrhalloc = (fabric_hints_->domain_attr->mr_mode & FI_MR_ALLOCATED) != 0; + bool mrhalloc = (fabric_info_->domain_attr->mr_mode & FI_MR_ALLOCATED) != 0; LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_ALLOCATED"), mrhalloc)); + int auth_key = (fabric_info_->domain_attr->max_ep_auth_key); + LF_DEB(cnb_deb, debug(str<>("Supported max_ep_auth_key"), auth_key)); + fabric_info_->domain_attr->max_ep_auth_key = 0; + LF_DEB(cnb_deb, debug(str<>("Creating fi_fabric"))); ret = fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fi_fabric"); @@ -985,8 +1001,10 @@ namespace NS_LIBFABRIC { // Print fabric info to a human-readable string if available if (display_fabric_info_ && fabric_info_) { - char const* info_str = fi_tostr(fabric_info_, FI_TYPE_INFO); - if (info_str) { std::cout << "Libfabric fabric info:\n" << info_str << std::endl; } + std::array buf; + std::cout << "Libfabric fabric info:\n" + << fi_tostr_r(buf.data(), buf.size(), fabric_info_, FI_TYPE_INFO) + << std::endl; } fi_freeinfo(fabric_hints_); } From 9c7c6df65a2acde412c38316018e5578994ae42c Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 13 Nov 2025 13:38:40 +0000 Subject: [PATCH 14/35] Fixes to support new hwmalloc API --- src/libfabric/context.cpp | 10 +++++----- src/libfabric/context.hpp | 6 ++++-- src/libfabric/test/check_libfabric.cpp | 6 ++++-- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index 6b49098a..a5a51e19 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -22,10 +22,10 @@ namespace oomph { using controller_type = libfabric::controller; - context_impl::context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, - std::size_t message_pool_reserve, bool debug) + context_impl::context_impl( + MPI_Comm comm, bool thread_safe, hwmalloc::heap_config const& heap_config, bool debug) : context_base(comm, thread_safe) - , m_heap{this, message_pool_never_free, message_pool_reserve} + , m_heap{this, heap_config} , m_recv_cb_queue(128) , m_recv_cb_cancel(8) { @@ -35,8 +35,8 @@ namespace oomph { m_ctxt_tag = reinterpret_cast(this); OOMPH_CHECK_MPI_RESULT(MPI_Bcast(&m_ctxt_tag, 1, MPI_UINT64_T, 0, comm)); - LF_DEB(src_deb, - debug(str<>("Broadcast"), "rank", dec<3>(rank), "context", hptr(m_ctxt_tag))); + LF_DEB( + src_deb, debug(str<>("Broadcast"), "rank", dec<3>(rank), "context", hptr(m_ctxt_tag))); // TODO fix the thread safety // problem: controller is a singleton and has problems when 2 contexts are created diff --git a/src/libfabric/context.hpp b/src/libfabric/context.hpp index e7f0308f..76654d66 100644 --- a/src/libfabric/context.hpp +++ b/src/libfabric/context.hpp @@ -57,8 +57,10 @@ namespace oomph { callback_queue m_recv_cb_cancel; public: - context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, - std::size_t message_pool_reserve, bool debug = false); + context_impl(MPI_Comm comm, bool thread_safe, hwmalloc::heap_config const& heap_config, + bool debug = false); + // context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, + // std::size_t message_pool_reserve, bool debug = false); context_impl(context_impl const&) = delete; context_impl(context_impl&&) = delete; diff --git a/src/libfabric/test/check_libfabric.cpp b/src/libfabric/test/check_libfabric.cpp index 070c8f11..11d9788e 100644 --- a/src/libfabric/test/check_libfabric.cpp +++ b/src/libfabric/test/check_libfabric.cpp @@ -15,6 +15,8 @@ #include "../communicator.hpp" #include "../context.hpp" +#include + int main(int argc, char** argv) { using namespace oomph; @@ -24,6 +26,6 @@ int main(int argc, char** argv) bool debug = true; // mpi_environment env(multi_threaded, argc, argv); - auto ctxt = - context_impl(MPI_COMM_WORLD, true, message_pool_never_free, message_pool_reserve, debug); + hwmalloc::heap_config const& default_heap = hwmalloc::get_default_heap_config(); + auto ctxt = context_impl(MPI_COMM_WORLD, true, default_heap /*, debug*/); } From 2764787961077be157ace9f752d06d03cc50fbd0 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 13 Nov 2025 13:39:57 +0000 Subject: [PATCH 15/35] ifdefs for LNX provider, especially address unsupported address-string functions --- src/libfabric/controller.hpp | 15 ++++++++++----- src/libfabric/controller_base.hpp | 7 +++++-- src/libfabric/locality.hpp | 12 ++++++++---- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 1b8e3b55..f015a0c4 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -67,7 +67,7 @@ namespace oomph::libfabric { // -------------------------------------------------------------------- constexpr fi_threading threadlevel_flags() { -#if defined(HAVE_LIBFABRIC_GNI) /*|| defined(HAVE_LIBFABRIC_CXI)*/ +#if defined(HAVE_LIBFABRIC_GNI) || defined(HAVE_LIBFABRIC_LNX) return FI_THREAD_ENDPOINT; #else return FI_THREAD_SAFE; @@ -77,10 +77,13 @@ namespace oomph::libfabric { // -------------------------------------------------------------------- uint64_t caps_flags(uint64_t /*available_flags*/) const { - uint64_t flags_required = FI_MSG | FI_TAGGED | FI_RMA | FI_READ | FI_WRITE | FI_RECV | - FI_SEND | FI_REMOTE_READ | FI_REMOTE_WRITE; -#if OOMPH_ENABLE_DEVICE + uint64_t flags_required = FI_TAGGED; +#ifndef HAVE_LIBFABRIC_LNX + flags_required |= FI_MSG | FI_TAGGED | FI_RECV | FI_SEND | FI_RMA | FI_READ | FI_WRITE | + FI_REMOTE_READ | FI_REMOTE_WRITE; +# if OOMPH_ENABLE_DEVICE flags_required |= FI_HMEM; +# endif #endif return flags_required; } @@ -170,7 +173,9 @@ namespace oomph::libfabric { LF_DEB(cnt_deb<9>, debug(str<>("initialize_localities"), size, "localities")); MPI_exchange_localities(av, mpi_comm, rank, size); +#ifndef HAVE_LIBFABRIC_LNX // address stuff not yet supported debug_print_av_vector(size); +#endif LF_DEB(cnt_deb<9>, debug(str<>("Done localities"))); } @@ -179,7 +184,7 @@ namespace oomph::libfabric { { #if defined(HAVE_LIBFABRIC_GNI) return true; -#elif defined(HAVE_LIBFABRIC_CXI) +#elif defined(HAVE_LIBFABRIC_LNX) // @todo : cxi provider is not yet thread safe using scalable endpoints return false; #else diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index 301e2e8d..e710757c 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -778,6 +778,9 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- constexpr std::int64_t memory_registration_mode_flags() { +#if defined(HAVE_LIBFABRIC_LNX) + return FI_MR_HMEM; +#endif std::int64_t base_flags = FI_MR_ALLOCATED; // | FI_MR_VIRT_ADDR | FI_MR_PROV_KEY; #if OOMPH_ENABLE_DEVICE base_flags = base_flags | FI_MR_HMEM; @@ -1390,8 +1393,8 @@ namespace NS_LIBFABRIC { { #if defined(HAVE_LIBFABRIC_GNI) return true; -#elif defined(HAVE_LIBFABRIC_CXI) - // @todo : cxi provider is not yet thread safe using scalable endpoints +#elif defined(HAVE_LIBFABRIC_LNX) + // @todo : provider is not yet thread safe using scalable endpoints return false; #else return (threadlevel_flags() == FI_THREAD_SAFE || diff --git a/src/libfabric/locality.hpp b/src/libfabric/locality.hpp index 67c753e7..9e91cec1 100644 --- a/src/libfabric/locality.hpp +++ b/src/libfabric/locality.hpp @@ -50,7 +50,7 @@ #endif #if defined(HAVE_LIBFABRIC_LNX) -# define HAVE_LIBFABRIC_LOCALITY_SIZE 32 +# define HAVE_LIBFABRIC_LOCALITY_SIZE 512 #endif namespace oomph { @@ -172,12 +172,16 @@ namespace oomph { namespace libfabric { std::string to_str() const { - char sbuf[256]; - size_t buflen = 256; + size_t buflen = 1024; + std::array buf; if (!av_) { return "No address vector"; } - char const* straddr_ret = fi_av_straddr(av_, data_.data(), sbuf, &buflen); + char const* straddr_ret = fi_av_straddr(av_, data_.data(), buf.data(), &buflen); +#ifdef HAVE_LIBFABRIC_LNX + return "LNX does not yet support straddr"; +#else std::string result = straddr_ret ? straddr_ret : "Address formatting Error"; return result; +#endif } private: From fe41c22ec45355471b9f6c397ab9a402a9b6a5b0 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 13 Nov 2025 14:33:10 +0000 Subject: [PATCH 16/35] Fix an API change introduce from libfabric 1.20 --- src/libfabric/controller_base.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index e710757c..06f19562 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -928,11 +928,11 @@ namespace NS_LIBFABRIC { bool mrhalloc = (fabric_info_->domain_attr->mr_mode & FI_MR_ALLOCATED) != 0; LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_ALLOCATED"), mrhalloc)); - +#if (FI_MAJOR_VERSION > 1) || ((FI_MAJOR_VERSION == 1) && FI_MINOR_VERSION >= 20) int auth_key = (fabric_info_->domain_attr->max_ep_auth_key); LF_DEB(cnb_deb, debug(str<>("Supported max_ep_auth_key"), auth_key)); fabric_info_->domain_attr->max_ep_auth_key = 0; - +#endif LF_DEB(cnb_deb, debug(str<>("Creating fi_fabric"))); ret = fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fi_fabric"); From ad198ec41eb23ce868664815d25700b0da7ed9cb Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 13 Nov 2025 14:37:47 +0000 Subject: [PATCH 17/35] Replace strcpy with strncpy --- src/libfabric/context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index a5a51e19..a1debfd7 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -76,7 +76,7 @@ namespace oomph { static char buffer[32]; std::string temp = std::to_string(m_controller->rendezvous_threshold()); if (temp.size() > 31) throw std::runtime_error("Bad string option check, fix please"); - strcpy(buffer, temp.c_str()); + strncpy(buffer, temp.c_str(), 32); return buffer; } else { return "unspecified"; } From 80f8108fbe4354601ecf25093b4d95137627a17e Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 13 Nov 2025 15:29:12 +0000 Subject: [PATCH 18/35] Fix CI build fails due to unsupported older libfabric version --- src/libfabric/controller_base.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index 06f19562..d423803b 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -39,6 +39,10 @@ #include "memory_region.hpp" #include "operation_context_base.hpp" +#if ((FI_MAJOR_VERSION == 1) && FI_MINOR_VERSION <= 12) +#define fi_tostr_r(a,b,c,d) " " +#endif + // #define DISABLE_FI_INJECT // #define EXCESSIVE_POLLING_BACKOFF_MICRO_S 50 From f2468653b3ee29995ac2325431ef2edb041e96d6 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Fri, 17 Apr 2026 14:05:50 +0200 Subject: [PATCH 19/35] Use libfatbat mini libfabric repo for base libfabric layer communicator / controller / operation context/ etc etc objects all now inherit from libfatbat:: base classes that provide the bulk of features but can be overriden for customization purposes --- src/libfabric/CMakeLists.txt | 50 +- src/libfabric/communicator.hpp | 594 +++---- src/libfabric/context.cpp | 143 +- src/libfabric/context.hpp | 224 +-- src/libfabric/controller.hpp | 704 ++++---- src/libfabric/controller_base.hpp | 1564 ------------------ src/libfabric/fabric_error.hpp | 49 - src/libfabric/libfabric_defines_template.hpp | 34 - src/libfabric/locality.hpp | 206 --- src/libfabric/memory_region.hpp | 409 ----- src/libfabric/operation_context.cpp | 188 ++- src/libfabric/operation_context.hpp | 69 +- src/libfabric/operation_context_base.hpp | 95 -- src/libfabric/print.hpp | 705 -------- src/libfabric/request_state.hpp | 167 +- src/libfabric/test/check_libfabric.cpp | 18 +- 16 files changed, 1079 insertions(+), 4140 deletions(-) delete mode 100644 src/libfabric/controller_base.hpp delete mode 100644 src/libfabric/fabric_error.hpp delete mode 100644 src/libfabric/locality.hpp delete mode 100644 src/libfabric/memory_region.hpp delete mode 100644 src/libfabric/operation_context_base.hpp delete mode 100644 src/libfabric/print.hpp diff --git a/src/libfabric/CMakeLists.txt b/src/libfabric/CMakeLists.txt index 92128897..a69ab5d3 100644 --- a/src/libfabric/CMakeLists.txt +++ b/src/libfabric/CMakeLists.txt @@ -1,35 +1,62 @@ +include(FetchContent) + +# ------------------------------------------------------------------------------ +# +# ------------------------------------------------------------------------------ +if(EXISTS ${PROJECT_SOURCE_DIR}/ext/libfatbat) + set(FETCHCONTENT_SOURCE_DIR_LIBFATBAT ${PROJECT_SOURCE_DIR}/ext/libfatbat) +endif() +fetchcontent_declare( + libFatbat GIT_REPOSITORY "https://github.com/biddisco/libfatbat.git" + GIT_TAG "main" +) +fetchcontent_makeavailable(libFatbat) +message( + STATUS + "✅ libFatbat building: (${libFatbat_VERSION}) in ${libFatbat_BINARY_DIR}" +) + +# ------------------------------------------------------------------------------ +# +# ------------------------------------------------------------------------------ find_package(Boost REQUIRED CONFIG COMPONENTS thread) # dummy library of our private headers add_library(oomph_private_libfabric_headers INTERFACE) -target_include_directories(oomph_private_libfabric_headers INTERFACE - "$") +target_include_directories( + oomph_private_libfabric_headers + INTERFACE "$" +) -# actual library (created in oomph_libfabric.cmake) source files, -# depends on dummy library +# actual library (created in oomph_libfabric.cmake) source files, depends on +# dummy library target_link_libraries(oomph_libfabric PRIVATE oomph_private_libfabric_headers) target_link_libraries(oomph_libfabric PRIVATE Boost::thread) +target_link_libraries(oomph_libfabric PRIVATE libfatbat) # we need to include a binary dir for the oomph_config_defines.hpp file -target_include_directories(oomph_libfabric INTERFACE - "$") +target_include_directories( + oomph_libfabric + INTERFACE "$" +) list(TRANSFORM oomph_sources PREPEND ${CMAKE_CURRENT_SOURCE_DIR}/../ - OUTPUT_VARIABLE oomph_sources_libfabric) + OUTPUT_VARIABLE oomph_sources_libfabric +) target_sources(oomph_libfabric PRIVATE ${oomph_sources_libfabric}) target_sources(oomph_libfabric PRIVATE context.cpp) target_sources(oomph_libfabric PRIVATE operation_context.cpp) -# if we are using GPU, then the libfabric library was probably built with -# gpu support, and we should link to cuda to prevent link errors -if (HWMALLOC_ENABLE_DEVICE) +# if we are using GPU, then the libfabric library was probably built with gpu +# support, and we should link to cuda to prevent link errors +if(HWMALLOC_ENABLE_DEVICE) include(CheckLanguage) check_language(CUDA) if(CMAKE_CUDA_COMPILER) enable_language(CUDA) else() - message(STATUS "No CUDA support") + message(STATUS "No CUDA support") return() endif() @@ -39,4 +66,5 @@ endif() add_executable(check_libfabric test/check_libfabric.cpp) target_link_libraries(check_libfabric PUBLIC oomph_libfabric) +target_link_libraries(check_libfabric PRIVATE libfatbat) target_include_directories(check_libfabric PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/src/libfabric/communicator.hpp b/src/libfabric/communicator.hpp index 6bec497b..ddd369b2 100644 --- a/src/libfabric/communicator.hpp +++ b/src/libfabric/communicator.hpp @@ -25,122 +25,109 @@ #include #include -namespace oomph { +namespace oomph +{ - using operation_context = libfabric::operation_context; +inline auto comm_log = libfatbat::log::create("OomphCom"); - using tag_disp = NS_DEBUG::detail::hex<12, uintptr_t>; +using operation_context = libfabric::operation_context; - template - inline NS_DEBUG::print_threshold com_deb("COMMUNI"); +class communicator_impl : public communicator_base +{ + using tag_type = std::uint64_t; + // + using segment_type = libfatbat::memory_segment; + using region_type = segment_type::handle_type; - static NS_DEBUG::enable_print com_err("COMMUNI"); + using callback_queue = boost::lockfree::queue, boost::lockfree::allocator>>; - class communicator_impl : public communicator_base + public: + context_impl* m_context; + libfatbat::endpoint_wrapper m_tx_endpoint; + libfatbat::endpoint_wrapper m_rx_endpoint; + // + callback_queue m_send_cb_queue; + callback_queue m_recv_cb_queue; + callback_queue m_recv_cb_cancel; + + // -------------------------------------------------------------------- + communicator_impl(context_impl* ctxt) + : communicator_base(ctxt) + , m_context(ctxt) + , m_send_cb_queue(128) + , m_recv_cb_queue(128) + , m_recv_cb_cancel(8) { - using tag_type = std::uint64_t; - // - using segment_type = libfabric::memory_segment; - using region_type = segment_type::handle_type; - - using callback_queue = boost::lockfree::queue, boost::lockfree::allocator>>; - - public: - context_impl* m_context; - libfabric::endpoint_wrapper m_tx_endpoint; - libfabric::endpoint_wrapper m_rx_endpoint; - // - callback_queue m_send_cb_queue; - callback_queue m_recv_cb_queue; - callback_queue m_recv_cb_cancel; - - // -------------------------------------------------------------------- - communicator_impl(context_impl* ctxt) - : communicator_base(ctxt) - , m_context(ctxt) - , m_send_cb_queue(128) - , m_recv_cb_queue(128) - , m_recv_cb_cancel(8) - { - LF_DEB(com_deb<9>, debug(str<>("MPI_comm"), hptr(mpi_comm()))); - m_tx_endpoint = m_context->get_controller()->get_tx_endpoint(); - m_rx_endpoint = m_context->get_controller()->get_rx_endpoint(); - } + LIBFATBAT_DEBUG(comm_log, "{:<20} MPI_comm {} ", "Construct", (mpi_comm())); + m_tx_endpoint = m_context->get_controller()->get_tx_endpoint(); + m_rx_endpoint = m_context->get_controller()->get_rx_endpoint(); + } - // -------------------------------------------------------------------- - ~communicator_impl() { clear_callback_queues(); } + // -------------------------------------------------------------------- + ~communicator_impl() { clear_callback_queues(); } - // -------------------------------------------------------------------- - auto& get_heap() noexcept { return m_context->get_heap(); } + // -------------------------------------------------------------------- + auto& get_heap() noexcept { return m_context->get_heap(); } - // -------------------------------------------------------------------- - /// generate a tag with 0xRRRRRRRRtttttttt rank, tag. - /// original tag can be 32bits, then we add 32bits of rank info. - inline std::uint64_t make_tag64( - std::uint32_t tag, /*std::uint32_t rank, */ std::uintptr_t ctxt) - { - return (((ctxt & 0x0000'0000'00FF'FFFF) << 24) | + // -------------------------------------------------------------------- + /// generate a tag with 0xRRRRRRRRtttttttt rank, tag. + /// original tag can be 32bits, then we add 32bits of rank info. + inline std::uint64_t make_tag64(std::uint32_t tag, /*std::uint32_t rank, */ std::uintptr_t ctxt) + { + return (((ctxt & 0x0000'0000'00FF'FFFF) << 24) | ((std::uint64_t(tag) & 0x0000'0000'00FF'FFFF))); - } + } - // -------------------------------------------------------------------- - template - inline void execute_fi_function(Func F, char const* msg, Args&&... args) + // -------------------------------------------------------------------- + template + inline void execute_fi_function(Func F, char const* msg, Args&&... args) + { + bool ok = false; + while (!ok) { - bool ok = false; - while (!ok) + ssize_t ret = F(std::forward(args)...); + if (ret == 0) { return; } + else if (ret == -FI_EAGAIN) { - ssize_t ret = F(std::forward(args)...); - if (ret == 0) { return; } - else if (ret == -FI_EAGAIN) - { - // com_deb<9>.error("Reposting", msg); - // no point stressing the system - m_context->get_controller()->poll_for_work_completions(this); - } - else if (ret == -FI_ENOENT) - { - // if a node has failed, we can recover - // @TODO : put something better here - com_err.error("No destination endpoint, terminating."); - std::terminate(); - } - else if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), msg); } + // com_deb<9>.error("Reposting", msg); + // no point stressing the system + m_context->get_controller()->poll_for_work_completions(this); } + else if (ret == -FI_ENOENT) + { + // if a node has failed, we can recover + // @TODO : put something better here + LIBFATBAT_ERROR(comm_log, "{:<20} No destination endpoint, terminating.", + "fi_function"); + std::terminate(); + } + else if (ret) { throw libfatbat::fabric_error(int(ret), msg); } } + } - // -------------------------------------------------------------------- - // this takes a pinned memory region and sends it - void send_tagged_region(region_type const& send_region, std::size_t size, - fi_addr_t dst_addr_, uint64_t tag_, operation_context* ctxt) - { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); - // clang-format off - LF_DEB(com_deb<9>, - debug(str<>("send_tagged_region"), - "->", dec<2>(dst_addr_), - send_region, - "tag", tag_disp(tag_), - "context", hptr(ctxt), - "tx endpoint", hptr(m_tx_endpoint.get_ep()))); - // clang-format on - execute_fi_function(fi_tsend, "fi_tsend", m_tx_endpoint.get_ep(), - send_region.get_address(), size, send_region.get_local_key(), dst_addr_, tag_, - ctxt); - } + // -------------------------------------------------------------------- + // this takes a pinned memory region and sends it + void send_tagged_region(region_type const& send_region, std::size_t size, fi_addr_t dst_addr_, + uint64_t tag_, operation_context* ctxt) + { + LIBFATBAT_SCOPE(comm_log, "{} {}", (void*)(this), __func__); + LIBFATBAT_DEBUG(comm_log, "{:<20} -> {:02} {} {} tag {:#12x} context {} tx endpoint {}", + "send_tagged_region", dst_addr_, send_region, tag_, static_cast(ctxt), + static_cast(m_tx_endpoint.get_ep())); + execute_fi_function(fi_tsend, "fi_tsend", m_tx_endpoint.get_ep(), send_region.get_address(), + size, send_region.get_local_key(), dst_addr_, tag_, ctxt); + } - // -------------------------------------------------------------------- - // this takes a pinned memory region and sends it using inject instead of send - void inject_tagged_region( - region_type const& send_region, std::size_t size, fi_addr_t dst_addr_, uint64_t tag_) - { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); - // clang-format on - LF_DEB(com_deb<9>, - debug(str<>("inject tagged"), "->", dec<2>(dst_addr_), send_region, "tag", - tag_disp(tag_), "tx endpoint", hptr(m_tx_endpoint.get_ep()))); - // clang-format off + // -------------------------------------------------------------------- + // this takes a pinned memory region and sends it using inject instead of send + void inject_tagged_region(region_type const& send_region, std::size_t size, fi_addr_t dst_addr_, + uint64_t tag_) + { + LIBFATBAT_SCOPE(comm_log, "{} {}", (void*)(this), __func__); + LIBFATBAT_DEBUG(comm_log, "{:<20} -> {:02} {} {} tag {:#12x} tx endpoint {}", + "inject_tagged_region", dst_addr_, send_region, tag_, + static_cast(m_tx_endpoint.get_ep())); execute_fi_function(fi_tinject, "fi_tinject", m_tx_endpoint.get_ep(), send_region.get_address(), size, dst_addr_, tag_); } @@ -152,283 +139,252 @@ namespace oomph { void recv_tagged_region(region_type const& recv_region, std::size_t size, fi_addr_t src_addr_, uint64_t tag_, operation_context* ctxt) { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); - // clang-format off - LF_DEB(com_deb<1>, - debug(str<>("recv_tagged_region"), - "<-", dec<2>(src_addr_), - recv_region, - "tag", tag_disp(tag_), - "context", hptr(ctxt), - "rx endpoint", hptr(m_rx_endpoint.get_ep()))); - // clang-format on - constexpr uint64_t ignore = 0; - execute_fi_function(fi_trecv, "fi_trecv", m_rx_endpoint.get_ep(), - recv_region.get_address(), size, recv_region.get_local_key(), src_addr_, tag_, - ignore, ctxt); - // if (l.owns_lock()) l.unlock(); - } + LIBFATBAT_SCOPE(comm_log, "{} {}", (void*)(this), __func__); + LIBFATBAT_DEBUG(comm_log, "{:<20} <- {:02} {} {} tag {:#12x} context {} rx endpoint {}", + "recv_tagged_region", src_addr_, recv_region, tag_, static_cast(ctxt), + static_cast(m_rx_endpoint.get_ep())); + constexpr uint64_t ignore = 0; + execute_fi_function(fi_trecv, "fi_trecv", m_rx_endpoint.get_ep(), recv_region.get_address(), + size, recv_region.get_local_key(), src_addr_, tag_, ignore, ctxt); + // if (l.owns_lock()) l.unlock(); + } - // -------------------------------------------------------------------- - send_request send(context_impl::heap_type::pointer const& ptr, std::size_t size, - rank_type dst, oomph::tag_type tag, - util::unique_function&& cb, std::size_t* scheduled) - { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); - std::uint64_t stag = - make_tag64(tag, /*this->rank(), */ this->m_context->get_context_tag()); + // -------------------------------------------------------------------- + send_request send(context_impl::heap_type::pointer const& ptr, std::size_t size, rank_type dst, + oomph::tag_type tag, util::unique_function&& cb, + std::size_t* scheduled) + { + LIBFATBAT_SCOPE(comm_log, "{} {}", (void*)(this), __func__); + std::uint64_t stag = make_tag64(tag, /*this->rank(), */ this->m_context->get_context_tag()); #if OOMPH_ENABLE_DEVICE - auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); + auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); #else - auto const& reg = ptr.handle(); + auto const& reg = ptr.handle(); #endif #ifdef EXTRA_SIZE_CHECKS - if (size != reg.get_size()) - { - LF_DEB(com_err, - error(str<>("send mismatch"), "size", hex<6>(size), "reg size", - hex<6>(reg.get_size()))); - } + if (size != reg.get_size()) + { + LIBFATBAT_ERROR(comm_log, "{:<20} size {:#06x} reg size {:#06x} send mismatch", "send", + size, reg.get_size()); + } #endif - m_context->get_controller()->sends_posted_++; + m_context->get_controller()->sends_posted_++; - // use optimized inject if msg is very small - if (size <= m_context->get_controller()->get_tx_inject_size()) + // use optimized inject if msg is very small + if (size <= m_context->get_controller()->get_tx_inject_size()) + { + inject_tagged_region(reg, size, fi_addr_t(dst), stag); + if (!has_reached_recursion_depth()) { - inject_tagged_region(reg, size, fi_addr_t(dst), stag); - if (!has_reached_recursion_depth()) - { - auto inc = recursion(); - cb(dst, tag); - return {}; - } - else - { - // construct request which is also an operation context - auto s = m_req_state_factory.make( - m_context, this, scheduled, dst, tag, std::move(cb)); - s->create_self_ref(); - while (!m_send_cb_queue.push(s.get())) {} - return {std::move(s)}; - } + auto inc = recursion(); + cb(dst, tag); + return {}; } + else + { + // construct request which is also an operation context + auto s = + m_req_state_factory.make(m_context, this, scheduled, dst, tag, std::move(cb)); + s->create_self_ref(); + while (!m_send_cb_queue.push(s.get())) {} + return {std::move(s)}; + } + } + + // construct request which is also an operation context + auto s = m_req_state_factory.make(m_context, this, scheduled, dst, tag, std::move(cb)); + s->create_self_ref(); - // construct request which is also an operation context - auto s = m_req_state_factory.make(m_context, this, scheduled, dst, tag, std::move(cb)); - s->create_self_ref(); - - // clang-format off - LF_DEB(com_deb<9>, - debug(str<>("Send"), - "thisrank", dec<>(rank()), - "rank", dec<>(dst), - "tag", tag_disp(std::uint64_t(tag)), - //"wrapped tag", tag_disp(std::uint64_t(tag.get())), - "stag", tag_disp(stag), - "addr", hptr(reg.get_address()), - "size", hex<6>(size), - "reg size", hex<6>(reg.get_size()), - "op_ctx", hptr(&(s->m_operation_context)), - "req", hptr(s.get()))); + LIBFATBAT_DEBUG(comm_log, + "{:<20} thisrank {} rank {} tag {:#12x} stag {:#12x} addr {} size {:#06x} reg size {:#06x} op_ctx {} req {}", + "send", rank(), dst, std::uint64_t(tag), stag, static_cast(reg.get_address()), + size, reg.get_size(), static_cast(&(s->m_operation_context)), + static_cast(s.get())); #if OOMPH_ENABLE_DEVICE - if (!ptr.on_device()) { - LF_DEB(com_deb<9>, - debug(str<>("send region CRC32"), - mem_crc32(reg.get_address(), size, "CRC32"))); + if (!ptr.on_device()) + { + LIBFATBAT_DEBUG(comm_log, "{:<20} {}", "send device region", + libfatbat::log::mem_crc32(reg.get_address(), size)); } #endif - // clang-format on - send_tagged_region(reg, size, fi_addr_t(dst), stag, &(s->m_operation_context)); - return {std::move(s)}; - } + send_tagged_region(reg, size, fi_addr_t(dst), stag, &(s->m_operation_context)); + return {std::move(s)}; + } - recv_request recv(context_impl::heap_type::pointer& ptr, std::size_t size, rank_type src, - oomph::tag_type tag, util::unique_function&& cb, - std::size_t* scheduled) - { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); - std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); + recv_request recv(context_impl::heap_type::pointer& ptr, std::size_t size, rank_type src, + oomph::tag_type tag, util::unique_function&& cb, + std::size_t* scheduled) + { + LIBFATBAT_SCOPE(comm_log, "{} {}", (void*)(this), __func__); + std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); #if OOMPH_ENABLE_DEVICE - auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); + auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); #else - auto const& reg = ptr.handle(); + auto const& reg = ptr.handle(); #endif #ifdef EXTRA_SIZE_CHECKS - if (size != reg.get_size()) - { - LF_DEB(com_err, - error(str<>("recv mismatch"), "size", hex<6>(size), "reg size", - hex<6>(reg.get_size()))); - } + if (size != reg.get_size()) + { + LIBFATBAT_ERROR(comm_log, "{:<20} size {:#06x} reg size {:#06x} recv mismatch", "recv", + size, reg.get_size()); + } #endif - m_context->get_controller()->recvs_posted_++; - - // construct request which is also an operation context - auto s = m_req_state_factory.make(m_context, this, scheduled, src, tag, std::move(cb)); - s->create_self_ref(); - - // clang-format off - LF_DEB(com_deb<9>, - debug(str<>("recv"), - "thisrank", dec<>(rank()), - "rank", dec<>(src), - "tag", tag_disp(std::uint64_t(tag)), - //"wrapped tag", tag_disp(std::uint64_t(tag.get())), - "stag", tag_disp(stag), - "addr", hptr(reg.get_address()), - "size", hex<6>(size), - "reg size", hex<6>(reg.get_size()), - "op_ctx", hptr(&(s->m_operation_context)), - "req", hptr(s.get()))); + m_context->get_controller()->recvs_posted_++; + + // construct request which is also an operation context + auto s = m_req_state_factory.make(m_context, this, scheduled, src, tag, std::move(cb)); + s->create_self_ref(); + + LIBFATBAT_DEBUG(comm_log, + "{:<20} thisrank {} rank {} tag {:#12x} stag {:#12x} addr {} size {:#06x} reg size {:#06x} op_ctx {} req {}", + "recv", rank(), src, std::uint64_t(tag), stag, static_cast(reg.get_address()), + size, reg.get_size(), static_cast(&(s->m_operation_context)), + static_cast(s.get())); #if OOMPH_ENABLE_DEVICE - if (!ptr.on_device()) { - LF_DEB(com_deb<9>, - debug(str<>("recv region CRC32"), - mem_crc32(reg.get_address(), size, "CRC32"))); + if (!ptr.on_device()) + { + LIBFATBAT_DEBUG(comm_log, "{:<20} {}", "recv device region", + libfatbat::log::mem_crc32(reg.get_address(), size)); } #endif - // clang-format on - recv_tagged_region(reg, size, fi_addr_t(src), stag, &(s->m_operation_context)); - return {std::move(s)}; - } + recv_tagged_region(reg, size, fi_addr_t(src), stag, &(s->m_operation_context)); + return {std::move(s)}; + } - shared_recv_request shared_recv(context_impl::heap_type::pointer& ptr, std::size_t size, - rank_type src, oomph::tag_type tag, - util::unique_function&& cb, - std::atomic* scheduled) - { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); - std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); + shared_recv_request shared_recv(context_impl::heap_type::pointer& ptr, std::size_t size, + rank_type src, oomph::tag_type tag, + util::unique_function&& cb, + std::atomic* scheduled) + { + LIBFATBAT_SCOPE(comm_log, "{} {}", (void*)(this), __func__); + std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); #if OOMPH_ENABLE_DEVICE - auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); + auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); #else - auto const& reg = ptr.handle(); + auto const& reg = ptr.handle(); #endif #ifdef EXTRA_SIZE_CHECKS - if (size != reg.get_size()) - { - LF_DEB(com_err, - error(str<>("recv mismatch"), "size", hex<6>(size), "reg size", - hex<6>(reg.get_size()))); - } -#endif - m_context->get_controller()->recvs_posted_++; - - // construct request which is also an operation context - auto s = std::make_shared( - m_context, this, scheduled, src, tag, std::move(cb)); - s->create_self_ref(); - - // clang-format off - LF_DEB(com_deb<9>, - debug(str<>("shared_recv"), - "thisrank", dec<>(rank()), - "rank", dec<>(src), - "tag", tag_disp(std::uint64_t(tag)), - //"wrapped tag", tag_disp(std::uint64_t(tag.get())), - "stag", tag_disp(stag), - "addr", hptr(reg.get_address()), - "size", hex<6>(size), - "reg size", hex<6>(reg.get_size()), - "op_ctx", hptr(&(s->m_operation_context)), - "req", hptr(s.get()))); - // clang-format on - - recv_tagged_region(reg, size, fi_addr_t(src), stag, &(s->m_operation_context)); - m_context->get_controller()->poll_recv_queue(m_rx_endpoint.get_rx_cq(), this); - return {std::move(s)}; - } - - void progress() + if (size != reg.get_size()) { - m_context->get_controller()->poll_for_work_completions(this); - clear_callback_queues(); + LIBFATBAT_ERROR(comm_log, "{:<20} size {:#06x} reg size {:#06x} recv mismatch", "recv", + size, reg.get_size()); } +#endif + m_context->get_controller()->recvs_posted_++; + + // construct request which is also an operation context + auto s = std::make_shared(m_context, this, scheduled, src, + tag, std::move(cb)); + s->create_self_ref(); + + LIBFATBAT_DEBUG(comm_log, + "{:<20} thisrank {} rank {} tag {:#12x} stag {:#12x} addr {} size {:#06x} reg size {:#06x} op_ctx {} req {}", + "shared_recv", rank(), src, std::uint64_t(tag), stag, + static_cast(reg.get_address()), size, reg.get_size(), + static_cast(&(s->m_operation_context)), static_cast(s.get())); + + recv_tagged_region(reg, size, fi_addr_t(src), stag, &(s->m_operation_context)); + m_context->get_controller()->poll_recv_queue(m_rx_endpoint.get_rx_cq(), this); + return {std::move(s)}; + } - void clear_callback_queues() - { - // work through ready callbacks, which were pushed to the queue - // (by other threads) - m_send_cb_queue.consume_all([](oomph::detail::request_state* req) { - [[maybe_unused]] auto scp = - com_deb<9>.scope("m_send_cb_queue.consume_all", NS_DEBUG::hptr(req)); + void progress() + { + m_context->get_controller()->poll_for_work_completions(this); + clear_callback_queues(); + } + + void clear_callback_queues() + { + // work through ready callbacks, which were pushed to the queue + // (by other threads) + m_send_cb_queue.consume_all( + [](oomph::detail::request_state* req) + { + LIBFATBAT_SCOPE(comm_log, "{:<20} req {} callback", "m_send_cb_queue.consume_all", + static_cast(req)); auto ptr = req->release_self_ref(); req->invoke_cb(); }); - m_recv_cb_queue.consume_all([](oomph::detail::request_state* req) { - [[maybe_unused]] auto scp = - com_deb<9>.scope("m_recv_cb_queue.consume_all", NS_DEBUG::hptr(req)); + m_recv_cb_queue.consume_all( + [](oomph::detail::request_state* req) + { + LIBFATBAT_SCOPE(comm_log, "{:<20} req {} callback", "m_recv_cb_queue.consume_all", + static_cast(req)); auto ptr = req->release_self_ref(); req->invoke_cb(); }); - m_context->m_recv_cb_queue.consume_all([](detail::shared_request_state* req) { + m_context->m_recv_cb_queue.consume_all( + [](detail::shared_request_state* req) + { auto ptr = req->release_self_ref(); req->invoke_cb(); }); - } + } - // Cancel is a problem with libfabric because fi_cancel is asynchronous. - // The item to be cancelled will either complete with CANCELLED status - // or will complete as usual (ie before the cancel could take effect) - // - // We can only be certain if we poll until the completion happens - // or attach a callback to the cancel notification which is not supported - // by oomph. - bool cancel_recv(detail::request_state* s) - { - // get the original message operation context - operation_context* op_ctx = &(s->m_operation_context); + // Cancel is a problem with libfabric because fi_cancel is asynchronous. + // The item to be cancelled will either complete with CANCELLED status + // or will complete as usual (ie before the cancel could take effect) + // + // We can only be certain if we poll until the completion happens + // or attach a callback to the cancel notification which is not supported + // by oomph. + bool cancel_recv(detail::request_state* s) + { + // get the original message operation context + operation_context* op_ctx = &(s->m_operation_context); - // submit the cancellation request - bool ok = (fi_cancel(&m_rx_endpoint.get_ep()->fid, op_ctx) == 0); - LF_DEB(com_deb<9>, debug(str<>("Cancel"), "ok", ok, "op_ctx", hptr(op_ctx))); + // submit the cancellation request + bool ok = (fi_cancel(&m_rx_endpoint.get_ep()->fid, op_ctx) == 0); + LIBFATBAT_DEBUG(comm_log, "{:<20} op_ctx {} fi_cancel ok {}", "cancel_recv", + static_cast(op_ctx), ok); - // if the cancel operation failed completely, return - if (!ok) return false; + // if the cancel operation failed completely, return + if (!ok) return false; - bool found = false; - while (!found) + bool found = false; + while (!found) + { + m_context->get_controller()->poll_recv_queue(m_rx_endpoint.get_rx_cq(), this); + // otherwise, poll until we know if it worked + std::stack temp_stack; + detail::request_state* temp; + while (!found && m_recv_cb_cancel.pop(temp)) { - m_context->get_controller()->poll_recv_queue(m_rx_endpoint.get_rx_cq(), this); - // otherwise, poll until we know if it worked - std::stack temp_stack; - detail::request_state* temp; - while (!found && m_recv_cb_cancel.pop(temp)) + if (temp == s) { - if (temp == s) - { - // our recv was cancelled correctly - found = true; - LF_DEB(com_deb<9>, - debug(str<>("Cancel"), "succeeded", "op_ctx", hptr(op_ctx))); - auto ptr = s->release_self_ref(); - s->set_canceled(); - } - else - { - // a different cancel operation - temp_stack.push(temp); - } + // our recv was cancelled correctly + found = true; + LIBFATBAT_DEBUG(comm_log, "{:<20} op_ctx {} fi_cancel ok {}", "cancel_recv", + static_cast(op_ctx), ok); + auto ptr = s->release_self_ref(); + s->set_canceled(); } - // return any weird unhandled cancels back to the queue - while (!temp_stack.empty()) + else { - auto temp = temp_stack.top(); - temp_stack.pop(); - m_recv_cb_cancel.push(temp); + // a different cancel operation + temp_stack.push(temp); } } - return found; + // return any weird unhandled cancels back to the queue + while (!temp_stack.empty()) + { + auto temp = temp_stack.top(); + temp_stack.pop(); + m_recv_cb_cancel.push(temp); + } } - }; + return found; + } +}; -} // namespace oomph +} // namespace oomph diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index a1debfd7..f47671c6 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -16,89 +16,92 @@ #include #include -namespace oomph { - // cppcheck-suppress ConfigurationNotChecked - static NS_DEBUG::enable_print src_deb("__SRC__"); +namespace oomph +{ +inline auto src_log = libfatbat::log::create("SRC"); - using controller_type = libfabric::controller; +using controller_type = libfabric::controller; - context_impl::context_impl( - MPI_Comm comm, bool thread_safe, hwmalloc::heap_config const& heap_config, bool debug) - : context_base(comm, thread_safe) - , m_heap{this, heap_config} - , m_recv_cb_queue(128) - , m_recv_cb_cancel(8) - { - int rank, size; - OOMPH_CHECK_MPI_RESULT(MPI_Comm_rank(comm, &rank)); - OOMPH_CHECK_MPI_RESULT(MPI_Comm_size(comm, &size)); +context_impl::context_impl(MPI_Comm comm, bool thread_safe, + hwmalloc::heap_config const& heap_config, bool debug) +: context_base(comm, thread_safe) +, m_heap{this, heap_config} +, m_recv_cb_queue(128) +, m_recv_cb_cancel(8) +{ + int rank, size; + OOMPH_CHECK_MPI_RESULT(MPI_Comm_rank(comm, &rank)); + OOMPH_CHECK_MPI_RESULT(MPI_Comm_size(comm, &size)); - m_ctxt_tag = reinterpret_cast(this); - OOMPH_CHECK_MPI_RESULT(MPI_Bcast(&m_ctxt_tag, 1, MPI_UINT64_T, 0, comm)); - LF_DEB( - src_deb, debug(str<>("Broadcast"), "rank", dec<3>(rank), "context", hptr(m_ctxt_tag))); + OOMPH_CHECK_MPI_RESULT(MPI_Bcast(&m_ctxt_tag, 1, MPI_UINT64_T, 0, comm)); + LIBFATBAT_DEBUG(src_log, "{:<20} rank {:03} context {}", "Broadcast", rank, + static_cast(this)); - // TODO fix the thread safety - // problem: controller is a singleton and has problems when 2 contexts are created - // in the following order: single threaded first, then multi-threaded after - // int threads = thread_safe ? std::thread::hardware_concurrency() : 1; - // int threads = std::thread::hardware_concurrency(); - // Determine the number of threads based on the CPU affinity mask - int threads = 1; + // TODO fix the thread safety + // problem: controller is a singleton and has problems when 2 contexts are created + // in the following order: single threaded first, then multi-threaded after + // int threads = thread_safe ? std::thread::hardware_concurrency() : 1; + // int threads = std::thread::hardware_concurrency(); + // Determine the number of threads based on the CPU affinity mask + int threads = 1; #if defined(_GNU_SOURCE) - cpu_set_t cpuset; - CPU_ZERO(&cpuset); - if (sched_getaffinity(0, sizeof(cpuset), &cpuset) == 0) - threads = CPU_COUNT(&cpuset); - else - threads = boost::thread::physical_concurrency(); -#else + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + if (sched_getaffinity(0, sizeof(cpuset), &cpuset) == 0) threads = CPU_COUNT(&cpuset); + else threads = boost::thread::physical_concurrency(); +#else + threads = boost::thread::physical_concurrency(); #endif - m_controller = init_libfabric_controller(this, comm, rank, size, threads, debug); - m_domain = m_controller->get_domain(); - } + m_controller = init_libfabric_controller(this, comm, rank, size, threads, debug); + m_domain = m_controller->get_domain(); +} + +communicator_impl* +context_impl::get_communicator() +{ + auto comm = new communicator_impl{this}; + m_comms_set.insert(comm); + return comm; +} - communicator_impl* context_impl::get_communicator() +char const* +context_impl::get_transport_option(std::string const& opt) +{ + if (opt == "name") { return "libfabric"; } + else if (opt == "progress") { return libfabric_progress_string(); } + else if (opt == "endpoint") { return libfabric_endpoint_string(); } + else if (opt == "rendezvous_threshold") { - auto comm = new communicator_impl{this}; - m_comms_set.insert(comm); - return comm; + static char buffer[32]; + std::string temp = std::to_string(m_controller->rendezvous_threshold()); + if (temp.size() > 31) throw std::runtime_error("Bad string option check, fix please"); + strncpy(buffer, temp.c_str(), 32); + return buffer; } - - char const* context_impl::get_transport_option(std::string const& opt) + else { - if (opt == "name") { return "libfabric"; } - else if (opt == "progress") { return libfabric_progress_string(); } - else if (opt == "endpoint") { return libfabric_endpoint_string(); } - else if (opt == "rendezvous_threshold") - { - static char buffer[32]; - std::string temp = std::to_string(m_controller->rendezvous_threshold()); - if (temp.size() > 31) throw std::runtime_error("Bad string option check, fix please"); - strncpy(buffer, temp.c_str(), 32); - return buffer; - } - else { return "unspecified"; } + return "unspecified"; } +} - std::shared_ptr context_impl::init_libfabric_controller( - oomph::context_impl* /*ctx*/, MPI_Comm comm, int rank, int size, int threads, bool debug) +std::shared_ptr +context_impl::init_libfabric_controller(oomph::context_impl* /*ctx*/, MPI_Comm comm, int rank, + int size, int threads, bool debug) +{ + // only allow one thread to pass, make other wait + static std::mutex m_init_mutex; + std::lock_guard lock(m_init_mutex); + static std::shared_ptr instance(nullptr); + if (!instance.get()) { - // only allow one thread to pass, make other wait - static std::mutex m_init_mutex; - std::lock_guard lock(m_init_mutex); - static std::shared_ptr instance(nullptr); - if (!instance.get()) - { - LF_DEB(src_deb, - debug(NS_DEBUG::str<>("New Controller"), "rank", dec<3>(rank), "size", dec<3>(size), - "threads", dec<3>(threads))); - instance.reset(new controller_type()); - if (debug) instance->enable_debug(); - instance->initialize(HAVE_LIBFABRIC_PROVIDER, rank == 0, size, threads, comm); - } - return instance; + LIBFATBAT_DEBUG(src_log, "{:<20} New Controller rank {:03} size {:03} threads {:03}", + "New Controller", rank, size, threads); + instance.reset(new controller_type()); + if (debug) instance->enable_debug(); + instance->initialize(HAVE_LIBFABRIC_PROVIDER, rank == 0, size, threads, comm); } + return instance; +} -} // namespace oomph +} // namespace oomph diff --git a/src/libfabric/context.hpp b/src/libfabric/context.hpp index 76654d66..11e6788a 100644 --- a/src/libfabric/context.hpp +++ b/src/libfabric/context.hpp @@ -15,146 +15,146 @@ #include #include +#include // paths relative to backend #include <../context_base.hpp> #include -#include #include -namespace oomph { - - static NS_DEBUG::enable_print ctx_deb("CONTEXT"); - - using controller_type = libfabric::controller; - - class context_impl : public context_base +namespace oomph +{ + +inline auto ctxt_log = libfatbat::log::create("Context"); + +using controller_type = libfabric::controller; + +class context_impl : public context_base +{ + public: + using region_type = libfatbat::memory_segment; + using domain_type = region_type::provider_domain; + using device_region_type = libfatbat::memory_segment; + using heap_type = hwmalloc::heap; + using callback_queue = boost::lockfree::queue, boost::lockfree::allocator>>; + + private: + heap_type m_heap; + domain_type* m_domain; + std::shared_ptr m_controller; + std::uintptr_t m_ctxt_tag; + + public: + // -------------------------------------------------- + // create a singleton ptr to a libfabric controller that + // can be shared between oomph context objects + static std::shared_ptr init_libfabric_controller(oomph::context_impl* ctx, + MPI_Comm comm, int rank, int size, int threads, bool debug = false); + + // queue for shared recv callbacks + callback_queue m_recv_cb_queue; + // queue for canceled shared recv requests + callback_queue m_recv_cb_cancel; + + public: + context_impl(MPI_Comm comm, bool thread_safe, hwmalloc::heap_config const& heap_config, + bool debug = false); + // context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, + // std::size_t message_pool_reserve, bool debug = false); + context_impl(context_impl const&) = delete; + context_impl(context_impl&&) = delete; + + region_type make_region(void* const ptr, std::size_t size, int device_id) { - public: - using region_type = libfabric::memory_segment; - using domain_type = region_type::provider_domain; - using device_region_type = libfabric::memory_segment; - using heap_type = hwmalloc::heap; - using callback_queue = boost::lockfree::queue, boost::lockfree::allocator>>; - - private: - heap_type m_heap; - domain_type* m_domain; - std::shared_ptr m_controller; - std::uintptr_t m_ctxt_tag; - - public: - // -------------------------------------------------- - // create a singleton ptr to a libfabric controller that - // can be shared between oomph context objects - static std::shared_ptr init_libfabric_controller(oomph::context_impl* ctx, - MPI_Comm comm, int rank, int size, int threads, bool debug = false); - - // queue for shared recv callbacks - callback_queue m_recv_cb_queue; - // queue for canceled shared recv requests - callback_queue m_recv_cb_cancel; - - public: - context_impl(MPI_Comm comm, bool thread_safe, hwmalloc::heap_config const& heap_config, - bool debug = false); - // context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, - // std::size_t message_pool_reserve, bool debug = false); - context_impl(context_impl const&) = delete; - context_impl(context_impl&&) = delete; - - region_type make_region(void* const ptr, std::size_t size, int device_id) + if (m_controller->get_mrbind()) { - if (m_controller->get_mrbind()) - { - void* endpoint = m_controller->get_rx_endpoint().get_ep(); - return libfabric::memory_segment(m_domain, ptr, size, true, endpoint, device_id); - } - else - { - return libfabric::memory_segment(m_domain, ptr, size, false, nullptr, device_id); - } + void* endpoint = m_controller->get_rx_endpoint().get_ep(); + return libfatbat::memory_segment(m_domain, ptr, size, true, endpoint, device_id); } + else + { + return libfatbat::memory_segment(m_domain, ptr, size, false, nullptr, device_id); + } + } - auto& get_heap() noexcept { return m_heap; } + auto& get_heap() noexcept { return m_heap; } - communicator_impl* get_communicator(); + communicator_impl* get_communicator(); - // we must modify all tags to use 32bits of context ptr for uniqueness - inline std::uintptr_t get_context_tag() { return m_ctxt_tag; } + // we must modify all tags to use 32bits of context ptr for uniqueness + inline std::uintptr_t get_context_tag() { return m_ctxt_tag; } - inline controller_type* get_controller() /*const */ { return m_controller.get(); } - char const* get_transport_option(std::string const& opt); + inline controller_type* get_controller() /*const */ { return m_controller.get(); } + char const* get_transport_option(std::string const& opt); - void progress() { get_controller()->poll_for_work_completions(nullptr); } + void progress() { get_controller()->poll_for_work_completions(nullptr); } - bool cancel_recv(detail::shared_request_state* s) - { - // get the original message operation context - auto op_ctx = &(s->m_operation_context); + bool cancel_recv(detail::shared_request_state* s) + { + // get the original message operation context + auto op_ctx = &(s->m_operation_context); - // submit the cancellation request - bool ok = - (fi_cancel(&(get_controller()->get_rx_endpoint().get_ep()->fid), op_ctx) == 0); + // submit the cancellation request + bool ok = (fi_cancel(&(get_controller()->get_rx_endpoint().get_ep()->fid), op_ctx) == 0); - // if the cancel operation failed completely, return - if (!ok) return false; + // if the cancel operation failed completely, return + if (!ok) return false; - bool found = false; - while (!found) + bool found = false; + while (!found) + { + get_controller()->poll_recv_queue(get_controller()->get_rx_endpoint().get_rx_cq(), + nullptr); + // otherwise, poll until we know if it worked + std::stack temp_stack; + detail::shared_request_state* temp; + while (!found && m_recv_cb_cancel.pop(temp)) { - get_controller()->poll_recv_queue( - get_controller()->get_rx_endpoint().get_rx_cq(), nullptr); - // otherwise, poll until we know if it worked - std::stack temp_stack; - detail::shared_request_state* temp; - while (!found && m_recv_cb_cancel.pop(temp)) + if (temp == s) { - if (temp == s) - { - // our recv was cancelled correctly - found = true; - LF_DEB(oomph::ctx_deb, - debug(str<>("Cancel shared"), "succeeded", "op_ctx", hptr(op_ctx))); - auto ptr = s->release_self_ref(); - s->set_canceled(); - } - else - { - // a different cancel operation - temp_stack.push(temp); - } + // our recv was cancelled correctly + found = true; + LIBFATBAT_DEBUG(ctxt_log, "{:<20} op_ctx {} fi_cancel ok {}", + "Cancel Shared recv", static_cast(op_ctx), ok); + auto ptr = s->release_self_ref(); + s->set_canceled(); } - // return any weird unhandled cancels back to the queue - while (!temp_stack.empty()) + else { - auto temp = temp_stack.top(); - temp_stack.pop(); - m_recv_cb_cancel.push(temp); + // a different cancel operation + temp_stack.push(temp); } } - return found; + // return any weird unhandled cancels back to the queue + while (!temp_stack.empty()) + { + auto temp = temp_stack.top(); + temp_stack.pop(); + m_recv_cb_cancel.push(temp); + } } + return found; + } - unsigned int num_tag_bits() const noexcept { return 32; } - }; + unsigned int num_tag_bits() const noexcept { return 32; } +}; - // -------------------------------------------------------------------- - template <> - inline oomph::libfabric::memory_segment - register_memory(oomph::context_impl& c, void* const ptr, std::size_t size) - { - return c.make_region(ptr, size, -2); - } +// -------------------------------------------------------------------- +template<> +inline libfatbat::memory_segment +register_memory(oomph::context_impl& c, void* const ptr, std::size_t size) +{ + return c.make_region(ptr, size, -2); +} #if OOMPH_ENABLE_DEVICE - template <> - inline oomph::libfabric::memory_segment register_device_memory( - context_impl& c, int device_id, void* ptr, std::size_t size) - { - return c.make_region(ptr, size, device_id); - } +template<> +inline libfatbat::memory_segment +register_device_memory(context_impl& c, int device_id, void* ptr, std::size_t size) +{ + return c.make_region(ptr, size, device_id); +} #endif -} // namespace oomph +} // namespace oomph diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index f015a0c4..d04acc87 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -25,9 +25,11 @@ #include #include // -#include "controller_base.hpp" -#include "fabric_error.hpp" -#include "locality.hpp" +#include +#include +#include +#include +// #include "oomph_libfabric_defines.hpp" #include "operation_context.hpp" // @@ -35,430 +37,424 @@ // #include -namespace NS_DEBUG { - // cppcheck-suppress ConfigurationNotChecked - - using namespace oomph::debug; - template - inline NS_DEBUG::print_threshold cnt_deb("CONTROL"); - // - static NS_DEBUG::enable_print cnt_err("CONTROL"); -} // namespace NS_DEBUG +namespace oomph::libfabric +{ +inline auto ctrl_log = libfatbat::log::create("Ctrl"); -namespace oomph::libfabric { - - class controller : public controller_base +class controller : public libfatbat::controller_base +{ + public: + // -------------------------------------------------------------------- + controller() + : libfatbat::controller_base() { - public: - // -------------------------------------------------------------------- - controller() - : controller_base() - { - } + } - // -------------------------------------------------------------------- - void initialize_derived(std::string const&, bool, int, size_t, MPI_Comm mpi_comm) - { - // Broadcast address of all endpoints to all ranks - // and fill address vector with info - exchange_addresses(av_, mpi_comm); - } + // -------------------------------------------------------------------- + void initialize_derived(std::string const&, bool, int, size_t, MPI_Comm mpi_comm) + { + // Broadcast address of all endpoints to all ranks + // and fill address vector with info + exchange_addresses(av_, mpi_comm); + } - // -------------------------------------------------------------------- - constexpr fi_threading threadlevel_flags() - { + // -------------------------------------------------------------------- + constexpr fi_threading threadlevel_flags() + { #if defined(HAVE_LIBFABRIC_GNI) || defined(HAVE_LIBFABRIC_LNX) - return FI_THREAD_ENDPOINT; + return FI_THREAD_ENDPOINT; #else - return FI_THREAD_SAFE; + return FI_THREAD_SAFE; #endif - } + } - // -------------------------------------------------------------------- - uint64_t caps_flags(uint64_t /*available_flags*/) const - { - uint64_t flags_required = FI_TAGGED; + // -------------------------------------------------------------------- + uint64_t caps_flags(uint64_t /*available_flags*/) const + { + uint64_t flags_required = FI_TAGGED; #ifndef HAVE_LIBFABRIC_LNX - flags_required |= FI_MSG | FI_TAGGED | FI_RECV | FI_SEND | FI_RMA | FI_READ | FI_WRITE | - FI_REMOTE_READ | FI_REMOTE_WRITE; -# if OOMPH_ENABLE_DEVICE - flags_required |= FI_HMEM; -# endif + flags_required |= FI_MSG | FI_TAGGED | FI_RECV | FI_SEND | FI_RMA | FI_READ | FI_WRITE | + FI_REMOTE_READ | FI_REMOTE_WRITE; +#if OOMPH_ENABLE_DEVICE + flags_required |= FI_HMEM; #endif - return flags_required; - } +#endif + return flags_required; + } - // -------------------------------------------------------------------- - // we do not need to perform any special actions on init (to contact root node) - void setup_root_node_address(struct fi_info* /*info*/) {} + // -------------------------------------------------------------------- + // we do not need to perform any special actions on init (to contact root node) + void setup_root_node_address(struct fi_info* /*info*/) {} - // -------------------------------------------------------------------- - // send address to rank 0 and receive array of all localities - void MPI_exchange_localities(fid_av* av, MPI_Comm comm, int rank, int size) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::hptr(this), __func__); + // -------------------------------------------------------------------- + // send address to rank 0 and receive array of all localities + void MPI_exchange_localities(fid_av* av, MPI_Comm comm, int rank, int size) + { + LIBFATBAT_SCOPE(ctrl_log, "{} {}", static_cast(this), __func__); - // array of empty locality objects - std::vector localities(size); - // - if (rank > 0) + // array of empty locality objects + std::vector localities(size); + // + if (rank > 0) + { + LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} size {}", "sending here", here_.to_str(), + static_cast(here_.fabric_data().data()), + libfatbat::locality_defs::array_size); + /*int err = */ MPI_Send(here_.fabric_data().data(), + libfatbat::locality_defs::array_size, MPI_CHAR, + 0, // dst rank + 0, // tag + comm); + + LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} size {}", "receiving all", "", "", + libfatbat::locality_defs::array_size); + + MPI_Status status; + /*err = */ MPI_Recv(localities.data(), size * libfatbat::locality_defs::array_size, + MPI_CHAR, + 0, // src rank + 0, // tag + comm, &status); + LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} size {}", "received addresses", "", "", + libfatbat::locality_defs::array_size); + } + else + { + LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} size {}", "receiving addresses", "", "", + libfatbat::locality_defs::array_size); + memcpy(&localities[0], here_.fabric_data().data(), + libfatbat::locality_defs::array_size); + for (int i = 1; i < size; ++i) { - LF_DEB(cnt_deb<9>, - debug( - str<>("sending here"), here_.to_str(), "size", locality_defs::array_size)); - /*int err = */ MPI_Send(here_.fabric_data().data(), locality_defs::array_size, - MPI_CHAR, - 0, // dst rank - 0, // tag - comm); - - LF_DEB( - cnt_deb<9>, debug(str<>("receiving all"), "size", locality_defs::array_size)); - + LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} size {}", "receiving address", i, "", + libfatbat::locality_defs::array_size); MPI_Status status; - /*err = */ MPI_Recv(localities.data(), size * locality_defs::array_size, MPI_CHAR, - 0, // src rank - 0, // tag + /*int err = */ MPI_Recv(&localities[i], size * libfatbat::locality_defs::array_size, + MPI_CHAR, + i, // src rank + 0, // tag comm, &status); - LF_DEB(cnt_deb<9>, debug(str<>("received addresses"))); - } - else - { - LF_DEB(cnt_deb<9>, debug(str<>("receiving addresses"))); - memcpy(&localities[0], here_.fabric_data().data(), locality_defs::array_size); - for (int i = 1; i < size; ++i) - { - LF_DEB(cnt_deb<9>, debug(str<>("receiving address"), dec<>(i))); - MPI_Status status; - /*int err = */ MPI_Recv(&localities[i], size * locality_defs::array_size, - MPI_CHAR, - i, // src rank - 0, // tag - comm, &status); - LF_DEB(cnt_deb<9>, debug(str<>("received address"), dec<>(i))); - } - - LF_DEB(cnt_deb<9>, debug(str<>("sending all"))); - for (int i = 1; i < size; ++i) - { - LF_DEB(cnt_deb<9>, debug(str<>("sending to"), dec<>(i))); - /*int err = */ MPI_Send(&localities[0], size * locality_defs::array_size, - MPI_CHAR, - i, // dst rank - 0, // tag - comm); - } + LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} size {}", "received address", i, "", + libfatbat::locality_defs::array_size); } - // all ranks should now have a full localities vector - LF_DEB(cnt_deb<9>, debug(str<>("populating vector"))); - for (int i = 0; i < size; ++i) + LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} size {}", "sending all", "", "", + libfatbat::locality_defs::array_size); + for (int i = 1; i < size; ++i) { - locality temp(localities[i], av); - insert_address(temp); + LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} size {}", "sending to", i, "", + libfatbat::locality_defs::array_size); + /*int err = */ MPI_Send(&localities[0], size * libfatbat::locality_defs::array_size, + MPI_CHAR, + i, // dst rank + 0, // tag + comm); } } - // -------------------------------------------------------------------- - // if we did not bootstrap, then fetch the list of all localities - // and insert each one into the address vector - void exchange_addresses(fid_av* av, MPI_Comm mpi_comm) + // all ranks should now have a full localities vector + LIBFATBAT_DEBUG(ctrl_log, "{:<20} size {}", "populating vector", + libfatbat::locality_defs::array_size); + for (int i = 0; i < size; ++i) { - [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::hptr(this), __func__); + libfatbat::locality temp(localities[i], av); + insert_address(temp); + } + } + + // -------------------------------------------------------------------- + // if we did not bootstrap, then fetch the list of all localities + // and insert each one into the address vector + void exchange_addresses(fid_av* av, MPI_Comm mpi_comm) + { + LIBFATBAT_SCOPE(ctrl_log, "{} {}", static_cast(this), __func__); - int rank, size; - MPI_Comm_rank(mpi_comm, &rank); - MPI_Comm_size(mpi_comm, &size); + int rank, size; + MPI_Comm_rank(mpi_comm, &rank); + MPI_Comm_size(mpi_comm, &size); - LF_DEB(cnt_deb<9>, debug(str<>("initialize_localities"), size, "localities")); + LIBFATBAT_DEBUG(ctrl_log, "{:<20} size {}", "initialize_localities", size); - MPI_exchange_localities(av, mpi_comm, rank, size); -#ifndef HAVE_LIBFABRIC_LNX // address stuff not yet supported - debug_print_av_vector(size); + MPI_exchange_localities(av, mpi_comm, rank, size); +#ifndef HAVE_LIBFABRIC_LNX // address stuff not yet supported + debug_print_av_vector(size); #endif - LF_DEB(cnt_deb<9>, debug(str<>("Done localities"))); - } + LIBFATBAT_DEBUG(ctrl_log, "{:<20} size {}", "Done localities", size); + } - // -------------------------------------------------------------------- - inline constexpr bool bypass_tx_lock() - { + // -------------------------------------------------------------------- + inline constexpr bool bypass_tx_lock() + { #if defined(HAVE_LIBFABRIC_GNI) - return true; + return true; #elif defined(HAVE_LIBFABRIC_LNX) - // @todo : cxi provider is not yet thread safe using scalable endpoints - return false; + // @todo : cxi provider is not yet thread safe using scalable endpoints + return false; #else - return (threadlevel_flags() == FI_THREAD_SAFE || + return (threadlevel_flags() == FI_THREAD_SAFE || endpoint_type_ == endpoint_type::threadlocalTx); #endif - } + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock get_tx_lock() - { - if (bypass_tx_lock()) return unique_lock(); - return unique_lock(send_mutex_); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock get_tx_lock() + { + if (bypass_tx_lock()) return unique_lock(); + return unique_lock(send_mutex_); + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock try_tx_lock() - { - if (bypass_tx_lock()) return unique_lock(); - return unique_lock(send_mutex_, std::try_to_lock_t{}); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock try_tx_lock() + { + if (bypass_tx_lock()) return unique_lock(); + return unique_lock(send_mutex_, std::try_to_lock_t{}); + } - // -------------------------------------------------------------------- - inline constexpr bool bypass_rx_lock() - { + // -------------------------------------------------------------------- + inline constexpr bool bypass_rx_lock() + { #ifdef HAVE_LIBFABRIC_GNI - return true; + return true; #else - return (threadlevel_flags() == FI_THREAD_SAFE || - endpoint_type_ == endpoint_type::scalableTxRx); + return ( + threadlevel_flags() == FI_THREAD_SAFE || endpoint_type_ == endpoint_type::scalableTxRx); #endif - } + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock get_rx_lock() - { - if (bypass_rx_lock()) return unique_lock(); - return unique_lock(recv_mutex_); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock get_rx_lock() + { + if (bypass_rx_lock()) return unique_lock(); + return unique_lock(recv_mutex_); + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock try_rx_lock() - { - if (bypass_rx_lock()) return unique_lock(); - return unique_lock(recv_mutex_, std::try_to_lock_t{}); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock try_rx_lock() + { + if (bypass_rx_lock()) return unique_lock(); + return unique_lock(recv_mutex_, std::try_to_lock_t{}); + } - // -------------------------------------------------------------------- - int poll_send_queue(fid_cq* send_cq, void* user_data) - { + // -------------------------------------------------------------------- + int poll_send_queue(fid_cq* send_cq, void* user_data) + { #ifdef EXCESSIVE_POLLING_BACKOFF_MICRO_S - std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); - if (std::chrono::duration_cast(now - send_poll_stamp) - .count() < EXCESSIVE_POLLING_BACKOFF_MICRO_S) - return 0; - send_poll_stamp = now; + std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); + if (std::chrono::duration_cast(now - send_poll_stamp).count() < + EXCESSIVE_POLLING_BACKOFF_MICRO_S) + return 0; + send_poll_stamp = now; #endif - int ret; - fi_cq_msg_entry entry[max_completions_array_limit_]; - assert(max_completions_per_poll_ <= max_completions_array_limit_); - { - auto lock = try_tx_lock(); + int ret; + fi_cq_msg_entry entry[max_completions_array_limit_]; + assert(max_completions_per_poll_ <= max_completions_array_limit_); + { + auto lock = try_tx_lock(); - // if we're not threadlocal and didn't get the lock, - // then another thread is polling now, just exit - if (!bypass_tx_lock() && !lock.owns_lock()) { return -1; } + // if we're not threadlocal and didn't get the lock, + // then another thread is polling now, just exit + if (!bypass_tx_lock() && !lock.owns_lock()) { return -1; } - static auto polling = - NS_DEBUG::cnt_deb<9>.make_timer(1, NS_DEBUG::str<>("poll send queue")); - LF_DEB(cnt_deb<9>, timed(polling, hptr(send_cq))); + // static auto polling = + // NS_DEBUG::cnt_deb<9>.make_timer(1, NS_DEBUG::str<>("poll send queue")); + // LF_DEB(cnt_deb<9>, timed(polling, static_cast(send_cq))); - // poll for completions + // poll for completions + { + ret = fi_cq_read(send_cq, &entry[0], max_completions_per_poll_); + } + // if there is an error, retrieve it + if (ret == -FI_EAVAIL) + { + struct fi_cq_err_entry e = {}; + int err_sz = fi_cq_readerr(send_cq, &e, 0); + (void)err_sz; + + // flags might not be set correctly + if ((e.flags & (FI_MSG | FI_SEND | FI_TAGGED)) != 0) { - ret = fi_cq_read(send_cq, &entry[0], max_completions_per_poll_); + LIBFATBAT_ERROR(ctrl_log, + "{:<20} Error FI_EAVAIL for FI_SEND with len {:#06x} context {} errcode {:3} flags {:16b} error {}", + "txcq", e.len, static_cast(e.op_context), e.err, e.flags, + fi_cq_strerror(send_cq, e.prov_errno, e.err_data, (char*)e.buf, e.len)); } - // if there is an error, retrieve it - if (ret == -FI_EAVAIL) + else if ((e.flags & FI_RMA) != 0) { - struct fi_cq_err_entry e = {}; - int err_sz = fi_cq_readerr(send_cq, &e, 0); - (void) err_sz; - - // flags might not be set correctly - if ((e.flags & (FI_MSG | FI_SEND | FI_TAGGED)) != 0) - { - LF_DEB(cnt_err, - error("txcq Error FI_EAVAIL for FI_SEND with len", hex<6>(e.len), - "context", hptr(e.op_context), "code", dec<3>(e.err), "flags", - bin<16>(e.flags), "error", - fi_cq_strerror( - send_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len))); - } - else if ((e.flags & FI_RMA) != 0) - { - LF_DEB(cnt_err, - error("txcq Error FI_EAVAIL for FI_RMA with len", hex<6>(e.len), - "context", hptr(e.op_context), "code", dec<3>(e.err), "flags", - bin<16>(e.flags), "error", - fi_cq_strerror( - send_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len))); - } - operation_context* handler = reinterpret_cast(e.op_context); - handler->handle_error(e); - return 0; + LIBFATBAT_ERROR(ctrl_log, + "{:<20} Error FI_EAVAIL for FI_RMA with len {:#06x} context {} errcode {:3} flags {:16b} error {}", + "txcq", e.len, static_cast(e.op_context), e.err, e.flags, + fi_cq_strerror(send_cq, e.prov_errno, e.err_data, (char*)e.buf, e.len)); } + operation_context* handler = reinterpret_cast(e.op_context); + handler->handle_error(e); + return 0; } - // - // exit possibly locked region and process each completion - // - if (ret > 0) + } + // + // exit possibly locked region and process each completion + // + if (ret > 0) + { + [[maybe_unused]] std::array buf; + int processed = 0; + for (int i = 0; i < ret; ++i) { - std::array buf; - int processed = 0; - for (int i = 0; i < ret; ++i) + ++sends_complete_; + LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} length {:#06x}", "Completion", i, + static_cast(entry[i].op_context), entry[i].len); + if ((entry[i].flags & (FI_TAGGED | FI_SEND | FI_MSG)) != 0) { - ++sends_complete; - LF_DEB(cnt_deb<9>, - debug(str<>("Completion"), i, dec<2>(i), "txcq flags", - fi_tostr_r( - buf.data(), buf.size(), &entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), - "(", dec<>(entry[i].flags), ")", "context", hptr(entry[i].op_context), - "length", hex<6>(entry[i].len))); - if ((entry[i].flags & (FI_TAGGED | FI_SEND | FI_MSG)) != 0) - { - LF_DEB(cnt_deb<9>, - debug(str<>("Completion"), "txcq tagged send completion", - hptr(entry[i].op_context))); - - operation_context* handler = - reinterpret_cast(entry[i].op_context); - processed += handler->handle_tagged_send_completion(user_data); - } - else - { - LF_DEB(cnt_err, - error("Received an unknown txcq completion", dec<>(entry[i].flags), - bin<64>(entry[i].flags))); - std::terminate(); - } + LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} {}", "Completion", + "txcq tagged send completion", static_cast(entry[i].op_context), ""); + + operation_context* handler = + reinterpret_cast(entry[i].op_context); + processed += handler->handle_tagged_send_completion(user_data); + } + else + { + LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} {}", "Completion", + "unknown txcq completion", static_cast(entry[i].op_context), ""); + std::terminate(); } - return processed; - } - else if (ret == 0 || ret == -FI_EAGAIN) - { - // do nothing, we will try again on the next check } - else { LF_DEB(cnt_err, error("unknown error in completion txcq read")); } - return 0; + return processed; } - - // -------------------------------------------------------------------- - int poll_recv_queue(fid_cq* rx_cq, void* user_data) + else if (ret == 0 || ret == -FI_EAGAIN) { + // do nothing, we will try again on the next check + } + else + { + LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} {}", "Completion", + "unknown error in completion txcq read", static_cast(entry[0].op_context), + ""); + } + return 0; + } + + // -------------------------------------------------------------------- + int poll_recv_queue(fid_cq* rx_cq, void* user_data) + { #ifdef EXCESSIVE_POLLING_BACKOFF_MICRO_S - std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); - if (std::chrono::duration_cast(now - recv_poll_stamp) - .count() < EXCESSIVE_POLLING_BACKOFF_MICRO_S) - return 0; - recv_poll_stamp = now; + std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); + if (std::chrono::duration_cast(now - recv_poll_stamp).count() < + EXCESSIVE_POLLING_BACKOFF_MICRO_S) + return 0; + recv_poll_stamp = now; #endif - int ret; - fi_cq_msg_entry entry[max_completions_array_limit_]; - assert(max_completions_per_poll_ <= max_completions_array_limit_); - { - auto lock = get_rx_lock(); + int ret; + fi_cq_msg_entry entry[max_completions_array_limit_]; + assert(max_completions_per_poll_ <= max_completions_array_limit_); + { + auto lock = get_rx_lock(); - // if we're not threadlocal and didn't get the lock, - // then another thread is polling now, just exit - if (!bypass_rx_lock() && !lock.owns_lock()) { return -1; } + // if we're not threadlocal and didn't get the lock, + // then another thread is polling now, just exit + if (!bypass_rx_lock() && !lock.owns_lock()) { return -1; } - static auto polling = - NS_DEBUG::cnt_deb<2>.make_timer(1, NS_DEBUG::str<>("poll recv queue")); - LF_DEB(cnt_deb<2>, timed(polling, hptr(rx_cq))); + // static auto polling = + // NS_DEBUG::cnt_deb<2>.make_timer(1, NS_DEBUG::str<>("poll recv queue")); + // LF_DEB(cnt_deb<2>, timed(polling, static_cast(rx_cq))); - // poll for completions - { - ret = fi_cq_read(rx_cq, &entry[0], max_completions_per_poll_); - } - // if there is an error, retrieve it - if (ret == -FI_EAVAIL) + // poll for completions + { + ret = fi_cq_read(rx_cq, &entry[0], max_completions_per_poll_); + } + // if there is an error, retrieve it + if (ret == -FI_EAVAIL) + { + // read the full error status + struct fi_cq_err_entry e = {}; + int err_sz = fi_cq_readerr(rx_cq, &e, 0); + (void)err_sz; + // from the manpage 'man 3 fi_cq_readerr' + if (e.err == FI_ECANCELED) { - // read the full error status - struct fi_cq_err_entry e = {}; - int err_sz = fi_cq_readerr(rx_cq, &e, 0); - (void) err_sz; - // from the manpage 'man 3 fi_cq_readerr' - if (e.err == FI_ECANCELED) - { - LF_DEB(cnt_deb<1>, - debug(str<>("rxcq Cancelled"), "flags", hex<6>(e.flags), "len", - hex<6>(e.len), "context", hptr(e.op_context))); - // the request was cancelled, we can simply exit - // as the canceller will have doone any cleanup needed - operation_context* handler = - reinterpret_cast(e.op_context); - handler->handle_cancelled(); - return 0; - } - else if (e.err != FI_SUCCESS) - { - LF_DEB(cnt_err, - error(str<>("poll_recv_queue"), "error code", dec<>(-e.err), "flags", - hex<6>(e.flags), "len", hex<6>(e.len), "context", - hptr(e.op_context), "error msg", - fi_cq_strerror( - rx_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len))); - } + LIBFATBAT_DEBUG(ctrl_log, "{:<20} flags {:#06x} len {:#06x} context {}", + "rxcq Cancelled", e.flags, e.len, static_cast(e.op_context)); + // the request was cancelled, we can simply exit + // as the canceller will have doone any cleanup needed operation_context* handler = reinterpret_cast(e.op_context); - if (handler) handler->handle_error(e); + handler->handle_cancelled(); return 0; } - } - // - // release the lock and process each completion - // - if (ret > 0) - { - std::array buf; - int processed = 0; - for (int i = 0; i < ret; ++i) + else if (e.err != FI_SUCCESS) { - ++recvs_complete; - LF_DEB(cnt_deb<2>, - debug(str<>("Completion"), i, "rxcq flags", - fi_tostr_r( - buf.data(), buf.size(), &entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), - "(", dec<>(entry[i].flags), ")", "context", hptr(entry[i].op_context), - "length", hex<6>(entry[i].len))); - if ((entry[i].flags & (FI_TAGGED | FI_RECV)) != 0) - { - LF_DEB(cnt_deb<2>, - debug(str<>("Completion"), "rxcq tagged recv completion", - hptr(entry[i].op_context))); - - operation_context* handler = - reinterpret_cast(entry[i].op_context); - processed += handler->handle_tagged_recv_completion(user_data); - } - else - { - LF_DEB(cnt_err, - error("Received an unknown rxcq completion", dec<>(entry[i].flags), - bin<64>(entry[i].flags))); - std::terminate(); - } + LIBFATBAT_DEBUG(ctrl_log, + "{:<20} error code {} flags {:#06x} len {:#06x} context {} error msg {}", + "poll_recv_queue", -e.err, e.flags, e.len, static_cast(e.op_context), + fi_cq_strerror(rx_cq, e.prov_errno, e.err_data, (char*)e.buf, e.len)); } - return processed; + operation_context* handler = reinterpret_cast(e.op_context); + if (handler) handler->handle_error(e); + return 0; } - else if (ret == 0 || ret == -FI_EAGAIN) + } + // + // release the lock and process each completion + // + if (ret > 0) + { + std::array buf; + int processed = 0; + for (int i = 0; i < ret; ++i) { - // do nothing, we will try again on the next check + ++recvs_complete_; + LIBFATBAT_DEBUG(ctrl_log, + "{:<20} {:02} {} flags {} ({:#06x}) context {} length {:#06x}", + "Completion txcq", i, + fi_tostr_r(buf.data(), buf.size(), &entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), + entry[i].flags, static_cast(entry[i].op_context), entry[i].len); + + if ((entry[i].flags & (FI_TAGGED | FI_RECV)) != 0) + { + LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} {}", "Completion", i, + static_cast(entry[i].op_context), "rxcq tagged recv completion"); + + operation_context* handler = + reinterpret_cast(entry[i].op_context); + processed += handler->handle_tagged_recv_completion(user_data); + } + else + { + LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} {}", "Completion", i, + static_cast(entry[i].op_context), + "Received an unknown rxcq completion"); + std::terminate(); + } } - else { LF_DEB(cnt_err, error("unknown error in completion rxcq read")); } - return 0; + return processed; } - - // Jobs started using mpi don't have this info - struct fi_info* set_src_dst_addresses(struct fi_info* info, bool tx) + else if (ret == 0 || ret == -FI_EAGAIN) { - (void) info; // unused variable warning - (void) tx; // unused variable warning - - LF_DEB(cnb_deb, debug(str<>("fi_dupinfo"))); - struct fi_info* hints = fi_dupinfo(info); - if (!hints) throw NS_LIBFABRIC::fabric_error(0, "fi_dupinfo"); - // clear any Rx address data that might be set - // free(hints->src_addr); - // hints->src_addr = nullptr; - // hints->src_addrlen = 0; - free(hints->dest_addr); - hints->dest_addr = nullptr; - hints->dest_addrlen = 0; - return hints; + // do nothing, we will try again on the next check } - }; + else + { + LIBFATBAT_ERROR(ctrl_log, "{:<20} unknown error in completion rxcq read", "Completion"); + } + return 0; + } -} // namespace oomph::libfabric + // Jobs started using mpi don't have this info + struct fi_info* set_src_dst_addresses(struct fi_info* info, bool tx) + { + (void)info; // unused variable warning + (void)tx; // unused variable warning + + LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} {}", "fi_dupinfo", 0, 0, "called"); + struct fi_info* hints = fi_dupinfo(info); + if (!hints) throw libfatbat::fabric_error(0, "fi_dupinfo"); + // clear any Rx address data that might be set + // free(hints->src_addr); + // hints->src_addr = nullptr; + // hints->src_addrlen = 0; + free(hints->dest_addr); + hints->dest_addr = nullptr; + hints->dest_addrlen = 0; + return hints; + } +}; + +} // namespace oomph::libfabric diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp deleted file mode 100644 index d423803b..00000000 --- a/src/libfabric/controller_base.hpp +++ /dev/null @@ -1,1564 +0,0 @@ -/* - * ghex-org - * - * Copyright (c) 2014-2023, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ -#pragma once - -#include -#include -#include -#include -#include -#include -// -#include -#include -#include -#include -// -#include -// -#include -#include -#include -#include -#include -#include -#include -#include -// -#include "oomph_libfabric_defines.hpp" -// -#include "fabric_error.hpp" -#include "locality.hpp" -#include "memory_region.hpp" -#include "operation_context_base.hpp" - -#if ((FI_MAJOR_VERSION == 1) && FI_MINOR_VERSION <= 12) -#define fi_tostr_r(a,b,c,d) " " -#endif - -// #define DISABLE_FI_INJECT -// #define EXCESSIVE_POLLING_BACKOFF_MICRO_S 50 - -// ------------------------------------------------------------------ - -// ---------------------------------------- -// auto progress (libfabric thread) or manual -// ---------------------------------------- -static fi_progress libfabric_progress_type() -{ - if (std::getenv("LIBFABRIC_AUTO_PROGRESS") == nullptr) return FI_PROGRESS_MANUAL; - return FI_PROGRESS_AUTO; -} - -static char const* libfabric_progress_string() -{ - if (libfabric_progress_type() == FI_PROGRESS_AUTO) return "auto"; - return "manual"; -} - -// ---------------------------------------- -// endpoint types -// We assume (to simplify things) that if you want a scalable Rx, -// you also have a scalable Tx -// -// Note that only GNI supports scalable endpoints currently -// Warning. It seems that scalable Rx contexts cannot be used when tagged or -// expected messages are used because the message is assigned to an rx context -// in a non deterministic way, so posting a tagged receive on context N might -// never complete as the tagged message when to context M, and appears as an -// unexpected message, completion on N never happens. -// -// When using unexpected mesages only, Rx contexts might be useful. -// ---------------------------------------- -enum class endpoint_type : int -{ - single = 0, - multiple = 1, - threadlocalTx = 2, - scalableTx = 3, - scalableTxRx = 4, -}; - -// ---------------------------------------- -// single endpoint or separate for send/recv -// ---------------------------------------- -static endpoint_type libfabric_endpoint_type() -{ - auto env_str = std::getenv("LIBFABRIC_ENDPOINT_TYPE"); - if (env_str == nullptr) return endpoint_type::single; - if (std::string(env_str) == std::string("multiple") || - std::atoi(env_str) == int(endpoint_type::multiple)) - return endpoint_type::multiple; - if (std::string(env_str) == std::string("threadlocal") || - std::atoi(env_str) == int(endpoint_type::threadlocalTx)) - return endpoint_type::threadlocalTx; - if (std::string(env_str) == std::string("scalableTx") || - std::atoi(env_str) == int(endpoint_type::scalableTx)) - return endpoint_type::scalableTx; - if (std::string(env_str) == std::string("scalableTxRx") || - std::atoi(env_str) == int(endpoint_type::scalableTxRx)) - return endpoint_type::scalableTxRx; - // default is single endpoint type - return endpoint_type::single; -} - -static char const* libfabric_endpoint_string() -{ - auto lf_ep_type = libfabric_endpoint_type(); - if (lf_ep_type == endpoint_type::multiple) return "multiple"; - if (lf_ep_type == endpoint_type::threadlocalTx) return "threadlocal"; - if (lf_ep_type == endpoint_type::scalableTx) return "scalableTx"; - if (lf_ep_type == endpoint_type::scalableTxRx) return "scalableTxRx"; - return "single"; -} - -// ---------------------------------------- -// number of completions to handle per poll -// ---------------------------------------- -static int libfabric_completions_per_poll() -{ - auto env_str = std::getenv("LIBFABRIC_POLL_SIZE"); - if (env_str != nullptr) - { - try - { - return std::atoi(env_str); - } - catch (...) - { - } - } - return 4; -} - -// ---------------------------------------- -// Eager/Rendezvous threshold -// ---------------------------------------- -static int libfabric_rendezvous_threshold(int def_val) -{ - auto env_str = std::getenv("LIBFABRIC_RENDEZVOUS_THRESHOLD"); - if (env_str != nullptr) - { - try - { - char* end; - return std::strtoul(env_str, &end, 0); - } - catch (...) - { - } - } - return def_val; -} - -// ------------------------------------------------ -// Needed on Cray for GNI extensions -// ------------------------------------------------ -#ifdef HAVE_LIBFABRIC_GNI -# include "rdma/fi_ext_gni.h" -// #define OOMPH_GNI_REG "none" -# define OOMPH_GNI_REG "internal" -// #define OOMPH_GNI_REG "udreg" - -static std::vector> gni_strs = { - {GNI_MR_CACHE, "GNI_MR_CACHE"}, -}; - -// clang-format off -static std::vector> gni_ints = { - {GNI_MR_CACHE_LAZY_DEREG, "GNI_MR_CACHE_LAZY_DEREG"}, - {GNI_MR_HARD_REG_LIMIT, "GNI_MR_HARD_REG_LIMIT"}, - {GNI_MR_SOFT_REG_LIMIT, "GNI_MR_SOFT_REG_LIMIT"}, - {GNI_MR_HARD_STALE_REG_LIMIT, "GNI_MR_HARD_STALE_REG_LIMIT"}, - {GNI_MR_UDREG_REG_LIMIT, "GNI_MR_UDREG_REG_LIMIT"}, - {GNI_WAIT_THREAD_SLEEP, "GNI_WAIT_THREAD_SLEEP"}, - {GNI_DEFAULT_USER_REGISTRATION_LIMIT, "GNI_DEFAULT_USER_REGISTRATION_LIMIT"}, - {GNI_DEFAULT_PROV_REGISTRATION_LIMIT, "GNI_DEFAULT_PROV_REGISTRATION_LIMIT"}, - {GNI_WAIT_SHARED_MEMORY_TIMEOUT, "GNI_WAIT_SHARED_MEMORY_TIMEOUT"}, - {GNI_MSG_RENDEZVOUS_THRESHOLD, "GNI_MSG_RENDEZVOUS_THRESHOLD"}, - {GNI_RMA_RDMA_THRESHOLD, "GNI_RMA_RDMA_THRESHOLD"}, - {GNI_CONN_TABLE_INITIAL_SIZE, "GNI_CONN_TABLE_INITIAL_SIZE"}, - {GNI_CONN_TABLE_MAX_SIZE, "GNI_CONN_TABLE_MAX_SIZE"}, - {GNI_CONN_TABLE_STEP_SIZE, "GNI_CONN_TABLE_STEP_SIZE"}, - {GNI_VC_ID_TABLE_CAPACITY, "GNI_VC_ID_TABLE_CAPACITY"}, - {GNI_MBOX_PAGE_SIZE, "GNI_MBOX_PAGE_SIZE"}, - {GNI_MBOX_NUM_PER_SLAB, "GNI_MBOX_NUM_PER_SLAB"}, - {GNI_MBOX_MAX_CREDIT, "GNI_MBOX_MAX_CREDIT"}, - {GNI_MBOX_MSG_MAX_SIZE, "GNI_MBOX_MSG_MAX_SIZE"}, - {GNI_RX_CQ_SIZE, "GNI_RX_CQ_SIZE"}, - {GNI_TX_CQ_SIZE, "GNI_TX_CQ_SIZE"}, - {GNI_MAX_RETRANSMITS, "GNI_MAX_RETRANSMITS"}, - {GNI_XPMEM_ENABLE, "GNI_XPMEM_ENABLE"}, - {GNI_DGRAM_PROGRESS_TIMEOUT, "GNI_DGRAM_PROGRESS_TIMEOUT"} -}; -// clang-format on -#endif - -// the libfabric library expects us to ask for an API supported version, so if -// we know we support api 2.0, then we ask for that, but the cxi legacy library -// on daint only supports 1.15, so drop back to that version if needed -#if defined(OOMPH_LIBFABRIC_V1_API) -# define LIBFABRIC_FI_VERSION_MAJOR 1 -# define LIBFABRIC_FI_VERSION_MINOR 15 -#else -# define LIBFABRIC_FI_VERSION_MAJOR 2 -# define LIBFABRIC_FI_VERSION_MINOR 2 -#endif - -namespace NS_DEBUG { - // cppcheck-suppress ConfigurationNotChecked - static NS_DEBUG::enable_print cnb_deb("CONBASE"); - static NS_DEBUG::enable_print cnb_err("CONBASE"); -} // namespace NS_DEBUG - -/** @brief a class to return the number of progressed callbacks */ -struct progress_status -{ - int m_num_sends = 0; - int m_num_recvs = 0; - - int num() const noexcept { return m_num_sends + m_num_recvs; } - int num_sends() const noexcept { return m_num_sends; } - int num_recvs() const noexcept { return m_num_recvs; } - - progress_status& operator+=(progress_status const& other) noexcept - { - m_num_sends += other.m_num_sends; - m_num_recvs += other.m_num_recvs; - return *this; - } -}; - -namespace NS_LIBFABRIC { - /// A wrapper around fi_close that reports any error - /// Because we use so many handles, we must be careful to - /// delete them all before closing resources that use them - template - void fidclose(Handle fid, char const* msg) - { - LF_DEB(cnb_deb, debug(str<>("closing"), msg)); - int ret = fi_close(fid); - if (ret == -FI_EBUSY) { throw NS_LIBFABRIC::fabric_error(ret, "fi_close EBUSY"); } - else if (ret == FI_SUCCESS) { return; } - throw NS_LIBFABRIC::fabric_error(ret, "fi_close error"); - } - - /// when using thread local endpoints, we encapsulate things that - /// are needed to manage an endpoint - struct endpoint_wrapper - { - private: - friend class controller; - - fid_ep* ep_ = nullptr; - fid_cq* rq_ = nullptr; - fid_cq* tq_ = nullptr; - char const* name_ = nullptr; - - public: - endpoint_wrapper() {} - endpoint_wrapper(fid_ep* ep, fid_cq* rq, fid_cq* tq, char const* name) - : ep_(ep) - , rq_(rq) - , tq_(tq) - , name_(name) - { - [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, name_); - } - - // to keep boost::lockfree happy, we need these copy operators - endpoint_wrapper(endpoint_wrapper const& ep) = default; - endpoint_wrapper& operator=(endpoint_wrapper const& ep) = default; - - void cleanup() - { - [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, name_); - if (ep_) - { - fidclose(&ep_->fid, "endpoint"); - ep_ = nullptr; - } - if (rq_) - { - fidclose(&rq_->fid, "rq"); - rq_ = nullptr; - } - if (tq_) - { - fidclose(&tq_->fid, "tq"); - tq_ = nullptr; - } - } - - inline fid_ep* get_ep() { return ep_; } - inline fid_cq* get_rx_cq() { return rq_; } - inline fid_cq* get_tx_cq() { return tq_; } - inline void set_tx_cq(fid_cq* cq) { tq_ = cq; } - inline char const* get_name() { return name_; } - }; - - using region_type = NS_MEMORY::memory_handle; - using endpoint_context_pool = - boost::lockfree::queue>; - - struct stack_endpoint - { - endpoint_wrapper endpoint_; - endpoint_context_pool* pool_; - // - stack_endpoint() - : endpoint_() - , pool_(nullptr) - { - } - // - stack_endpoint( - fid_ep* ep, fid_cq* rq, fid_cq* tq, char const* name, endpoint_context_pool* pool) - : endpoint_(ep, rq, tq, name) - , pool_(pool) - { - } - // - stack_endpoint& operator=(stack_endpoint&& other) - { - endpoint_ = std::move(other.endpoint_); - pool_ = std::exchange(other.pool_, nullptr); - return *this; - } - - ~stack_endpoint() - { - if (!pool_) return; - LF_DEB(cnb_deb, - trace(str<>("Scalable Ep"), "used push", "ep", hptr(get_ep()), "tx cq", - hptr(get_tx_cq()), "rx cq", hptr(get_rx_cq()))); - pool_->push(endpoint_); - } - - inline fid_ep* get_ep() { return endpoint_.get_ep(); } - - inline fid_cq* get_rx_cq() { return endpoint_.get_rx_cq(); } - - inline fid_cq* get_tx_cq() { return endpoint_.get_tx_cq(); } - }; - - struct endpoints_lifetime_manager - { - // threadlocal endpoints - static inline thread_local stack_endpoint tl_tx_; - static inline thread_local stack_endpoint tl_stx_; - static inline thread_local stack_endpoint tl_srx_; - // non threadlocal endpoints, tx/rx - endpoint_wrapper ep_tx_; - endpoint_wrapper ep_rx_; - }; - - template - class controller_base - { - public: - typedef std::mutex mutex_type; - typedef std::lock_guard scoped_lock; - typedef std::unique_lock unique_lock; - - protected: - // For threadlocal/scalable endpoints, - // we use a dedicated threadlocal endpoint wrapper - std::unique_ptr eps_; - - using endpoint_context_pool = - boost::lockfree::queue>; - endpoint_context_pool tx_endpoints_; - endpoint_context_pool rx_endpoints_; - - bool display_fabric_info_; // for debugging purposes, show fi_info hints - struct fi_info* fabric_info_; - struct fid_fabric* fabric_; - struct fid_domain* fabric_domain_; - struct fid_pep* ep_passive_; - - struct fid_av* av_; - endpoint_type endpoint_type_; - - locality here_; - locality root_; - - // used during queue creation setup and during polling - mutex_type controller_mutex_; - - // used to protect send/recv resources - alignas(64) mutex_type send_mutex_; - alignas(64) mutex_type recv_mutex_; - - std::size_t tx_inject_size_; - std::size_t tx_attr_size_; - std::size_t rx_attr_size_; - - uint32_t max_completions_per_poll_; - uint32_t msg_rendezvous_threshold_; - inline static constexpr uint32_t max_completions_array_limit_ = 256; - - static inline thread_local std::chrono::steady_clock::time_point send_poll_stamp; - static inline thread_local std::chrono::steady_clock::time_point recv_poll_stamp; - - // set if FI_MR_LOCAL is required (local access requires binding) - bool mrlocal = false; - // set if FI_MR_ENDPOINT is required (per endpoint memory binding) - bool mrbind = false; - // set if FI_MR_HRMEM provider requires heterogeneous memory registration - bool mrhmem = false; - - public: - bool get_mrbind() { return mrbind; } - - public: - NS_LIBFABRIC::simple_counter sends_posted_; - NS_LIBFABRIC::simple_counter recvs_posted_; - NS_LIBFABRIC::simple_counter sends_readied_; - NS_LIBFABRIC::simple_counter recvs_readied_; - NS_LIBFABRIC::simple_counter sends_complete; - NS_LIBFABRIC::simple_counter recvs_complete; - - void finvoke(char const* msg, char const* err, int ret) - { - LF_DEB(cnb_deb, trace(str<>(msg))); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, err); - } - - public: - // -------------------------------------------------------------------- - controller_base() - : eps_(nullptr) - , tx_endpoints_(1) - , rx_endpoints_(1) - , display_fabric_info_(false) - , fabric_info_(nullptr) - , fabric_(nullptr) - , fabric_domain_(nullptr) - , ep_passive_(nullptr) - , av_(nullptr) - , tx_inject_size_(0) - , tx_attr_size_(0) - , rx_attr_size_(0) - , max_completions_per_poll_(1) - , msg_rendezvous_threshold_(0x4000) - , sends_posted_(0) - , recvs_posted_(0) - , sends_readied_(0) - , recvs_readied_(0) - , sends_complete(0) - , recvs_complete(0) - { - } - - // -------------------------------------------------------------------- - // clean up all resources - ~controller_base() - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - unsigned int messages_handled_ = 0; - unsigned int rma_reads_ = 0; - unsigned int recv_deletes_ = 0; - - LF_DEB(cnb_deb, - debug(str<>("counters"), "Received messages", dec<>(messages_handled_), - "Total reads", dec<>(rma_reads_), "Total deletes", dec<>(recv_deletes_), - "deletes error", dec<>(messages_handled_ - recv_deletes_))); - - tx_endpoints_.consume_all([](auto&& ep) { ep.cleanup(); }); - rx_endpoints_.consume_all([](auto&& ep) { ep.cleanup(); }); - - // No cleanup threadlocals : done by consume_all cleanup above - // eps_->tl_tx_.endpoint_.cleanup(); - // eps_->tl_stx_.endpoint_.cleanup(); - // eps_->tl_srx_.endpoint_.cleanup(); - - // non threadlocal endpoints, tx/rx - eps_->ep_tx_.cleanup(); - eps_->ep_rx_.cleanup(); - - // Cleanup endpoints - eps_.reset(nullptr); - - // delete adddress vector - fidclose(&av_->fid, "Address Vector"); - - try - { - fidclose(&fabric_domain_->fid, "Domain"); - } - catch (fabric_error& e) - { - std::cout << "fabric domain close failed : Ensure all RMA " - "objects are freed before program termination" - << std::endl; - } - fidclose(&fabric_->fid, "Fabric"); - - // clean up - LF_DEB(cnb_deb, debug(str<>("freeing fabric_info"))); - - fi_freeinfo(fabric_info_); - } - - // -------------------------------------------------------------------- - // only used in check_libfabric quick test for helpful output - void enable_debug() { display_fabric_info_ = true; } - - // -------------------------------------------------------------------- - // setup an endpoint for receiving messages, - // usually an rx endpoint is shared by all threads - endpoint_wrapper create_rx_endpoint( - struct fid_domain* domain, struct fi_info* info, struct fid_av* av) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - auto ep_rx = new_endpoint_active(domain, info, false); - - // bind address vector - bind_address_vector_to_endpoint(ep_rx, av); - - // create a completion queue for the rx endpoint - info->rx_attr->op_flags |= FI_COMPLETION; - auto rx_cq = create_completion_queue(domain, info->rx_attr->size, "rx"); - - // bind CQ to endpoint - bind_queue_to_endpoint(ep_rx, rx_cq, FI_RECV, "rx"); - return endpoint_wrapper(ep_rx, rx_cq, nullptr, "rx"); - } - - // -------------------------------------------------------------------- - // initialize the basic fabric/domain/name - template - void initialize( - std::string const& provider, bool rootnode, int size, size_t threads, Args&&... args) - { - LF_DEB(cnb_deb, eval([]() { std::cout.setf(std::ios::unitbuf); })); - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - - max_completions_per_poll_ = libfabric_completions_per_poll(); - LF_DEB(cnb_err, debug(str<>("Poll completions"), dec<3>(max_completions_per_poll_))); - - uint32_t default_val = (threads == 1) ? 0x400 : 0x4000; - msg_rendezvous_threshold_ = libfabric_rendezvous_threshold(default_val); - LF_DEB( - cnb_err, debug(str<>("Rendezvous threshold"), hex<4>(msg_rendezvous_threshold_))); - - endpoint_type_ = static_cast(libfabric_endpoint_type()); - LF_DEB(cnb_err, debug(str<>("Endpoints"), libfabric_endpoint_string())); - - eps_ = std::make_unique(); - - LF_DEB(cnb_deb, debug(str<>("Threads"), dec<3>(threads))); - - open_fabric(provider, threads, rootnode); - - // create an address vector that will be bound to (all) endpoints - av_ = create_address_vector(fabric_info_, size, threads); - - // we need an rx endpoint in all cases except scalable rx - if (endpoint_type_ != endpoint_type::scalableTxRx) - { - // setup an endpoint for receiving messages - // rx endpoint is typically shared by all threads - eps_->ep_rx_ = create_rx_endpoint(fabric_domain_, fabric_info_, av_); - } - - if (endpoint_type_ == endpoint_type::single) - { - // always bind a tx cq to the rx endpoint for single endpoint type - auto tx_cq = bind_tx_queue_to_rx_endpoint(fabric_info_, eps_->ep_rx_.get_ep()); - eps_->ep_rx_.set_tx_cq(tx_cq); - } - else if (endpoint_type_ != endpoint_type::scalableTxRx) - { -#if defined(HAVE_LIBFABRIC_SOCKETS) || defined(HAVE_LIBFABRIC_TCP) || \ - defined(HAVE_LIBFABRIC_SHM) || defined(HAVE_LIBFABRIC_VERBS) || defined(HAVE_LIBFABRIC_CXI) || \ - defined(HAVE_LIBFABRIC_EFA) - // it appears that the rx endpoint cannot be enabled if it does not - // have a Tx CQ (at least when using sockets), so we create a dummy - // Tx CQ and bind it just to stop libfabric from triggering an error. - // The tx_cq won't actually be used because the user will get the real - // tx endpoint which will have the correct cq bound to it - auto dummy_cq = bind_tx_queue_to_rx_endpoint(fabric_info_, eps_->ep_rx_.get_ep()); - eps_->ep_rx_.set_tx_cq(dummy_cq); -#endif - } - - if (endpoint_type_ == endpoint_type::multiple) - { - // create a separate Tx endpoint for sending messages - // note that the CQ needs FI_RECV even though its a Tx cq to keep - // some providers happy as they trigger an error if an endpoint - // has no Rx cq attached (appears to be a progress related bug) - auto ep_tx = new_endpoint_active(fabric_domain_, fabric_info_, true); - - // create a completion queue for tx endpoint - fabric_info_->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); - auto tx_cq = create_completion_queue( - fabric_domain_, fabric_info_->tx_attr->size, "tx multiple"); - - bind_queue_to_endpoint(ep_tx, tx_cq, FI_TRANSMIT | FI_RECV, "tx multiple"); - bind_address_vector_to_endpoint(ep_tx, av_); - enable_endpoint(ep_tx, "tx multiple"); - - // combine endpoints and CQ into wrapper for convenience - eps_->ep_tx_ = endpoint_wrapper(ep_tx, nullptr, tx_cq, "tx multiple"); - } - else if (endpoint_type_ == endpoint_type::threadlocalTx) - { - // each thread creates a Tx endpoint on first call to get_tx_endpoint() - } - else if (endpoint_type_ == endpoint_type::scalableTx || - endpoint_type_ == endpoint_type::scalableTxRx) - { - // setup tx contexts for each possible thread - size_t threads_allocated = 0; - auto ep_sx = new_endpoint_scalable( - fabric_domain_, fabric_info_, true /*Tx*/, threads, threads_allocated); - - LF_DEB(cnb_deb, - trace(str<>("scalable endpoint ok"), "Contexts allocated", - dec<4>(threads_allocated))); - - finvoke("fi_scalable_ep_bind AV", "fi_scalable_ep_bind", - fi_scalable_ep_bind(ep_sx, &av_->fid, 0)); - - // prepare the stack for insertions - tx_endpoints_.reserve(threads_allocated); - // - for (unsigned int i = 0; i < threads_allocated; i++) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope( - NS_DEBUG::hptr(this), "scalable", NS_DEBUG::dec<4>(i)); - - // For threadlocal/scalable endpoints, tx/rx resources - fid_ep* scalable_ep_tx; - fid_cq* scalable_cq_tx; - - // Create a Tx context, cq, bind and enable - finvoke("create tx context", "fi_tx_context", - fi_tx_context(ep_sx, i, NULL, &scalable_ep_tx, NULL)); - scalable_cq_tx = create_completion_queue( - fabric_domain_, fabric_info_->tx_attr->size, "tx scalable"); - bind_queue_to_endpoint( - scalable_ep_tx, scalable_cq_tx, FI_TRANSMIT, "tx scalable"); - enable_endpoint(scalable_ep_tx, "tx scalable"); - - endpoint_wrapper tx(scalable_ep_tx, nullptr, scalable_cq_tx, "tx scalable"); - LF_DEB(cnb_deb, - trace(str<>("Scalable Ep"), "initial tx push", "ep", hptr(tx.get_ep()), - "tx cq", hptr(tx.get_tx_cq()), "rx cq", hptr(tx.get_rx_cq()))); - tx_endpoints_.push(tx); - } - - eps_->ep_tx_ = endpoint_wrapper(ep_sx, nullptr, nullptr, "rx scalable"); - } - - // once enabled we can get the address - enable_endpoint(eps_->ep_rx_.get_ep(), "rx here"); - here_ = get_endpoint_address(&eps_->ep_rx_.get_ep()->fid); - LF_DEB(cnb_deb, debug(str<>("setting 'here'"), here_.to_str())); - - // // if we are using scalable endpoints, then setup tx/rx contexts - // // we will us a single endpoint for all Tx/Rx contexts - // if (endpoint_type_ == endpoint_type::scalableTx || - // endpoint_type_ == endpoint_type::scalableTxRx) - // { - - // // thread slots might not be same as what we asked for - // size_t threads_allocated = 0; - // auto ep_sx = new_endpoint_scalable(fabric_domain_, fabric_info_, true /*Tx*/, threads, - // threads_allocated); - // if (!ep_sx) - // throw NS_LIBFABRIC::fabric_error(FI_EOTHER, "fi_scalable endpoint creation failed"); - - // LF_DEB(cnb_deb, trace(str<>("scalable endpoint ok"), - // "Contexts allocated", dec<4>(threads_allocated))); - - // // prepare the stack for insertions - // tx_endpoints_.reserve(threads_allocated); - // rx_endpoints_.reserve(threads_allocated); - // // - // for (unsigned int i = 0; i < threads_allocated; i++) - // { - // [[maybe_unused]] auto scp = - // NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), "scalable", dec<4>(i)); - - // // For threadlocal/scalable endpoints, tx/rx resources - // fid_ep* scalable_ep_tx; - // fid_cq* scalable_cq_tx; - //// fid_ep* scalable_ep_rx; - //// fid_cq* scalable_cq_rx; - - // // Tx context setup - // finvoke("create tx context", "fi_tx_context", - // fi_tx_context(ep_sx, i, NULL, &scalable_ep_tx, NULL)); - - // scalable_cq_tx = create_completion_queue(fabric_domain_, - // fabric_info_->tx_attr->size, "tx scalable"); - - // bind_queue_to_endpoint(scalable_ep_tx, scalable_cq_tx, FI_TRANSMIT, "tx scalable"); - - // enable_endpoint(scalable_ep_tx, "tx scalable"); - - // endpoint_wrapper tx(scalable_ep_tx, nullptr, scalable_cq_tx, "tx scalable"); - // LF_DEB(cnb_deb, - // trace(str<>("Scalable Ep"), "initial tx push", "ep", - // NS_DEBUG::ptr(tx.get_ep()), "tx cq", NS_DEBUG::ptr(tx.get_tx_cq()), "rx cq", - // NS_DEBUG::ptr(tx.get_rx_cq()))); - // tx_endpoints_.push(tx); - - // // Rx contexts - //// finvoke("create rx context", "fi_rx_context", - //// fi_rx_context(ep_sx, i, NULL, &scalable_ep_rx, NULL)); - - //// scalable_cq_rx = - //// create_completion_queue(fabric_domain_, fabric_info_->rx_attr->size, "rx"); - - //// bind_queue_to_endpoint(scalable_ep_rx, scalable_cq_rx, FI_RECV, "rx scalable"); - - //// enable_endpoint(scalable_ep_rx, "rx scalable"); - - //// endpoint_wrapper rx(scalable_ep_rx, scalable_cq_rx, nullptr, "rx scalable"); - //// LF_DEB(cnb_deb, - //// trace(str<>("Scalable Ep"), "initial rx push", "ep", - //// NS_DEBUG::ptr(rx.get_ep()), "tx cq", NS_DEBUG::ptr(rx.get_tx_cq()), "rx cq", - //// NS_DEBUG::ptr(rx.get_rx_cq()))); - //// rx_endpoints_.push(rx); - // } - - // finvoke("fi_scalable_ep_bind AV", "fi_scalable_ep_bind", - // fi_scalable_ep_bind(ep_sx, &av_->fid, 0)); - - // eps_->ep_tx_ = endpoint_wrapper(ep_sx, nullptr, nullptr, "rx scalable"); - - return static_cast(this)->initialize_derived( - provider, rootnode, size, threads, std::forward(args)...); - } - - // -------------------------------------------------------------------- - uint64_t caps_flags(uint64_t available_flags) const - { - char buf[1024]; - LF_DEB(cnb_err, - debug(str<>("caps available"), hex(available_flags), - fi_tostr_r(buf, 1024, &available_flags, FI_TYPE_CAPS))); - uint64_t required_flags = - static_cast(this)->caps_flags(available_flags); - // - uint64_t final_flags = required_flags; - for (uint64_t bit = 0; bit < 64; ++bit) - { - uint64_t f = (1ULL << bit); - if ((required_flags & f) && ((available_flags & f) == 0)) - { - LF_DEB(cnb_err, - error(str<>("caps flags unavailable"), - fi_tostr_r(buf, 1024, &f, FI_TYPE_CAPS))); - final_flags &= ~f; - } - } - LF_DEB(cnb_err, - debug(str<>("caps flags requested"), hex(final_flags), - fi_tostr_r(buf, 1024, &final_flags, FI_TYPE_CAPS))); - return final_flags; - } - - // -------------------------------------------------------------------- - constexpr fi_threading threadlevel_flags() - { - return static_cast(this)->threadlevel_flags(); - } - - // -------------------------------------------------------------------- - constexpr std::int64_t memory_registration_mode_flags() - { -#if defined(HAVE_LIBFABRIC_LNX) - return FI_MR_HMEM; -#endif - std::int64_t base_flags = FI_MR_ALLOCATED; // | FI_MR_VIRT_ADDR | FI_MR_PROV_KEY; -#if OOMPH_ENABLE_DEVICE - base_flags = base_flags | FI_MR_HMEM; -#endif - base_flags = base_flags | FI_MR_LOCAL; - -#if defined(HAVE_LIBFABRIC_CXI) - return base_flags | FI_MR_ENDPOINT; - -#elif defined(HAVE_LIBFABRIC_EFA) - return base_flags | FI_MR_MMU_NOTIFY | FI_MR_ENDPOINT; -#else - return base_flags; -#endif - } - - // -------------------------------------------------------------------- - uint32_t rendezvous_threshold() { return msg_rendezvous_threshold_; } - - // -------------------------------------------------------------------- - // initialize the basic fabric/domain/name - void open_fabric(std::string const& provider, int threads, bool rootnode) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - - struct fi_info* fabric_hints_ = fi_allocinfo(); - if (!fabric_hints_) - { - throw NS_LIBFABRIC::fabric_error(-1, "Failed to allocate fabric hints"); - } - - // setup the provider we want to use before getting info - if ((provider.c_str() == std::string("tcp")) || - (provider.c_str() == std::string("verbs"))) - { - fabric_hints_->fabric_attr->prov_name = - strdup(std::string(provider + ";ofi_rxm").c_str()); - } - else { fabric_hints_->fabric_attr->prov_name = strdup(provider.c_str()); } - LF_DEB(cnb_deb, debug(str<>("fabric provider"), fabric_hints_->fabric_attr->prov_name)); - -#if defined(HAVE_LIBFABRIC_CXI) - // libfabric domain for multi-nic CXI provider - char const* cxi_domain = std::getenv("FI_CXI_DEVICE_NAME"); - if (cxi_domain == nullptr) - { - LF_DEB(cnb_err, error(str<>("Domain"), "FI_CXI_DEVICE_NAME not set")); - } - else { fabric_hints_->domain_attr->name = strdup(cxi_domain); } - LF_DEB( - NS_DEBUG::cnb_deb, debug(str<>("fabric domain"), fabric_hints_->domain_attr->name)); -#endif - - fabric_hints_->domain_attr->mr_mode = memory_registration_mode_flags(); - - // get an info object to see what might be available before we set any flags - uint64_t flags = 0; - int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), - nullptr, nullptr, flags, fabric_hints_, &fabric_info_); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fabric info"); - if (display_fabric_info_ && fabric_info_) - { - std::array buf; - LF_DEB(cnb_err, - trace(str<>("Fabric info"), "pre-check ->", - fabric_hints_->fabric_attr->prov_name, "\n", - fi_tostr_r(buf.data(), buf.size(), fabric_info_, FI_TYPE_INFO))); - } - - // set capabilities we want to request - uint64_t all_caps = - caps_flags(fabric_info_->rx_attr->caps | fabric_info_->tx_attr->caps); - - // fabric_hints_->caps = all_caps; - fabric_hints_->tx_attr->caps = fabric_info_->tx_attr->caps & all_caps; - fabric_hints_->rx_attr->caps = fabric_info_->rx_attr->caps & all_caps; - - if ((fabric_info_->mode & FI_CONTEXT) == 0) - { - std::array buf; - LF_DEB(cnb_err, - debug(str<>("mode FI_CONTEXT!=0"), - fi_tostr_r(buf.data(), buf.size(), &fabric_hints_->domain_attr->mode, - FI_TYPE_MODE))); - } - fabric_hints_->domain_attr->name = strdup(fabric_info_->domain_attr->name); - - // Enable/Disable the use of progress threads - auto progress = libfabric_progress_type(); - fabric_hints_->domain_attr->control_progress = progress; - fabric_hints_->domain_attr->data_progress = progress; - LF_DEB(cnb_err, debug(str<>("progress"), libfabric_progress_string())); - - if (threads > 1) - { - LF_DEB(cnb_deb, debug(str<>("Setting Threads>1 level"))); - // fabric_hints_->domain_attr->threading = FI_THREAD_SAFE; - // fabric_hints_->domain_attr->threading = FI_THREAD_FID; - fabric_hints_->domain_attr->threading = threadlevel_flags(); - } - else - { - LF_DEB(cnb_deb, debug(str<>("FI_THREAD_DOMAIN"))); - // we serialize everything - fabric_hints_->domain_attr->threading = FI_THREAD_DOMAIN; - } - - // Enable resource management - fabric_hints_->domain_attr->resource_mgmt = FI_RM_ENABLED; - - LF_DEB(cnb_deb, debug(str<>("fabric endpoint"), "RDM")); - fabric_hints_->ep_attr->type = FI_EP_RDM; - - LF_DEB(cnb_deb, - debug(str<>("get fabric info"), "FI_VERSION", dec(LIBFABRIC_FI_VERSION_MAJOR), - dec(LIBFABRIC_FI_VERSION_MINOR))); - - ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), - nullptr, nullptr, flags, fabric_hints_, &fabric_info_); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fabric info"); - - if (rootnode) - { - std::array buf; - LF_DEB(cnb_err, - trace(str<>("Fabric info"), "\n", - fi_tostr_r(buf.data(), buf.size(), fabric_info_, FI_TYPE_INFO))); - } - - int mrkey = (fabric_info_->domain_attr->mr_mode & FI_MR_PROV_KEY) != 0; - LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_PROV_KEY"), mrkey)); - - bool context = (fabric_info_->mode & FI_CONTEXT) != 0; - LF_DEB(cnb_deb, debug(str<>("Requires FI_CONTEXT"), context)); - - mrlocal = (fabric_info_->domain_attr->mr_mode & FI_MR_LOCAL) != 0; - LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_LOCAL"), mrlocal)); - - mrbind = (fabric_info_->domain_attr->mr_mode & FI_MR_ENDPOINT) != 0; - LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_ENDPOINT"), mrbind)); - - /* Check if provider requires heterogeneous memory registration */ - mrhmem = (fabric_info_->domain_attr->mr_mode & FI_MR_HMEM) != 0; - LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_HMEM"), mrhmem)); - - bool mrhalloc = (fabric_info_->domain_attr->mr_mode & FI_MR_ALLOCATED) != 0; - LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_ALLOCATED"), mrhalloc)); -#if (FI_MAJOR_VERSION > 1) || ((FI_MAJOR_VERSION == 1) && FI_MINOR_VERSION >= 20) - int auth_key = (fabric_info_->domain_attr->max_ep_auth_key); - LF_DEB(cnb_deb, debug(str<>("Supported max_ep_auth_key"), auth_key)); - fabric_info_->domain_attr->max_ep_auth_key = 0; -#endif - LF_DEB(cnb_deb, debug(str<>("Creating fi_fabric"))); - ret = fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fi_fabric"); - - // Allocate a domain. - LF_DEB(cnb_deb, debug(str<>("Allocating domain"))); - ret = fi_domain(fabric_, fabric_info_, &fabric_domain_, nullptr); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_domain"); - -#if defined(HAVE_LIBFABRIC_GNI) - { - [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), "GNI memory registration block"); - - LF_DEB(cnb_err, debug(str<>("-------"), "GNI String values")); - // Dump out all vars for debug purposes - for (auto& gni_data : gni_strs) - { - _set_check_domain_op_value( - gni_data.first, 0, gni_data.second.c_str(), false); - } - LF_DEB(cnb_err, debug(str<>("-------"), "GNI Int values")); - for (auto& gni_data : gni_ints) - { - _set_check_domain_op_value( - gni_data.first, 0, gni_data.second.c_str(), false); - } - LF_DEB(cnb_err, debug(str<>("-------"))); - - // -------------------------- - // GNI_MR_CACHE - // set GNI mem reg to be either none, internal or udreg - // - _set_check_domain_op_value( - GNI_MR_CACHE, const_cast(OOMPH_GNI_REG), "GNI_MR_CACHE"); - - // -------------------------- - // GNI_MR_UDREG_REG_LIMIT - // Experiments showed default value of 2048 too high if - // launching multiple clients on one node - // - int32_t udreg_limit = 0x0800; // 0x0400 = 1024, 0x0800 = 2048 - _set_check_domain_op_value( - GNI_MR_UDREG_REG_LIMIT, udreg_limit, "GNI_MR_UDREG_REG_LIMIT"); - - // -------------------------- - // GNI_MR_CACHE_LAZY_DEREG - // Enable lazy deregistration in MR cache - // - int32_t enable = 1; - LF_DEB(cnb_deb, debug(str<>("setting GNI_MR_CACHE_LAZY_DEREG"))); - _set_check_domain_op_value( - GNI_MR_CACHE_LAZY_DEREG, enable, "GNI_MR_CACHE_LAZY_DEREG"); - - // -------------------------- - // GNI_MSG_RENDEZVOUS_THRESHOLD (c.f. GNI_RMA_RDMA_THRESHOLD) - // - int32_t thresh = msg_rendezvous_threshold_; - _set_check_domain_op_value( - GNI_MSG_RENDEZVOUS_THRESHOLD, thresh, "GNI_MSG_RENDEZVOUS_THRESHOLD"); - } -#endif - tx_inject_size_ = fabric_info_->tx_attr->inject_size; - - // the number of preposted receives, and sender queue depth - // is set by querying the tx/tx attr sizes - tx_attr_size_ = std::min(size_t(512), fabric_info_->tx_attr->size / 2); - rx_attr_size_ = std::min(size_t(512), fabric_info_->rx_attr->size / 2); - // Print fabric info to a human-readable string if available - if (display_fabric_info_ && fabric_info_) - { - std::array buf; - std::cout << "Libfabric fabric info:\n" - << fi_tostr_r(buf.data(), buf.size(), fabric_info_, FI_TYPE_INFO) - << std::endl; - } - fi_freeinfo(fabric_hints_); - } - - // -------------------------------------------------------------------- - struct fi_info* set_src_dst_addresses(struct fi_info* info, bool tx) - { - return static_cast(this)->set_src_dst_addresses(info, tx); - } - -#ifdef HAVE_LIBFABRIC_GNI - // -------------------------------------------------------------------- - // Special GNI extensions to disable memory registration cache - - // if set is false, the old value is returned and nothing is set - template - int _set_check_domain_op_value(int op, T value, char const* info, bool set = true) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - static struct fi_gni_ops_domain* gni_domain_ops = nullptr; - int ret = 0; - - if (gni_domain_ops == nullptr) - { - ret = fi_open_ops(&fabric_domain_->fid, FI_GNI_DOMAIN_OPS_1, 0, - (void**) &gni_domain_ops, nullptr); - LF_DEB(cnb_deb, - debug(str<>("gni open ops"), (ret == 0 ? "OK" : "FAIL"), - NS_DEBUG::ptr(gni_domain_ops))); - } - - // if open was ok and set flag is present, then set value - if (ret == 0 && set) - { - ret = gni_domain_ops->set_val( - &fabric_domain_->fid, (dom_ops_val_t) (op), reinterpret_cast(&value)); - - LF_DEB(cnb_deb, debug(str<>("gni set ops val"), value, (ret == 0 ? "OK" : "FAIL"))); - } - - // Get the value (so we can check that the value we set is now returned) - T new_value; - ret = gni_domain_ops->get_val(&fabric_domain_->fid, (dom_ops_val_t) (op), &new_value); - if constexpr (std::is_integral::value) - { - LF_DEB(cnb_err, - debug( - str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, hex<8>(new_value))); - } - else - { - LF_DEB(cnb_err, - debug(str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, new_value)); - } - // - if (ret) throw NS_LIBFABRIC::fabric_error(ret, std::string("setting ") + info); - - return ret; - } -#endif - - // -------------------------------------------------------------------- - struct fid_ep* new_endpoint_active(struct fid_domain* domain, struct fi_info* info, bool tx) - { - // don't allow multiple threads to call endpoint create at the same time - scoped_lock lock(controller_mutex_); - - // make sure src_addr/dst_addr are set accordingly - // and we do not create two endpoint with the same src address - struct fi_info* hints = set_src_dst_addresses(info, tx); - - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - LF_DEB(cnb_deb, debug(str<>("Got info mode"), (info->mode & FI_NOTIFY_FLAGS_ONLY))); - - struct fid_ep* ep; - int ret = fi_endpoint(domain, hints, &ep, nullptr); - if (ret) - { - throw NS_LIBFABRIC::fabric_error( - ret, "fi_endpoint (too many threadlocal endpoints?)"); - } - fi_freeinfo(hints); - LF_DEB(cnb_deb, debug(str<>("new_endpoint_active"), hptr(ep))); - return ep; - } - - // -------------------------------------------------------------------- - struct fid_ep* new_endpoint_scalable(struct fid_domain* domain, struct fi_info* info, - bool tx, size_t threads, size_t& threads_allocated) - { - // don't allow multiple threads to call endpoint create at the same time - scoped_lock lock(controller_mutex_); - - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - - LF_DEB(cnb_deb, debug(str<>("fi_dupinfo"))); - struct fi_info* hints = fi_dupinfo(info); - if (!hints) throw NS_LIBFABRIC::fabric_error(0, "fi_dupinfo"); - - int flags = 0; - struct fi_info* new_hints = nullptr; - int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), - nullptr, nullptr, flags, hints, &new_hints); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_getinfo"); - - // Check the optimal number of TX/RX contexts supported by the provider - size_t context_count = 0; - if (tx) { context_count = std::min(new_hints->domain_attr->tx_ctx_cnt, threads); } - else { context_count = std::min(new_hints->domain_attr->rx_ctx_cnt, threads); } - - // clang-format off - LF_DEB(cnb_deb, - trace(str<>("scalable endpoint"), - "Tx", tx, - "Threads", dec<3>(threads), - "tx_ctx_cnt", dec<3>(new_hints->domain_attr->tx_ctx_cnt), - "rx_ctx_cnt", dec<3>(new_hints->domain_attr->rx_ctx_cnt), - "context_count", dec<3>(context_count))); - // clang-format on - - threads_allocated = context_count; - new_hints->ep_attr->tx_ctx_cnt = context_count; - new_hints->ep_attr->rx_ctx_cnt = context_count; - - struct fid_ep* ep; - ret = fi_scalable_ep(domain, new_hints, &ep, nullptr); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_scalable_ep"); - LF_DEB(cnb_deb, debug(str<>("new_endpoint_scalable"), hptr(ep))); - fi_freeinfo(hints); - return ep; - } - - // -------------------------------------------------------------------- - endpoint_wrapper& get_rx_endpoint() - { - static auto rx = NS_DEBUG::cnb_deb.make_timer(1, NS_DEBUG::str<>("get_rx_endpoint")); - LF_DEB(cnb_deb, timed(rx)); - - if (endpoint_type_ == endpoint_type::scalableTxRx) - { - if (eps_->tl_srx_.get_ep() == nullptr) - { - endpoint_wrapper ep; - bool ok = rx_endpoints_.pop(ep); - if (!ok) - { - // clang-format off - LF_DEB(cnb_deb, error(str<>("Scalable Ep"), "pop rx", - "ep", hptr(ep.get_ep()), - "tx cq", hptr(ep.get_tx_cq()), - "rx cq", hptr(ep.get_rx_cq()))); - // clang-format on - throw std::runtime_error("rx endpoint wrapper pop fail"); - } - eps_->tl_srx_ = stack_endpoint( - ep.get_ep(), ep.get_rx_cq(), ep.get_tx_cq(), ep.get_name(), &rx_endpoints_); - LF_DEB(cnb_deb, - trace(str<>("Scalable Ep"), "pop rx", "ep", hptr(eps_->tl_srx_.get_ep()), - "tx cq", hptr(eps_->tl_srx_.get_tx_cq()), "rx cq", - hptr(eps_->tl_srx_.get_rx_cq()))); - } - return eps_->tl_srx_.endpoint_; - } - // otherwise just return the normal Rx endpoint - return eps_->ep_rx_; - } - - // -------------------------------------------------------------------- - endpoint_wrapper& get_tx_endpoint() - { - if (endpoint_type_ == endpoint_type::threadlocalTx) - { - if (eps_->tl_tx_.get_ep() == nullptr) - { - [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, "threadlocal"); - - // create a completion queue for tx endpoint - fabric_info_->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); - auto tx_cq = create_completion_queue( - fabric_domain_, fabric_info_->tx_attr->size, "tx threadlocal"); - - // setup an endpoint for sending messages - // note that the CQ needs FI_RECV even though its a Tx cq to keep - // some providers happy as they trigger an error if an endpoint - // has no Rx cq attached (progress bug) - auto ep_tx = new_endpoint_active(fabric_domain_, fabric_info_, true); - bind_queue_to_endpoint(ep_tx, tx_cq, FI_TRANSMIT | FI_RECV, "tx threadlocal"); - bind_address_vector_to_endpoint(ep_tx, av_); - enable_endpoint(ep_tx, "tx threadlocal"); - - // set threadlocal endpoint wrapper - LF_DEB(cnb_deb, - trace(str<>("Threadlocal Ep"), "create Tx", "ep", hptr(ep_tx), "tx cq", - hptr(tx_cq), "rx cq", hptr(nullptr))); - // for cleaning up at termination - endpoint_wrapper ep(ep_tx, nullptr, tx_cq, "tx threadlocal"); - tx_endpoints_.push(ep); - eps_->tl_tx_ = stack_endpoint(ep_tx, nullptr, tx_cq, "threadlocal", nullptr); - } - return eps_->tl_tx_.endpoint_; - } - else if (endpoint_type_ == endpoint_type::scalableTx || - endpoint_type_ == endpoint_type::scalableTxRx) - { - if (eps_->tl_stx_.get_ep() == nullptr) - { - endpoint_wrapper ep; - bool ok = tx_endpoints_.pop(ep); - if (!ok) - { - LF_DEB(cnb_deb, - error(str<>("Scalable Ep"), "pop tx", "ep", hptr(ep.get_ep()), "tx cq", - hptr(ep.get_tx_cq()), "rx cq", hptr(ep.get_rx_cq()))); - throw std::runtime_error("tx endpoint wrapper pop fail"); - } - eps_->tl_stx_ = stack_endpoint( - ep.get_ep(), ep.get_rx_cq(), ep.get_tx_cq(), ep.get_name(), &tx_endpoints_); - LF_DEB(cnb_deb, - trace(str<>("Scalable Ep"), "pop tx", "ep", hptr(eps_->tl_stx_.get_ep()), - "tx cq", hptr(eps_->tl_stx_.get_tx_cq()), "rx cq", - hptr(eps_->tl_stx_.get_rx_cq()))); - } - return eps_->tl_stx_.endpoint_; - } - else if (endpoint_type_ == endpoint_type::multiple) { return eps_->ep_tx_; } - // single : shared tx/rx endpoint - return eps_->ep_rx_; - } - - // -------------------------------------------------------------------- - void bind_address_vector_to_endpoint(struct fid_ep* endpoint, struct fid_av* av) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - - LF_DEB(cnb_deb, debug(str<>("Binding AV"), "to", hptr(endpoint))); - int ret = fi_ep_bind(endpoint, &av->fid, 0); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "bind address_vector"); - } - - // -------------------------------------------------------------------- - void bind_queue_to_endpoint( - struct fid_ep* endpoint, struct fid_cq*& cq, uint32_t cqtype, char const* type) - { - [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, type); - - LF_DEB(cnb_deb, debug(str<>("Binding CQ"), "to", hptr(endpoint), type)); - int ret = fi_ep_bind(endpoint, &cq->fid, cqtype); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "bind cq"); - } - - // -------------------------------------------------------------------- - fid_cq* bind_tx_queue_to_rx_endpoint(struct fi_info* info, struct fid_ep* ep) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - info->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); - fid_cq* tx_cq = create_completion_queue(fabric_domain_, info->tx_attr->size, "tx->rx"); - // shared send/recv endpoint - bind send cq to the recv endpoint - bind_queue_to_endpoint(ep, tx_cq, FI_TRANSMIT, "tx->rx bug fix"); - return tx_cq; - } - - // -------------------------------------------------------------------- - void enable_endpoint(struct fid_ep* endpoint, char const* type) - { - [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, type); - - LF_DEB(cnb_deb, debug(str<>("Enabling endpoint"), hptr(endpoint))); - int ret = fi_enable(endpoint); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_enable"); - } - - // -------------------------------------------------------------------- - locality get_endpoint_address(struct fid* id) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - - locality::locality_data local_addr; - std::size_t addrlen = locality_defs::array_size; - int ret = fi_getname(id, local_addr.data(), &addrlen); - if (ret || (addrlen > locality_defs::array_size)) - { - std::string err = - std::to_string(addrlen) + "=" + std::to_string(locality_defs::array_size); - NS_LIBFABRIC::fabric_error(ret, "fi_getname - error (address size ?) " + err); - } - - // optimized out when debug logging is false - if constexpr (NS_DEBUG::cnb_deb.is_enabled()) - { - LF_DEB(cnb_deb, - debug(str<>("raw address data"), "size", dec<4>(addrlen), " : ", - locality(local_addr, av_).to_str())); - - std::stringstream temp2; - for (std::size_t i = 0; i < locality_defs::array_length; ++i) - { - temp2 << NS_DEBUG::hex<8>(local_addr[i]) << " - "; - } - LF_DEB(cnb_deb, debug(str<>("raw address data"), temp2.str().c_str())); - } - return locality(local_addr, av_); - } - - // -------------------------------------------------------------------- - fid_pep* create_passive_endpoint(struct fid_fabric* fabric, struct fi_info* info) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - - struct fid_pep* ep; - int ret = fi_passive_ep(fabric, info, &ep, nullptr); - if (ret) { throw NS_LIBFABRIC::fabric_error(ret, "Failed to create fi_passive_ep"); } - return ep; - } - - // -------------------------------------------------------------------- - inline locality const& here() const { return here_; } - - // -------------------------------------------------------------------- - inline fi_addr_t const& fi_address() const { return here_.fi_address(); } - - // -------------------------------------------------------------------- - inline void setHere(locality const& val) { here_ = val; } - - // -------------------------------------------------------------------- - inline locality const& root() const { return root_; } - - // -------------------------------------------------------------------- - inline struct fid_domain* get_domain() const { return fabric_domain_; } - - // -------------------------------------------------------------------- - inline std::size_t get_rma_protocol_size() { return 65536; } -#ifdef DISABLE_FI_INJECT - // -------------------------------------------------------------------- - inline std::size_t get_tx_inject_size() { return 0; } -#else - // -------------------------------------------------------------------- - inline std::size_t get_tx_inject_size() { return tx_inject_size_; } -#endif - - // -------------------------------------------------------------------- - inline std::size_t get_tx_size() { return tx_attr_size_; } - - // -------------------------------------------------------------------- - inline std::size_t get_rx_size() { return rx_attr_size_; } - - // -------------------------------------------------------------------- - // returns true when all connections have been disconnected and none are active - inline bool isTerminated() - { - return false; - // return (qp_endpoint_map_.size() == 0); - } - - // -------------------------------------------------------------------- - void debug_print_av_vector(std::size_t N) - { - locality addr; - std::size_t addrlen = locality_defs::array_size; - for (std::size_t i = 0; i < N; ++i) - { - int ret = fi_av_lookup(av_, fi_addr_t(i), addr.fabric_data_writable(), &addrlen); - addr.set_fi_address(fi_addr_t(i)); - if ((ret == 0) && (addrlen <= locality_defs::array_size)) - { - LF_DEB(cnb_deb, debug(str<>("address vector"), dec<3>(i), addr.to_str())); - } - else - { - LF_DEB(cnb_err, - error(str<>("address length"), dec<3>(addrlen), - dec<3>(locality_defs::array_size))); - throw std::runtime_error("debug_print_av_vector : address vector " - "traversal failure"); - } - } - } - - // -------------------------------------------------------------------- - inline constexpr bool bypass_tx_lock() - { -#if defined(HAVE_LIBFABRIC_GNI) - return true; -#elif defined(HAVE_LIBFABRIC_LNX) - // @todo : provider is not yet thread safe using scalable endpoints - return false; -#else - return (threadlevel_flags() == FI_THREAD_SAFE || - endpoint_type_ == endpoint_type::threadlocalTx); -#endif - } - - // -------------------------------------------------------------------- - inline controller_base::unique_lock get_tx_lock() - { - if (bypass_tx_lock()) return unique_lock(); - return unique_lock(send_mutex_); - } - - // -------------------------------------------------------------------- - inline controller_base::unique_lock try_tx_lock() - { - if (bypass_tx_lock()) return unique_lock(); - return unique_lock(send_mutex_, std::try_to_lock_t{}); - } - - // -------------------------------------------------------------------- - inline constexpr bool bypass_rx_lock() - { -#ifdef HAVE_LIBFABRIC_GNI - return true; -#else - return (threadlevel_flags() == FI_THREAD_SAFE || - endpoint_type_ == endpoint_type::scalableTxRx); -#endif - } - - // -------------------------------------------------------------------- - inline controller_base::unique_lock get_rx_lock() - { - if (bypass_rx_lock()) return unique_lock(); - return unique_lock(recv_mutex_); - } - - // -------------------------------------------------------------------- - inline controller_base::unique_lock try_rx_lock() - { - if (bypass_rx_lock()) return unique_lock(); - return unique_lock(recv_mutex_, std::try_to_lock_t{}); - } - - // -------------------------------------------------------------------- - progress_status poll_for_work_completions(void* user_data) - { - progress_status p{0, 0}; - bool retry = false; - do { - // sends - uint32_t nsend = static_cast(this)->poll_send_queue( - get_tx_endpoint().get_tx_cq(), user_data); - p.m_num_sends += nsend; - retry = (nsend == max_completions_per_poll_); - // recvs - uint32_t nrecv = static_cast(this)->poll_recv_queue( - get_rx_endpoint().get_rx_cq(), user_data); - p.m_num_recvs += nrecv; - retry |= (nrecv == max_completions_per_poll_); - } while (retry); - return p; - } - - // -------------------------------------------------------------------- - inline int poll_send_queue(fid_cq* tx_cq, void* user_data) - { - return static_cast(this)->poll_send_queue(tx_cq, user_data); - } - - // -------------------------------------------------------------------- - inline int poll_recv_queue(fid_cq* rx_cq, void* user_data) - { - return static_cast(this)->poll_recv_queue(rx_cq, user_data); - } - - // -------------------------------------------------------------------- - struct fid_cq* create_completion_queue( - struct fid_domain* domain, size_t size, char const* type) - { - [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, type); - - struct fid_cq* cq; - fi_cq_attr cq_attr = {}; - cq_attr.format = FI_CQ_FORMAT_MSG; - cq_attr.wait_obj = FI_WAIT_NONE; - cq_attr.wait_cond = FI_CQ_COND_NONE; - cq_attr.size = size; - cq_attr.flags = 0 /*FI_COMPLETION*/; - LF_DEB(cnb_deb, trace(str<>("CQ size"), dec<4>(size))); - // open completion queue on fabric domain and set context to null - int ret = fi_cq_open(domain, &cq_attr, &cq, nullptr); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_cq_open"); - return cq; - } - - // -------------------------------------------------------------------- - fid_av* create_address_vector(struct fi_info* info, int N, int num_rx_contexts) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - - fid_av* av; - fi_av_attr av_attr = {fi_av_type(0), 0, 0, 0, nullptr, nullptr, 0}; - - // number of addresses expected - av_attr.count = N; - - // number of receive contexts used - int rx_ctx_bits = 0; -#ifdef RX_CONTEXTS_SUPPORT - while (num_rx_contexts >> ++rx_ctx_bits); - LF_DEB(cnb_deb, debug(str<>("rx_ctx_bits"), rx_ctx_bits)); -#endif - av_attr.rx_ctx_bits = rx_ctx_bits; - // if contexts is nonzero, then we are using a single scalable endpoint - av_attr.ep_per_node = (num_rx_contexts > 0) ? 2 : 0; - - if (info->domain_attr->av_type != FI_AV_UNSPEC) - { - av_attr.type = info->domain_attr->av_type; - } - else - { - LF_DEB(cnb_deb, debug(str<>("map FI_AV_TABLE"))); - av_attr.type = FI_AV_TABLE; - } - - LF_DEB(cnb_deb, debug(str<>("Creating AV"))); - int ret = fi_av_open(fabric_domain_, &av_attr, &av, nullptr); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_av_open"); - return av; - } - - // -------------------------------------------------------------------- - locality insert_address(locality const& address) { return insert_address(av_, address); } - - // -------------------------------------------------------------------- - locality insert_address(fid_av* av, locality const& address) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - - LF_DEB(cnb_deb, trace(str<>("inserting AV"), address.to_str(), hptr(av))); - fi_addr_t fi_addr = 0xffff'ffff; - int ret = fi_av_insert(av, address.fabric_data().data(), 1, &fi_addr, 0, nullptr); - if (ret < 0) { throw NS_LIBFABRIC::fabric_error(ret, "fi_av_insert"); } - else if (ret == 0) - { - LF_DEB(cnb_deb, error("fi_av_insert called with existing address")); - NS_LIBFABRIC::fabric_error(ret, "fi_av_insert did not return 1"); - } - // address was generated correctly, now update the locality with the fi_addr - locality new_locality(address, fi_addr, av); - LF_DEB(cnb_deb, - trace(str<>("AV add"), "rank", dec<>(fi_addr), new_locality.to_str(), "fi_addr", - hex<4>(fi_addr))); - return new_locality; - } - }; - -} // namespace NS_LIBFABRIC diff --git a/src/libfabric/fabric_error.hpp b/src/libfabric/fabric_error.hpp deleted file mode 100644 index 84e43dd5..00000000 --- a/src/libfabric/fabric_error.hpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * ghex-org - * - * Copyright (c) 2014-2023, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ -#pragma once - -#include -#include -// -#include -// -#include "oomph_libfabric_defines.hpp" - -namespace NS_DEBUG { - // cppcheck-suppress ConfigurationNotChecked - static NS_DEBUG::enable_print err_deb("ERROR__"); -} // namespace NS_DEBUG - -namespace NS_LIBFABRIC { - - class fabric_error : public std::runtime_error - { - public: - // -------------------------------------------------------------------- - fabric_error(int err, std::string const& msg) - : std::runtime_error(std::string(fi_strerror(-err)) + msg) - , error_(err) - { - NS_DEBUG::err_deb.error(msg, ":", fi_strerror(-err)); - std::terminate(); - } - - fabric_error(int err) - : std::runtime_error(fi_strerror(-err)) - , error_(-err) - { - NS_DEBUG::err_deb.error(what()); - std::terminate(); - } - - int error_; - }; - -} // namespace NS_LIBFABRIC diff --git a/src/libfabric/libfabric_defines_template.hpp b/src/libfabric/libfabric_defines_template.hpp index ea2a105b..1f629903 100644 --- a/src/libfabric/libfabric_defines_template.hpp +++ b/src/libfabric/libfabric_defines_template.hpp @@ -4,39 +4,5 @@ // definitions that cmake generates from user options // clang-format off @oomph_config_defines@ -// clang-format on - -// ------------------------------------------------------------------ -// This section exists to make interoperabily/sharing of code -// between OOMPH/GHEX and HPX easier - there are some files that do -// the majority of libfabric initialization/setup and polling that -// are basically the same in many apps, these files can be reused provided -// some namespaces for the lib and for debugging are setup correctly - -#define NS_LIBFABRIC oomph::libfabric -#define NS_MEMORY oomph::libfabric -#define NS_DEBUG oomph::debug - -#ifndef LF_DEB -# define LF_DEB(printer, Expr) \ - { \ - using namespace NS_DEBUG; \ - if constexpr (printer.is_enabled()) { printer.Expr; }; \ - } -#endif - -#define LFSOURCE_DIR "@OOMPH_SRC_LIBFABRIC_DIR@" -#define LFPRINT_HPP "@OOMPH_SRC_LIBFABRIC_DIR@/print.hpp" -#define LFCOUNT_HPP "@OOMPH_SRC_LIBFABRIC_DIR@/simple_counter.hpp" - -// oomph has a debug print helper file in the main source tree -#if __has_include(LFPRINT_HPP) -# include LFPRINT_HPP -# define has_debug 1 -#endif - -#if __has_include(LFCOUNT_HPP) -# include LFCOUNT_HPP -#endif #endif diff --git a/src/libfabric/locality.hpp b/src/libfabric/locality.hpp deleted file mode 100644 index 9e91cec1..00000000 --- a/src/libfabric/locality.hpp +++ /dev/null @@ -1,206 +0,0 @@ -/* - * ghex-org - * - * Copyright (c) 2014-2023, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ -#pragma once - -#include -#include -#include -#include -#include -// -#include -#include -// -#include -#include -// -#include "oomph_libfabric_defines.hpp" - -// Different providers use different address formats that we must accommodate in our locality object. -#ifdef HAVE_LIBFABRIC_GNI -# define HAVE_LIBFABRIC_LOCALITY_SIZE 48 -#endif - -#ifdef HAVE_LIBFABRIC_CXI -# ifdef HAVE_LIBFABRIC_CXI_1_15 -# define HAVE_LIBFABRIC_LOCALITY_SIZE sizeof(int) -# else -# define HAVE_LIBFABRIC_LOCALITY_SIZE sizeof(long int) -# endif -#endif - -#ifdef HAVE_LIBFABRIC_EFA -# define HAVE_LIBFABRIC_LOCALITY_SIZE 32 -#endif - -#if defined(HAVE_LIBFABRIC_VERBS) || defined(HAVE_LIBFABRIC_TCP) || \ - defined(HAVE_LIBFABRIC_SOCKETS) || defined(HAVE_LIBFABRIC_PSM2) -# define HAVE_LIBFABRIC_LOCALITY_SIZE 16 -#endif - -#if defined(HAVE_LIBFABRIC_SHM) -# define HAVE_LIBFABRIC_LOCALITY_SIZE 24 -#endif - -#if defined(HAVE_LIBFABRIC_LNX) -# define HAVE_LIBFABRIC_LOCALITY_SIZE 512 -#endif - -namespace oomph { - // cppcheck-suppress ConfigurationNotChecked - static NS_DEBUG::enable_print loc_deb("LOCALTY"); -} // namespace oomph - -namespace oomph { namespace libfabric { - - struct locality; - - // -------------------------------------------------------------------- - // Locality, in this structure we store the information required by - // libfabric to make a connection to another node. - // With libfabric 1.4.x the array contains the fabric ip address stored - // as the second uint32_t in the array. For this reason we use an - // array of uint32_t rather than uint8_t/char so we can easily access - // the ip for debug/validation purposes - // -------------------------------------------------------------------- - namespace locality_defs { - // the number of 32bit ints stored in our array - uint32_t const array_size = HAVE_LIBFABRIC_LOCALITY_SIZE; - uint32_t const array_length = HAVE_LIBFABRIC_LOCALITY_SIZE / 4; - } // namespace locality_defs - - struct locality - { - // array type of our locality data - typedef std::array locality_data; - - static char const* type() { return "libfabric"; } - - explicit locality(locality_data const& in_data, struct fid_av* av) - { - std::memcpy(&data_[0], &in_data[0], locality_defs::array_size); - fi_address_ = 0; - av_ = av; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("explicit construct"), to_str())); - } - - locality() - { - std::memset(&data_[0], 0x00, locality_defs::array_size); - fi_address_ = 0; - av_ = nullptr; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("default construct"), to_str())); - } - - locality(locality const& other) - : data_(other.data_) - , fi_address_(other.fi_address_) - , av_(other.av_) - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy construct"), to_str())); - } - - locality(locality const& other, fi_addr_t addr, struct fid_av* av) - : data_(other.data_) - , fi_address_(addr) - , av_(av) - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy fi construct"), to_str())); - } - - locality(locality&& other) - : data_(std::move(other.data_)) - , fi_address_(other.fi_address_) - , av_(other.av_) - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("move construct"), to_str())); - } - - // provided to support sockets mode bootstrap - explicit locality(std::string const& address, std::string const& portnum) - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("explicit construct-2"), address, ":", portnum)); - // - struct sockaddr_in socket_data; - memset(&socket_data, 0, sizeof(socket_data)); - socket_data.sin_family = AF_INET; - socket_data.sin_port = htons(std::stol(portnum)); - inet_pton(AF_INET, address.c_str(), &(socket_data.sin_addr)); - // - std::memcpy(&data_[0], &socket_data, locality_defs::array_size); - fi_address_ = 0; - av_ = nullptr; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("string constructing"), to_str())); - } - - locality& operator=(locality const& other) - { - data_ = other.data_; - fi_address_ = other.fi_address_; - av_ = other.av_; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy operator"), to_str(), other.to_str())); - return *this; - } - - bool operator==(locality const& other) - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("equality operator"), to_str(), other.to_str())); - return std::memcmp(&data_, &other.data_, locality_defs::array_size) == 0; - } - - inline fi_addr_t const& fi_address() const { return fi_address_; } - - inline void set_fi_address(fi_addr_t fi_addr) { fi_address_ = fi_addr; } - - inline uint16_t port() const - { - uint16_t port = 256 * reinterpret_cast(data_.data())[2] + - reinterpret_cast(data_.data())[3]; - return port; - } - - inline locality_data const& fabric_data() const { return data_; } - - inline char* fabric_data_writable() { return reinterpret_cast(data_.data()); } - - std::string to_str() const - { - size_t buflen = 1024; - std::array buf; - if (!av_) { return "No address vector"; } - char const* straddr_ret = fi_av_straddr(av_, data_.data(), buf.data(), &buflen); -#ifdef HAVE_LIBFABRIC_LNX - return "LNX does not yet support straddr"; -#else - std::string result = straddr_ret ? straddr_ret : "Address formatting Error"; - return result; -#endif - } - - private: - friend bool operator==(locality const& lhs, locality const& rhs) - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("equality friend"), lhs.to_str(), rhs.to_str())); - return ((lhs.data_ == rhs.data_) && (lhs.fi_address_ == rhs.fi_address_)); - } - - friend std::ostream& operator<<(std::ostream& os, locality const& loc) - { - for (uint32_t i = 0; i < locality_defs::array_length; ++i) { os << loc.data_[i]; } - return os; - } - - private: - locality_data data_; - fi_addr_t fi_address_; - struct fid_av* av_; - }; - -}} // namespace oomph::libfabric diff --git a/src/libfabric/memory_region.hpp b/src/libfabric/memory_region.hpp deleted file mode 100644 index 2028fc41..00000000 --- a/src/libfabric/memory_region.hpp +++ /dev/null @@ -1,409 +0,0 @@ -/* - * ghex-org - * - * Copyright (c) 2014-2023, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ -#pragma once - -#include -// -#include -#include -// -#include -#include - -#include "fabric_error.hpp" -#include "oomph_libfabric_defines.hpp" - -#ifdef OOMPH_ENABLE_DEVICE -# include -#endif -// ------------------------------------------------------------------ - -namespace NS_MEMORY { - - static NS_DEBUG::enable_print mrn_deb("REGION_"); - - /* -struct fi_mr_attr { - union { - const struct iovec *mr_iov; - const struct fi_mr_dmabuf *dmabuf; - }; - size_t iov_count; - uint64_t access; - uint64_t offset; - uint64_t requested_key; - void *context; - size_t auth_key_size; - uint8_t *auth_key; - enum fi_hmem_iface iface; - union { - uint64_t reserved; - int cuda; - int ze; - int neuron; - int synapseai; - } device; - void *hmem_data; - size_t page_size; - const struct fid_mr *base_mr; - size_t sub_mr_cnt; -}; - -*/ - - // This is the only part of the code that actually - // calls libfabric functions - struct region_provider - { - // The internal memory region handle - using provider_region = struct fid_mr; - using provider_domain = struct fid_domain; - - // register region - static inline int fi_register_memory(provider_domain* pd, int device_id, void const* buf, - size_t len, uint64_t access_flags, uint64_t offset, uint64_t request_key, - struct fid_mr** mr) - { - [[maybe_unused]] auto scp = NS_MEMORY::mrn_deb.scope( - __func__, NS_DEBUG::hptr(buf), NS_DEBUG::dec<>(len), device_id); - // - struct iovec addresses = {/*.iov_base = */ const_cast(buf), /*.iov_len = */ len}; - fi_mr_attr attr = { - /*.mr_iov = */ {&addresses}, - /*.iov_count = */ 1, - /*.access = */ access_flags, - /*.offset = */ offset, - /*.requested_key = */ request_key, - /*.context = */ nullptr, - /*.auth_key_size = */ 0, - /*.auth_key = */ nullptr, - /*.iface = */ FI_HMEM_SYSTEM, - /*.device = */ {0}, -#if (FI_MAJOR_VERSION > 1) || ((FI_MAJOR_VERSION == 1) && FI_MINOR_VERSION > 17) - /*.hmem_data = */ nullptr, -#endif -#if (FI_MAJOR_VERSION >= 2) - /*page_size = */ static_cast(sysconf(_SC_PAGESIZE)), - /*base_mr = */ nullptr, - /*sub_mr_cnt = */ 0, - }; -#else - }; -#endif - - if (device_id >= 0) - { -#ifdef OOMPH_ENABLE_DEVICE - attr.device.cuda = device_id; - int handle = hwmalloc::get_device_id(); - attr.device.cuda = handle; -# if defined(OOMPH_DEVICE_CUDA) - attr.iface = FI_HMEM_CUDA; - LF_DEB(NS_MEMORY::mrn_deb, - trace(NS_DEBUG::str<>("CUDA"), "set device id", device_id, handle)); -# elif defined(OOMPH_DEVICE_HIP) - attr.iface = FI_HMEM_ROCR; - LF_DEB(NS_MEMORY::mrn_deb, - trace(NS_DEBUG::str<>("HIP"), "set device id", device_id, handle)); -# endif -#endif - } - uint64_t flags = 0; - int ret = fi_mr_regattr(pd, &attr, flags, mr); - if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "register_memory"); } - return ret; - } - - // unregister region - static inline int unregister_memory(provider_region* region) - { - return fi_close(®ion->fid); - } - - // Default registration flags for this provider - static inline constexpr int access_flags() - { - return FI_READ | FI_WRITE | FI_RECV | FI_SEND /*| FI_REMOTE_READ | FI_REMOTE_WRITE*/; - } - - // Get the local descriptor of the memory region. - static inline void* get_local_key(provider_region* const region) - { - return fi_mr_desc(region); - } - - // Get the remote key of the memory region. - static inline uint64_t get_remote_key(provider_region* const region) - { - return fi_mr_key(region); - } - }; - - // -------------------------------------------------------------------- - // This is a handle to a small chunk of memory that has been registered - // as part of a much larger allocation (a memory_segment) - struct memory_handle - { - // -------------------------------------------------------------------- - using provider_region = region_provider::provider_region; - - // -------------------------------------------------------------------- - // Default constructor creates unusable handle(region) - memory_handle() - : address_{nullptr} - , region_{nullptr} - , size_{0} - , used_space_{0} - { - } - memory_handle(memory_handle const&) noexcept = default; - memory_handle& operator=(memory_handle const&) noexcept = default; - - memory_handle(provider_region* region, unsigned char* addr, - std::size_t size /*, uint32_t flags*/) noexcept - : address_{addr} - , region_{region} - , size_{uint32_t(size)} - , used_space_{0} - { - // LF_DEB(NS_MEMORY::mrn_deb, - // trace(NS_DEBUG::str<>("memory_handle"), *this)); - } - - // -------------------------------------------------------------------- - // move constructor, clear other region so that it is not unregistered twice - memory_handle(memory_handle&& other) noexcept - : address_{other.address_} - , region_{std::exchange(other.region_, nullptr)} - , size_{other.size_} - , used_space_{other.used_space_} - { - } - - // -------------------------------------------------------------------- - // move assignment, clear other region so that it is not unregistered twice - memory_handle& operator=(memory_handle&& other) noexcept - { - address_ = other.address_; - region_ = std::exchange(other.region_, nullptr); - size_ = other.size_; - used_space_ = other.used_space_; - return *this; - } - - // -------------------------------------------------------------------- - // Return the address of this memory region block. - inline unsigned char* get_address(void) const { return address_; } - - // -------------------------------------------------------------------- - // Get the local descriptor of the memory region. - inline void* get_local_key(void) const { return region_provider::get_local_key(region_); } - - // -------------------------------------------------------------------- - // Get the remote key of the memory region. - inline uint64_t get_remote_key(void) const - { - return region_provider::get_remote_key(region_); - } - - // -------------------------------------------------------------------- - // Get the size of the memory chunk usable by this memory region, - // this may be smaller than the value returned by get_length - // if the region is a sub region (partial region) within another block - inline uint64_t get_size(void) const { return size_; } - - // -------------------------------------------------------------------- - // Get the size used by a message in the memory region. - inline uint32_t get_message_length(void) const { return used_space_; } - - // -------------------------------------------------------------------- - // Set the size used by a message in the memory region. - inline void set_message_length(uint32_t length) { used_space_ = length; } - - // -------------------------------------------------------------------- - void release_region() noexcept { region_ = nullptr; } - - // -------------------------------------------------------------------- - // return the underlying libfabric region handle - inline provider_region* get_region() const { return region_; } - - // -------------------------------------------------------------------- - // Deregister the memory region. - // returns 0 when successful, -1 otherwise - int deregister(void) const - { - if (region_ /*&& !get_user_region()*/) - { - LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("release"), region_)); - // - if (region_provider::unregister_memory(region_)) - { - LF_DEB(NS_MEMORY::mrn_deb, error("fi_close mr failed")); - return -1; - } - else - { - LF_DEB( - NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("de-Registered region"), *this)); - } - region_ = nullptr; - } - return 0; - } - - // -------------------------------------------------------------------- - friend std::ostream& operator<<(std::ostream& os, memory_handle const& region) - { - (void) region; -#if 1 || has_debug - using namespace NS_DEBUG; - os << "region " - << hptr(®ion) - //<< " fi_region " << hptr(region.region_) - << " address " << hptr(region.address_) << " size " - << hex<6>(region.size_) - //<< " used_space " << hex<6>(region.used_space_/*size_*/) - << " loc key " - << hptr(region.region_ ? region_provider::get_local_key(region.region_) : nullptr) - << " rem key " - << hptr(region.region_ ? region_provider::get_remote_key(region.region_) : 0); - ///// clang-format off - ///// clang-format on -#endif - return os; - } - - protected: - // This gives the start address of this region. - // This is the address that should be used for data storage - unsigned char* address_; - - // The hardware level handle to the region (as returned from libfabric fi_mr_reg) - mutable provider_region* region_; - - // The (maximum available) size of the memory buffer - uint32_t size_; - - // Space used by a message in the memory region. - // This may be smaller/less than the size available if more space - // was allocated than it turns out was needed - mutable uint32_t used_space_; - }; - - // -------------------------------------------------------------------- - // a memory segment is a pinned block of memory that has been specialized - // by a particular region provider. Each provider (infiniband, libfabric, - // other) has a different definition for the object and the protection - // domain used to limit access. - // -------------------------------------------------------------------- - struct memory_segment : public memory_handle - { - using provider_domain = region_provider::provider_domain; - using provider_region = region_provider::provider_region; - using handle_type = memory_handle; - - // -------------------------------------------------------------------- - memory_segment(provider_region* region, unsigned char* address, unsigned char* base_address, - uint64_t size) - : memory_handle(region, address, size) - , base_addr_(base_address) - { - } - - // -------------------------------------------------------------------- - // move constructor, clear other region - memory_segment(memory_segment&& other) noexcept - : memory_handle(std::move(other)) - , base_addr_{std::exchange(other.base_addr_, nullptr)} - { - } - - // -------------------------------------------------------------------- - // move assignment, clear other region - memory_segment& operator=(memory_segment&& other) noexcept - { - memory_handle(std::move(other)); - region_ = std::exchange(other.region_, nullptr); - return *this; - } - - // -------------------------------------------------------------------- - // construct a memory region object by registering an existing address buffer - // we do not cache local/remote keys here because memory segments are only - // used by the heap to store chunks and the user will always receive - // a memory_handle - which does have keys cached - memory_segment(provider_domain* pd, void const* buffer, uint64_t const length, bool bind_mr, - void* ep, int device_id) - { - // an rma key counter to keep some providers (CXI) happy - static std::atomic key = 0; - // - address_ = static_cast(const_cast(buffer)); - size_ = length; - used_space_ = length; - region_ = nullptr; - // - base_addr_ = memory_handle::address_; - LF_DEB(NS_MEMORY::mrn_deb, trace(str<>("memory_segment"), *this, device_id)); - - int ret = region_provider::fi_register_memory(pd, device_id, buffer, length, - region_provider::access_flags(), 0, key++, &(region_)); - if (!ret) - { - LF_DEB(NS_MEMORY::mrn_deb, - trace(str<>("Registered region"), "device", device_id, *this)); - } - - if (bind_mr) - { - ret = fi_mr_bind(region_, (struct fid*) ep, 0); - if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "fi_mr_bind"); } - else { LF_DEB(NS_MEMORY::mrn_deb, trace(str<>("Bound region"), *this)); } - - ret = fi_mr_enable(region_); - if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "fi_mr_enable"); } - else { LF_DEB(NS_MEMORY::mrn_deb, trace(str<>("Enabled region"), *this)); } - } - } - - // -------------------------------------------------------------------- - // destroy the region and memory according to flag settings - ~memory_segment() { deregister(); } - - handle_type get_handle(std::size_t offset, std::size_t size) const noexcept - { - return memory_handle(region_, base_addr_ + offset, size); - } - - // -------------------------------------------------------------------- - // Get the address of the base memory region. - // This is the address of the memory allocated from the system - inline unsigned char* get_base_address(void) const { return base_addr_; } - - // -------------------------------------------------------------------- - friend std::ostream& operator<<(std::ostream& os, memory_segment const& region) - { - (void) region; -#if has_debug - // clang-format off - os << *static_cast(®ion) - << " base address " << NS_DEBUG::hptr(region.base_addr_); - // clang-format on -#endif - return os; - } - - public: - // this is the base address of the memory registered by this segment - // individual memory_handles are offset from this address - unsigned char* base_addr_; - }; - -} // namespace NS_MEMORY diff --git a/src/libfabric/operation_context.cpp b/src/libfabric/operation_context.cpp index 0f6de97a..69de9a10 100644 --- a/src/libfabric/operation_context.cpp +++ b/src/libfabric/operation_context.cpp @@ -13,47 +13,50 @@ #include #include -namespace oomph::libfabric { - void operation_context::handle_cancelled() +namespace oomph::libfabric +{ +void +operation_context::handle_cancelled() +{ + LIBFATBAT_SCOPE(opctx_deb, "{} {} request {}", static_cast(this), __func__, + static_cast(&m_req)); + // enqueue the cancelled/callback + if (std::holds_alternative(m_req)) { - [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::hptr(this), __func__); - // enqueue the cancelled/callback - if (std::holds_alternative(m_req)) - { - // regular (non-shared) recv - auto s = std::get(m_req); - while (!(s->m_comm->m_recv_cb_cancel.push(s))) {} - } - else if (std::holds_alternative(m_req)) - { - // shared recv - auto s = std::get(m_req); - while (!(s->m_ctxt->m_recv_cb_cancel.push(s))) {} - } - else { throw std::runtime_error("Request state invalid in handle_cancelled"); } + // regular (non-shared) recv + auto s = std::get(m_req); + while (!(s->m_comm->m_recv_cb_cancel.push(s))) {} + } + else if (std::holds_alternative(m_req)) + { + // shared recv + auto s = std::get(m_req); + while (!(s->m_ctxt->m_recv_cb_cancel.push(s))) {} } + else + { + throw std::runtime_error("Request state invalid in handle_cancelled"); + } +} + +int +operation_context::handle_tagged_recv_completion_impl(void* user_data) +{ + LIBFATBAT_SCOPE(opctx_deb, "{} {} request {}", static_cast(this), __func__, + static_cast(&m_req)); - int operation_context::handle_tagged_recv_completion_impl(void* user_data) + if (std::holds_alternative(m_req)) { - [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::hptr(this), __func__); - if (std::holds_alternative(m_req)) + // regular (non-shared) recv + auto s = std::get(m_req); + //if (std::this_thread::get_id() == thread_id_) + if (reinterpret_cast(user_data) == s->m_comm) { - // regular (non-shared) recv - auto s = std::get(m_req); - //if (std::this_thread::get_id() == thread_id_) - if (reinterpret_cast(user_data) == s->m_comm) + if (!s->m_comm->has_reached_recursion_depth()) { - if (!s->m_comm->has_reached_recursion_depth()) - { - auto inc = s->m_comm->recursion(); - auto ptr = s->release_self_ref(); - s->invoke_cb(); - } - else - { - // enqueue the callback - while (!(s->m_comm->m_recv_cb_queue.push(s))) {} - } + auto inc = s->m_comm->recursion(); + auto ptr = s->release_self_ref(); + s->invoke_cb(); } else { @@ -61,52 +64,52 @@ namespace oomph::libfabric { while (!(s->m_comm->m_recv_cb_queue.push(s))) {} } } - else if (std::holds_alternative(m_req)) + else { - // shared recv - auto s = std::get(m_req); - if (!s->m_comm->m_context->has_reached_recursion_depth()) - { - auto inc = s->m_comm->m_context->recursion(); - auto ptr = s->release_self_ref(); - s->invoke_cb(); - } - else - { - // enqueue the callback - while (!(s->m_comm->m_context->m_recv_cb_queue.push(s))) {} - } + // enqueue the callback + while (!(s->m_comm->m_recv_cb_queue.push(s))) {} + } + } + else if (std::holds_alternative(m_req)) + { + // shared recv + auto s = std::get(m_req); + if (!s->m_comm->m_context->has_reached_recursion_depth()) + { + auto inc = s->m_comm->m_context->recursion(); + auto ptr = s->release_self_ref(); + s->invoke_cb(); } else { - detail::request_state** req = reinterpret_cast(&m_req); - LF_DEB(NS_MEMORY::opctx_deb<9>, - error( - str<>("invalid request_state"), this, "request", hptr(req))); - throw std::runtime_error("Request state invalid in handle_tagged_recv"); + // enqueue the callback + while (!(s->m_comm->m_context->m_recv_cb_queue.push(s))) {} } - return 1; } + else + { + detail::request_state** req = reinterpret_cast(&m_req); + LIBFATBAT_DEBUG(opctx_deb, "{:<20} tagged recv completion handler : context {} request {}", + "invalid request state", static_cast(this), static_cast(req)); + throw std::runtime_error("Request state invalid in handle_tagged_recv"); + } + return 1; +} - int operation_context::handle_tagged_send_completion_impl(void* user_data) +int +operation_context::handle_tagged_send_completion_impl(void* user_data) +{ + if (std::holds_alternative(m_req)) { - if (std::holds_alternative(m_req)) + // regular (non-shared) recv + auto s = std::get(m_req); + if (reinterpret_cast(user_data) == s->m_comm) { - // regular (non-shared) recv - auto s = std::get(m_req); - if (reinterpret_cast(user_data) == s->m_comm) + if (!s->m_comm->has_reached_recursion_depth()) { - if (!s->m_comm->has_reached_recursion_depth()) - { - auto inc = s->m_comm->recursion(); - auto ptr = s->release_self_ref(); - s->invoke_cb(); - } - else - { - // enqueue the callback - while (!(s->m_comm->m_send_cb_queue.push(s))) {} - } + auto inc = s->m_comm->recursion(); + auto ptr = s->release_self_ref(); + s->invoke_cb(); } else { @@ -114,23 +117,32 @@ namespace oomph::libfabric { while (!(s->m_comm->m_send_cb_queue.push(s))) {} } } - else if (std::holds_alternative(m_req)) + else { - // shared recv - auto s = std::get(m_req); - if (!s->m_comm->m_context->has_reached_recursion_depth()) - { - auto inc = s->m_comm->m_context->recursion(); - auto ptr = s->release_self_ref(); - s->invoke_cb(); - } - else - { - // enqueue the callback - while (!(s->m_comm->m_context->m_recv_cb_queue.push(s))) {} - } + // enqueue the callback + while (!(s->m_comm->m_send_cb_queue.push(s))) {} + } + } + else if (std::holds_alternative(m_req)) + { + // shared recv + auto s = std::get(m_req); + if (!s->m_comm->m_context->has_reached_recursion_depth()) + { + auto inc = s->m_comm->m_context->recursion(); + auto ptr = s->release_self_ref(); + s->invoke_cb(); } - else { throw std::runtime_error("Request state invalid in handle_tagged_send"); } - return 1; + else + { + // enqueue the callback + while (!(s->m_comm->m_context->m_recv_cb_queue.push(s))) {} + } + } + else + { + throw std::runtime_error("Request state invalid in handle_tagged_send"); } -} // namespace oomph::libfabric + return 1; +} +} // namespace oomph::libfabric diff --git a/src/libfabric/operation_context.hpp b/src/libfabric/operation_context.hpp index faed3d70..c16c1505 100644 --- a/src/libfabric/operation_context.hpp +++ b/src/libfabric/operation_context.hpp @@ -13,40 +13,41 @@ // #include // -#include "operation_context_base.hpp" +#include +#include // -namespace oomph::libfabric { +namespace oomph::libfabric +{ + +inline auto opctx_deb = libfatbat::log::create("OP__CXT"); + +// This struct holds the ready state of a future +// we must also store the context used in libfabric, in case +// a request is cancelled - fi_cancel(...) needs it +struct operation_context : public libfatbat::operation_context_base +{ + std::variant m_req; + + template + operation_context(RequestState* req) + : operation_context_base() + , m_req{req} + { + LIBFATBAT_SCOPE(opctx_deb, "{} {} request {}", static_cast(this), __func__, + static_cast(req)); + } - template - inline NS_DEBUG::print_threshold opctx_deb("OP__CXT"); + // -------------------------------------------------------------------- + // When a completion returns FI_ECANCELED, this is called + void handle_cancelled(); - // This struct holds the ready state of a future - // we must also store the context used in libfabric, in case - // a request is cancelled - fi_cancel(...) needs it - struct operation_context : public operation_context_base - { - std::variant m_req; - - template - operation_context(RequestState* req) - : operation_context_base() - , m_req{req} - { - [[maybe_unused]] auto scp = - opctx_deb<9>.scope(NS_DEBUG::hptr(this), __func__, "request", req); - } - - // -------------------------------------------------------------------- - // When a completion returns FI_ECANCELED, this is called - void handle_cancelled(); - - // -------------------------------------------------------------------- - // Called when a tagged recv completes - int handle_tagged_recv_completion_impl(void* user_data); - - // -------------------------------------------------------------------- - // Called when a tagged send completes - int handle_tagged_send_completion_impl(void* user_data); - }; - -} // namespace oomph::libfabric + // -------------------------------------------------------------------- + // Called when a tagged recv completes + int handle_tagged_recv_completion_impl(void* user_data); + + // -------------------------------------------------------------------- + // Called when a tagged send completes + int handle_tagged_send_completion_impl(void* user_data); +}; + +} // namespace oomph::libfabric diff --git a/src/libfabric/operation_context_base.hpp b/src/libfabric/operation_context_base.hpp deleted file mode 100644 index 462c79b5..00000000 --- a/src/libfabric/operation_context_base.hpp +++ /dev/null @@ -1,95 +0,0 @@ -/* - * ghex-org - * - * Copyright (c) 2014-2023, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ -#pragma once - -#include -#include "oomph_libfabric_defines.hpp" - -namespace NS_LIBFABRIC { - - class controller; - - static NS_DEBUG::enable_print ctx_bas("CTXBASE"); - - // This struct holds the ready state of a future - // we must also store the context used in libfabric, in case - // a request is cancelled - fi_cancel(...) needs it - template - struct operation_context_base - { - private: - // libfabric requires some space for it's internal bookkeeping - // so the first member of this struct must be fi_context - fi_context context_reserved_space; - - public: - operation_context_base() - : context_reserved_space() - { - [[maybe_unused]] auto scp = ctx_bas.scope(NS_DEBUG::hptr(this), __func__); - } - - // error - void handle_error(struct fi_cq_err_entry& err) - { - static_cast(this)->handle_error_impl(err); - } - void handle_error_impl(struct fi_cq_err_entry& /*err*/) { std::terminate(); } - - void handle_cancelled() { static_cast(this)->handle_cancelled_impl(); } - void handle_cancelled_impl() { std::terminate(); } - - // send - int handle_send_completion() - { - return static_cast(this)->handle_send_completion_impl(); - } - int handle_send_completion_impl() { return 0; } - - // tagged send - int handle_tagged_send_completion(void* user_data) - { - return static_cast(this)->handle_tagged_send_completion_impl(user_data); - } - int handle_tagged_send_completion_impl(void* /*user_data*/) { return 0; } - - // recv - int handle_recv_completion(std::uint64_t len) - { - return static_cast(this)->handle_recv_completion_impl(len); - } - int handle_recv_completion_impl(std::uint64_t /*len*/) { return 0; } - - // tagged recv - int handle_tagged_recv_completion(void* user_data) - { - return static_cast(this)->handle_tagged_recv_completion_impl(user_data); - } - int handle_tagged_recv_completion_impl(bool /*threadlocal*/) { return 0; } - - void handle_rma_read_completion() - { - static_cast(this)->handle_rma_read_completion_impl(); - } - void handle_rma_read_completion_impl() {} - - // unknown sender = new connection - int handle_new_connection(controller* ctrl, std::uint64_t len) - { - return static_cast(this)->handle_new_connection_impl(ctrl, len); - } - int handle_new_connection_impl(controller*, std::uint64_t) { return 0; } - }; - - // provided so that a pointer can be cast to this and the operation_context_type queried - struct unspecialized_context : public operation_context_base - { - }; -} // namespace NS_LIBFABRIC diff --git a/src/libfabric/print.hpp b/src/libfabric/print.hpp deleted file mode 100644 index 04364b98..00000000 --- a/src/libfabric/print.hpp +++ /dev/null @@ -1,705 +0,0 @@ -/* - * ghex-org - * - * Copyright (c) 2014-2023, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -// -#if defined(__linux) || defined(linux) || defined(__linux__) -# include -# include -#elif defined(__APPLE__) -# include -# include -# define environ (*_NSGetEnviron()) -#else -extern char** environ; -#endif - -#include - -// ------------------------------------------------------------ -// This file provides a simple to use printf style debugging -// tool that can be used on a per file basis to enable output. -// It is not intended to be exposed to users, but rather as -// an aid for development. -// ------------------------------------------------------------ -// Usage: Instantiate a debug print object at the top of a file -// using a template param of true/false to enable/disable output. -// When the template parameter is false, the optimizer will -// not produce code and so the impact is nil. -// -// static debug::enable_print spq_deb("SUBJECT"); -// -// Later in code you may print information using -// -// spq_deb.debug(str<16>("cleanup_terminated"), "v1" -// , "D" , dec<2>(domain_num) -// , "Q" , dec<3>(q_index) -// , "thread_num", dec<3>(local_num)); -// -// various print formatters (dec/hex/str) are supplied to make -// the output regular and aligned for easy parsing/scanning. -// -// In tight loops, huge amounts of debug information might be -// produced, so a simple timer based output is provided -// To instantiate a timed output -// static auto getnext = spq_deb.make_timer(1 -// , str<16>("get_next_thread")); -// then inside a tight loop -// spq_deb.timed(getnext, dec<>(thread_num)); -// The output will only be produced every N seconds -// ------------------------------------------------------------ - -#define NS_DEBUG oomph::debug -#ifndef LF_DEB -# define LF_DEB(printer, Expr) \ - { \ - using namespace NS_DEBUG; \ - if constexpr (printer.is_enabled()) { printer.Expr; }; \ - } -#endif - -// ------------------------------------------------------------ -/// \cond NODETAIL -namespace NS_DEBUG { - - // ------------------------------------------------------------------ - // format as zero padded int - // ------------------------------------------------------------------ - namespace detail { - - template - struct dec - { - constexpr dec(T const& v) - : data_(v) - { - } - - T const& data_; - - friend std::ostream& operator<<(std::ostream& os, dec const& d) - { - os << std::right << std::setfill('0') << std::setw(N) << std::noshowbase << std::dec - << d.data_; - return os; - } - }; - } // namespace detail - - template - constexpr detail::dec dec(T const& v) - { - return detail::dec(v); - } - - // ------------------------------------------------------------------ - // format as pointer - // ------------------------------------------------------------------ - struct hptr - { - hptr(void const* v) - : data_(v) - { - } - hptr(std::uintptr_t const v) - : data_(reinterpret_cast(v)) - { - } - void const* data_; - friend std::ostream& operator<<(std::ostream& os, hptr const& d) - { - os << std::right << "0x" << std::setfill('0') << std::setw(12) << std::noshowbase - << std::hex << reinterpret_cast(d.data_); - return os; - } - }; - - // ------------------------------------------------------------------ - // format as zero padded hex - // ------------------------------------------------------------------ - namespace detail { - - template - struct hex; - - template - struct hex::value>::type> - { - constexpr hex(T const& v) - : data_(v) - { - } - T const& data_; - friend std::ostream& operator<<(std::ostream& os, hex const& d) - { - os << std::right << "0x" << std::setfill('0') << std::setw(N) << std::noshowbase - << std::hex << d.data_; - return os; - } - }; - - template - struct hex::value>::type> - { - constexpr hex(T const& v) - : data_(v) - { - } - T const& data_; - friend std::ostream& operator<<(std::ostream& os, hex const& d) - { - os << std::right << std::setw(N) << std::noshowbase << std::hex << d.data_; - return os; - } - }; - } // namespace detail - - template - constexpr detail::hex hex(T const& v) - { - return detail::hex(v); - } - - // ------------------------------------------------------------------ - // format as binary bits - // ------------------------------------------------------------------ - namespace detail { - - template - struct bin - { - constexpr bin(T const& v) - : data_(v) - { - } - T const& data_; - friend std::ostream& operator<<(std::ostream& os, bin const& d) - { - os << std::bitset(d.data_); - return os; - } - }; - } // namespace detail - - template - constexpr detail::bin bin(T const& v) - { - return detail::bin(v); - } - - // ------------------------------------------------------------------ - // format as padded string - // ------------------------------------------------------------------ - template - struct str - { - constexpr str(char const* v) - : data_(v) - { - } - - char const* data_; - - friend std::ostream& operator<<(std::ostream& os, str const& d) - { - os << std::left << std::setfill(' ') << std::setw(N) << d.data_; - return os; - } - }; - - // ------------------------------------------------------------------ - // helper fuction for printing CRC32 - // ------------------------------------------------------------------ - inline uint32_t crc32(void const* address, size_t length) - { - boost::crc_32_type result; - result.process_bytes(address, length); - return result.checksum(); - } - - // ------------------------------------------------------------------ - // helper fuction for printing short memory dump and crc32 - // useful for debugging corruptions in buffers during - // rma or other transfers - // ------------------------------------------------------------------ - struct mem_crc32 - { - mem_crc32(void const* a, std::size_t len, char const* txt) - : addr_(reinterpret_cast(a)) - , len_(len) - , txt_(txt) - { - } - std::uint8_t const* addr_; - std::size_t const len_; - char const* txt_; - friend std::ostream& operator<<(std::ostream& os, mem_crc32 const& p) - { - using namespace NS_DEBUG; - std::uint8_t const* byte = static_cast(p.addr_); - os << "Memory:"; - os << " address " << hptr(p.addr_) << " length " << hex<6, std::size_t>(p.len_) - << " CRC32:" << hex<8, std::size_t>(crc32(p.addr_, p.len_)) << "\n"; - size_t i = 0; - while (i < std::min(size_t(128), p.len_)) - { - os << "0x"; - for (int j = 7; j >= 0; j--) - { - os << std::hex << std::setfill('0') << std::setw(2) - << (((i + j) > p.len_) ? (int) 0 : (int) byte[i + j]); - } - i += 8; - if (i % 32 == 0) - os << std::endl; - else - os << " "; - } - os << ": " << p.txt_; - return os; - } - }; - - namespace detail { - - template - void tuple_print(std::ostream& os, TupleType const& t, std::index_sequence) - { - (..., (os << (I == 0 ? "" : " ") << std::get(t))); - } - - template - void tuple_print(std::ostream& os, std::tuple const& t) - { - tuple_print(os, t, std::make_index_sequence()); - } - } // namespace detail - - namespace detail { - - // ------------------------------------------------------------------ - // helper class for printing thread ID - // ------------------------------------------------------------------ - struct current_thread_print_helper - { - }; - - inline std::ostream& operator<<(std::ostream& os, current_thread_print_helper const&) - { - os << hex<12, std::thread::id>(std::this_thread::get_id()) -#ifdef DEBUGGING_PRINT_LINUX - << " cpu " << debug::dec<3, int>(sched_getcpu()) << " "; -#else - << " cpu " - << "--- "; -#endif - return os; - } - - // ------------------------------------------------------------------ - // helper class for printing time since start - // ------------------------------------------------------------------ - struct hostname_print_helper - { - char const* get_hostname() const - { - static bool initialized = false; - static char hostname_[20]; - if (!initialized) - { - initialized = true; - gethostname(hostname_, std::size_t(12)); - std::string temp = "(" + std::to_string(guess_rank()) + ")"; - std::strcat(hostname_, temp.c_str()); - } - return hostname_; - } - - int guess_rank() const - { - std::vector env_strings{"_RANK=", "_NODEID="}; - for (char** current = environ; *current; current++) - { - auto e = std::string(*current); - for (auto s : env_strings) - { - auto pos = e.find(s); - if (pos != std::string::npos) - { - //std::cout << "Got a rank string : " << e << std::endl; - return std::stoi(e.substr(pos + s.size(), 5)); - } - } - } - return -1; - } - }; - - inline std::ostream& operator<<(std::ostream& os, hostname_print_helper const& h) - { - os << debug::str<13>(h.get_hostname()) << " "; - return os; - } - - // ------------------------------------------------------------------ - // helper class for printing time since start - // ------------------------------------------------------------------ - struct current_time_print_helper - { - }; - - inline std::ostream& operator<<(std::ostream& os, current_time_print_helper const&) - { - using namespace std::chrono; - static steady_clock::time_point log_t_start = steady_clock::now(); - // - auto now = steady_clock::now(); - auto nowt = duration_cast(now - log_t_start).count(); - // - os << debug::dec<10>(nowt) << " "; - return os; - } - - template - void display(char const* prefix, Args const&... args) - { - // using a temp stream object with a single copy to cout at the end - // prevents multiple threads from injecting overlapping text - std::stringstream tempstream; - tempstream << prefix << detail::current_time_print_helper() - << detail::current_thread_print_helper() << detail::hostname_print_helper(); - ((tempstream << args << " "), ...); - tempstream << "\n"; - std::cout << tempstream.str() << std::flush; - } - - template - void debug(Args const&... args) - { - display(" ", args...); - } - - template - void warning(Args const&... args) - { - display(" ", args...); - } - - template - void error(Args const&... args) - { - display(" ", args...); - } - - template - void scope(Args const&... args) - { - display(" ", args...); - } - - template - void trace(Args const&... args) - { - display(" ", args...); - } - - template - void timed(Args const&... args) - { - display(" ", args...); - } - } // namespace detail - - template - struct scoped_var - { - // capture tuple elements by reference - no temp vars in constructor please - char const* prefix_; - std::tuple const message_; - std::string buffered_msg; - - // - scoped_var(char const* p, Args const&... args) - : prefix_(p) - , message_(args...) - { - std::stringstream tempstream; - detail::tuple_print(tempstream, message_); - buffered_msg = tempstream.str(); - detail::display(" ", prefix_, debug::str<>(">> enter <<"), tempstream.str()); - } - - ~scoped_var() - { - detail::display(" ", prefix_, debug::str<>("<< leave >>"), buffered_msg); - } - }; - - template - struct timed_var - { - mutable std::chrono::steady_clock::time_point time_start_; - double const delay_; - std::tuple const message_; - // - timed_var(double const& delay, Args const&... args) - : time_start_(std::chrono::steady_clock::now()) - , delay_(delay) - , message_(args...) - { - } - - bool elapsed(std::chrono::steady_clock::time_point const& now) const - { - double elapsed_ = - std::chrono::duration_cast>(now - time_start_) - .count(); - - if (elapsed_ > delay_) - { - time_start_ = now; - return true; - } - return false; - } - - friend std::ostream& operator<<(std::ostream& os, timed_var const& ti) - { - detail::tuple_print(os, ti.message_); - return os; - } - }; - - /////////////////////////////////////////////////////////////////////////// - template - struct enable_print; - - // when false, debug statements should produce no code - template <> - struct enable_print - { - constexpr enable_print(char const*) {} - - constexpr bool is_enabled() const { return false; } - - template - constexpr void debug(Args const&...) const - { - } - - template - constexpr void warning(Args const&...) const - { - } - - template - constexpr void trace(Args const&...) const - { - } - - template - constexpr void error(Args const&...) const - { - } - - template - constexpr void timed(Args const&...) const - { - } - - template - constexpr void array(std::string const&, std::vector const&) const - { - } - - template - constexpr void array(std::string const&, std::array const&) const - { - } - - template - constexpr void array(std::string const&, Iter, Iter) const - { - } - - template - constexpr bool scope(Args const&...) - { - return true; - } - - template - constexpr bool declare_variable(Args const&...) const - { - return true; - } - - template - constexpr void set(T&, V const&) - { - } - - // @todo, return void so that timers have zero footprint when disabled - template - constexpr int make_timer(double const, Args const&...) const - { - return 0; - } - - template - constexpr bool eval(Expr const&) - { - return true; - } - }; - - // when true, debug statements produce valid output - template <> - struct enable_print - { - private: - char const* prefix_; - - public: - constexpr enable_print() - : prefix_("") - { - } - - constexpr enable_print(char const* p) - : prefix_(p) - { - } - - constexpr bool is_enabled() const { return true; } - - template - constexpr void debug(Args const&... args) const - { - detail::debug(prefix_, args...); - } - - template - constexpr void warning(Args const&... args) const - { - detail::warning(prefix_, args...); - } - - template - constexpr void trace(Args const&... args) const - { - detail::trace(prefix_, args...); - } - - template - constexpr void error(Args const&... args) const - { - detail::error(prefix_, args...); - } - - template - scoped_var scope(Args const&... args) - { - return scoped_var(prefix_, args...); - } - - template - void timed(timed_var const& init, Args const&... args) const - { - auto now = std::chrono::steady_clock::now(); - if (init.elapsed(now)) { detail::timed(prefix_, init, args...); } - } - - template - void array(std::string const& name, std::vector const& v) const - { - std::cout << str<20>(name.c_str()) << ": {" << debug::dec<4>(v.size()) << "} : "; - std::copy(std::begin(v), std::end(v), std::ostream_iterator(std::cout, ", ")); - std::cout << "\n"; - } - - template - void array(std::string const& name, std::array const& v) const - { - std::cout << str<20>(name.c_str()) << ": {" << debug::dec<4>(v.size()) << "} : "; - std::copy(std::begin(v), std::end(v), std::ostream_iterator(std::cout, ", ")); - std::cout << "\n"; - } - - template - void array(std::string const& name, Iter begin, Iter end) const - { - std::cout << str<20>(name.c_str()) << ": {" << debug::dec<4>(std::distance(begin, end)) - << "} : "; - std::copy(begin, end, - std::ostream_iterator::value_type>( - std::cout, ", ")); - std::cout << std::endl; - } - - template - T declare_variable(Args const&... args) const - { - return T(args...); - } - - template - void set(T& var, V const& val) - { - var = val; - } - - template - timed_var make_timer(double const delay, Args const... args) const - { - return timed_var(delay, args...); - } - - template - auto eval(Expr const& e) - { - return e(); - } - }; - - // ------------------------------------------------------------------ - // helper for N>M true/false - // ------------------------------------------------------------------ - template - struct check_level : std::integral_constant - { - }; - - template - struct print_threshold : enable_print::value> - { - using base_type = enable_print::value>; - // inherit constructor - using base_type::base_type; - }; - -} // namespace NS_DEBUG -/// \endcond diff --git a/src/libfabric/request_state.hpp b/src/libfabric/request_state.hpp index 74958fc5..816a1050 100644 --- a/src/libfabric/request_state.hpp +++ b/src/libfabric/request_state.hpp @@ -13,88 +13,91 @@ #include "../request_state_base.hpp" #include "./operation_context.hpp" -namespace oomph { namespace detail { +namespace oomph +{ +namespace detail +{ +inline auto req_deb = libfatbat::log::create("Request"); + +struct request_state +: public util::enable_shared_from_this +, public request_state_base +{ + using base = request_state_base; + using shared_ptr_t = util::unsafe_shared_ptr; + using operation_context = libfabric::operation_context; + + operation_context m_operation_context; + util::unsafe_shared_ptr m_self_ptr; + + request_state(oomph::context_impl* ctxt, oomph::communicator_impl* comm, std::size_t* scheduled, + rank_type rank, tag_type tag, cb_type&& cb) + : base{ctxt, comm, scheduled, rank, tag, std::move(cb)} + , m_operation_context{this} + { + } + + void progress(); + + bool cancel(); + + void create_self_ref() + { + // create a self-reference cycle!! + // this is useful if we only keep a raw pointer around internally, which still is supposed + // to keep the object alive + m_self_ptr = shared_from_this(); + } + + shared_ptr_t release_self_ref() noexcept + { + assert(((bool)m_self_ptr) && "doesn't own a self-reference!"); + return std::move(m_self_ptr); + } +}; + +struct shared_request_state +: public std::enable_shared_from_this +, public request_state_base +{ + using base = request_state_base; + using shared_ptr_t = std::shared_ptr; + using operation_context = libfabric::operation_context; + + operation_context m_operation_context; + std::shared_ptr m_self_ptr; + + shared_request_state(oomph::context_impl* ctxt, oomph::communicator_impl* comm, + std::atomic* scheduled, rank_type rank, tag_type tag, cb_type&& cb) + : base{ctxt, comm, scheduled, rank, tag, std::move(cb)} + , m_operation_context{this} + { + LIBFATBAT_SCOPE(req_deb, "{} {}", static_cast(this), __func__); + } - struct request_state - : public util::enable_shared_from_this - , public request_state_base + ~shared_request_state() { - using base = request_state_base; - using shared_ptr_t = util::unsafe_shared_ptr; - using operation_context = libfabric::operation_context; - - operation_context m_operation_context; - util::unsafe_shared_ptr m_self_ptr; - - request_state(oomph::context_impl* ctxt, oomph::communicator_impl* comm, - std::size_t* scheduled, rank_type rank, tag_type tag, cb_type&& cb) - : base{ctxt, comm, scheduled, rank, tag, std::move(cb)} - , m_operation_context{this} - { - } - - void progress(); - - bool cancel(); - - void create_self_ref() - { - // create a self-reference cycle!! - // this is useful if we only keep a raw pointer around internally, which still is supposed - // to keep the object alive - m_self_ptr = shared_from_this(); - } - - shared_ptr_t release_self_ref() noexcept - { - assert(((bool) m_self_ptr) && "doesn't own a self-reference!"); - return std::move(m_self_ptr); - } - }; - - struct shared_request_state - : public std::enable_shared_from_this - , public request_state_base + LIBFATBAT_SCOPE(req_deb, "{} {}", static_cast(this), __func__); + } + + void progress(); + + bool cancel(); + + void create_self_ref() { - using base = request_state_base; - using shared_ptr_t = std::shared_ptr; - using operation_context = libfabric::operation_context; - - operation_context m_operation_context; - std::shared_ptr m_self_ptr; - - shared_request_state(oomph::context_impl* ctxt, oomph::communicator_impl* comm, - std::atomic* scheduled, rank_type rank, tag_type tag, cb_type&& cb) - : base{ctxt, comm, scheduled, rank, tag, std::move(cb)} - , m_operation_context{this} - { - [[maybe_unused]] auto scp = - libfabric::opctx_deb<9>.scope(NS_DEBUG::hptr(this), __func__); - } - - ~shared_request_state() - { - [[maybe_unused]] auto scp = - libfabric::opctx_deb<9>.scope(NS_DEBUG::hptr(this), __func__); - } - - void progress(); - - bool cancel(); - - void create_self_ref() - { - // create a self-reference cycle!! - // this is useful if we only keep a raw pointer around internally, which still is supposed - // to keep the object alive - m_self_ptr = shared_from_this(); - } - - shared_ptr_t release_self_ref() noexcept - { - assert(((bool) m_self_ptr) && "doesn't own a self-reference!"); - return std::move(m_self_ptr); - } - }; - -}} // namespace oomph::detail + // create a self-reference cycle!! + // this is useful if we only keep a raw pointer around internally, which still is supposed + // to keep the object alive + m_self_ptr = shared_from_this(); + } + + shared_ptr_t release_self_ref() noexcept + { + assert(((bool)m_self_ptr) && "doesn't own a self-reference!"); + return std::move(m_self_ptr); + } +}; + +} // namespace detail +} // namespace oomph diff --git a/src/libfabric/test/check_libfabric.cpp b/src/libfabric/test/check_libfabric.cpp index 11d9788e..d91569b2 100644 --- a/src/libfabric/test/check_libfabric.cpp +++ b/src/libfabric/test/check_libfabric.cpp @@ -10,22 +10,24 @@ #include #include -#include "../benchmarks/mpi_environment.hpp" +#include +#include // +#include "../benchmarks/mpi_environment.hpp" #include "../communicator.hpp" #include "../context.hpp" -#include - -int main(int argc, char** argv) +int +main(int argc, char** argv) { + libfatbat::log::init_from_env(); using namespace oomph; - bool const message_pool_never_free = false; + bool const message_pool_never_free = false; std::size_t const message_pool_reserve = 1024 * 1024 * 128; - bool const multi_threaded = true; - bool debug = true; + bool const multi_threaded = true; + bool debug = true; // - mpi_environment env(multi_threaded, argc, argv); + mpi_environment env(multi_threaded, argc, argv); hwmalloc::heap_config const& default_heap = hwmalloc::get_default_heap_config(); auto ctxt = context_impl(MPI_COMM_WORLD, true, default_heap /*, debug*/); } From e002c847fbaa9e4d6a40127984ed89ff20a5ca5a Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Sat, 18 Apr 2026 10:39:52 +0200 Subject: [PATCH 20/35] Make sure logging is initialized --- src/libfabric/CMakeLists.txt | 29 +++++++++++++++++++++++++++++ src/libfabric/context.cpp | 2 ++ src/libfabric/controller.hpp | 2 +- src/libfabric/operation_context.hpp | 2 +- 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/libfabric/CMakeLists.txt b/src/libfabric/CMakeLists.txt index a69ab5d3..b105376d 100644 --- a/src/libfabric/CMakeLists.txt +++ b/src/libfabric/CMakeLists.txt @@ -16,6 +16,21 @@ message( "✅ libFatbat building: (${libFatbat_VERSION}) in ${libFatbat_BINARY_DIR}" ) +# ------------------------------------------------------------------------------ +# spdlog logging library +# ------------------------------------------------------------------------------ +string(TOUPPER ${FATBAT_LOG_LEVEL} FATBAT_LOG_LEVEL_UPPERCASE) +if(NOT "${FATBAT_LOG_LEVEL_UPPERCASE}" MATCHES "OFF") + find_package(spdlog REQUIRED) + message(STATUS "✅ spdlog found ${spdlog_VERSION}") +endif() + +# ------------------------------------------------------------------------------ +# fmt library (for nice formatting of messages) +# ------------------------------------------------------------------------------ +find_package(fmt REQUIRED) +message(STATUS "✅ fmt found ${fmt_VERSION}") + # ------------------------------------------------------------------------------ # # ------------------------------------------------------------------------------ @@ -47,6 +62,20 @@ target_sources(oomph_libfabric PRIVATE ${oomph_sources_libfabric}) target_sources(oomph_libfabric PRIVATE context.cpp) target_sources(oomph_libfabric PRIVATE operation_context.cpp) +if(NOT "${FATBAT_LOG_LEVEL_UPPERCASE}" MATCHES "OFF") + target_compile_definitions( + oomph_libfabric INTERFACE LIBFATBAT_LOGGING_ENABLED=1 + ) + target_compile_definitions( + oomph_libfabric + INTERFACE SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${FATBAT_LOG_LEVEL_UPPERCASE} + ) + target_link_libraries( + oomph_libfabric INTERFACE spdlog::spdlog $<$:ws2_32> + ) + target_link_libraries(oomph_libfabric INTERFACE fmt::fmt) +endif() + # if we are using GPU, then the libfabric library was probably built with gpu # support, and we should link to cuda to prevent link errors if(HWMALLOC_ENABLE_DEVICE) diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index f47671c6..b4de1b6e 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -29,6 +29,8 @@ context_impl::context_impl(MPI_Comm comm, bool thread_safe, , m_recv_cb_queue(128) , m_recv_cb_cancel(8) { + libfatbat::log::init_from_env(); + // int rank, size; OOMPH_CHECK_MPI_RESULT(MPI_Comm_rank(comm, &rank)); OOMPH_CHECK_MPI_RESULT(MPI_Comm_size(comm, &size)); diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index d04acc87..61b6bf7c 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -173,7 +173,7 @@ class controller : public libfatbat::controller_base MPI_Comm_rank(mpi_comm, &rank); MPI_Comm_size(mpi_comm, &size); - LIBFATBAT_DEBUG(ctrl_log, "{:<20} size {}", "initialize_localities", size); + LIBFATBAT_DEBUG(ctrl_log, "{:<20} size {}", "init_localities", size); MPI_exchange_localities(av, mpi_comm, rank, size); #ifndef HAVE_LIBFABRIC_LNX // address stuff not yet supported diff --git a/src/libfabric/operation_context.hpp b/src/libfabric/operation_context.hpp index c16c1505..7b59c250 100644 --- a/src/libfabric/operation_context.hpp +++ b/src/libfabric/operation_context.hpp @@ -19,7 +19,7 @@ namespace oomph::libfabric { -inline auto opctx_deb = libfatbat::log::create("OP__CXT"); +inline auto opctx_deb = libfatbat::log::create("opctxt"); // This struct holds the ready state of a future // we must also store the context used in libfabric, in case From 798a9ee0bbcc29779c42f25a08bcb475ad5b15ef Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Sat, 18 Apr 2026 11:20:10 +0200 Subject: [PATCH 21/35] Remove duplicated functions from base controller --- src/libfabric/controller.hpp | 57 ++---------------------------------- 1 file changed, 2 insertions(+), 55 deletions(-) diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 61b6bf7c..41366584 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -41,12 +41,12 @@ namespace oomph::libfabric { inline auto ctrl_log = libfatbat::log::create("Ctrl"); -class controller : public libfatbat::controller_base +class controller : public libfatbat::controller_base { public: // -------------------------------------------------------------------- controller() - : libfatbat::controller_base() + : libfatbat::controller_base() { } @@ -182,59 +182,6 @@ class controller : public libfatbat::controller_base LIBFATBAT_DEBUG(ctrl_log, "{:<20} size {}", "Done localities", size); } - // -------------------------------------------------------------------- - inline constexpr bool bypass_tx_lock() - { -#if defined(HAVE_LIBFABRIC_GNI) - return true; -#elif defined(HAVE_LIBFABRIC_LNX) - // @todo : cxi provider is not yet thread safe using scalable endpoints - return false; -#else - return (threadlevel_flags() == FI_THREAD_SAFE || - endpoint_type_ == endpoint_type::threadlocalTx); -#endif - } - - // -------------------------------------------------------------------- - inline controller_base::unique_lock get_tx_lock() - { - if (bypass_tx_lock()) return unique_lock(); - return unique_lock(send_mutex_); - } - - // -------------------------------------------------------------------- - inline controller_base::unique_lock try_tx_lock() - { - if (bypass_tx_lock()) return unique_lock(); - return unique_lock(send_mutex_, std::try_to_lock_t{}); - } - - // -------------------------------------------------------------------- - inline constexpr bool bypass_rx_lock() - { -#ifdef HAVE_LIBFABRIC_GNI - return true; -#else - return ( - threadlevel_flags() == FI_THREAD_SAFE || endpoint_type_ == endpoint_type::scalableTxRx); -#endif - } - - // -------------------------------------------------------------------- - inline controller_base::unique_lock get_rx_lock() - { - if (bypass_rx_lock()) return unique_lock(); - return unique_lock(recv_mutex_); - } - - // -------------------------------------------------------------------- - inline controller_base::unique_lock try_rx_lock() - { - if (bypass_rx_lock()) return unique_lock(); - return unique_lock(recv_mutex_, std::try_to_lock_t{}); - } - // -------------------------------------------------------------------- int poll_send_queue(fid_cq* send_cq, void* user_data) { From 45f5b841cde12f10a97a6f2eb777427191095d6d Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Sat, 18 Apr 2026 11:39:03 +0200 Subject: [PATCH 22/35] reuse controller base polling --- src/libfabric/controller.hpp | 194 +---------------------------------- 1 file changed, 2 insertions(+), 192 deletions(-) diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 41366584..75392c63 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -185,203 +185,13 @@ class controller : public libfatbat::controller_base(now - send_poll_stamp).count() < - EXCESSIVE_POLLING_BACKOFF_MICRO_S) - return 0; - send_poll_stamp = now; -#endif - int ret; - fi_cq_msg_entry entry[max_completions_array_limit_]; - assert(max_completions_per_poll_ <= max_completions_array_limit_); - { - auto lock = try_tx_lock(); - - // if we're not threadlocal and didn't get the lock, - // then another thread is polling now, just exit - if (!bypass_tx_lock() && !lock.owns_lock()) { return -1; } - - // static auto polling = - // NS_DEBUG::cnt_deb<9>.make_timer(1, NS_DEBUG::str<>("poll send queue")); - // LF_DEB(cnt_deb<9>, timed(polling, static_cast(send_cq))); - - // poll for completions - { - ret = fi_cq_read(send_cq, &entry[0], max_completions_per_poll_); - } - // if there is an error, retrieve it - if (ret == -FI_EAVAIL) - { - struct fi_cq_err_entry e = {}; - int err_sz = fi_cq_readerr(send_cq, &e, 0); - (void)err_sz; - - // flags might not be set correctly - if ((e.flags & (FI_MSG | FI_SEND | FI_TAGGED)) != 0) - { - LIBFATBAT_ERROR(ctrl_log, - "{:<20} Error FI_EAVAIL for FI_SEND with len {:#06x} context {} errcode {:3} flags {:16b} error {}", - "txcq", e.len, static_cast(e.op_context), e.err, e.flags, - fi_cq_strerror(send_cq, e.prov_errno, e.err_data, (char*)e.buf, e.len)); - } - else if ((e.flags & FI_RMA) != 0) - { - LIBFATBAT_ERROR(ctrl_log, - "{:<20} Error FI_EAVAIL for FI_RMA with len {:#06x} context {} errcode {:3} flags {:16b} error {}", - "txcq", e.len, static_cast(e.op_context), e.err, e.flags, - fi_cq_strerror(send_cq, e.prov_errno, e.err_data, (char*)e.buf, e.len)); - } - operation_context* handler = reinterpret_cast(e.op_context); - handler->handle_error(e); - return 0; - } - } - // - // exit possibly locked region and process each completion - // - if (ret > 0) - { - [[maybe_unused]] std::array buf; - int processed = 0; - for (int i = 0; i < ret; ++i) - { - ++sends_complete_; - LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} length {:#06x}", "Completion", i, - static_cast(entry[i].op_context), entry[i].len); - if ((entry[i].flags & (FI_TAGGED | FI_SEND | FI_MSG)) != 0) - { - LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} {}", "Completion", - "txcq tagged send completion", static_cast(entry[i].op_context), ""); - - operation_context* handler = - reinterpret_cast(entry[i].op_context); - processed += handler->handle_tagged_send_completion(user_data); - } - else - { - LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} {}", "Completion", - "unknown txcq completion", static_cast(entry[i].op_context), ""); - std::terminate(); - } - } - return processed; - } - else if (ret == 0 || ret == -FI_EAGAIN) - { - // do nothing, we will try again on the next check - } - else - { - LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} {}", "Completion", - "unknown error in completion txcq read", static_cast(entry[0].op_context), - ""); - } - return 0; + return static_cast(this)->poll_send_queue_default(send_cq, user_data); } // -------------------------------------------------------------------- int poll_recv_queue(fid_cq* rx_cq, void* user_data) { -#ifdef EXCESSIVE_POLLING_BACKOFF_MICRO_S - std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); - if (std::chrono::duration_cast(now - recv_poll_stamp).count() < - EXCESSIVE_POLLING_BACKOFF_MICRO_S) - return 0; - recv_poll_stamp = now; -#endif - int ret; - fi_cq_msg_entry entry[max_completions_array_limit_]; - assert(max_completions_per_poll_ <= max_completions_array_limit_); - { - auto lock = get_rx_lock(); - - // if we're not threadlocal and didn't get the lock, - // then another thread is polling now, just exit - if (!bypass_rx_lock() && !lock.owns_lock()) { return -1; } - - // static auto polling = - // NS_DEBUG::cnt_deb<2>.make_timer(1, NS_DEBUG::str<>("poll recv queue")); - // LF_DEB(cnt_deb<2>, timed(polling, static_cast(rx_cq))); - - // poll for completions - { - ret = fi_cq_read(rx_cq, &entry[0], max_completions_per_poll_); - } - // if there is an error, retrieve it - if (ret == -FI_EAVAIL) - { - // read the full error status - struct fi_cq_err_entry e = {}; - int err_sz = fi_cq_readerr(rx_cq, &e, 0); - (void)err_sz; - // from the manpage 'man 3 fi_cq_readerr' - if (e.err == FI_ECANCELED) - { - LIBFATBAT_DEBUG(ctrl_log, "{:<20} flags {:#06x} len {:#06x} context {}", - "rxcq Cancelled", e.flags, e.len, static_cast(e.op_context)); - // the request was cancelled, we can simply exit - // as the canceller will have doone any cleanup needed - operation_context* handler = reinterpret_cast(e.op_context); - handler->handle_cancelled(); - return 0; - } - else if (e.err != FI_SUCCESS) - { - LIBFATBAT_DEBUG(ctrl_log, - "{:<20} error code {} flags {:#06x} len {:#06x} context {} error msg {}", - "poll_recv_queue", -e.err, e.flags, e.len, static_cast(e.op_context), - fi_cq_strerror(rx_cq, e.prov_errno, e.err_data, (char*)e.buf, e.len)); - } - operation_context* handler = reinterpret_cast(e.op_context); - if (handler) handler->handle_error(e); - return 0; - } - } - // - // release the lock and process each completion - // - if (ret > 0) - { - std::array buf; - int processed = 0; - for (int i = 0; i < ret; ++i) - { - ++recvs_complete_; - LIBFATBAT_DEBUG(ctrl_log, - "{:<20} {:02} {} flags {} ({:#06x}) context {} length {:#06x}", - "Completion txcq", i, - fi_tostr_r(buf.data(), buf.size(), &entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), - entry[i].flags, static_cast(entry[i].op_context), entry[i].len); - - if ((entry[i].flags & (FI_TAGGED | FI_RECV)) != 0) - { - LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} {}", "Completion", i, - static_cast(entry[i].op_context), "rxcq tagged recv completion"); - - operation_context* handler = - reinterpret_cast(entry[i].op_context); - processed += handler->handle_tagged_recv_completion(user_data); - } - else - { - LIBFATBAT_DEBUG(ctrl_log, "{:<20} {} {} {}", "Completion", i, - static_cast(entry[i].op_context), - "Received an unknown rxcq completion"); - std::terminate(); - } - } - return processed; - } - else if (ret == 0 || ret == -FI_EAGAIN) - { - // do nothing, we will try again on the next check - } - else - { - LIBFATBAT_ERROR(ctrl_log, "{:<20} unknown error in completion rxcq read", "Completion"); - } - return 0; + return static_cast(this)->poll_recv_queue_default(rx_cq, user_data); } // Jobs started using mpi don't have this info From fe8884c27b5487488f5f7e681fe178edbe840d71 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 20 Apr 2026 10:43:38 +0200 Subject: [PATCH 23/35] Revert "Bump hwmalloc submodule" This reverts commit 6c8bd05563869a132e78253aef64fe7293233562. --- ext/hwmalloc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/hwmalloc b/ext/hwmalloc index eb9484b0..762dfd8a 160000 --- a/ext/hwmalloc +++ b/ext/hwmalloc @@ -1 +1 @@ -Subproject commit eb9484b0c7a2a1a7122975c1ff1b76d7e61f230d +Subproject commit 762dfd8a47dee7b7843da78760f0e35174682a7c From 764f4394ce2d9ad4ca5bcff792f435ba541e9f03 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 20 Apr 2026 12:47:08 +0200 Subject: [PATCH 24/35] Disable fortran temporarily for ci/cd --- CMakeLists.txt | 4 ++-- test/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3db53422..1bc87548 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,8 +93,8 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_config.inc.in # --------------------------------------------------------------------- # fortan bindings # --------------------------------------------------------------------- -include(oomph_fortran) -add_subdirectory(bindings) +#include(oomph_fortran) +#add_subdirectory(bindings) # --------------------------------------------------------------------- # testing diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 39affd0e..2c23633a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -129,4 +129,4 @@ if(OOMPH_WITH_LIBFABRIC) endforeach() endif() -add_subdirectory(bindings) +#add_subdirectory(bindings) From b6a05eb2b595f4c7d04aec60ecc84891cea54322 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 20 Apr 2026 11:56:26 +0200 Subject: [PATCH 25/35] Use macros for log creation to avoid instantiating void types when disabled --- src/libfabric/communicator.hpp | 2 +- src/libfabric/context.cpp | 2 +- src/libfabric/context.hpp | 2 +- src/libfabric/controller.hpp | 2 +- src/libfabric/operation_context.cpp | 3 ++- src/libfabric/operation_context.hpp | 2 +- src/libfabric/request_state.hpp | 2 +- 7 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/libfabric/communicator.hpp b/src/libfabric/communicator.hpp index ddd369b2..60c8b4be 100644 --- a/src/libfabric/communicator.hpp +++ b/src/libfabric/communicator.hpp @@ -28,7 +28,7 @@ namespace oomph { -inline auto comm_log = libfatbat::log::create("OomphCom"); +MAKE_LOGGER(comm_log, "OomphCom") using operation_context = libfabric::operation_context; diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index b4de1b6e..dd5d31e6 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -18,7 +18,7 @@ namespace oomph { -inline auto src_log = libfatbat::log::create("SRC"); +MAKE_LOGGER(src_log, "SRC") using controller_type = libfabric::controller; diff --git a/src/libfabric/context.hpp b/src/libfabric/context.hpp index 11e6788a..d312ce37 100644 --- a/src/libfabric/context.hpp +++ b/src/libfabric/context.hpp @@ -25,7 +25,7 @@ namespace oomph { -inline auto ctxt_log = libfatbat::log::create("Context"); +MAKE_LOGGER(ctxt_log, "Context") using controller_type = libfabric::controller; diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 75392c63..275d5352 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -39,7 +39,7 @@ namespace oomph::libfabric { -inline auto ctrl_log = libfatbat::log::create("Ctrl"); +MAKE_LOGGER(ctrl_log, "Ctrl") class controller : public libfatbat::controller_base { diff --git a/src/libfabric/operation_context.cpp b/src/libfabric/operation_context.cpp index 69de9a10..8dad1211 100644 --- a/src/libfabric/operation_context.cpp +++ b/src/libfabric/operation_context.cpp @@ -88,7 +88,8 @@ operation_context::handle_tagged_recv_completion_impl(void* user_data) } else { - detail::request_state** req = reinterpret_cast(&m_req); + [[maybe_unused]] detail::request_state** req = + reinterpret_cast(&m_req); LIBFATBAT_DEBUG(opctx_deb, "{:<20} tagged recv completion handler : context {} request {}", "invalid request state", static_cast(this), static_cast(req)); throw std::runtime_error("Request state invalid in handle_tagged_recv"); diff --git a/src/libfabric/operation_context.hpp b/src/libfabric/operation_context.hpp index 7b59c250..e7a92346 100644 --- a/src/libfabric/operation_context.hpp +++ b/src/libfabric/operation_context.hpp @@ -19,7 +19,7 @@ namespace oomph::libfabric { -inline auto opctx_deb = libfatbat::log::create("opctxt"); +MAKE_LOGGER(opctx_deb, "opctxt") // This struct holds the ready state of a future // we must also store the context used in libfabric, in case diff --git a/src/libfabric/request_state.hpp b/src/libfabric/request_state.hpp index 816a1050..7fd6ea82 100644 --- a/src/libfabric/request_state.hpp +++ b/src/libfabric/request_state.hpp @@ -17,7 +17,7 @@ namespace oomph { namespace detail { -inline auto req_deb = libfatbat::log::create("Request"); +MAKE_LOGGER(req_deb, "Request") struct request_state : public util::enable_shared_from_this From bf657240f8cf0891acbdd55180632353a2724071 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Tue, 21 Apr 2026 14:23:04 +0200 Subject: [PATCH 26/35] Use std::hardware_concurrency in place of boost::physical_concurrency --- src/libfabric/context.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index dd5d31e6..7dbc9e4c 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -9,7 +9,7 @@ */ #include // -#include +#include // paths relative to backend #include #include @@ -50,10 +50,10 @@ context_impl::context_impl(MPI_Comm comm, bool thread_safe, cpu_set_t cpuset; CPU_ZERO(&cpuset); if (sched_getaffinity(0, sizeof(cpuset), &cpuset) == 0) threads = CPU_COUNT(&cpuset); - else - threads = boost::thread::physical_concurrency(); + else // threads = boost::thread::physical_concurrency(); + threads = std::thread::hardware_concurrency(); #else - threads = boost::thread::physical_concurrency(); + threads = std::thread::hardware_concurrency(); #endif m_controller = init_libfabric_controller(this, comm, rank, size, threads, debug); m_domain = m_controller->get_domain(); From 9b62a2c2f920e75a6234c79a0a44d3111fcd2d27 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 20 Apr 2026 10:50:40 +0200 Subject: [PATCH 27/35] Add fmt FetchContent to remove dependency and allow CI/CD to build correctly --- src/libfabric/CMakeLists.txt | 8 +++++++- src/libfabric/controller.hpp | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/libfabric/CMakeLists.txt b/src/libfabric/CMakeLists.txt index b105376d..7b06690c 100644 --- a/src/libfabric/CMakeLists.txt +++ b/src/libfabric/CMakeLists.txt @@ -28,7 +28,13 @@ endif() # ------------------------------------------------------------------------------ # fmt library (for nice formatting of messages) # ------------------------------------------------------------------------------ -find_package(fmt REQUIRED) +find_package(fmt QUIET) +if(NOT fmt_FOUND) + message("Fetching fmt") + include(FetchContent) + FetchContent_Declare(fmt GIT_REPOSITORY https://github.com/fmtlib/fmt GIT_TAG 12.1.0) + FetchContent_MakeAvailable(fmt) +endif() message(STATUS "✅ fmt found ${fmt_VERSION}") # ------------------------------------------------------------------------------ diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 275d5352..3ab0be4f 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -1,3 +1,6 @@ + + + /* * ghex-org * From 611b6342dbe8c51fd782a5580e49e585eea1f253 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Tue, 21 Apr 2026 14:27:06 +0200 Subject: [PATCH 28/35] Cleanup dependencies --- src/libfabric/CMakeLists.txt | 45 +++++++++++++++++------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/src/libfabric/CMakeLists.txt b/src/libfabric/CMakeLists.txt index 7b06690c..33999f80 100644 --- a/src/libfabric/CMakeLists.txt +++ b/src/libfabric/CMakeLists.txt @@ -8,7 +8,7 @@ if(EXISTS ${PROJECT_SOURCE_DIR}/ext/libfatbat) endif() fetchcontent_declare( libFatbat GIT_REPOSITORY "https://github.com/biddisco/libfatbat.git" - GIT_TAG "main" + GIT_TAG "develop" ) fetchcontent_makeavailable(libFatbat) message( @@ -21,38 +21,34 @@ message( # ------------------------------------------------------------------------------ string(TOUPPER ${FATBAT_LOG_LEVEL} FATBAT_LOG_LEVEL_UPPERCASE) if(NOT "${FATBAT_LOG_LEVEL_UPPERCASE}" MATCHES "OFF") - find_package(spdlog REQUIRED) - message(STATUS "✅ spdlog found ${spdlog_VERSION}") + find_package(spdlog QUIET) + if(spdlog_FOUND) + message(STATUS "✅ spdlog found ${spdlog_VERSION}") + message(STATUS "spdlog include dirs: ${spdlog_INCLUDE_DIRS}") + else() + message("Fetching spdlog") + include(FetchContent) + fetchcontent_declare( + spdlog GIT_REPOSITORY https://github.com/gabime/spdlog.git GIT_TAG v2.x + ) + fetchcontent_makeavailable(spdlog) + message(STATUS "✅ spdlog found ${spdlog_VERSION}") + endif() endif() # ------------------------------------------------------------------------------ -# fmt library (for nice formatting of messages) -# ------------------------------------------------------------------------------ -find_package(fmt QUIET) -if(NOT fmt_FOUND) - message("Fetching fmt") - include(FetchContent) - FetchContent_Declare(fmt GIT_REPOSITORY https://github.com/fmtlib/fmt GIT_TAG 12.1.0) - FetchContent_MakeAvailable(fmt) -endif() -message(STATUS "✅ fmt found ${fmt_VERSION}") - +# interface/dummy library of our private headers # ------------------------------------------------------------------------------ -# -# ------------------------------------------------------------------------------ -find_package(Boost REQUIRED CONFIG COMPONENTS thread) - -# dummy library of our private headers add_library(oomph_private_libfabric_headers INTERFACE) target_include_directories( oomph_private_libfabric_headers INTERFACE "$" ) -# actual library (created in oomph_libfabric.cmake) source files, depends on -# dummy library +# ------------------------------------------------------------------------------ +# actual library (created in oomph_libfabric.cmake) source files, depends on dummy library +# ------------------------------------------------------------------------------ target_link_libraries(oomph_libfabric PRIVATE oomph_private_libfabric_headers) -target_link_libraries(oomph_libfabric PRIVATE Boost::thread) target_link_libraries(oomph_libfabric PRIVATE libfatbat) # we need to include a binary dir for the oomph_config_defines.hpp file @@ -76,10 +72,11 @@ if(NOT "${FATBAT_LOG_LEVEL_UPPERCASE}" MATCHES "OFF") oomph_libfabric INTERFACE SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${FATBAT_LOG_LEVEL_UPPERCASE} ) + target_link_libraries( - oomph_libfabric INTERFACE spdlog::spdlog $<$:ws2_32> + oomph_libfabric INTERFACE $ + $ ) - target_link_libraries(oomph_libfabric INTERFACE fmt::fmt) endif() # if we are using GPU, then the libfabric library was probably built with gpu From 8f23d486189ee30999dddfdb53efead71c8c2176 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Tue, 21 Apr 2026 15:30:08 +0200 Subject: [PATCH 29/35] Switch libfabric install to newer source compiled version --- .github/workflows/CI.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index cec31f8f..b7edb383 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -14,7 +14,12 @@ jobs: - name: install libfabric run: | apt update - apt-get -y install libfabric-dev + git clone https://github.com/ofiwg/libfabric + cd libfabric + ./autogen.sh + ./configure --enable-debug --enable-tcp=yes --enable-lnx=no --enable-rxm=yes --enable-shm=yes --enable-cxi=no --enable-efa=no --enable-mrail=no --enable-opx=no --enable-psm2=no --enable-sm2=no --enable-psm3=no --enable-rxd=no --enable-sockets=no --enable-ucx=no --enable-udp=no --enable-usnic=no --enable-verbs=no --enable-xpmem=no + make -j install + # apt-get -y install libfabric-dev - uses: actions/checkout@v3 with: submodules: recursive @@ -48,7 +53,9 @@ jobs: - name: Build run: cmake --build build --parallel 2 - name: Execute tests - run: cd build && export SHLVL=1 && export OMPI_ALLOW_RUN_AS_ROOT=1 && export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 && export CTEST_OUTPUT_ON_FAILURE=1 && env && ctest + run: cd build && export SHLVL=1 && export OMPI_ALLOW_RUN_AS_ROOT=1 && export + OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 && export CTEST_OUTPUT_ON_FAILURE=1 + && env && ctest build-gpu-hip: runs-on: ubuntu-latest From 47c6bcca04127946d013fb8ac50d166a8858b0d7 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Tue, 21 Apr 2026 16:06:20 +0200 Subject: [PATCH 30/35] Enable libfabric testing on alps machines --- .cscs-ci/default.yaml | 87 ++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 0d6ba1fc..c8dd0cc1 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -1,5 +1,5 @@ include: - - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml' + - remote: "https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml" variables: BASE_IMAGE: jfrog.svc.cscs.ch/docker-group-csstaff/alps-images/ngc-pytorch:26.01-py3-alps4-dev @@ -10,16 +10,22 @@ variables: .build_deps_template: timeout: 1 hour before_script: - - echo $DOCKERHUB_TOKEN | podman login docker.io -u $DOCKERHUB_USERNAME --password-stdin || true - - export DOCKERFILE_SHA=`sha256sum .cscs-ci/container/deps.Containerfile | head -c 16` + - echo $DOCKERHUB_TOKEN | podman login docker.io -u $DOCKERHUB_USERNAME + --password-stdin || true + - export DOCKERFILE_SHA=`sha256sum .cscs-ci/container/deps.Containerfile | + head -c 16` - export ENV_FILE_SHA=`sha256sum ${SPACK_ENV_FILE} | head -c 16` - - export CONFIG_TAG=`echo $DOCKERFILE_SHA-$BASE_IMAGE-$SPACK_SHA-$SPACK_PACKAGES_SHA-$ENV_FILE_SHA | sha256sum - | head -c 16` - - export PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/public/oomph-spack-deps-$BACKEND:$CONFIG_TAG + - export CONFIG_TAG=`echo + $DOCKERFILE_SHA-$BASE_IMAGE-$SPACK_SHA-$SPACK_PACKAGES_SHA-$ENV_FILE_SHA | + sha256sum - | head -c 16` + - export + PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/public/oomph-spack-deps-$BACKEND:$CONFIG_TAG - echo -e "CONFIG_TAG=$CONFIG_TAG" >> base-${BACKEND}.env - echo -e "DEPS_IMAGE=$PERSIST_IMAGE_NAME" >> base-${BACKEND}.env variables: DOCKERFILE: .cscs-ci/container/deps.Containerfile - DOCKER_BUILD_ARGS: '["BASE_IMAGE", "SPACK_SHA", "SPACK_PACKAGES_SHA", "SPACK_ENV_FILE"]' + DOCKER_BUILD_ARGS: "[\"BASE_IMAGE\", \"SPACK_SHA\", \"SPACK_PACKAGES_SHA\", + \"SPACK_ENV_FILE\"]" SPACK_ENV_FILE: .cscs-ci/spack/$BACKEND.yaml artifacts: reports: @@ -48,23 +54,25 @@ build_deps_ucx: - .build_deps_template # TODO: Libfabric tests are currently failing on Alps and need to be fixed. -# build_deps_libfabric: -# variables: -# BACKEND: libfabric -# extends: -# - .container-builder-cscs-gh200 -# - .build_deps_template +build_deps_libfabric: + variables: + BACKEND: libfabric + extends: + - .container-builder-cscs-gh200 + - .build_deps_template .build_template: extends: .container-builder-cscs-gh200 timeout: 15 minutes before_script: - - echo $DOCKERHUB_TOKEN | podman login docker.io -u $DOCKERHUB_USERNAME --password-stdin || true - - export PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/public/oomph-build-$BACKEND:$CI_COMMIT_SHA + - echo $DOCKERHUB_TOKEN | podman login docker.io -u $DOCKERHUB_USERNAME + --password-stdin || true + - export + PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/public/oomph-build-$BACKEND:$CI_COMMIT_SHA - echo -e "BUILD_IMAGE=$PERSIST_IMAGE_NAME" >> build-${BACKEND}.env variables: DOCKERFILE: .cscs-ci/container/build.Containerfile - DOCKER_BUILD_ARGS: '["DEPS_IMAGE", "BACKEND"]' + DOCKER_BUILD_ARGS: "[\"DEPS_IMAGE\", \"BACKEND\"]" artifacts: reports: dotenv: build-${BACKEND}.env @@ -95,20 +103,20 @@ build_ucx: artifacts: true # TODO: Libfabric tests are currently failing on Alps and need to be fixed. -# build_libfabric: -# variables: -# BACKEND: libfabric -# extends: .build_template -# needs: -# - job: build_deps_libfabric -# artifacts: true +build_libfabric: + variables: + BACKEND: libfabric + extends: .build_template + needs: + - job: build_deps_libfabric + artifacts: true .test_template_base: extends: .container-runner-clariden-gh200 variables: SLURM_JOB_NUM_NODES: 1 SLURM_GPUS_PER_TASK: 1 - SLURM_TIMELIMIT: '5:00' + SLURM_TIMELIMIT: "5:00" SLURM_PARTITION: normal SLURM_MPI_TYPE: pmix SLURM_NETWORK: disable_rdzv_get @@ -123,7 +131,8 @@ build_ucx: variables: SLURM_NTASKS: 1 script: - - ctest --test-dir /oomph/build -L "serial" --output-on-failure --timeout 60 --parallel 8 + - ctest --test-dir /oomph/build -L "serial" --output-on-failure --timeout 60 + --parallel 8 .test_parallel_template: extends: .test_template_base @@ -132,9 +141,11 @@ build_ucx: script: # All ranks write to ctest files in Testing, but this can deadlock when # writing inside the container. - - if [[ "${SLURM_PROCID}" == 0 ]]; then rm -rf /oomph/build/Testing; mkdir /tmp/Testing; ln -s /tmp/Testing /oomph/build/Testing; fi + - if [[ "${SLURM_PROCID}" == 0 ]]; then rm -rf /oomph/build/Testing; mkdir + /tmp/Testing; ln -s /tmp/Testing /oomph/build/Testing; fi - sleep 1 - - ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 + - ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure + --timeout 60 # TODO: NCCL will be enabled in https://github.com/ghex-org/oomph/pull/55 # test_serial_nccl: @@ -180,16 +191,16 @@ test_parallel_ucx: image: $BUILD_IMAGE # TODO: Libfabric tests are currently failing on Alps and need to be fixed. -# test_serial_libfabric: -# extends: .test_serial_template -# needs: -# - job: build_libfabric -# artifacts: true -# image: $BUILD_IMAGE +test_serial_libfabric: + extends: .test_serial_template + needs: + - job: build_libfabric + artifacts: true + image: $BUILD_IMAGE -# test_parallel_libfabric: -# extends: .test_parallel_template -# needs: -# - job: build_libfabric -# artifacts: true -# image: $BUILD_IMAGE +test_parallel_libfabric: + extends: .test_parallel_template + needs: + - job: build_libfabric + artifacts: true + image: $BUILD_IMAGE From 76f5b6e0b9bf93398122c7e55702b268e61d182e Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Tue, 21 Apr 2026 18:36:49 +0200 Subject: [PATCH 31/35] Add a var for PMI library to help with libfabric find --- cmake/FindPMIx.cmake | 31 ++++++++++++++++--------------- src/libfabric/CMakeLists.txt | 5 +++-- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/cmake/FindPMIx.cmake b/cmake/FindPMIx.cmake index d585edae..54c58256 100644 --- a/cmake/FindPMIx.cmake +++ b/cmake/FindPMIx.cmake @@ -1,30 +1,31 @@ find_package(PkgConfig QUIET) pkg_check_modules(PC_PMIX QUIET pmix) -find_path(PMIX_INCLUDE_DIR pmix.h - HINTS - ${PMIX_ROOT} ENV PMIX_ROOT - ${PMIX_DIR} ENV PMIX_DIR - PATH_SUFFIXES include) +find_path(PMIX_INCLUDE_DIR pmix.h HINTS ${PMIX_ROOT} ENV PMIX_ROOT ${PMIX_DIR} + ENV PMIX_DIR PATH_SUFFIXES include +) -find_library(PMIX_LIBRARY HINT ${PMIX_DIR} NAMES pmix - HINTS - ${PMIX_ROOT} ENV PMIX_ROOT - ${PMIX_DIR} ENV PMIX_DIR - PATH_SUFFIXES lib lib64) +find_library( + PMIX_LIBRARY HINT ${PMIX_DIR} NAMES pmix HINTS ${PMIX_ROOT} ENV PMIX_ROOT + ${PMIX_DIR} ENV PMIX_DIR + PATH_SUFFIXES lib lib64 +) -set(PMIX_LIBRARIES ${PMIX_LIBRARY} CACHE INTERNAL "") +set(PMIX_LIBRARIES ${PMIX_LIBRARY} CACHE INTERNAL "") set(PMIX_INCLUDE_DIRS ${PMIX_INCLUDE_DIR} CACHE INTERNAL "") include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(PMIx DEFAULT_MSG PMIX_LIBRARY PMIX_INCLUDE_DIR) +find_package_handle_standard_args( + PMIx DEFAULT_MSG PMIX_LIBRARY PMIX_INCLUDE_DIR +) mark_as_advanced(PMIX_ROOT PMIX_LIBRARY PMIX_INCLUDE_DIR) if(NOT TARGET PMIx::libpmix AND PMIx_FOUND) add_library(PMIx::libpmix SHARED IMPORTED) - set_target_properties(PMIx::libpmix PROPERTIES - IMPORTED_LOCATION ${PMIX_LIBRARY} - INTERFACE_INCLUDE_DIRECTORIES ${PMIX_INCLUDE_DIR} + set_target_properties( + PMIx::libpmix PROPERTIES IMPORTED_LOCATION ${PMIX_LIBRARY} + INTERFACE_INCLUDE_DIRECTORIES ${PMIX_INCLUDE_DIR} ) + set(PMI_LIBRARY_TARGET PMIx::libpmix) endif() diff --git a/src/libfabric/CMakeLists.txt b/src/libfabric/CMakeLists.txt index 33999f80..c996f558 100644 --- a/src/libfabric/CMakeLists.txt +++ b/src/libfabric/CMakeLists.txt @@ -13,7 +13,7 @@ fetchcontent_declare( fetchcontent_makeavailable(libFatbat) message( STATUS - "✅ libFatbat building: (${libFatbat_VERSION}) in ${libFatbat_BINARY_DIR}" + "✅ libFatbat building: (${libFatbat_VERSION}) from ${libFatbat_SOURCE_DIR} in ${libFatbat_BINARY_DIR}" ) # ------------------------------------------------------------------------------ @@ -46,7 +46,8 @@ target_include_directories( ) # ------------------------------------------------------------------------------ -# actual library (created in oomph_libfabric.cmake) source files, depends on dummy library +# actual library (created in oomph_libfabric.cmake) source files, depends on +# dummy library # ------------------------------------------------------------------------------ target_link_libraries(oomph_libfabric PRIVATE oomph_private_libfabric_headers) target_link_libraries(oomph_libfabric PRIVATE libfatbat) From 86b3ef1f70a79570837cdc3359cbb138f1941dee Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Fri, 17 Apr 2026 15:09:15 +0200 Subject: [PATCH 32/35] Fix a debug format error and MPI_Comm type format for mpich/openmpi --- src/libfabric/communicator.hpp | 35 ++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/src/libfabric/communicator.hpp b/src/libfabric/communicator.hpp index 60c8b4be..2ab567ff 100644 --- a/src/libfabric/communicator.hpp +++ b/src/libfabric/communicator.hpp @@ -11,6 +11,7 @@ #include #include +#include #include @@ -51,6 +52,31 @@ class communicator_impl : public communicator_base callback_queue m_recv_cb_queue; callback_queue m_recv_cb_cancel; + template::type>::value, + bool IsIntegral = std::is_integral::type>::value> + struct mpi_format_helper + { + static char const* cast(T const&) { return "[opaque]"; } + }; + + template + struct mpi_format_helper + { + static void const* cast(T value) { return static_cast(value); } + }; + + template + struct mpi_format_helper + { + static unsigned long long cast(T value) { return static_cast(value); } + }; + + template + static auto mpi_format(T value) -> decltype(mpi_format_helper::cast(value)) + { + return mpi_format_helper::cast(value); + } + // -------------------------------------------------------------------- communicator_impl(context_impl* ctxt) : communicator_base(ctxt) @@ -59,7 +85,8 @@ class communicator_impl : public communicator_base , m_recv_cb_queue(128) , m_recv_cb_cancel(8) { - LIBFATBAT_DEBUG(comm_log, "{:<20} MPI_comm {} ", "Construct", (mpi_comm())); + LIBFATBAT_DEBUG(comm_log, "{:<20} MPI_comm {} ", "Construct", + mpi_format_helper::cast(mpi_comm())); m_tx_endpoint = m_context->get_controller()->get_tx_endpoint(); m_rx_endpoint = m_context->get_controller()->get_rx_endpoint(); } @@ -112,7 +139,7 @@ class communicator_impl : public communicator_base uint64_t tag_, operation_context* ctxt) { LIBFATBAT_SCOPE(comm_log, "{} {}", (void*)(this), __func__); - LIBFATBAT_DEBUG(comm_log, "{:<20} -> {:02} {} {} tag {:#12x} context {} tx endpoint {}", + LIBFATBAT_DEBUG(comm_log, "{:<20} -> {:02} {} tag {:#12x} context {} tx endpoint {}", "send_tagged_region", dst_addr_, send_region, tag_, static_cast(ctxt), static_cast(m_tx_endpoint.get_ep())); execute_fi_function(fi_tsend, "fi_tsend", m_tx_endpoint.get_ep(), send_region.get_address(), @@ -125,7 +152,7 @@ class communicator_impl : public communicator_base uint64_t tag_) { LIBFATBAT_SCOPE(comm_log, "{} {}", (void*)(this), __func__); - LIBFATBAT_DEBUG(comm_log, "{:<20} -> {:02} {} {} tag {:#12x} tx endpoint {}", + LIBFATBAT_DEBUG(comm_log, "{:<20} -> {:02} {} tag {} tx endpoint {}", "inject_tagged_region", dst_addr_, send_region, tag_, static_cast(m_tx_endpoint.get_ep())); execute_fi_function(fi_tinject, "fi_tinject", m_tx_endpoint.get_ep(), @@ -140,7 +167,7 @@ class communicator_impl : public communicator_base uint64_t tag_, operation_context* ctxt) { LIBFATBAT_SCOPE(comm_log, "{} {}", (void*)(this), __func__); - LIBFATBAT_DEBUG(comm_log, "{:<20} <- {:02} {} {} tag {:#12x} context {} rx endpoint {}", + LIBFATBAT_DEBUG(comm_log, "{:<20} <- {:02} {} tag {} context {} rx endpoint {}", "recv_tagged_region", src_addr_, recv_region, tag_, static_cast(ctxt), static_cast(m_rx_endpoint.get_ep())); constexpr uint64_t ignore = 0; From b18c4f6de7668cca22fbf77040d37abde91fa63c Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Wed, 22 Apr 2026 13:03:25 +0200 Subject: [PATCH 33/35] Enable CI/CD libfabric testing and set env var to disable MR Cache --- .cscs-ci/default.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index c8dd0cc1..abbfb9ba 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -53,7 +53,6 @@ build_deps_ucx: - .container-builder-cscs-gh200 - .build_deps_template -# TODO: Libfabric tests are currently failing on Alps and need to be fixed. build_deps_libfabric: variables: BACKEND: libfabric @@ -102,7 +101,6 @@ build_ucx: - job: build_deps_ucx artifacts: true -# TODO: Libfabric tests are currently failing on Alps and need to be fixed. build_libfabric: variables: BACKEND: libfabric @@ -190,9 +188,10 @@ test_parallel_ucx: artifacts: true image: $BUILD_IMAGE -# TODO: Libfabric tests are currently failing on Alps and need to be fixed. test_serial_libfabric: extends: .test_serial_template + variables: + FI_MR_CACHE_MAX_COUNT: 0 # Disables the MR cache to prevent the teardown segfault needs: - job: build_libfabric artifacts: true @@ -200,6 +199,8 @@ test_serial_libfabric: test_parallel_libfabric: extends: .test_parallel_template + variables: + FI_MR_CACHE_MAX_COUNT: 0 # Disables the MR cache to prevent the teardown segfault needs: - job: build_libfabric artifacts: true From de07829933f285e4ab7bfbbacb64b922a263f85c Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Wed, 22 Apr 2026 15:32:50 +0200 Subject: [PATCH 34/35] experimenting with FI_MR_xxx for CI/CD --- .cscs-ci/default.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index abbfb9ba..6ae13795 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -199,8 +199,8 @@ test_serial_libfabric: test_parallel_libfabric: extends: .test_parallel_template - variables: - FI_MR_CACHE_MAX_COUNT: 0 # Disables the MR cache to prevent the teardown segfault + # variables: + # FI_MR_CACHE_MAX_COUNT: 0 # Disables the MR cache to prevent the teardown segfault needs: - job: build_libfabric artifacts: true From 91ec19d930d5e9e94cca02a5ddcbd663d5a35506 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Fri, 24 Apr 2026 09:07:34 +0200 Subject: [PATCH 35/35] Apply a mem reg shutdown patch to the libfabric code for CI testing --- .cscs-ci/container/deps.Containerfile | 19 +++++++++++++++++++ .cscs-ci/spack/mr_unsubscribe.patch | 13 +++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 .cscs-ci/spack/mr_unsubscribe.patch diff --git a/.cscs-ci/container/deps.Containerfile b/.cscs-ci/container/deps.Containerfile index f5867ac5..88b84222 100644 --- a/.cscs-ci/container/deps.Containerfile +++ b/.cscs-ci/container/deps.Containerfile @@ -14,6 +14,25 @@ RUN mkdir -p /opt/spack-packages && \ RUN spack repo remove --scope defaults:base builtin && \ spack repo add --scope site /opt/spack-packages/repos/spack_repo/builtin + +RUN cat < /opt/spack-packages/repos/spack_repo/builtin/packages/libfabric/mr_unsubscribe.patch +diff --git a/prov/util/src/import_mem_monitor.c b/prov/util/src/import_mem_monitor.c +index e7be581526f0..1f1f4a971099 100644 +--- a/prov/util/src/import_mem_monitor.c ++++ b/prov/util/src/import_mem_monitor.c +@@ -111,7 +111,7 @@ static void ofi_import_monitor_unsubscribe(struct ofi_mem_monitor *notifier, + const void *addr, size_t len, + union ofi_mr_hmem_info *hmem_info) + { +- assert(impmon.impfid); ++ if (!impmon.impfid) return; + impmon.impfid->export_ops->unsubscribe(impmon.impfid, addr, len); + } +EOF + +RUN sed -i '/patch("nvhpc-symver.patch", when="@1.6.0:1.14.0 %nvhpc")/a\ patch("mr_unsubscribe.patch")' /opt/spack-packages/repos/spack_repo/builtin/packages/libfabric/package.py + + ARG SPACK_ENV_FILE COPY $SPACK_ENV_FILE /spack_environment/spack.yaml diff --git a/.cscs-ci/spack/mr_unsubscribe.patch b/.cscs-ci/spack/mr_unsubscribe.patch new file mode 100644 index 00000000..6416c4f7 --- /dev/null +++ b/.cscs-ci/spack/mr_unsubscribe.patch @@ -0,0 +1,13 @@ +diff --git a/prov/util/src/import_mem_monitor.c b/prov/util/src/import_mem_monitor.c +index e7be581526f0..1f1f4a971099 100644 +--- a/prov/util/src/import_mem_monitor.c ++++ b/prov/util/src/import_mem_monitor.c +@@ -111,7 +111,7 @@ static void ofi_import_monitor_unsubscribe(struct ofi_mem_monitor *notifier, + const void *addr, size_t len, + union ofi_mr_hmem_info *hmem_info) + { +- assert(impmon.impfid); ++ if (!impmon.impfid) return; + impmon.impfid->export_ops->unsubscribe(impmon.impfid, addr, len); + } +