From 1ceda8026ba05a731f541c9b94d725dca2a83e37 Mon Sep 17 00:00:00 2001 From: Ben Lasscock Date: Fri, 26 Jun 2026 19:40:02 +0000 Subject: [PATCH] Add opt-in monolithic shared library for ODR-safe plugin co-loading MDIO is consumed header-only, so every translation unit that touches it statically links its own copy of tensorstore and its vendored Abseil. That is harmless for a single executable, but a process that dlopen()s more than one such plugin ends up with several copies of Abseil's global state and aborts at runtime with the infamous "ODR violation in Cord". This adds an opt-in MDIO_BUILD_MONOLITHIC_SHARED (default OFF, so static consumers are untouched) that emits one libmdio_monolith.so, exposed via the mdio::monolith alias. Plugins link that single object instead of the tensorstore::* static deps, so the dynamic linker maps tensorstore and Abseil exactly once per process and the globals are singletons again. Getting a self-contained shared object right took two non-obvious steps: - WHOLE_ARCHIVE on the tensorstore::* targets is a no-op because they are INTERFACE aggregators. A small recursive collector walks the link closure down to the concrete STATIC_LIBRARY targets (unwrapping the $ genexes tensorstore uses for its alwayslink driver libs) and whole-archives those, so the explicit template instantiations (Spec / Zarr metadata JSON binders) and the driver self-registration objects are all force-included. - The interface deps are re-exposed to consumers as $ usage requirements, propagating the transitive tensorstore/Abseil/ nlohmann include dirs and defines without dragging the static archives back into the consumer (which would re-duplicate Abseil and defeat the whole exercise). Co-authored-by: Cursor --- mdio/CMakeLists.txt | 139 ++++++++++++++++++++++++++++++++++++++++++++ mdio/monolith.cc | 30 ++++++++++ 2 files changed, 169 insertions(+) create mode 100644 mdio/monolith.cc diff --git a/mdio/CMakeLists.txt b/mdio/CMakeLists.txt index 21e7599..ec896a0 100644 --- a/mdio/CMakeLists.txt +++ b/mdio/CMakeLists.txt @@ -110,6 +110,145 @@ install(EXPORT mdioTargets # ============ End installable library ============ +# ============ Optional monolithic shared library ============ +# +# By default mdio is consumed as a header-only INTERFACE target and each +# consumer statically links its own copy of tensorstore + Abseil. That is fine +# for a single executable, but a process that dynamically loads more than one +# such consumer (e.g. several plugins via dlopen) ends up with multiple copies +# of Abseil's global state and aborts at runtime ("ODR violation in Cord"). +# +# When MDIO_BUILD_MONOLITHIC_SHARED is enabled we additionally emit a single +# shared object that bundles tensorstore (and its vendored Abseil) once. +# Consumers that need ODR-safe co-loading link `mdio::monolith` instead of +# `mdio` + the tensorstore::* internal deps, so the dynamic linker maps +# tensorstore/Abseil exactly once per process. +option(MDIO_BUILD_MONOLITHIC_SHARED + "Also build libmdio_monolith.so bundling tensorstore+Abseil for ODR-safe co-loading of multiple mdio-linked plugins in one process" + OFF) + +# Recursively walk the link closure of the given targets and collect every +# concrete STATIC_LIBRARY target. The tensorstore::* entries are INTERFACE +# aggregators, so $ applied to them is a no-op; +# we need the real archives underneath so that explicit template instantiations +# (e.g. the tensorstore Spec / Zarr metadata JSON binders) and the driver +# self-registration objects are all force-included into the shared object. +# Dependency entries can be plain names, namespaced aliases (foo::bar), or +# genex-wrapped libraries such as +# $; we tokenize on +# genex punctuation (protecting :: in namespaced names) and keep whatever +# resolves to a real target. +function(_mdio_collect_static_archives outvar) + set(_result "") + set(_seen "") + set(_stack ${ARGN}) + while(_stack) + list(POP_FRONT _stack _t) + if(NOT TARGET ${_t}) + continue() + endif() + if(_t IN_LIST _seen) + continue() + endif() + list(APPEND _seen ${_t}) + get_target_property(_type ${_t} TYPE) + if(_type STREQUAL "STATIC_LIBRARY") + list(APPEND _result ${_t}) + endif() + set(_deps "") + get_target_property(_il ${_t} INTERFACE_LINK_LIBRARIES) + if(_il) + list(APPEND _deps ${_il}) + endif() + if(NOT _type STREQUAL "INTERFACE_LIBRARY") + get_target_property(_ll ${_t} LINK_LIBRARIES) + if(_ll) + list(APPEND _deps ${_ll}) + endif() + endif() + foreach(_d ${_deps}) + string(REPLACE "::" "@@NS@@" _d "${_d}") + string(REGEX REPLACE "[$<>:,]" ";" _toks "${_d}") + foreach(_tok ${_toks}) + string(REPLACE "@@NS@@" "::" _tok "${_tok}") + if(_tok AND TARGET ${_tok}) + list(APPEND _stack "${_tok}") + endif() + endforeach() + endforeach() + endwhile() + list(REMOVE_DUPLICATES _result) + set(${outvar} ${_result} PARENT_SCOPE) +endfunction() + +if(MDIO_BUILD_MONOLITHIC_SHARED) + # The tensorstore drivers self-register through static initializers, so they + # must be whole-archived into the shared object or the registrations get + # stripped (and zarr/s3/gcs stores fail to open at runtime). The remaining + # Abseil objects are pulled in transitively by normal symbol resolution, so + # they only appear once -- which is the whole point. + set(mdio_MONOLITH_DEPS + tensorstore::driver_array + tensorstore::driver_zarr + tensorstore::driver_zarr3 + tensorstore::driver_json + tensorstore::kvstore_file + tensorstore::kvstore_s3 + tensorstore::kvstore_gcs + tensorstore::stack + tensorstore::tensorstore + tensorstore::index_space_dim_expression + tensorstore::index_space_index_transform + tensorstore::util_status_testutil + nlohmann_json_schema_validator::nlohmann_json_schema_validator + ) + + add_library(mdio_monolith SHARED ${CMAKE_CURRENT_SOURCE_DIR}/monolith.cc) + set_target_properties(mdio_monolith PROPERTIES + OUTPUT_NAME mdio_monolith + POSITION_INDEPENDENT_CODE ON + ) + # Resolve the interface deps down to the concrete static archives and + # whole-archive every one of them so the shared object is self-contained. + _mdio_collect_static_archives(mdio_MONOLITH_ARCHIVES ${mdio_MONOLITH_DEPS}) + list(LENGTH mdio_MONOLITH_ARCHIVES _n_arch) + message(STATUS "MDIO monolith: whole-archiving ${_n_arch} static archive(s)") + + # The archives are whole-archived into this .so (PRIVATE link) so it is + # self-contained. The interface deps are ALSO re-exposed to consumers as + # COMPILE_ONLY usage requirements ($ requires CMake >= 3.27): + # this propagates the transitive tensorstore/Abseil/nlohmann include + # directories and compile definitions to anything linking mdio::monolith, + # WITHOUT pulling the static archives back into the consumer (which would + # re-duplicate Abseil globals and defeat the purpose). + target_link_libraries(mdio_monolith + PRIVATE + "$" + PUBLIC + "$" + ) + target_include_directories(mdio_monolith PUBLIC + $ + $ + ${TENSORSTORE_INCLUDE_DIRS} + ) + target_compile_definitions(mdio_monolith PUBLIC MAX_NUM_SLICES=${MAX_NUM_SLICES}) + target_compile_features(mdio_monolith PUBLIC cxx_std_17) + + # mdio::monolith is the public alias consumers link against. + add_library(mdio::monolith ALIAS mdio_monolith) + + install(TARGETS mdio_monolith EXPORT mdioTargets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + + message(STATUS "MDIO monolithic shared library enabled --> mdio::monolith") +endif() + +# ============ End optional monolithic shared library ============ + mdio_cc_test( NAME diff --git a/mdio/monolith.cc b/mdio/monolith.cc new file mode 100644 index 0000000..efd2251 --- /dev/null +++ b/mdio/monolith.cc @@ -0,0 +1,30 @@ +// Copyright 2026 TGS +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Translation-unit anchor for the optional monolithic shared library +// (MDIO_BUILD_MONOLITHIC_SHARED). The heavy tensorstore/Abseil objects -- and +// the driver self-registration initializers -- are force-included via +// WHOLE_ARCHIVE in CMake; this file just gives the shared object a concrete, +// exported symbol and pulls the public header so the include graph is built. +// +// Why this library exists: a process that loads more than one plugin which +// each statically embed tensorstore/Abseil ends up with multiple copies of +// Abseil's global state (e.g. the Cord registry), which aborts at runtime +// ("ODR violation in Cord"). Linking every such plugin against this single +// shared library instead means the dynamic linker maps tensorstore/Abseil +// exactly once, so those globals are singletons again. + +#include + +extern "C" const char* mdio_monolith_version() { return "1.0.0"; }