From 4148a9abe0f91160e39120755b3ffd3f33a2c1b8 Mon Sep 17 00:00:00 2001
From: Anders Kvellestad <anders.kvellestad@fys.uio.no>
Date: Mon, 4 May 2026 09:03:08 +0200
Subject: [PATCH 01/24] Fix H5Fflush failure in hdf5_v1 printer with MultiNest

The hdf5_v1 printer aborted with "H5Fflush returned error value (-1)... not a file or file object" whenever a scanner explicitly called flush() on an auxiliary print stream. MultiNest hits this on its very first dumper callback, where it flushes the "txt" (Posterior) and "live" (LastLive) RA streams.

Root cause: only the primary HDF5Printer opens the HDF5 file and sets its file_id member; auxiliary printers inherit location_id, RA_location_id,and metadata_location_id from the primary but never initialise their own file_id. HDF5Printer::empty_sync_buffers() called H5Fflush(file_id, ...)unconditionally, so on an aux printer it was flushing an uninitialised hid_t.

Fix: route the flush through primary_printer->file_id. primary_printer defaults to "this" for the primary and is set to the actual primary in the aux constructor, so the same call is correct in both cases. This also makes the dumper's flush() calls actually useful (they now push HDF5's internal buffers to disk) instead of erroring out.

Closes #495.
---
 Printers/src/printers/hdf5printer/hdf5printer.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/Printers/src/printers/hdf5printer/hdf5printer.cpp b/Printers/src/printers/hdf5printer/hdf5printer.cpp
index cd32d6eac6..0f861b3f7c 100644
--- a/Printers/src/printers/hdf5printer/hdf5printer.cpp
+++ b/Printers/src/printers/hdf5printer/hdf5printer.cpp
@@ -1377,8 +1377,11 @@ namespace Gambit
         printer_error().raise(LOCAL_INFO, errmsg.str());
       }
 
-      // Tell the HDF5 library to flush everything to disk
-      herr_t err = H5Fflush(file_id, H5F_SCOPE_GLOBAL);
+      // Tell the HDF5 library to flush everything to disk.
+      // Auxiliary printers don't own a file handle (only the primary opens
+      // the HDF5 file in its constructor), so always flush via the primary's
+      // file_id. For the primary itself, primary_printer == this.
+      herr_t err = H5Fflush(primary_printer->file_id, H5F_SCOPE_GLOBAL);
       if(err<0)
       {
         std::ostringstream errmsg;

From 343026c87f8af5c4409ffb285ec74247b42d0d10 Mon Sep 17 00:00:00 2001
From: Anders Kvellestad <anders.kvellestad@fys.uio.no>
Date: Mon, 4 May 2026 10:54:59 +0200
Subject: [PATCH 02/24] Added a CLAUDE.md and MINIMAL_BUILD.md

---
 CLAUDE.md        |  17 ++++++
 MINIMAL_BUILD.md | 140 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 157 insertions(+)
 create mode 100644 CLAUDE.md
 create mode 100644 MINIMAL_BUILD.md

diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000000..3b25d89c2f
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,17 @@
+# Build
+
+If you need to build/rebuild GAMBIT to perform some test, you can check 
+**[MINIMAL_BUILD.md](MINIMAL_BUILD.md)** for an end-to-end build recipe.
+It covers system packages, Python setup, cmake flags, backend ordering, 
+smoke test, and timing.
+
+
+# Run example
+
+```
+OMP_NUM_THREADS=1 ./gambit -rf yaml_files/spartan.yaml
+```
+
+(For multi-process MPI runs, re-configure cmake with `-DWITH_MPI=True`
+and use `mpiexec -np <N> ./gambit ...`.)
+
diff --git a/MINIMAL_BUILD.md b/MINIMAL_BUILD.md
new file mode 100644
index 0000000000..dad6c35562
--- /dev/null
+++ b/MINIMAL_BUILD.md
@@ -0,0 +1,140 @@
+# Minimal GAMBIT build
+
+End-to-end recipe for a minimal GAMBIT build: ditching all the physics "Bits", not building any backends and only building a couple of scanners. 
+
+The resulting `./gambit` binary can still be large and take several minutes to build
+from scratch on 16 cores (the bottleneck is the single huge `Core/src/gambit.cpp`
+translation unit).
+
+## 1. System packages
+
+```
+sudo apt-get update
+sudo apt-get install -y \
+    gfortran g++-12 gcc-12 \
+    libeigen3-dev libgsl-dev libboost-all-dev libhdf5-dev \
+    libblas-dev liblapack-dev pkg-config
+```
+
+Why each:
+- `gfortran` — required (Fortran backends, BLAS interfaces).
+- `g++-12 / gcc-12` — `cmake/backends.cmake` hard-codes
+  `--castxml-cc=g++-12` for the Prob3++ BOSS step (workaround for a
+  g++-13/clang-castxml conflict). Without g++-12 the prob3pp build
+  aborts at the BOSS/CastXML stage.
+- `libeigen3-dev` — provides `/usr/include/eigen3`, used in the cmake
+  invocation below.
+- `libgsl-dev`, `libboost-all-dev`, `libhdf5-dev` — runtime / build deps.
+- `libblas-dev liblapack-dev` — without LAPACK, cmake's optional.cmake
+  errors out with "LAPACK shared library not found".
+
+## 2. Python packages
+
+By default cmake picks a different interpreter (e.g. `/usr/local/bin/python3`,
+3.11 in this sandbox) and a different runtime libpython (the system one,
+e.g. `libpython3.10`). That mismatch forces you to install runtime Python
+packages (numpy, numba, pandas, scipy, mpmath) **twice** — once for each
+interpreter — or `./gambit -h` aborts with `ModuleNotFoundError: numpy`.
+
+Avoid this by pointing cmake at a single Python via `PYTHON_EXECUTABLE`,
+`PYTHON_INCLUDE_DIR` and `PYTHON_LIBRARY` (see the cmake step below). Then
+install the packages once for **that** interpreter:
+
+```
+pip install --user numpy numba pandas scipy mpmath
+```
+
+(In the Claude Code sandbox the chosen interpreter is
+`/usr/local/bin/python3` = 3.11.15, so this `pip --user` install is the
+one needed.)
+
+## 3. Configure (cmake)
+
+```
+mkdir -p build && cd build
+cmake -DPYTHON_EXECUTABLE=/usr/local/bin/python3 \
+      -DPYTHON_INCLUDE_DIR=/usr/include/python3.11 \
+      -DPYTHON_LIBRARY=/usr/lib/x86_64-linux-gnu/libpython3.11.so \
+      -DEIGEN3_INCLUDE_DIR=/usr/include/eigen3 \
+      -DWITH_MPI=False \
+      -DWITH_ROOT=False \
+      -Ditch="ColliderBit;CosmoBit;DarkBit;NeutrinoBit;ObjectivesBit;FlavBit;DecayBit;SpecBit;PrecisionBit;Mathematica" \
+      ..
+```
+
+Notes:
+- `PYTHON_EXECUTABLE`/`PYTHON_INCLUDE_DIR`/`PYTHON_LIBRARY` make cmake use
+  the same Python (3.11 here) for both build-time scripts and the runtime
+  libpython — eliminating the dual pip install. Confirm by looking for
+  matching versions in the cmake output:
+  ```
+  Using Python interpreter version 3.11.15 for build.
+  Using Python libraries version 3.11.15 for Python backend support.
+  ```
+  If you see a `NOTE: You are using different Python versions for the
+  interpreter and the libraries!` message, the flags weren't picked up
+  (likely because `CMakeCache.txt` from a previous run is sticky — wipe
+  the build dir and rerun).
+- `itch` lists modules to **exclude** from the build.
+- `WITH_MPI=False` and `WITH_ROOT=False` keep the build minimal.
+- This first cmake call takes ~75 seconds (it clones pybind11, runs many
+  feature checks).
+
+## 4. Build scanners, then re-cmake, then build gambit
+
+```
+make diver
+make multinest
+cmake ..                  # re-detect newly built scanner libraries
+make gambit -j$(nproc)
+```
+
+Expected timings on 16 cores:
+- `diver`: ~1 min (mostly download)
+- `multinest`: ~1 min (mostly download)
+- `cmake ..`: ~75 s
+- `make gambit -j16`: Can take ~25 min (gambit.cpp.o alone can take ~20 min)
+
+The whole chain runs cleanly start-to-finish if build is done in the order above.
+
+### Long-running build tip (Claude Code)
+
+The `Bash` tool defaults to a 10-minute timeout, but exceeding it does
+**not** kill the command — the tool returns "running in background" and
+the build continues. You can poll with subsequent `ps`/`tail` calls. Or
+raise the cap explicitly via the `BASH_MAX_TIMEOUT_MS` env var in
+`~/.claude/settings.json` (e.g. set to `3600000` for 60 min):
+
+```json
+{
+  "env": { "BASH_MAX_TIMEOUT_MS": "3600000" }
+}
+```
+
+To watch a verbose log instead of the silent default, prepend `VERBOSE=1`:
+
+```
+make gambit -j$(nproc) VERBOSE=1
+```
+
+## 5. Smoke test
+
+```
+./gambit --version
+./gambit -h
+./gambit -d -f yaml_files/spartan.yaml   # dry-run, prints functor tree
+```
+
+A successful dry-run prints the resolved dependency tree.
+
+## 6. Run example
+
+Without MPI:
+
+```
+OMP_NUM_THREADS=1 ./gambit -rf yaml_files/spartan.yaml
+```
+
+(The original `mpiexec -np N ./gambit ...` form requires `WITH_MPI=True`
+at configure time — re-run cmake with that flag if needed.)
+

From 4d6eebcc7b1574a28e5099eedbd382b4307e870c Mon Sep 17 00:00:00 2001
From: Anders Kvellestad <anders.kvellestad@fys.uio.no>
Date: Mon, 4 May 2026 10:55:45 +0200
Subject: [PATCH 03/24] Added file with list of hdf5 printer issues

---
 hdf5_printer_issues.md | 195 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 195 insertions(+)
 create mode 100644 hdf5_printer_issues.md

diff --git a/hdf5_printer_issues.md b/hdf5_printer_issues.md
new file mode 100644
index 0000000000..77a2da70f0
--- /dev/null
+++ b/hdf5_printer_issues.md
@@ -0,0 +1,195 @@
+# HDF5 printer audit — synthesized findings
+
+Audit of the v1 (`Printers/src/printers/hdf5printer/`) and v2
+(`Printers/src/printers/hdf5printer_v2/`) HDF5 printer implementations,
+performed after the file_id fix for issue #495. Findings sorted by
+severity and grouped where v1 and v2 share a class of bug.
+
+## Critical (likely to bite real runs)
+
+### [v2] `final_size` is uninitialised on workers in `HDF5Printer2::finalise()`
+`hdf5printer_v2.cpp:1958-2010`. `final_size` is set only inside
+`if(myRank==0)` at lines 1959-1967, but the loop at 1996-2010 runs on
+**every** rank and calls `(*it)->extend_all_datasets_to(final_size)` at
+line 2008. Workers pass garbage. There's no rank-0 guard around the
+loop. This will misbehave any time a worker has a non-synchronised aux
+buffer registered (which any scanner using RA aux streams over MPI
+produces). The whole 1996-2010 block belongs inside `if(myRank==0)`,
+alongside the matching block at 1930-1952.
+
+### [v1] `errorsOff()` / `errorsOn()` nesting permanently silences HDF5 errors
+`hdf5tools.cpp:369-386`. `errorsOff()` saves the old handler into
+module-globals `old_func` / `old_client_data`. The function at line 377
+calls `errorsOff()` again *while errors are already off* — so it saves
+NULL into those globals. The matching `errorsOn()` at line 386 then
+restores NULL, leaving HDF5 error reporting permanently disabled for the
+rest of the process. Same pattern at lines 395/408 (less reachable). Any
+subsequent HDF5 problem will fail silently.
+
+### [v1] Aux printers' uninitialised `hid_t` members beyond `file_id`
+`hdf5printer.hpp:336-345`. None of `file_id`, `group_id`, `RA_group_id`,
+`metadata_id` (and arguably the `*_location_id` set) have default
+initialisers. The aux constructor only assigns `location_id` /
+`RA_location_id` / `metadata_location_id`. The `H5Fflush` issue we just
+fixed (#495) is one example — others are latent. Suggests a wider fix:
+either default-init all to `-1` (HDF5's invalid handle), or always go
+through `primary_printer->X` in code that runs on aux instances.
+
+### [v1] `openGroup` returns after creating only the first non-existent path component
+`hdf5tools.cpp:540-574`. On the first iteration where `H5Gopen2` fails
+and `H5Gcreate2` succeeds, the inner `else` branch at line 564 returns
+immediately, exiting the loop before processing the remaining
+components. Latent because v1 only ever opens single-level paths
+(`/data`, `/data/RA`, etc. as separate calls), but the function would
+silently mis-create any user-supplied deeper `group:` option.
+
+### [v1] Buffer destructors do not close their HDF5 datasets
+`VertexBufferNumeric1D_HDF5.hpp:294-303`,
+`DataSetInterfaceBase.hpp:189-204`. Destructors are deliberately empty
+(close calls commented out, with TODO about copy semantics). Cleanup
+relies entirely on `finalise()` running. Any abnormal exit before
+finalise — a constructor exception elsewhere, an error during
+synchronisation, an uncaught signal — leaks dataset handles and likely
+loses unflushed data when `H5Fclose` auto-closes them later.
+
+### [v2] `H5Tclose` missing in `HDF5DataSet<T>::write_buffer` — leaks one type handle per flush
+`hdf5printer_v2.hpp:304-311`. `hid_t dtype = H5Dget_type(get_dset_id())`
+is opened but never closed. The companion `write_RA_buffer` at lines
+438/467 *does* close it. Long scans with many datasets and many flushes
+accumulate handles until HDF5's table or the OS file-handle limit gives.
+
+### [v2] Stack-local `RAbuffer` added to member `aux_buffers`, then destroyed
+`hdf5printer_v2.cpp:1973, 1994`. `HDF5MasterBuffer RAbuffer(...)` is a
+local; `add_aux_buffer(RAbuffer)` pushes its address into the
+`aux_buffers` vector. When `finalise` returns, the pointer dangles.
+Currently dormant (no second `finalise` call, empty destructor at 1844)
+but a bug-magnet for any future change.
+
+## Major
+
+### [v1] `masterWaitForAll(FINAL_SYNC)` deadlocks if any worker dies before reaching it
+`hdf5printer.cpp:987` plus `Utils/src/mpiwrapper.cpp:163-188`. The
+barrier is plain blocking `Recv`s on rank 0, no Iprobe-with-timeout
+(unlike `allWaitForMasterWithFunc`). The TODO comment at line 988
+acknowledges this. Same risk on the resume-mode `myComm.Barrier()` at
+line 610 if `prepare_and_combine_tmp_files` raises on rank 0.
+
+### [v1] `exit(1)` from inside the constructor after `closeFile`
+`hdf5printer.cpp:467-469`. On any restart-without-`-r` against an
+existing output file with the requested group missing — a normal user
+mistake — rank 0 calls `exit(1)`, bypassing MPI shutdown. Workers
+blocked on the upcoming `myComm.Barrier()` at line 610 hang forever.
+
+### [v2] `MPI_flush_to_rank` / `MPI_recv_all_buffers` / `MPI_request_buffer_data` are dead code
+`hdf5printer_v2.cpp:664-810`, `hdf5printer_v2.hpp:910-1001`, including
+`recv_counter` / `send_counter` debug scaffolding. No callers.
+`finalise()` uses the `gather_and_print` / `Gatherv` path instead.
+Either half-finished refactor leftover or a missed wiring that should be
+using this faster path. Recommend either deleting or wiring up — the
+deadlock-warning comment is currently misleading.
+
+### [v2] `HDF5Printer2::flush()` does not gather over MPI; only the calling rank's buffermaster is touched
+`hdf5printer_v2.cpp:1876-1879`. So between scanner-driven `flush()`
+calls, RA aux data on workers stays in worker memory until `finalise()`.
+Different from v1's effective semantics. Any scanner that calls
+`aux_stream->flush()` for durability (MultiNest's dumper at
+`multinest_3.12/multinest.cpp:362-363` is exactly this) will *not* get
+RA data from non-rank-0 ranks to disk until the scan ends.
+
+### [v1] `synchronise_buffers` underflows if `sync_pos == 0`
+`hdf5printer.cpp:1307`. `const unsigned long sync_pos = get_sync_pos()-1;`
+becomes `ULONG_MAX`. Currently never reached because `check_for_new_point`
+only triggers it after a real point has been seen, but the invariant is
+not enforced anywhere.
+
+### [v1] Unbounded growth of `postpone_write_queue_and_locs`
+`VertexBufferNumeric1D_HDF5.hpp:63, 541-544`. RA writes that can't yet
+be matched to a sync point get pushed without a cap. The MPI-based
+queue-sending that was supposed to bound this has been disabled
+(commented out) and the comments weren't updated.
+
+### [v1] `_print_metadata` always passes `resume=false`
+`hdf5printer.cpp:1681-1691`. Re-creates datasets on every call.
+Currently only called once, at `finalise()`, so dormant — but any new
+caller will trip the "Dataset with same name may already exist" path
+immediately.
+
+### [v1] Aux constructor doesn't validate `dynamic_cast` result
+`hdf5printer.cpp:683`. If the cast fails (HDF5 aux attached to non-HDF5
+primary), `primary_printer` becomes nullptr and the very next
+`set_resume(primary_printer->get_resume())` segfaults.
+
+### [v2] `print_metadata` underflow
+`hdf5printer_v2.cpp:609-616`. `std::size_t size = get_next_metadata_position(); if(sameset) size --;`
+— if the GAMBIT dataset doesn't exist yet, `get_next_metadata_position()`
+returns 0, `--` underflows to `SIZE_MAX`. Currently protected by the
+call-site `if (use_metadata)` at 1948, but the function is unsafe.
+
+### [v1] Iterators decremented past `begin()` in combine size-trim loops
+`hdf5_combine_tools.cpp:539-545`, `807-813`. Technically UB on
+`std::vector::iterator` when all entries are false. Works on most STLs.
+
+### [v1] `H5Dget_type` not closed in combine tools
+`hdf5_combine_tools.cpp:949-950, 955-956, 1144-1145, 1273, 1278`.
+Type-handle leak per file/parameter combined; contributes to "too many
+open objects" warnings on big merges.
+
+## Minor / Notes
+
+A handful more across both files:
+
+- v1 dead/stale state members (`current_dset_position`, `previous_points`,
+  `global=false` field never assigned).
+- v1 `errorsOff` / `errorsOn` use static globals → fundamentally not
+  thread-safe.
+- v1 hardcoded `BUFFERLENGTH=100` and `MAX_PPIDPAIRS=1000`.
+- v1 `combine_output_py` is dead code (uses a Python helper that may not
+  exist).
+- v2 constructor's resource-leak risk if `printer_error()` raises
+  mid-setup (no RAII).
+- v2 generous `std::cout` / `std::cerr` use where `logger()` would be
+  appropriate (e.g. lines 1514, 1539, 1543, 1705, 1713, 1716, 1745,
+  1762, 1965, 2018).
+- v2 narrowing conversions between `std::size_t` and `int` in MPI size
+  handling.
+- Both v1 and v2 shell out to `popen("rm ...")` and `popen("cp ...")`
+  rather than calling `std::remove` / filesystem APIs. Brittle on paths
+  with shell metacharacters.
+- v2 `MAX_BUFFER_SIZE = 100000` `T buffer[MAX_BUFFER_SIZE]` is a stack
+  array (~800 KB for `T=double`). Fine on Linux defaults, fragile
+  elsewhere.
+- v2 `H5Tequal(in-memory, on-disk)` check can fire spuriously on resume
+  because on-disk types aren't normalised through `H5Tget_native_type`
+  like v1 does.
+- v2 `_isvalid` companion existence not separately tracked from value
+  dataset (TODO at `hdf5printer_v2.hpp:888`).
+
+## What to prioritize fixing
+
+1. **v2 `final_size` / `extend_all_datasets_to` on workers** — same shape
+   as the bug we just fixed: a finalise-time member-not-set-on-aux-path
+   issue that hides because some scenarios mask the UB. Will hit any MPI
+   scan with v2 + RA aux streams.
+2. **v1 `errorsOff` / `errorsOn` nesting** — silently disables your
+   safety net for HDF5 errors. Trivial to fix (save state in a local).
+3. **v1 uninit `hid_t` cluster** — same root cause as #495. A small
+   refactor to default-init or to collapse aux/primary handles into a
+   single struct would prevent another year of "another field wasn't set
+   on aux printers" bugs.
+4. **v2 `H5Tclose` leak in `write_buffer`** — slow-burn but unbounded.
+5. **v1 buffer-destructor cleanup** — would matter on any abnormal
+   shutdown, including signal-driven ones from a scheduler killing a
+   job.
+
+Everything below "Major" is good to file as low-priority issues but
+probably not urgent.
+
+## Theme
+
+A lot of the fragility across both printers comes from sharing state
+between primary and auxiliary printer instances via raw pointers and
+ad-hoc "use this one, not that one" rules. A refactor that makes the
+auxiliary's role purely a *view* over a shared `HDF5File` object (so
+there are no per-printer file/group handles to forget to initialise)
+would eliminate this whole class of bug. Big change, but probably worth
+scoping if you're touching this code anyway.

From cef688aca483efb15a9dec37303a1042d806df44 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 4 May 2026 09:17:56 +0000
Subject: [PATCH 04/24] [hdf5_v2] Guard finalise() RA-flush block with
 myRank==0 to fix uninit final_size on workers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Issue
-----
In HDF5Printer2::finalise() (Printers/src/printers/hdf5printer_v2/
hdf5printer_v2.cpp), the local `std::size_t final_size` was declared
unconditionally but only assigned inside an `if(myRank==0)` block (via
`buffermaster.get_next_free_position()`). Immediately after, the loop
that flushes unsynchronised (random-access) aux buffers and calls
`(*it)->extend_all_datasets_to(final_size)` ran on **every** rank.
Worker ranks therefore passed an uninitialised value into
`extend_all_datasets_to`, which is undefined behaviour.

In practice this hit any MPI scan using the v2 hdf5 printer with
auxiliary RA streams (i.e. anything beyond a single-rank run): worker
ranks with non-empty `aux_buffers` would attempt local writes / dataset
extensions that they have no business performing — only rank 0 owns
the output file. The matching synchronised-buffer flush block earlier
in finalise() is already correctly gated by `if(myRank==0)`, so the
fix is to apply the same guard symmetrically.

Fix
---
- Move the `final_size` query and the entire RA-flush /
  extend_all_datasets_to loop inside `if(myRank==0)`. Worker ranks
  have already shipped their RA buffer contents to rank 0 via the
  preceding `gather_and_print(...)` call, which remains unguarded
  (it is a collective and must be called by all ranks).
- Tighten the scope of the `final_size` declaration so it lives only
  inside the rank-0 block where it is meaningful.
- Move the `add_aux_buffer(RAbuffer)` call inside the rank-0 block
  too, since the RAbuffer only contains gathered data on rank 0 and
  the loop that consumes `aux_buffers` is now rank-0-only. (Note:
  RAbuffer being a stack local appended to the member `aux_buffers`
  vector is a separate latent issue, tracked in
  `hdf5_printer_issues.md`.)
- Added a comment explaining why the block is rank-0-only.

Verification
------------
- Built GAMBIT with `-DWITH_MPI=True`.
- Ran the spartan example single-rank
  (`OMP_NUM_THREADS=1 ./gambit -rf yaml_files/spartan.yaml`):
  successful, "Final dataset size is 9003".
- Ran spartan with 2 MPI ranks
  (`mpiexec -np 2 ./gambit -rf yaml_files/spartan.yaml`):
  successful, "Final dataset size is 9001". Inspected the output
  HDF5: all 34 datasets under /data have identical length (9001),
  confirming sync and RA datasets were extended consistently and no
  output was corrupted.

https://claude.ai/code/session_01QZuy7gyVUMZa3FCXShjj4X
---
 .../hdf5printer_v2/hdf5printer_v2.cpp         | 50 +++++++++++--------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/Printers/src/printers/hdf5printer_v2/hdf5printer_v2.cpp b/Printers/src/printers/hdf5printer_v2/hdf5printer_v2.cpp
index 410b27abe3..7f40fea80d 100644
--- a/Printers/src/printers/hdf5printer_v2/hdf5printer_v2.cpp
+++ b/Printers/src/printers/hdf5printer_v2/hdf5printer_v2.cpp
@@ -1955,16 +1955,6 @@ namespace Gambit
 
             std::ostringstream buffer_nonempty_report;
             bool buffer_nonempty_warn(false);
-            std::size_t final_size;
-            if(myRank==0)
-            {
-                /// Need to know final nominal dataset size to ensure unsynchronised datasets match synchronised ones.
-                buffermaster.lock_and_open_file();
-                final_size = buffermaster.get_next_free_position();
-                buffermaster.close_and_unlock_file();
-                std::cout<<"Final dataset size is "<<final_size<<std::endl;
-                logger()<< LogTags::printers << LogTags::info << "Final dataset size is "<<final_size<<EOM;
-            }
 
             #ifdef WITH_MPI
             // Gather RA print buffer data from all other processes
@@ -1989,23 +1979,39 @@ namespace Gambit
                 // So just stick any buffermanager object as the argument to satisfy the function signature requirements.
                 gather_and_print(buffermaster,RA_buffers,false);
             }
-
-            // Try to flush everything left to disk
-            add_aux_buffer(RAbuffer); // Make sure to include 'gathered' RA data, if there is any.
             #endif
-            for(auto it=aux_buffers.begin(); it!=aux_buffers.end(); ++it)
+
+            // Only rank 0 owns the output file, so only rank 0 should flush
+            // RA buffers to disk and extend the unsynchronised datasets to the
+            // final nominal size. Worker ranks have already shipped their RA
+            // data to rank 0 via gather_and_print above.
+            if(myRank==0)
             {
-                if(not (*it)->is_synchronised())
+                /// Need to know final nominal dataset size to ensure unsynchronised datasets match synchronised ones.
+                buffermaster.lock_and_open_file();
+                std::size_t final_size = buffermaster.get_next_free_position();
+                buffermaster.close_and_unlock_file();
+                std::cout<<"Final dataset size is "<<final_size<<std::endl;
+                logger()<< LogTags::printers << LogTags::info << "Final dataset size is "<<final_size<<EOM;
+
+                #ifdef WITH_MPI
+                // Try to flush everything left to disk
+                add_aux_buffer(RAbuffer); // Make sure to include 'gathered' RA data, if there is any.
+                #endif
+                for(auto it=aux_buffers.begin(); it!=aux_buffers.end(); ++it)
                 {
-                    (*it)->flush();
-                    // Check if everything managed to flush!
-                    if(not (*it)->all_buffers_empty())
+                    if(not (*it)->is_synchronised())
                     {
-                        buffer_nonempty_report<<(*it)->buffer_status();
-                        buffer_nonempty_warn = true;
+                        (*it)->flush();
+                        // Check if everything managed to flush!
+                        if(not (*it)->all_buffers_empty())
+                        {
+                            buffer_nonempty_report<<(*it)->buffer_status();
+                            buffer_nonempty_warn = true;
+                        }
+                        // Make sure final dataset size is correct for the unsynchronised buffers
+                        (*it)->extend_all_datasets_to(final_size);
                     }
-                    // Make sure final dataset size is correct for the unsynchronised buffers
-                    (*it)->extend_all_datasets_to(final_size);
                 }
             }
 

From 896c3b27a5258acdc14ec04b2370a35c07ca7474 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 4 May 2026 09:18:03 +0000
Subject: [PATCH 05/24] Document optional MPI install in MINIMAL_BUILD.md

Add an "Optional: MPI" subsection to step 1 covering the
`libopenmpi-dev openmpi-bin` install and the corresponding
`-DWITH_MPI=True` cmake flag, plus an MPI run example in step 6
(including the `--allow-run-as-root --oversubscribe` flags that are
typically needed in containerised dev environments).

Needed for testing any printer / scanner code paths gated on
`#ifdef WITH_MPI` (e.g. the hdf5_v2 finalise rank-0 guard fixed in
the preceding commit).

https://claude.ai/code/session_01QZuy7gyVUMZa3FCXShjj4X
---
 MINIMAL_BUILD.md | 40 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/MINIMAL_BUILD.md b/MINIMAL_BUILD.md
index dad6c35562..469875dc66 100644
--- a/MINIMAL_BUILD.md
+++ b/MINIMAL_BUILD.md
@@ -28,6 +28,25 @@ Why each:
 - `libblas-dev liblapack-dev` — without LAPACK, cmake's optional.cmake
   errors out with "LAPACK shared library not found".
 
+### Optional: MPI
+
+To build GAMBIT with MPI support (required to run multi-process scans
+with `mpiexec -np N ./gambit ...`, and to exercise any printer / scanner
+code paths gated on `#ifdef WITH_MPI`), additionally install OpenMPI:
+
+```
+sudo apt-get install -y libopenmpi-dev openmpi-bin
+```
+
+This provides `mpiexec`, `mpicc`, `mpic++`, and the headers/libs that
+cmake's MPI detection picks up. Then add `-DWITH_MPI=True` to the cmake
+invocation in step 3. Verify the install with:
+
+```
+mpiexec --version    # should report Open MPI 4.x
+which mpic++         # /usr/bin/mpic++
+```
+
 ## 2. Python packages
 
 By default cmake picks a different interpreter (e.g. `/usr/local/bin/python3`,
@@ -76,7 +95,10 @@ Notes:
   (likely because `CMakeCache.txt` from a previous run is sticky — wipe
   the build dir and rerun).
 - `itch` lists modules to **exclude** from the build.
-- `WITH_MPI=False` and `WITH_ROOT=False` keep the build minimal.
+- `WITH_MPI=False` and `WITH_ROOT=False` keep the build minimal. Set
+  `-DWITH_MPI=True` instead if you installed OpenMPI per the optional
+  step in section 1; cmake will then pick up `mpic++` automatically and
+  compile the `#ifdef WITH_MPI` branches in the printer / scanner code.
 - This first cmake call takes ~75 seconds (it clones pybind11, runs many
   feature checks).
 
@@ -135,6 +157,18 @@ Without MPI:
 OMP_NUM_THREADS=1 ./gambit -rf yaml_files/spartan.yaml
 ```
 
-(The original `mpiexec -np N ./gambit ...` form requires `WITH_MPI=True`
-at configure time — re-run cmake with that flag if needed.)
+With MPI (build configured with `-DWITH_MPI=True`):
+
+```
+OMP_NUM_THREADS=1 mpiexec -np 2 ./gambit -rf yaml_files/spartan.yaml
+```
+
+If `mpiexec` complains about running as root in a container, add
+`--allow-run-as-root` (and, if you hit "not enough slots", also
+`--oversubscribe`):
+
+```
+OMP_NUM_THREADS=1 mpiexec --allow-run-as-root --oversubscribe -np 2 \
+    ./gambit -rf yaml_files/spartan.yaml
+```
 

From dd0109b7305e2f65ddd703b5e05c2a5318097c7e Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 4 May 2026 09:18:36 +0000
Subject: [PATCH 06/24] Ignore downloaded_tarball_paths.txt build artifact

Cmake writes downloaded_tarball_paths.txt at the repo root listing the
paths of scanner / backend tarballs it downloaded into
ScannerBit/downloaded/ and Backends/downloaded/ during a build. It is
purely a build-time bookkeeping file and should not be tracked.

https://claude.ai/code/session_01QZuy7gyVUMZa3FCXShjj4X
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 59f3cd1142..5b5dddbd8b 100755
--- a/.gitignore
+++ b/.gitignore
@@ -85,6 +85,7 @@ Backends/installed/**
 ScannerBit/downloaded/**
 ScannerBit/installed/**
 ScannerBit/lib/plugin_libraries.list
+downloaded_tarball_paths.txt
 
 # GUM files
 *.mug

From 7bd5c1b4a9307e0a25bc708ff89a0592913cbf30 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 4 May 2026 14:21:08 +0000
Subject: [PATCH 07/24] [hdf5_v1] Fix errorsOff/errorsOn nesting that
 permanently silenced HDF5 errors

Issue
-----
In Printers/src/printers/hdf5printer/hdf5tools.cpp, errorsOff() saved
the current HDF5 error handler into module-globals
old_func / old_client_data and then installed a NULL handler;
errorsOn() restored from those globals. This is fine for an
isolated pair, but it breaks when errorsOff() is called while errors
are already off:

  errorsOff();   // saves real handler -> globals; installs NULL
  ...
  errorsOff();   // saves NULL (current handler) -> globals; installs NULL
  ...
  errorsOn();    // restores globals = NULL  -> errors stay off forever

There is exactly one such nested case in the v1 printer code today
(hdf5tools.cpp lines 369/377/386 in the dataset corruption-recovery
helper), and it permanently disables HDF5 error reporting for the rest
of the process whenever it is hit. Any subsequent HDF5 problem then
fails silently. There are also ~25 paired call sites across
hdf5tools.cpp and hdf5_combine_tools.cpp that work fine in isolation
but would corrupt the global state if any of them were ever wrapped
in another errorsOff() / errorsOn() block.

Fix
---
Make errorsOff() / errorsOn() idempotent with a bool guard:
- errorsOff(): if errors are already off, do nothing (in particular,
  do not call H5Eget_auto2, which would overwrite the saved handler
  with the NULL we installed ourselves).
- errorsOn(): if errors are already on (i.e. unbalanced call), do
  nothing rather than blindly restoring whatever is in the saved
  slots.

The bool / handler state is moved into an unnamed namespace at file
scope, giving it internal linkage. Functionally equivalent to the
existing namespace-scope globals for this codebase (no other TU
references them), but it (a) makes intent explicit -- these are
strictly the implementation detail of the two functions just below
them -- and (b) prevents any future accidental ODR collision on the
short generic names.

A counter-based nesting scheme was considered but rejected: no real
caller pairs an inner errorsOn() with an inner errorsOff(), so all the
counter would do is silently absorb the one buggy nested call we
already have. The bool keeps the logic minimal and matches what every
existing caller actually wants.

Note: the underlying state is still a module global, so errorsOff /
errorsOn remain not thread-safe. That is a separate (lower-priority)
issue tracked in hdf5_printer_issues.md.

Verification
------------
- Rebuilt GAMBIT (-DWITH_MPI=True) cleanly.
- Single-rank v1 spartan run: success.
- 2-rank MPI v1 spartan run: success.
- Resume run via combine-routines path through hdf5_combine_tools.cpp
  (which has the bulk of the errorsOff / errorsOn call sites): success.
  No HDF5 warnings or errors in any of the runs.

https://claude.ai/code/session_01QZuy7gyVUMZa3FCXShjj4X
---
 .../src/printers/hdf5printer/hdf5tools.cpp    | 24 ++++++++++++++-----
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/Printers/src/printers/hdf5printer/hdf5tools.cpp b/Printers/src/printers/hdf5printer/hdf5tools.cpp
index 346e4ebfa6..f47200a3ae 100644
--- a/Printers/src/printers/hdf5printer/hdf5tools.cpp
+++ b/Printers/src/printers/hdf5printer/hdf5tools.cpp
@@ -668,9 +668,18 @@ namespace Gambit {
       /// Close hdf5 group
       SIMPLE_CALL(hid_t, closeGroup,  hid_t, H5Gclose, "close", "group", "group")
 
-      /// global error variables (handler)
-      H5E_auto2_t old_func;
-      void *old_client_data;
+      /// State for errorsOff() / errorsOn(). File-local: only the two
+      /// functions below should ever touch these. The bool guard ensures
+      /// that a redundant errorsOff() called while errors are already off
+      /// does NOT overwrite the saved handler with the NULL handler we
+      /// installed ourselves -- otherwise the matching errorsOn() would
+      /// "restore" NULL and silently disable HDF5 error reporting for
+      /// the rest of the process.
+      namespace {
+         bool errors_are_off = false;
+         H5E_auto2_t saved_func = nullptr;
+         void *saved_client_data = nullptr;
+      }
 
       // FIXME: This caused compile problems on LISA cluster (CW)
       /// Silence error report (e.g. while probing for file existence)
@@ -684,18 +693,21 @@ namespace Gambit {
       /// and let me know if it works :)
       void errorsOff()
       {
+         if (errors_are_off) return; // already off; don't clobber saved state
          /* Save old error handler */
-         H5Eget_auto2(H5E_DEFAULT, &old_func, &old_client_data);
-
+         H5Eget_auto2(H5E_DEFAULT, &saved_func, &saved_client_data);
          /* Turn off error handling */
          H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
+         errors_are_off = true;
       }
 
       /// Restore error report
       void errorsOn()
       {
+         if (!errors_are_off) return; // unbalanced / already on; no-op
          /* Restore previous error handler */
-         H5Eset_auto2(H5E_DEFAULT, old_func, old_client_data);
+         H5Eset_auto2(H5E_DEFAULT, saved_func, saved_client_data);
+         errors_are_off = false;
       }
 
       /// @{ Dataset manipulations

From 592ad6dfc4e1ae189a449d9f07191db7ea779db3 Mon Sep 17 00:00:00 2001
From: Anders Kvellestad <anders.kvellestad@fys.uio.no>
Date: Mon, 4 May 2026 10:54:59 +0200
Subject: [PATCH 08/24] Added a CLAUDE.md and MINIMAL_BUILD.md

---
 MINIMAL_BUILD.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MINIMAL_BUILD.md b/MINIMAL_BUILD.md
index 469875dc66..2b2c58c7c7 100644
--- a/MINIMAL_BUILD.md
+++ b/MINIMAL_BUILD.md
@@ -95,10 +95,12 @@ Notes:
   (likely because `CMakeCache.txt` from a previous run is sticky — wipe
   the build dir and rerun).
 - `itch` lists modules to **exclude** from the build.
+
 - `WITH_MPI=False` and `WITH_ROOT=False` keep the build minimal. Set
   `-DWITH_MPI=True` instead if you installed OpenMPI per the optional
   step in section 1; cmake will then pick up `mpic++` automatically and
   compile the `#ifdef WITH_MPI` branches in the printer / scanner code.
+
 - This first cmake call takes ~75 seconds (it clones pybind11, runs many
   feature checks).
 

From 8d905d6179e39b2956dcb516989cd8977e5cc95b Mon Sep 17 00:00:00 2001
From: Anders Kvellestad <anders.kvellestad@fys.uio.no>
Date: Mon, 4 May 2026 09:03:08 +0200
Subject: [PATCH 09/24] Fix H5Fflush failure in hdf5_v1 printer with MultiNest

The hdf5_v1 printer aborted with "H5Fflush returned error value (-1)... not a file or file object" whenever a scanner explicitly called flush() on an auxiliary print stream. MultiNest hits this on its very first dumper callback, where it flushes the "txt" (Posterior) and "live" (LastLive) RA streams.

Root cause: only the primary HDF5Printer opens the HDF5 file and sets its file_id member; auxiliary printers inherit location_id, RA_location_id,and metadata_location_id from the primary but never initialise their own file_id. HDF5Printer::empty_sync_buffers() called H5Fflush(file_id, ...)unconditionally, so on an aux printer it was flushing an uninitialised hid_t.

Fix: route the flush through primary_printer->file_id. primary_printer defaults to "this" for the primary and is set to the actual primary in the aux constructor, so the same call is correct in both cases. This also makes the dumper's flush() calls actually useful (they now push HDF5's internal buffers to disk) instead of erroring out.

Closes #495.
---
 yaml_files/spartan_issue_495.yaml | 456 ++++++++++++++++++++++++++++++
 1 file changed, 456 insertions(+)
 create mode 100644 yaml_files/spartan_issue_495.yaml

diff --git a/yaml_files/spartan_issue_495.yaml b/yaml_files/spartan_issue_495.yaml
new file mode 100644
index 0000000000..b87d435cb7
--- /dev/null
+++ b/yaml_files/spartan_issue_495.yaml
@@ -0,0 +1,456 @@
+##########################################################################
+## GAMBIT configuration for an ultra-minimal scan of a toy model.
+##
+## Only needs ExampleBit_A and a scanner (either one of the built-ins like
+## the random sampler or TWalk, or an external like Diver, MultiNest,
+## GreAT or Polychord).
+##########################################################################
+
+
+Parameters:
+  # In this example we will simply be fitting the mean and standard deviation of a normal distribution.
+  NormalDist:
+    # Example of using simple priors:
+    mu:
+      prior_type: flat
+      range: [15, 30]
+    sigma:
+      prior_type: flat
+      range: [0, 5]
+
+    # If using a more complicated prior specified in the 'Priors' section below,
+    # use the 'in_priors' keyword in this section:
+    # mu: in_priors
+    # sigma: in_priors
+
+
+Priors:
+
+  # None needed: simple priors are already specified in the 'Parameters' section above.
+
+  # If defining a more complicated prior, make sure to tag the relevant parameters with the
+  # keyword 'in_priors' in the 'Parameters' section above. Below is an example of a
+  # 2D gaussian prior for the two parameters 'mu' and 'sigma':
+  #
+  # my_prior:
+  #   parameters: ["NormalDist::mu", "NormalDist::sigma"]
+  #   prior_type: gaussian
+  #   mu: [18, 5]
+  #   cov_matrix: [[9.0, 0.0], [0.0, 1.0]]
+  #   # sigma: [3.0, 1]
+
+
+Printer:
+
+  # printer: sqlite
+  # options:
+  #   output_file: "results.sql"
+  #   table_name: "data"
+  #   buffer_length: 1000
+  #   delete_file_on_restart: true
+
+  # printer: hdf5
+  # options:
+  #   output_file: "results.hdf5"
+  #   group: "/data"
+  #   delete_file_on_restart: true
+  #   buffer_length: 1000
+  #   # disable_autorepair: true
+
+  printer: hdf5_v1
+  options:
+    output_file: "gambit_output.hdf5"
+    group: "/data"
+    delete_file_on_restart: true
+    disable_combine_routines: true
+
+  # printer: ascii
+  # options:
+  #   output_file: "results.dat"
+  #   buffer_length: 10
+  #   delete_file_on_restart: true
+  #   print_debug_data: true
+
+  # printer: none
+
+
+Scanner:
+
+  use_scanner: multinest
+
+  scanners:
+
+    random:
+      plugin: random
+      like: LogLike
+      point_number: 1000
+
+    grid:
+      # plugin: pygrid
+      plugin: grid
+      #version: ">=1.0"
+      like: LogLike
+      grid_pts: [5, 3]
+
+    de:
+      plugin: diver
+      like: LogLike
+      NP: 500
+      initial_guesses:
+        parameters: [NormalDist::sigma, NormalDist::mu]
+        values:
+        - [2.5, 25]
+        - [2, 24]
+        - [1.5, 22]
+
+    multinest:
+      plugin: multinest
+      like:  LogLike
+      nlive: 500
+      #tol: 0.0001
+      tol: 0.1
+
+    mcmc:
+      plugin: great
+      like: LogLike
+      nTrialLists: 5
+      nTrials: 10000
+
+    twalk:
+      plugin: twalk
+      like: LogLike
+      sqrtR: 1.001
+
+    polychord:
+      plugin: polychord
+      like:  LogLike
+      print_parameters_in_native_output: true
+      tol: 0.1
+
+    pso:
+      plugin: jswarm
+      like:  LogLike
+      NP: 400
+      adaptive_phi: true
+      adaptive_omega: true
+      verbosity: 2
+
+    minuit2:
+      plugin: minuit2
+      like: LogLike
+      tolerance: 0.0001
+      precision: 0.0001
+      max_loglike_calls: 100000
+      max_iterations: 100000
+      algorithm: combined # simplex, combined, scan, fumili, bfgs, migrad
+      print_level: 1
+      strategy: 2
+      start:
+        NormalDist::mu: 25.
+        NormalDist::sigma: 2.5
+      step:
+        NormalDist::mu: 5.
+        NormalDist::sigma: 1.
+
+    scipy_basin_hopping:
+      like: LogLike
+      plugin: scipy_basin_hopping
+      # The run arguments below have been tested with scipy v1.9.
+      # Other versions might expect different arguments.
+      run:
+        # n_runs: 4
+        x0:
+          NormalDist::mu: 25.
+          NormalDist::sigma: 2.5
+        niter: 100
+        T: 1.0
+        stepsize: 0.5
+        minimizer_kwargs:
+          method: "L-BFGS-B"
+        interval: 50
+        disp: true
+        target_accept_rate: 0.5
+        stepwise_factor: 0.9
+
+    scipy_differential_evolution:
+      like: LogLike
+      plugin: scipy_differential_evolution
+      # The run arguments below have been tested with scipy v1.9.
+      # Other versions might expect different arguments.
+      run:
+        # n_runs: 4
+        strategy: 'best1bin'
+        maxiter: 1000
+        popsize: 15
+        tol: 0.01
+        mutation: [0.5, 1]
+        recombination: 0.7
+        disp: false
+        polish: true
+        init: 'latinhypercube'
+        atol: 0
+        updating: 'immediate'
+        x0:
+          NormalDist::mu: 25.
+          NormalDist::sigma: 2.5
+
+    scipy_direct:
+      like: LogLike
+      plugin: scipy_direct
+      # The run arguments below have been tested with scipy v1.9.
+      # Other versions might expect different arguments.
+      run:
+        # n_runs: 4
+        eps: 0.0001
+        # maxfun: 2000
+        maxiter: 1000
+        locally_biased: true
+        vol_tol: 1e-16
+        len_tol: 1e-6
+
+    scipy_dual_annealing:
+      like: LogLike
+      plugin: scipy_dual_annealing
+      # The run arguments below have been tested with scipy v1.9.
+      # Other versions might expect different arguments.
+      run:
+        # n_runs: 4
+        maxiter: 1000
+        initial_temp: 5230.0
+        restart_temp_ratio: 2.0e-5
+        visit: 2.62
+        accept: -5.0
+        maxfun: 10000000.0
+        no_local_search: false
+        x0:
+          NormalDist::mu: 25.
+          NormalDist::sigma: 2.5
+
+    scipy_shgo:
+      like: LogLike
+      plugin: scipy_shgo
+      # The run arguments below have been tested with scipy v1.9.
+      # Other versions might expect different arguments.
+      run:
+        split_param_space:
+          NormalDist::mu: 2
+          NormalDist::sigma: 2
+        n: 100
+        iters: 1
+        sampling_method: "sobol"  # "simplicial", "halton", "sobol"
+        minimizer_kwargs:
+          method: "SLSQP" # "SLSQP" "L-BFGS-B"
+          options:
+            ftol: 1e-12
+        options:
+          # maxfev:
+          # f_min:
+          # f_tol:
+          # maxiter:
+          # maxev:
+          # maxtime:
+          # minhgrd:
+          # jac: false  # Buggy in some scipy versions: https://github.com/scipy/scipy/issues/14533
+          minimize_every_iter: true
+          local_iter: false
+          infty_constraints: true
+          disp: false
+
+    scipy_minimize:
+      like: LogLike
+      plugin: scipy_minimize
+      # The run arguments below have been tested with scipy v1.9.
+      # Other versions might expect different arguments.
+      run:
+        # n_runs: 5
+        x0:
+          NormalDist::mu: 25.
+          NormalDist::sigma: 2.5
+        method: "L-BFGS-B"  # "Nelder-Mead", "Powell", "CG", "BFGS", "L-BFGS-B", "TNC", "COBYLA", "SLSQP", "trust-constr",
+        # jac: "2-point"      # "2-point", "3-point", "cs"
+        # hess: "2-point"     # "2-point", "3-point", "cs"
+        tol: 1e-6
+        options:
+          maxiter: 15000
+          disp: false
+
+    static_dynesty:
+      like: LogLike
+      pkg: gambit_dynesty
+      plugin: static_dynesty
+      pkl_name: "static_dynesty.pkl"
+      init:
+        nlive: 1000
+      run:
+        dlogz: 0.5
+        checkpoint_every: 60
+
+    dynamic_dynesty:
+      like: LogLike
+      plugin: dynamic_dynesty
+      pkg: gambit_dynesty
+      pkl_name: "dynamic_dynesty.pkl"
+      init:
+        nlive: 1000
+      run:
+        dlogz_init: 0.5
+        n_effective: 10000
+        checkpoint_every: 60
+
+    nessai:
+      like: LogLike
+      plugin: nessai_flow_sampler
+      pkg: gambit_nessai
+      #init:
+        #output: "nessai_log_dir"
+        #logger: true
+
+    nautilus:
+      like: LogLike
+      plugin: nautilus
+      #pkg: gambit_nautilus
+      run:
+        verbose: true
+
+    zeus:
+      like: LogLike
+      pkg: gambit_zeus
+      plugin: zeus
+      init:
+        nwalkers: 8
+      #run:
+        #nsteps: 20000
+        #filename: "zeus.h5"
+      SaveProgressCallback:
+        filename: zeus.h5
+        ncheck: 100
+
+    emcee:
+      like: LogLike
+      plugin: emcee
+      pkg: gambit_emcee
+      init:
+        nwalkers: 8
+      run:
+        nsteps: 20000
+        #filename: "emcee.h5"
+
+    pocomc:
+      like: LogLike
+      plugin: pocomc
+      #pkg: gambit_pocomc
+      #init:
+      #    nparticles: 1000
+
+    ultranest:
+      like: LogLike
+      pkg: gambit_ultranest
+      plugin: reactive_ultranest
+      #init:
+      run:
+        min_num_live_points: 1000
+        dlogz: 0.5
+
+    binminpy:
+      like: LogLike
+      plugin: binminpy
+      run:
+        n_bins:
+          # For any parameters not listed under 'n_bins' binminpy will assume a single bin
+          NormalDist::mu: 100
+          NormalDist::sigma: 100
+        # sampled_parameters: ["NormalDist::mu", "NormalDist::sigma"]  # By default parameters are optimized within each bin
+        sampler: "latinhypercube"
+        optimizer: "minimize"
+        optimizer_kwargs:
+          method: "L-BFGS-B"
+          tol: 1e-4
+        n_initial_points: 10
+        n_sampler_points_per_bin: 10
+        # accept_loglike_above: -30.0
+        accept_delta_loglike: 6.5
+        neighborhood_distance: 2
+        inherit_best_init_point_within_bin: False
+        n_optim_restarts_per_bin: 1
+        n_tasks_per_batch: 1
+        print_progress_every_n_batch: 100
+
+
+ObsLikes:
+
+  - purpose:      LogLike
+    capability:   normaldist_loglike
+    module:       ExampleBit_A
+    type:         double
+
+  #- purpose:      Observable
+  #  capability:   normaldist_loglike
+  #  module:       ExampleBit_A
+  #  type:         double
+  #  critical:     true
+
+Rules:
+
+  - if:
+      capability: normaldist_loglike
+    then:
+      options:
+        probability_of_validity: 1.0
+
+Logger:
+
+  redirection:
+    [Default]      : "default.log"
+    [ExampleBit_A] : "ExampleBit_A.log"
+    [Scanner]      : "Scanner.log"
+  debug: true
+
+KeyValues:
+
+  debug: false #true
+
+  default_output_path: "${PWD}/runs/spartan"
+
+  # An additional entry in the dataset and metadata, useful for identifying which
+  # points correspond to a given scan in combined datasets.
+  # The default is for print_scanID: true,
+  # and for it to print the date and time as an int of the form
+  # scanID: HourMinuteSecondMillisecond. This can be overwritten to any integer.
+  print_scanID: true
+  scanID: 1
+
+  rng:
+    generator: ranlux48
+    seed: -1
+
+  print_timing_data: true
+
+  print_unitcube: true
+
+  likelihood:
+    model_invalid_for_lnlike_below: -1e6
+
+    # A 'likelihood modifier function' recieves as input the total
+    # log-likelihood value and outputs a modified log-likelihood which
+    # is then passed to the scanner. This can be used to make an adaptive
+    # scanner explore specific ranges of the total log-likelihood, e.g.
+    # log-likelihood values corresponding to a given 1D/2D confidence region.
+    # The default is to use the 'identity' modifier, which does nothing.
+    use_lnlike_modifier: identity
+    lnlike_modifiers:
+      # Assuming that the best-fit log-likelihood value is 0.0,
+      # the 'gaussian' or 'gaussian_plateau' settings below
+      # will encourage the scanner to explore parameter regions
+      # at the border of the 2-sigma confidence region in 2D
+      # (Delta lnlike = -3.09).
+      gaussian:
+        mu: -3.1
+        sigma: 0.5
+        # use_limit: lower
+        use_delta_lnlike: false
+      gaussian_plateau:
+        mu_dn: -3.2
+        sigma_dn: 0.5
+        mu_up: -3.0
+        sigma_up: 3.0
+        use_delta_lnlike: false

From d292d2460555863ee85958d67a33f91c93bdcc4c Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 4 May 2026 16:27:29 +0000
Subject: [PATCH 10/24] [hdf5_v1] Default-initialise hid_t members to -1 to
 harden aux printer paths

The HDF5Printer class has seven hid_t members for HDF5 file / group /
location handles:

  file_id, group_id, RA_group_id, metadata_id,
  location_id, RA_location_id, metadata_location_id

The four file/group handles are only assigned by the *primary*
constructor; the aux constructor at hdf5printer.cpp:683-699 only sets
the three *_location_id members (inheriting them from
primary_printer). With no in-class initialisers, the four file/group
handles held indeterminate values on every auxiliary printer instance.

This was the root cause of issue #495 (H5Fflush(file_id, ...) firing
on aux printers when MultiNest's dumper called flush() on its 'txt' /
'live' aux streams). That specific call site is now correctly routed
through primary_printer->file_id (merge of upstream
fix_hdf5v1_and_multinest_issue), but the underlying class invariant
remained fragile: any future aux call site that forgets to go through
primary_printer would silently read uninitialised memory.

Fix: give all seven hid_t members an in-class default initialiser of
-1. This matches the sentinel value the class already treats as "unset
HDF5 handle" -- the existing get_location() / get_RA_location() /
get_metadata_location() checks at hdf5printer.cpp:1144,1155,1166
already raise a printer_error when they see -1. With this change the
class invariant becomes uniform: an unset HDF5 handle in this class
is -1, period. Any accidental HDF5 call on an aux's file/group handle
now passes H5I_INVALID_HID, which HDF5 reliably rejects -- failures
will surface deterministically as printer_errors instead of as
indeterminate behaviour or random "lucky" successes.

The bottom three *_location_id members are assigned by both
constructors today, so the = -1 there is purely defensive. Including
them keeps the rule uniform across all seven handles.

Verification
------------
Rebuilt and ran four scenarios cleanly (no HDF5 errors, all exit 0):
- v1 + Diver, single rank
- v1 + Diver, 2-rank MPI
- v1 + MultiNest, single rank (was the original #495 reproducer)
- v2 + Diver, single rank (regression check)

https://claude.ai/code/session_01QZuy7gyVUMZa3FCXShjj4X
---
 .../gambit/Printers/printers/hdf5printer.hpp  | 25 +++++++++++++------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/Printers/include/gambit/Printers/printers/hdf5printer.hpp b/Printers/include/gambit/Printers/printers/hdf5printer.hpp
index f6acf1b267..3008684bd1 100644
--- a/Printers/include/gambit/Printers/printers/hdf5printer.hpp
+++ b/Printers/include/gambit/Printers/printers/hdf5printer.hpp
@@ -332,17 +332,26 @@ namespace Gambit
         std::string group;     // HDF5 group location to store datasets
         std::string metadata_group; // HDF5 group location to store metadata
 
-        // Handles for HDF5 files and groups containing the datasets
-        hid_t file_id;
-        hid_t group_id;
-        hid_t RA_group_id;
-        hid_t metadata_id;
+        // Handles for HDF5 files and groups containing the datasets.
+        // Default-initialised to -1 (the convention this class already uses
+        // as the "unset HDF5 handle" sentinel; see e.g. the get_location() /
+        // get_RA_location() / get_metadata_location() checks). Only the
+        // primary printer's constructor assigns the file/group handles
+        // (file_id, group_id, RA_group_id, metadata_id); aux instances
+        // inherit the *_location_id values from the primary. The default
+        // initialisers ensure that any code path that mistakenly reads one
+        // of these on an aux instance gets a deterministic invalid handle
+        // rather than indeterminate memory.
+        hid_t file_id = -1;
+        hid_t group_id = -1;
+        hid_t RA_group_id = -1;
+        hid_t metadata_id = -1;
 
         // Handle to a location in a HDF5 to which the datasets will be written
         // i.e. a file or a group.
-        hid_t location_id;
-        hid_t RA_location_id;
-        hid_t metadata_location_id;
+        hid_t location_id = -1;
+        hid_t RA_location_id = -1;
+        hid_t metadata_location_id = -1;
 
         /// Pointer to the primary printer object
         // (if this is an auxilliary printer, else it is "this" //NULL)

From ff1955ae560612790b93aa3f07ad68dc9c17a319 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 4 May 2026 16:32:56 +0000
Subject: [PATCH 11/24] [hdf5_v2] Close HDF5 type handle in
 HDF5DataSet::write_buffer (handle leak)

In HDF5DataSet<T>::write_buffer (the v2 hdf5 printer's per-flush write
path for synchronised datasets), the on-disk dtype is queried with

    hid_t dtype = H5Dget_type(get_dset_id());

and used only for an H5Tequal sanity check against the in-memory
expected_dtype. The handle was never closed before the function
returned, leaking one HDF5 type handle per call. write_buffer is
invoked once per flush per synchronised dataset, so on long scans with
many datasets and frequent flushes the leak accumulates -- eventually
HDF5's internal table or the OS file-handle limit gives, depending on
the build/runtime.

The companion write_RA_buffer (lines 438/467 in the same header)
already closes its corresponding H5Dget_type handle, so this is purely
a missing close in the synchronised path.

Fix: H5Tclose(dtype) immediately after the H5Tequal check, where
dtype stops being needed. Mirrors the close site in write_RA_buffer.
A short comment notes why it's there so the next reader doesn't
"clean it up" again.

Verification
------------
- Built cleanly.
- v2 + Diver, single rank: exit 0, no HDF5 errors, output file has
  34 datasets all sized 9002.
- v2 + Diver, 2 MPI ranks: exit 0, no HDF5 errors.

(The leak itself is slow-burn -- not visible in a short spartan run --
but the fix is mechanical and the smoke tests confirm it doesn't
disturb the write path.)

https://claude.ai/code/session_01QZuy7gyVUMZa3FCXShjj4X
---
 Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp b/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp
index 33a177b931..f5f40cd923 100644
--- a/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp
+++ b/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp
@@ -308,6 +308,11 @@ namespace Gambit
                  errmsg << "Error! Tried to write to dataset (name="<<myname()<<") with type id "<<dtype<<" but expected it to have type id "<<expected_dtype<<". This is a bug, please report it.";
                  printer_error().raise(LOCAL_INFO, errmsg.str());
              }
+             // dtype is no longer needed after the equality check; close it
+             // here to avoid leaking one HDF5 type handle per flush per
+             // dataset (write_RA_buffer below already does this at its
+             // matching H5Dget_type site).
+             H5Tclose(dtype);
 
              std::size_t required_size = target_pos+length;
              // Check that target position is allowed

From d25db2a43814d3fa99abaabce81f9361e674bd2a Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 4 May 2026 17:07:51 +0000
Subject: [PATCH 12/24] [hdf5_v1] Clarify why buffer / dataset destructors are
 intentionally empty

The destructors of DataSetInterfaceBase and VertexBufferNumeric1D_HDF5
each carried a TODO suggesting that one day they ought to close their
HDF5 dataset / flush their pending writes, with the actual cleanup
calls left commented out. The TODOs implied the empty body was a
hack waiting to be fixed, but in fact the empty body is required by
how these objects are stored:

- DataSetInterfaceBase (and DataSetInterfaceScalar) are populated via
  the assign-from-temporary pattern, e.g.
      _dsetdata = DataSetInterfaceScalar<T,CL>(loc, name, ...);
      local_buffers[key] = BuffType(loc, label, vertexID, ...);
  With the default copy/move semantics, the temporary and the
  surviving slot share the same hid_t dset_id; closing in the
  destructor would invalidate the handle that the surviving copy
  still relies on.

- The same shape applies to VertexBufferNumeric1D_HDF5 itself, whose
  destructor would similarly run on a temporary that shares all the
  buffer state (HDF5 handles, sync counters, write queues) with the
  container slot. Doing write_to_disk() / RA_write_to_disk() there
  would either double-write or write through stale state.

The actual close / flush is driven deterministically from
HDF5Printer::flush() and HDF5Printer::finalise(), iterating over the
surviving buffers in the printer's all_buffers map. If finalise() is
bypassed (exception during scan setup or shutdown), HDF5's own
H5Fclose() at process exit auto-closes any still-open dataset handles
and flushes their pending I/O, so the file on disk remains
well-formed.

This commit replaces the misleading TODOs with comments explaining
the actual ownership invariant, so the next reader doesn't try to
"fix" the empty body and reintroduce the original handle-aliasing
bug. Properly closing in the destructor would require giving these
classes ownership semantics they currently don't have (e.g. wrapping
the hid_t in a shared_ptr with an H5Dclose deleter, or making the
chain non-copyable / move-only) -- a larger change that is out of
scope here.

No behaviour change. Verified by rebuilding and running the spartan
example with both the v1 and v2 hdf5 printer (both exit 0, no HDF5
errors).

https://claude.ai/code/session_01QZuy7gyVUMZa3FCXShjj4X
---
 .../hdf5printer/DataSetInterfaceBase.hpp      | 32 +++++++++++--------
 .../VertexBufferNumeric1D_HDF5.hpp            | 24 +++++++++-----
 2 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/Printers/include/gambit/Printers/printers/hdf5printer/DataSetInterfaceBase.hpp b/Printers/include/gambit/Printers/printers/hdf5printer/DataSetInterfaceBase.hpp
index 17d4b01521..e5b0a8233b 100644
--- a/Printers/include/gambit/Printers/printers/hdf5printer/DataSetInterfaceBase.hpp
+++ b/Printers/include/gambit/Printers/printers/hdf5printer/DataSetInterfaceBase.hpp
@@ -184,23 +184,27 @@ namespace Gambit {
         for(std::size_t i=0; i<DSETRANK; i++) { record_dims[i] = rdims[i]; }
       }
 
-      /// Do cleanup (close dataset)
+      /// Destructor.
+      /// Deliberately does NOT close the underlying HDF5 dataset.
+      ///
+      /// DataSetInterfaceBase (and its derived classes) are stored by value
+      /// and assigned-from-temporary in several places, e.g.
+      ///   _dsetdata = DataSetInterfaceScalar<T,CHUNKLENGTH>(loc, name, ...);
+      ///   local_buffers[key] = BuffType(loc, label, vertexID, ...);
+      /// In each case a temporary is constructed, then copy-/move-assigned
+      /// into a member or container slot, then destroyed. With the default
+      /// copy semantics both objects hold the same hid_t dset_id, so
+      /// closing in the destructor would invalidate the handle held by the
+      /// surviving copy. The actual close therefore lives in the explicit
+      /// closeDataSet() method below, which is driven by
+      /// VertexBufferNumeric1D_HDF5::finalise() during the printer's
+      /// finalise() pass. If finalise() is bypassed (e.g. an exception
+      /// during scan setup or shutdown), HDF5's own H5Fclose() at process
+      /// exit will auto-close any still-open dataset handles and flush
+      /// their pending I/O, so the file on disk remains well-formed.
       template<class T, std::size_t RR, std::size_t CL>
       DataSetInterfaceBase<T,RR,CL>::~DataSetInterfaceBase()
       {
-         // TODO: Having problems with copied objects sharing dataset identifiers, and closing datasets prematurely on each other.
-         // To fix, will probably need to have a fancy copy constructor or something. Or wrap datasets in an
-         // object which itself has a fancy copy constructor. For now, just leave dataset resources lying around,
-         // probably won't cause any issues.
-         // Or could explicity tell interface to close datasets before the objects are destroyed.
-         //if(this->dset_id>=0)
-         //{
-         //  herr_t status = H5Dclose(this->dset_id);
-         //  if(status<0)
-         //  {
-         //     logger() << LogTags::printers << LogTags::err <<LogTags::repeat_to_cerr<< LOCAL_INFO << ": Error destructing DataSetInterfaceBase! Failed to close wrapped dataset! (H5Dclose failed). No exception thrown because this will behave badly when throw from a destructor. (dataset name: "<<myname<<")"<<EOM;
-         //  }
-         //}
       }
 
       /// Release resources associated with the underlying dataset
diff --git a/Printers/include/gambit/Printers/printers/hdf5printer/VertexBufferNumeric1D_HDF5.hpp b/Printers/include/gambit/Printers/printers/hdf5printer/VertexBufferNumeric1D_HDF5.hpp
index 0f8aa20f0f..924911f832 100644
--- a/Printers/include/gambit/Printers/printers/hdf5printer/VertexBufferNumeric1D_HDF5.hpp
+++ b/Printers/include/gambit/Printers/printers/hdf5printer/VertexBufferNumeric1D_HDF5.hpp
@@ -290,16 +290,24 @@ namespace Gambit {
         }
       }
  
-      /// Destructor
+      /// Destructor.
+      /// Deliberately does NOT flush the buffer to disk.
+      ///
+      /// Buffer objects of this type are stored by value (in the
+      /// std::map<VBIDpair, BuffType> inside H5P_LocalBufferManager) and
+      /// are populated via the assign-from-temporary pattern at
+      /// H5P_LocalBufferManager::get_buffer, so each buffer object's
+      /// destructor runs at least once on a temporary that shares state
+      /// (HDF5 dataset handles, sync counters) with the surviving
+      /// container slot. Triggering write_to_disk() / RA_write_to_disk()
+      /// here would therefore either double-write or write through stale
+      /// state, depending on which copy is being destroyed. End-of-scan
+      /// flushing is handled deterministically by HDF5Printer::flush()
+      /// and HDF5Printer::finalise() instead, which iterate over the
+      /// surviving buffers in the printer's all_buffers map.
       template<class T, std::size_t L>
-      VertexBufferNumeric1D_HDF5<T,L>::~VertexBufferNumeric1D_HDF5() 
+      VertexBufferNumeric1D_HDF5<T,L>::~VertexBufferNumeric1D_HDF5()
       {
-         //TODO: Do this in some more controlled way
-         //if(this->is_synchronised()) { write_to_disk(); }
-         //else 
-         //{ 
-         //   RA_write_to_disk(); 
-         //}
       }
 
       // Print out report on buffer sync status       

From 9aa6166dae68b8e5c44dc19ccb5b2d37be74a83a Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 4 May 2026 17:51:33 +0000
Subject: [PATCH 13/24] [hdf5_v1] Replace bare exit(1) in printer ctor with
 printer_error().raise()

In HDF5Printer::common_constructor (Printers/src/printers/hdf5printer/
hdf5printer.cpp), the rank-0 setup block at lines 414-472 handles the
case where the requested output file already exists. Inside the
"restart, not resume, not overwrite" branch (lines 448-469), the code
opens the file, checks whether the requested group is already present,
and either:
  - group exists -> raise a printer_error with a clear, multi-option
    message explaining how to fix the situation (line 465); or
  - group missing -> closeFile + bare exit(1) (line 468), with no
    message at all.

The exit(1) path was buggy in two ways:

 1. exit(1) bypasses MPI shutdown -- no MPI_Finalize, no MPI_Abort.
    Worker ranks (which never enter this rank-0-only block) sail on
    to the upcoming myComm.Barrier() at line 610 and hang forever.
    The user sees a job that never terminates, with no log message
    explaining why.

 2. The user has no idea what went wrong: the path triggers when
    they pointed the printer at a file that already exists from
    some unrelated previous run, and the only signal is a non-zero
    exit code from rank 0.

Fix: mirror the sibling group-exists case at line 465. Build a
clear printer_error message ("file exists, requested group not in it,
not in resume mode, delete_file_on_restart not set; here are the
three things you can do") and raise it. printer_error().raise()
throws an exception that goes through GAMBIT's normal error-reporting
machinery, which at least tells the user what happened on rank 0
before tearing down -- a strict improvement over silent exit(1).

Note that this commit does *not* address the broader rank-0-error-
deadlocks-workers problem: if any of the four printer_error sites in
this constructor block (lines 438, 445, 465, and now this one) fire
on rank 0, worker ranks still hang at the Barrier downstream. That
deadlock-class issue is the same one tracked under "v1
masterWaitForAll(FINAL_SYNC) deadlocks" and "Same risk on the
resume-mode myComm.Barrier() at line 610" in
hdf5_printer_issues.md, and needs a separate, larger fix using
BarrierWithTimeout. The point of the present commit is just to bring
this one site up to the same error-reporting standard as its three
siblings.

Verification
------------
- Built cleanly.
- Regular v1 spartan run: exit 0, no change to the happy path.
- Reproduced the failing scenario by seeding an output file with
  group "/data" via a normal run, then re-running with the same
  output_file but group "/data2", restart mode (-rf), no
  delete_file_on_restart. Before the fix this would silent-exit(1);
  after the fix the job terminates with a FATAL ERROR carrying the
  full guidance message and "Calling MPI_Finalize..." in the trailer.

https://claude.ai/code/session_01QZuy7gyVUMZa3FCXShjj4X
---
 Printers/src/printers/hdf5printer/hdf5printer.cpp | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/Printers/src/printers/hdf5printer/hdf5printer.cpp b/Printers/src/printers/hdf5printer/hdf5printer.cpp
index 0f861b3f7c..77a099393f 100644
--- a/Printers/src/printers/hdf5printer/hdf5printer.cpp
+++ b/Printers/src/printers/hdf5printer/hdf5printer.cpp
@@ -465,7 +465,20 @@ namespace Gambit
                   printer_error().raise(LOCAL_INFO, errmsg.str());
                 }
                 HDF5::closeFile(file_id);
-                exit(1);
+                // Group does not exist in the pre-existing file. Refuse to
+                // proceed: we are not in resume mode and 'delete_file_on_restart'
+                // is not set, so adding a new group to a file from an unrelated
+                // previous run risks silently mixing unrelated data. Raise a
+                // printer_error with actionable advice rather than calling
+                // bare exit(1) (which bypassed MPI shutdown and left worker
+                // ranks hanging at the upcoming Barrier with no error
+                // message).
+                std::ostringstream errmsg2;
+                errmsg2 << "Error preparing pre-existing output file '"<<finalfile<<"' for writing via hdf5printer! The file exists, but does not contain the requested output group '"<<group<<"', and the run is not in resume mode (so we will not append to it) and 'delete_file_on_restart' is not set (so we will not overwrite it). Refusing to proceed to avoid silently mixing unrelated data. Please take one of the following actions:"<<std::endl;
+                errmsg2 << "  1. Re-run with the -r flag (restart) and set 'delete_file_on_restart: true' to overwrite the existing file;"<<std::endl;
+                errmsg2 << "  2. Choose a different output filename via the 'output_file' printer option;"<<std::endl;
+                errmsg2 << "  3. Manually move or delete the existing file '"<<finalfile<<"'."<<std::endl;
+                printer_error().raise(LOCAL_INFO, errmsg2.str());
               }
 
             }

From bde00bad9350d2702dea9f39f140f7aced2a9173 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 4 May 2026 18:27:54 +0000
Subject: [PATCH 14/24] [hdf5_v2] Delete unused MPI_flush_to_rank /
 MPI_recv_all_buffers / MPI_request_buffer_data path

The v2 hdf5 printer carried two parallel sets of MPI machinery for
shipping buffer data from worker ranks to the rank-0 master:

 1. The currently-used "gather and Gatherv" path in finalise() and
    flush(), built around HDF5MasterBuffer::add_to_buffers and the
    HDF5bufferchunk message format. This is what actually runs in
    every MPI scan today.

 2. A second, unwired Send/Recv path consisting of:
      - HDF5MasterBuffer::MPI_flush_to_rank
      - HDF5MasterBuffer::MPI_request_buffer_data
      - HDF5MasterBuffer::MPI_recv_all_buffers
      - HDF5MasterBuffer::MPI_recv_buffer<T>
      - HDF5Buffer<T>::MPI_flush_to_rank
      - HDF5Buffer<T>::MPI_recv_from_rank
      - HDF5BufferBase::MPI_flush_to_rank (pure-virtual decl)
    plus eight dedicated MPI tag constants
    (h5v2_bufname / h5v2_bufdata_points / h5v2_bufdata_ranks /
    h5v2_bufdata_valid / h5v2_bufdata_type / h5v2_bufdata_values /
    h5v2_BLOCK / h5v2_BEGIN) and some commented-out
    recv_counter / send_counter debug scaffolding.

The path in (2) is internally consistent (the functions only call each
other) but has no external callers anywhere in the codebase, and the
MPI_request_buffer_data side sends an h5v2_BEGIN tag for which there
is no matching Recv -- a strong sign the path was abandoned mid-wiring
rather than being a finished alternative.

This commit deletes (2) outright. Two reasons for choosing delete over
finishing the wiring:

 - The currently-used gather_and_print path is correct and tested
   (passes our spartan smoke tests in single-rank and 2-rank MPI
   modes, including with auxiliary RA streams).
 - The send-side comment in MPI_recv_all_buffers explicitly warns of a
   deadlock if MPI_request_buffer_data is not called first; revisiting
   that ordering and the request/recv handshake to make it deadlock-free
   would be a real design effort, not a hygiene fix. The audit
   recommended either deleting or wiring up; given there is no concrete
   reason to believe the unfinished path would outperform the working
   one, deletion is the lower-risk choice.

Net change: -266 lines / +4 lines across hdf5printer_v2.hpp and
hdf5printer_v2.cpp. add_to_buffers, MPI_add_int_block_to_buffer,
MPI_add_float_block_to_buffer (which ARE used by the live path) are
untouched.

Verification
------------
- Built cleanly.
- v2 + Diver, single rank: exit 0, output has 34 datasets all sized
  9001 (uniform, as expected).
- v2 + Diver, 2 MPI ranks: exit 0, output has 34 datasets all sized
  9001 (uniform).

https://claude.ai/code/session_01QZuy7gyVUMZa3FCXShjj4X
---
 .../Printers/printers/hdf5printer_v2.hpp      | 160 +-----------------
 .../hdf5printer_v2/hdf5printer_v2.cpp         | 110 ------------
 2 files changed, 4 insertions(+), 266 deletions(-)

diff --git a/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp b/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp
index f5f40cd923..a6f996d1cc 100644
--- a/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp
+++ b/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp
@@ -68,10 +68,6 @@ namespace Gambit
     typedef long long longlong;
     typedef unsigned long long ulonglong;
 
-    // DEBUG h5v2_BLOCK message counters
-    //static int recv_counter;
-    //static int send_counter;
-
     /// Length of chunks in chunked HDF5 dataset. Affects write/retrieval performance for blocks of data of various sizes.
     /// It is set to an "intermediate" sort of size since that seems to work well enough.
     static const std::size_t HDF5_CHUNKLENGTH = 100;
@@ -82,22 +78,10 @@ namespace Gambit
     /// Largest allowed size of buffers. Size can be dynamically set from 1 to this number.
     static const std::size_t MAX_BUFFER_SIZE = 100000;
 
-    /// MPI tags for HDF5 printer v2
-    const int h5v2_bufname(10);
-    const int h5v2_bufdata_points(11);
-    const int h5v2_bufdata_ranks(12);
-    const int h5v2_bufdata_valid(13);
-    const int h5v2_bufdata_type(14);
-    const int h5v2_bufdata_values(15);
-    // Message block end tag:
-    // 1 means "there is another block of messages to receive"
-    // 0 means "there are no more blocks of messages to receive"
-    const int h5v2_BLOCK(30);
-    // "Begin sending data" tag
-    const int h5v2_BEGIN(31);
-
-    // The 'h5v2_bufdata_type' messages send an integer encoding
-    // the datatype for the h5v2_bufdata_values messages
+    // The integer-encoded type ID for buffer-data messages
+    // (formerly used by h5v2_bufdata_type / h5v2_bufdata_values; the
+    // type encoding itself is still in use by the gather_and_print
+    // path via HDF5bufferchunk and add_to_buffers).
     // Need a unique integer for each type. We can encode these
     // with a template function:
 
@@ -643,12 +627,6 @@ namespace Gambit
         // int version
         virtual std::pair<std::vector<long>,std::vector<int>> flush_to_vector_int(const std::vector<PPIDpair>& order) = 0;
 
-#ifdef WITH_MPI
-        /// Send buffer contents to another process
-        virtual void MPI_flush_to_rank(const unsigned int r) = 0;
-
-#endif
-
         /// Make sure datasets exist on disk with the correct name and size
         virtual void ensure_dataset_exists(const hid_t loc_id, const std::size_t length) = 0;
 
@@ -910,102 +888,6 @@ namespace Gambit
             return buffer.size();
         }
 
-#ifdef WITH_MPI
-        // Send buffer contents to a different process
-        void MPI_flush_to_rank(const unsigned int r)
-        {
-            if(buffer.size()>0)
-            {
-                // Get name of the dataset this buffer is associated with
-                std::string namebuf = dset_name();
-                // Copy point data and values into MPI send buffer
-                std::vector<unsigned long> pointIDs;
-                std::vector<unsigned int> ranks; // Will assume all PPIDpairs are valid. I think this is fine to do...
-                std::vector<int> valid; // We have to send the invalid points too, to maintain buffer synchronicity
-                std::vector<T> values;
-                int type(h5v2_type<T>()); // Get integer identifying the type of the data values
-                int more_buffers = 1; // Flag indicating that there is a block of data to receive
-                for(auto it=buffer.begin(); it!=buffer.end(); ++it)
-                {
-                    pointIDs.push_back(it->first.pointID);
-                    ranks   .push_back(it->first.rank);
-                    valid   .push_back(buffer_valid.at(it->first));
-                    values  .push_back(it->second);
-                }
-
-                // Debug info
-        #ifdef HDF5PRINTER2_DEBUG
-                logger()<<LogTags::printers<<LogTags::debug<<"Sending points for buffer "<<dset_name()<<std::endl
-                                                           <<" (more_buffers: "<<more_buffers<<")"<<std::endl;
-                for(std::size_t i=0; i<buffer.size(); ++i)
-                {
-                    logger()<<"   Sending point ("<<ranks.at(i)<<", "<<pointIDs.at(i)<<")="<<values.at(i)<<" (valid="<<valid.at(i)<<")"<<std::endl;
-                }
-                logger()<<EOM;
-                #endif
-
-                // Send the buffers
-                std::size_t Npoints = values.size();
-                myComm.Send(&more_buffers, 1, r, h5v2_BLOCK);
-                //std::cerr<<myComm.Get_rank()<<": sent "<<more_buffers<<std::endl;
-                //send_counter+=1;
-                myComm.Send(&namebuf[0] , namebuf.size(), MPI_CHAR, r, h5v2_bufname);
-                myComm.Send(&type       , 1      , r, h5v2_bufdata_type);
-                myComm.Send(&values[0]  , Npoints, r, h5v2_bufdata_values);
-                myComm.Send(&pointIDs[0], Npoints, r, h5v2_bufdata_points);
-                myComm.Send(&ranks[0]   , Npoints, r, h5v2_bufdata_ranks);
-                myComm.Send(&valid[0]   , Npoints, r, h5v2_bufdata_valid);
-
-                // Clear buffer variables
-                buffer      .clear();
-                buffer_valid.clear();
-                buffer_set  .clear();
-            }
-        }
-
-        // Receive buffer contents from a different process
-        // (MasterBuffer should have received the name, type, and size of the
-        // buffer data, and used this to construct/retrieve this buffer.
-        // We then collect the buffer data messages)
-        void MPI_recv_from_rank(unsigned int r, std::size_t Npoints)
-        {
-            /// MPI buffers
-            std::vector<unsigned long> pointIDs(Npoints);
-            std::vector<unsigned int> ranks(Npoints);
-            std::vector<int> valid(Npoints);
-            std::vector<T> values(Npoints);
-
-            // Receive buffer data
-            myComm.Recv(&values[0]  , Npoints, r, h5v2_bufdata_values);
-            myComm.Recv(&pointIDs[0], Npoints, r, h5v2_bufdata_points);
-            myComm.Recv(&ranks[0]   , Npoints, r, h5v2_bufdata_ranks);
-            myComm.Recv(&valid[0]   , Npoints, r, h5v2_bufdata_valid);
-
-            // Pack it into this buffer
-        #ifdef HDF5PRINTER2_DEBUG
-            logger()<<LogTags::printers<<LogTags::debug<<"Adding points to buffer "<<dset_name()<<std::endl;
-            for(std::size_t i=0; i<Npoints; ++i)
-            {
-                // Extra Debug
-                logger()<<"   Adding received point ("<<ranks.at(i)<<", "<<pointIDs.at(i)<<")="<<values.at(i)<<" (valid="<<valid.at(i)<<")"<<std::endl;
-                PPIDpair ppid(pointIDs.at(i), ranks.at(i));
-                if(valid.at(i))
-                {
-                    append(values.at(i), ppid);
-                }
-                else
-                {
-                    update(ppid);
-                }
-            }
-            logger()<<EOM;
-            #endif
-
-            // Debug info:
-            //std::cout<<"(rank "<<myComm.Get_rank()<<") Final buffer size: "<<N_items_in_buffer()<<" (Npoints was: "<<Npoints<<"), dset="<<dset_name()<<std::endl;
-        }
-#endif
-
         void add_float_block(const HDF5bufferchunk& chunk, const std::size_t buf)
         {
             // Pack it into this buffer
@@ -1279,40 +1161,6 @@ namespace Gambit
         void print_metadata(std::map<std::string,std::string>, bool);
 
         #ifdef WITH_MPI
-        /// Gather all buffer data on a certain rank process
-        /// (only gathers data from buffers known to that process)
-        void MPI_flush_to_rank(const unsigned int rank);
-
-        /// Give process 'rank' permission to begin sending its buffer data
-        void MPI_request_buffer_data(const unsigned int rank);
-
-        /// Receive buffer data from a specified process until a STOP message is received
-        void MPI_recv_all_buffers(const unsigned int rank);
-
-        /// Receive buffer data of a known type for a known dataset
-        /// Requires status message resulting from a probe for the message to be received
-        template<class T>
-        int MPI_recv_buffer(const unsigned int r, const std::string& dset_name)
-        {
-            // Get number of points to be received
-            MPI_Status status;
-            myComm.Probe(r, h5v2_bufdata_values, &status);
-            int Npoints;
-            int err = MPI_Get_count(&status, GMPI::get_mpi_data_type<T>::type(), &Npoints);
-            if(err<0)
-            {
-                std::stringstream msg;
-                msg<<"Error from MPI_Get_count while attempting to receive buffer data from rank "<<r<<" for dataset "<<dset_name<<"!";
-                printer_error().raise(LOCAL_INFO,msg.str());
-            }
-            HDF5Buffer<T>& buffer = get_buffer<T>(dset_name, buffered_points);
-            //std::cout<<"(rank "<<myComm.Get_rank()<<") Npoints: "<<Npoints<<std::endl;
-            buffer.MPI_recv_from_rank(r, Npoints);
-            logger()<< LogTags::printers << LogTags::debug << "Received "<<Npoints<<" points from rank "<<r<<"'s buffers (for dataset: "<<dset_name<<")"<<EOM;
-            //std::cout<<"(rank "<<myComm.Get_rank()<<") Received "<<Npoints<<" from rank "<<r<<". New buffer size is "<<buffer.N_items_in_buffer()<<" (name="<<buffer.dset_name()<<")"<<std::endl;
-            return Npoints;
-        }
-
         /// Copy an MPI-transmitted block of buffer data into our buffer
         template<class T>
         void MPI_add_int_block_to_buffer(const HDF5bufferchunk& chunk, const std::string& dset_name, const std::size_t dset_index)
diff --git a/Printers/src/printers/hdf5printer_v2/hdf5printer_v2.cpp b/Printers/src/printers/hdf5printer_v2/hdf5printer_v2.cpp
index 7f40fea80d..c882b260a2 100644
--- a/Printers/src/printers/hdf5printer_v2/hdf5printer_v2.cpp
+++ b/Printers/src/printers/hdf5printer_v2/hdf5printer_v2.cpp
@@ -660,108 +660,6 @@ namespace Gambit
     }
 
     #ifdef WITH_MPI
-    /// Send buffer data to a specified process
-    void HDF5MasterBuffer::MPI_flush_to_rank(const unsigned int r)
-    {
-        for(auto it=all_buffers.begin(); it!=all_buffers.end(); ++it)
-        {
-            it->second->MPI_flush_to_rank(r);
-        }
-        buffered_points.clear();
-        buffered_points_set.clear();
-    }
-
-    /// Give process r permission to begin sending its buffer data
-    // Don't do this for all processes at once, as MPI can run out of
-    // Recv request IDs behind the scenes if thousands of processes are
-    // trying to send it lots of stuff at once. But it is good to let
-    // many processes start sending data before we need it, so that the
-    // Recv goes quickly (and is effectively already done in the background)
-    // when we get to it.
-    void HDF5MasterBuffer::MPI_request_buffer_data(const unsigned int r)
-    {
-        logger()<< LogTags::printers << LogTags::info << "Asking process "<<r<<" to begin sending buffer data"<<EOM;
-        // Send a message to the process to trigger it to begin sending buffer data (if any exists)
-        int begin_sending = 1;
-        myComm.Send(&begin_sending, 1, r, h5v2_BEGIN);
-    }
-
-    /// Receive buffer data from a specified process until a STOP message is received
-    void HDF5MasterBuffer::MPI_recv_all_buffers(const unsigned int r)
-    {
-        // MAKE SURE MPI_request_buffer_data(r) HAS BEEN CALLED BEFORE THIS FUNCTION!
-        // Otherwise a deadlock will occur because we will wait forever for buffer data
-        // that will never be sent.
-
-        int more_buffers = 1;
-        int max_Npoints = 0; // Track largest buffer sent, for reporting general number of points recv'd
-        int Nbuffers = 0; // Number of buffers recv'd
-        while(more_buffers)
-        {
-            // Check "more buffers" message
-            myComm.Recv(&more_buffers, 1, r, h5v2_BLOCK);
-            //recv_counter+=1;
-            logger()<<LogTags::printers<<LogTags::debug<<"More buffers to receive from process "<<r<<"? "<<more_buffers<<EOM;
-            if(more_buffers)
-            {
-                Nbuffers+=1;
-                // Retrieve the name of the dataset for the buffer data
-                MPI_Status status;
-                myComm.Probe(r, h5v2_bufname, &status);
-                int name_size;
-                int err = MPI_Get_count(&status, MPI_CHAR, &name_size);
-                if(err<0)
-                {
-                    std::ostringstream errmsg;
-                    errmsg<<"MPI_Get_count failed while trying to get name of dataset to receive!";
-                    printer_error().raise(LOCAL_INFO, errmsg.str());
-                }
-                std::string dset_name(name_size, 'x'); // Initialise string to correct size, but filled with x's
-                myComm.Recv(&dset_name[0], name_size, MPI_CHAR, r, h5v2_bufname);
-
-                logger()<<LogTags::printers<<LogTags::debug<<"Preparing to receive buffer data from process "<<r<<" for buffer "<<dset_name<<EOM;
-
-                // Get datatype of buffer data, and call matching receive function for that type
-                int buftype;
-                myComm.Recv(&buftype, 1, r, h5v2_bufdata_type);
-                int Npoints = 0;
-                switch(buftype)
-                {
-                    case h5v2_type<int      >(): Npoints = MPI_recv_buffer<int      >(r, dset_name); break;
-                    case h5v2_type<uint     >(): Npoints = MPI_recv_buffer<uint     >(r, dset_name); break;
-                    case h5v2_type<long     >(): Npoints = MPI_recv_buffer<long     >(r, dset_name); break;
-                    case h5v2_type<ulong    >(): Npoints = MPI_recv_buffer<ulong    >(r, dset_name); break;
-                    //case h5v2_type<longlong >(): Npoints = MPI_recv_buffer<longlong >(r, dset_name); break;
-                    //case h5v2_type<ulonglong>(): Npoints = MPI_recv_buffer<ulonglong>(r, dset_name); break;
-                    case h5v2_type<float    >(): Npoints = MPI_recv_buffer<float    >(r, dset_name); break;
-                    case h5v2_type<double   >(): Npoints = MPI_recv_buffer<double   >(r, dset_name); break;
-                    default:
-                       std::ostringstream errmsg;
-                       errmsg<<"Unrecognised datatype integer (value = "<<buftype<<") received in buffer type message from rank "<<r<<" for dataset "<<dset_name<<"!";
-                       printer_error().raise(LOCAL_INFO, errmsg.str());
-                }
-                if(Npoints>max_Npoints) max_Npoints = Npoints;
-            }
-        }
-
-        logger()<<LogTags::printers<<LogTags::info;
-        if(max_Npoints==0)
-        {
-           logger()<<"No print buffer data recv'd from rank "<<r<<" process"<<std::endl;
-        }
-        else
-        {
-           logger()<<"Recv'd "<<Nbuffers<<" dataset buffers from rank "<<r<<"; the largest one contained "<<max_Npoints<<" points"<<std::endl;
-        }
-        // Debug
-        //for(auto it=all_buffers.begin(); it!=all_buffers.end(); ++it)
-        //{
-        //    std::cout<<"New buffer length is "<<it->second->N_items_in_buffer()<<" (name="<<it->second->dset_name()<<")"<<std::endl;
-        //}
-
-        logger()<<"Finished checking for buffer data messages from process "<<r<<EOM;
-    }
-
     // Add a vector of buffer chunk data to the buffers managed by this object
     void HDF5MasterBuffer::add_to_buffers(const std::vector<HDF5bufferchunk>& blocks, const std::vector<std::pair<std::string,int>>& buf_types)
     {
@@ -1882,10 +1780,6 @@ namespace Gambit
     // No distinction between final and early termination procedure for this printer.
     void HDF5Printer2::finalise(bool /*abnormal*/)
     {
-        // DEBUG h5v2_BLOCK message counter
-        //recv_counter = 0;
-        //send_counter = 0;
-
         // The primary printer will take care of finalising all output.
         if(not is_auxilliary_printer())
         {
@@ -2026,10 +1920,6 @@ namespace Gambit
             }
 
             logger()<< LogTags::printers << LogTags::info << "HDF5Printer2 output finalisation complete."<<EOM;
-
-            // DEBUG
-            //logger()<<LogTags::printers<<LogTags::debug<<"h5v2_BLOCK send count: "<<send_counter<<std::endl
-            //                                           <<"h5v2_BLOCK recv count: "<<recv_counter<<EOM;
         }
     }
 

From 91de1b830aa4ddca4b31591b3cec78ee5bf4c30b Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 4 May 2026 20:31:00 +0000
Subject: [PATCH 15/24] [hdf5_v1] Close H5Dget_type ids leaked in combine_tools

hdf5_combine_tools.cpp called H5Dget_type at 8 sites to obtain the
HDF5 datatype of an existing dataset (in order to recreate the dataset
in the combined output via setup_hdf5_points), but never released the
returned datatype id. H5Dget_type returns a fresh id that the caller
owns and must close with H5Tclose; otherwise the id (and its underlying
in-memory state) lingers until the file/library is shut down.

The leaks were at three call sites within hdf5_stuff::Combine_HDF5:

 - Primary-dataset combine loop (~L949-956): type/type2 obtained per
   batch from either the previous combined dataset or a temp-file
   dataset, used by setup_hdf5_points only on batch 0, but allocated
   on every batch and never freed. Fixed by H5Tclose(type/type2) after
   the dataset_out/dataset2_out closes inside the same per-batch
   if(valid_dset>=0 or old_dataset>=0) block, so each iteration
   balances its allocations.

 - Auxiliary-dataset path (~L1144-1145): type/type2 obtained inside
   the "param not also a primary" branch, used immediately by
   setup_hdf5_points. Fixed by H5Tclose right after that call, guarded
   by >=0 since the variables are pre-initialised to -1 above.

 - Metadata-dataset combine loop (~L1273, L1278): single 'type' id,
   same pattern as the primary loop. Fixed by H5Tclose(type) after
   the dataset_out close inside the per-batch valid branch.

These were straightforward resource leaks: setup_hdf5_points only
reads 'type' to pass it to H5Dcreate2 and does not take ownership,
so adding the closes at the end of the using-scope is correct and
safe.

Verification
------------
- Built cleanly (only Printers translation unit recompiled).
- Ran spartan with the v1 (hdf5_v1) printer, 2 MPI ranks, with
  disable_combine_routines: false so the combine path actually
  exercises all three call sites. Run completed cleanly (exit 0):
   * 17 primary parameters combined, no errors
   * 51 metadata parameters combined, no errors
   * 0 auxiliary parameters in this scan (path not exercised, but
     that branch is structurally identical to the primary path)
   * combined output gambit_output.hdf5 contains uniform 9501-row
     1-D datasets across all primary parameters.

Out of scope (not fixed here)
-----------------------------
- hdf5_combine_tools.hpp Enter_HDF5() at line 377 calls
  H5Tget_native_type(dtype, ...) and stores the result in 'type', then
  closes only 'dtype' (line 448) before returning. The native-type id
  is also a fresh id that needs H5Tclose. This is a separate leak (a
  different HDF5 API) and the issue title is scoped specifically to
  H5Dget_type; flagging here for follow-up.

https://claude.ai/code/session_01QZuy7gyVUMZa3FCXShjj4X
---
 Printers/src/printers/hdf5printer/hdf5_combine_tools.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Printers/src/printers/hdf5printer/hdf5_combine_tools.cpp b/Printers/src/printers/hdf5printer/hdf5_combine_tools.cpp
index 2d0469b23c..16b88fd171 100644
--- a/Printers/src/printers/hdf5printer/hdf5_combine_tools.cpp
+++ b/Printers/src/printers/hdf5printer/hdf5_combine_tools.cpp
@@ -992,6 +992,8 @@ namespace Gambit
                             // Close resources
                             HDF5::closeDataset(dataset_out);
                             HDF5::closeDataset(dataset2_out);
+                            H5Tclose(type);
+                            H5Tclose(type2);
 
                             // Move offset so that next batch is written to correct place in output file
                             offset += batch_size_tot + pc_offset;
@@ -1152,6 +1154,8 @@ namespace Gambit
                              }
                              // Create new dataset
                              setup_hdf5_points(new_group, type, type2, size_tot, *it);
+                             if(type>=0)  H5Tclose(type);
+                             if(type2>=0) H5Tclose(type2);
                           }
                           // Reopen output datasets for copying
                           hid_t dataset_out  = HDF5::openDataset(new_group, *it);
@@ -1307,6 +1311,7 @@ namespace Gambit
 
                             // Close resources
                             HDF5::closeDataset(dataset_out);
+                            H5Tclose(type);
 
                             // Move offset so that next batch is written to correct place in output file
                             offset += batch_size_tot + pc_offset;

From 533d150060053ed1f7d85e4373124751460364dc Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 4 May 2026 20:37:46 +0000
Subject: [PATCH 16/24] [hdf5_v1] Avoid undefined iterator decrement past
 begin() in combine size-trim loops

Two near-identical loops in hdf5_stuff (one in the primary-dataset
scanning pass, one in the auxiliary-dataset Enter_Aux_Parameters pass)
walked a std::vector<bool> backwards to count the number of trailing
invalid entries to trim from a per-file size:

    for (auto it = valids.end()-1; size > 0; --it)
    {
        if (*it) break;
        else --size;
    }

There were two problems with this idiom:

 1. If valids.size() == 0 (which happens cleanly when the temp file's
    sync group is empty), valids.end()-1 is undefined behavior. In
    practice the loop condition `size > 0` would skip the body, but
    the iterator is still constructed.

 2. More importantly: when ALL entries in valids are invalid (so the
    "break" branch is never taken), the final iteration decrements
    size from 1 to 0 and dereferences valids.front() correctly, but
    then the for-loop's post-step runs `--it` once more, producing an
    iterator one before begin(). That decrement is undefined behavior
    on std::vector::iterator -- even though it's typically a pointer
    under the hood and "happens to work", a sanitizer or a debug
    iterator implementation can flag/abort. The condition `size > 0`
    is then evaluated on the already-invalid iterator's surrounding
    state, but the UB has already occurred at the --it.

Replaced both loops with a straightforward index-based trim:

    while (size > 0 && !valids[size-1])
        --size;

Equivalent semantics (trim trailing invalids, keep first valid),
but no iterator arithmetic, no past-the-begin decrement, and no
empty-vector trap. Both call sites already guarantee
size <= valids.size() (size and valids both come from the
pointID_isvalid / RA_pointID_isvalid dataset, with the prior
size != size2 check raising on mismatch), so valids[size-1] is
safely in range whenever size > 0.

Verification
------------
- Built cleanly (Printers TU only).
- Spartan + hdf5_v1 + 2 MPI ranks + disable_combine_routines: false:
  exit 0, combined output has uniform 9000-row primary datasets and
  1-row metadata datasets. The primary-pass trim loop runs once per
  temp file (2x in this scan); the aux-pass trim loop is structurally
  identical and exercised by the same Enter_HDF5/read_hdf5 path.

https://claude.ai/code/session_01QZuy7gyVUMZa3FCXShjj4X
---
 .../hdf5printer/hdf5_combine_tools.cpp        | 20 ++++++-------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/Printers/src/printers/hdf5printer/hdf5_combine_tools.cpp b/Printers/src/printers/hdf5printer/hdf5_combine_tools.cpp
index 16b88fd171..bf23eebd55 100644
--- a/Printers/src/printers/hdf5printer/hdf5_combine_tools.cpp
+++ b/Printers/src/printers/hdf5printer/hdf5_combine_tools.cpp
@@ -536,13 +536,9 @@ namespace Gambit
                                size_tot_l = size_tot + size; // Last?
                            //}
 
-                           for (auto it = valids.end()-1; size > 0; --it)
-                           {
-                               if (*it)
-                                   break;
-                               else
-                                   --size;
-                           }
+                           // Trim any trailing invalid points (size <= valids.size() ensured above)
+                           while (size > 0 && !valids[size-1])
+                               --size;
 
                            HDF5::closeSpace(dataspace);
                            HDF5::closeSpace(dataspace2);
@@ -804,13 +800,9 @@ namespace Gambit
                        ranks.push_back(valid_rank);
                        ptids.push_back(valid_ptid);
 
-                       for (auto it = valids.end()-1; size > 0; --it)
-                       {
-                           if (*it)
-                               break;
-                           else
-                               --size;
-                       }
+                       // Trim any trailing invalid points (size <= valids.size() ensured above)
+                       while (size > 0 && !valids[size-1])
+                           --size;
                        aux_sizes.push_back(size);
 
                        HDF5::closeSpace(dataspace);

From 732330636d5cbb27d594212fe732e38c965ae5f4 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 4 May 2026 21:14:05 +0000
Subject: [PATCH 17/24] [hdf5_v2] HDF5Printer2::flush() now also flushes
 registered aux_buffers

Before this change, HDF5Printer2::flush() only invoked
buffermaster.flush() on the calling printer instance. That meant a
public flush() on the primary printer left every auxilliary printer's
buffermaster -- metadata streams, RA streams, and any other aux
streams the scan has registered -- with their points still in RAM
until either the per-stream sync auto-flush triggered (when a sync
buffer fills) or finalise() ran. The two known external callers of
the public API (twalk's out_stream->flush() at the end of a temp-file
replay batch and the Python scanner binding in py_module_scanbit.hpp)
both expect "persist what I've written so far", so the partial flush
was a behavioural mismatch with caller intent and a divergence from
the v1 printer (HDF5Printer::flush() iterates all buffers).

The fix: after flushing this->buffermaster, if this is the primary
printer, iterate the aux_buffers list and flush each. Aux printers
register their own buffermaster into the primary's aux_buffers in
their constructor (~hdf5printer_v2.cpp:1342 add_aux_buffer) and
have an empty aux_buffers themselves, so guarding the loop on
not is_auxilliary_printer() preserves aux-printer semantics
(unchanged: still just flushes its own buffermaster).

Two passes -- sync streams first, RA streams second -- mirror the
ordering already used in finalise() (~:1824-1833 then :1895-1909).
Reason: RA writes anchor to positions established by sync writes,
so flushing RA before its sync targets are on disk can leave RA
points stuck in the postpone queue.

What is intentionally NOT done here:

 * No MPI gather. The architecture defers cross-rank gathering to
   finalise() as a performance optimisation for HPC networked
   filesystems (see comment block at hdf5printer_v2.cpp:1786-1793).
   Mid-scan, every rank just writes its own buffered points to the
   shared file under the existing cross-rank Utils::FileLock taken
   inside HDF5MasterBuffer::flush() (lock_and_open_file ->
   hdf5out.get_lock() at :741). Adding an MPI collective inside the
   public flush() would deadlock for non-collective callers (twalk's
   per-rank flush, Python scanners) and is unnecessary for
   "persist my data" semantics.

 * No explicit H5Fflush. v1 keeps the file open between buffer
   flushes and ends flush() with H5Fflush(file_id, H5F_SCOPE_GLOBAL).
   v2's HDF5MasterBuffer::flush() always closes the file at the end
   via close_and_unlock_file -> H5Fclose, which already drains the
   HDF5 internal cache to disk, making H5Fflush redundant.

 * Auto-flush path unchanged. add_buffered_point's "buffer full" auto
   flush at hdf5printer_v2.hpp:1128 / 1137 calls
   HDF5MasterBuffer::flush() (same class), not HDF5Printer2::flush().
   That path is untouched.

 * finalise() unchanged. It still does its own MPI gather and
   per-buffer flush logic without going through the public flush().

Cost: the new loop incurs N additional lock_and_open_file ->
close_and_unlock_file cycles per public flush() call (N = number of
registered aux buffers), but HDF5MasterBuffer::flush() short-circuits
on get_Npoints() == 0 (:479) without taking the lock, so empty aux
buffers cost essentially zero. flush() is rare on the hot path; this
is fine.

Verification
------------
- Built cleanly (Printers TU only).
- Spartan + v2 + single rank: exit 0, 34 primary datasets all sized
  9502 (uniform).
- Spartan + v2 + 2 MPI ranks: exit 0, 34 primary datasets all sized
  9003 (uniform).

Both runs produce well-formed output; the change is additive and
preserves existing on-disk layout because finalise() already flushes
everything at end-of-scan -- this commit just makes the same
flushing happen on demand when flush() is called.

https://claude.ai/code/session_01QZuy7gyVUMZa3FCXShjj4X
---
 .../hdf5printer_v2/hdf5printer_v2.cpp         | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/Printers/src/printers/hdf5printer_v2/hdf5printer_v2.cpp b/Printers/src/printers/hdf5printer_v2/hdf5printer_v2.cpp
index c882b260a2..b6cbf39f50 100644
--- a/Printers/src/printers/hdf5printer_v2/hdf5printer_v2.cpp
+++ b/Printers/src/printers/hdf5printer_v2/hdf5printer_v2.cpp
@@ -1773,7 +1773,26 @@ namespace Gambit
     // Flush data in buffers to disk
     void HDF5Printer2::flush()
     {
+        // Each rank writes its own buffered points under the cross-rank
+        // Utils::FileLock taken inside HDF5MasterBuffer::flush(); no MPI
+        // gather here. The collective gather happens only in finalise().
+        // Auxilliary printers register their buffermaster into the primary's
+        // aux_buffers (see constructor) and have an empty aux_buffers
+        // themselves, so only the primary needs to iterate.
+        // Sync streams before RA streams: matches finalise()'s ordering,
+        // since RA writes target positions established by sync writes.
         buffermaster.flush();
+        if(not is_auxilliary_printer())
+        {
+            for(auto it=aux_buffers.begin(); it!=aux_buffers.end(); ++it)
+            {
+                if((*it)->is_synchronised()) (*it)->flush();
+            }
+            for(auto it=aux_buffers.begin(); it!=aux_buffers.end(); ++it)
+            {
+                if(not (*it)->is_synchronised()) (*it)->flush();
+            }
+        }
     }
 
     // Make sure printer output is fully on disk and safe

From a4847e69d040f28eded31b17d46ddb57f050d9c9 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 5 May 2026 05:21:12 +0000
Subject: [PATCH 18/24] [hdf5_v1] Guard sync_pos==0 underflow in
 synchronise_buffers and get_buffer fast_forward

HDF5Printer's sync_pos member is unsigned long, initialised to 0
(hdf5printer.hpp:416), and only ever incremented (in
increment_sync_pos at hdf5printer.cpp:1650). Two call sites compute
get_sync_pos()-1 without first checking that sync_pos>0, which
would underflow to ULONG_MAX when sync_pos==0:

 1. HDF5Printer::synchronise_buffers (hdf5printer.cpp:1320):
        const unsigned long sync_pos = get_sync_pos()-1;
        ...
        it->second->synchronise_output_to_position(sync_pos);
    The underflowed ULONG_MAX would be stored into each buffer's
    target_sync_pos. For RA buffers this later feeds extend_dset()
    in RA_write_to_disk, which would attempt to extend the dataset
    to ULONG_MAX rows. For sync buffers, the movediff calculation
    in synchronise_output_to_position relies on sync_pos+1
    (ulong), which then wraps to 0 -- harmless only when
    dset_head_pos() also happens to be 0.

 2. H5P_LocalBufferManager::get_buffer (hdf5printer.hpp:558):
        if(synchronised) it->second.fast_forward(
            printer->get_sync_pos()-1);
    fast_forward takes a signed long target_pos. With sync_pos==0,
    get_sync_pos()-1 == ULONG_MAX (ulong), implicitly converted to
    long it becomes -1 on two's-complement platforms. fast_forward
    then computes needed_steps = -1 - 0 = -1 (negative) and raises
    the "would need to move backwards" error
    (VertexBufferBase.hpp:218).

In normal scans both bugs are dormant because of call ordering:

 - synchronise_buffers' first invocation (from the first
   check_for_new_point at :1632) happens before any buffer has been
   added to all_buffers, so the for-loop iterates over nothing and
   the underflowed value is never delivered.

 - fast_forward via get_buffer is reached only after
   check_for_new_point has run increment_sync_pos at :1650, taking
   sync_pos from 0 to 1, so by the time fast_forward executes
   sync_pos>=1 and target_pos>=0.

The underflow is therefore real but masked by today's lifecycle.
Adding a buffer earlier than the first check_for_new_point, or
calling synchronise_buffers from a new code path before any
pointID arrives, would surface either bug.

Fix: guard each call site with sync_pos>0. Semantically, sync_pos==0
means "no point has yet been registered as the current sync
position", so there is nothing to synchronise to or catch up to;
returning early / skipping the call is the correct no-op behaviour
and is consistent with buffers being at dset_head_pos==0 from
construction.

This is a surgical fix that preserves all existing behaviour: the
two guarded paths were never executed with sync_pos==0 in practice,
so guarding them doesn't change any observable behaviour today --
it only removes the latent underflow.

Verification
------------
- Built cleanly.
- Spartan + hdf5_v1 + 1 rank: exit 0, 34 primary datasets all
  sized 9000 (uniform).
- Spartan + hdf5_v1 + 2 MPI ranks (with combine routines enabled):
  exit 0, 34 primary datasets all sized 9501 (uniform).

https://claude.ai/code/session_01QZuy7gyVUMZa3FCXShjj4X
---
 Printers/include/gambit/Printers/printers/hdf5printer.hpp | 5 ++++-
 Printers/src/printers/hdf5printer/hdf5printer.cpp         | 6 ++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/Printers/include/gambit/Printers/printers/hdf5printer.hpp b/Printers/include/gambit/Printers/printers/hdf5printer.hpp
index 3008684bd1..c6a8acb1bc 100644
--- a/Printers/include/gambit/Printers/printers/hdf5printer.hpp
+++ b/Printers/include/gambit/Printers/printers/hdf5printer.hpp
@@ -555,7 +555,10 @@ namespace Gambit
         // We subtract one because another increment will happen after
         // the print statement (that triggered the creation of the new
         // buffer) completes.
-        if(synchronised) it->second.fast_forward(printer->get_sync_pos()-1);
+        // If sync_pos is still 0 there is no previous position to catch up
+        // to (and the buffer is already at dset_head_pos == 0 from
+        // construction), so skip the call to avoid an unsigned underflow.
+        if(synchronised and printer->get_sync_pos()>0) it->second.fast_forward(printer->get_sync_pos()-1);
       }
 
       if( it == local_buffers.end() )
diff --git a/Printers/src/printers/hdf5printer/hdf5printer.cpp b/Printers/src/printers/hdf5printer/hdf5printer.cpp
index 77a099393f..13f530818f 100644
--- a/Printers/src/printers/hdf5printer/hdf5printer.cpp
+++ b/Printers/src/printers/hdf5printer/hdf5printer.cpp
@@ -1316,6 +1316,12 @@ namespace Gambit
         printer_error().raise(LOCAL_INFO, errmsg.str());
       }
 
+      // If sync_pos is still 0 then no point has yet been registered as the
+      // current sync position, so there is no "previous slot" for buffers to
+      // catch up to. Return early to avoid the unsigned underflow that would
+      // otherwise occur in get_sync_pos()-1 below.
+      if(get_sync_pos()==0) return;
+
       // Determine the desired sync position
       const unsigned long sync_pos = get_sync_pos()-1;
 

From 6952d63f52f24e4f25fd8562bff4924ccb42c250 Mon Sep 17 00:00:00 2001
From: Anders Kvellestad <anders.kvellestad@fys.uio.no>
Date: Tue, 5 May 2026 08:57:00 +0200
Subject: [PATCH 19/24] Deleted temp files used for working on the hdf5
 printers

---
 CLAUDE.md                         |  17 --
 MINIMAL_BUILD.md                  | 176 ------------
 hdf5_printer_issues.md            | 195 -------------
 yaml_files/spartan_issue_495.yaml | 456 ------------------------------
 4 files changed, 844 deletions(-)
 delete mode 100644 CLAUDE.md
 delete mode 100644 MINIMAL_BUILD.md
 delete mode 100644 hdf5_printer_issues.md
 delete mode 100644 yaml_files/spartan_issue_495.yaml

diff --git a/CLAUDE.md b/CLAUDE.md
deleted file mode 100644
index 3b25d89c2f..0000000000
--- a/CLAUDE.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# Build
-
-If you need to build/rebuild GAMBIT to perform some test, you can check 
-**[MINIMAL_BUILD.md](MINIMAL_BUILD.md)** for an end-to-end build recipe.
-It covers system packages, Python setup, cmake flags, backend ordering, 
-smoke test, and timing.
-
-
-# Run example
-
-```
-OMP_NUM_THREADS=1 ./gambit -rf yaml_files/spartan.yaml
-```
-
-(For multi-process MPI runs, re-configure cmake with `-DWITH_MPI=True`
-and use `mpiexec -np <N> ./gambit ...`.)
-
diff --git a/MINIMAL_BUILD.md b/MINIMAL_BUILD.md
deleted file mode 100644
index 2b2c58c7c7..0000000000
--- a/MINIMAL_BUILD.md
+++ /dev/null
@@ -1,176 +0,0 @@
-# Minimal GAMBIT build
-
-End-to-end recipe for a minimal GAMBIT build: ditching all the physics "Bits", not building any backends and only building a couple of scanners. 
-
-The resulting `./gambit` binary can still be large and take several minutes to build
-from scratch on 16 cores (the bottleneck is the single huge `Core/src/gambit.cpp`
-translation unit).
-
-## 1. System packages
-
-```
-sudo apt-get update
-sudo apt-get install -y \
-    gfortran g++-12 gcc-12 \
-    libeigen3-dev libgsl-dev libboost-all-dev libhdf5-dev \
-    libblas-dev liblapack-dev pkg-config
-```
-
-Why each:
-- `gfortran` — required (Fortran backends, BLAS interfaces).
-- `g++-12 / gcc-12` — `cmake/backends.cmake` hard-codes
-  `--castxml-cc=g++-12` for the Prob3++ BOSS step (workaround for a
-  g++-13/clang-castxml conflict). Without g++-12 the prob3pp build
-  aborts at the BOSS/CastXML stage.
-- `libeigen3-dev` — provides `/usr/include/eigen3`, used in the cmake
-  invocation below.
-- `libgsl-dev`, `libboost-all-dev`, `libhdf5-dev` — runtime / build deps.
-- `libblas-dev liblapack-dev` — without LAPACK, cmake's optional.cmake
-  errors out with "LAPACK shared library not found".
-
-### Optional: MPI
-
-To build GAMBIT with MPI support (required to run multi-process scans
-with `mpiexec -np N ./gambit ...`, and to exercise any printer / scanner
-code paths gated on `#ifdef WITH_MPI`), additionally install OpenMPI:
-
-```
-sudo apt-get install -y libopenmpi-dev openmpi-bin
-```
-
-This provides `mpiexec`, `mpicc`, `mpic++`, and the headers/libs that
-cmake's MPI detection picks up. Then add `-DWITH_MPI=True` to the cmake
-invocation in step 3. Verify the install with:
-
-```
-mpiexec --version    # should report Open MPI 4.x
-which mpic++         # /usr/bin/mpic++
-```
-
-## 2. Python packages
-
-By default cmake picks a different interpreter (e.g. `/usr/local/bin/python3`,
-3.11 in this sandbox) and a different runtime libpython (the system one,
-e.g. `libpython3.10`). That mismatch forces you to install runtime Python
-packages (numpy, numba, pandas, scipy, mpmath) **twice** — once for each
-interpreter — or `./gambit -h` aborts with `ModuleNotFoundError: numpy`.
-
-Avoid this by pointing cmake at a single Python via `PYTHON_EXECUTABLE`,
-`PYTHON_INCLUDE_DIR` and `PYTHON_LIBRARY` (see the cmake step below). Then
-install the packages once for **that** interpreter:
-
-```
-pip install --user numpy numba pandas scipy mpmath
-```
-
-(In the Claude Code sandbox the chosen interpreter is
-`/usr/local/bin/python3` = 3.11.15, so this `pip --user` install is the
-one needed.)
-
-## 3. Configure (cmake)
-
-```
-mkdir -p build && cd build
-cmake -DPYTHON_EXECUTABLE=/usr/local/bin/python3 \
-      -DPYTHON_INCLUDE_DIR=/usr/include/python3.11 \
-      -DPYTHON_LIBRARY=/usr/lib/x86_64-linux-gnu/libpython3.11.so \
-      -DEIGEN3_INCLUDE_DIR=/usr/include/eigen3 \
-      -DWITH_MPI=False \
-      -DWITH_ROOT=False \
-      -Ditch="ColliderBit;CosmoBit;DarkBit;NeutrinoBit;ObjectivesBit;FlavBit;DecayBit;SpecBit;PrecisionBit;Mathematica" \
-      ..
-```
-
-Notes:
-- `PYTHON_EXECUTABLE`/`PYTHON_INCLUDE_DIR`/`PYTHON_LIBRARY` make cmake use
-  the same Python (3.11 here) for both build-time scripts and the runtime
-  libpython — eliminating the dual pip install. Confirm by looking for
-  matching versions in the cmake output:
-  ```
-  Using Python interpreter version 3.11.15 for build.
-  Using Python libraries version 3.11.15 for Python backend support.
-  ```
-  If you see a `NOTE: You are using different Python versions for the
-  interpreter and the libraries!` message, the flags weren't picked up
-  (likely because `CMakeCache.txt` from a previous run is sticky — wipe
-  the build dir and rerun).
-- `itch` lists modules to **exclude** from the build.
-
-- `WITH_MPI=False` and `WITH_ROOT=False` keep the build minimal. Set
-  `-DWITH_MPI=True` instead if you installed OpenMPI per the optional
-  step in section 1; cmake will then pick up `mpic++` automatically and
-  compile the `#ifdef WITH_MPI` branches in the printer / scanner code.
-
-- This first cmake call takes ~75 seconds (it clones pybind11, runs many
-  feature checks).
-
-## 4. Build scanners, then re-cmake, then build gambit
-
-```
-make diver
-make multinest
-cmake ..                  # re-detect newly built scanner libraries
-make gambit -j$(nproc)
-```
-
-Expected timings on 16 cores:
-- `diver`: ~1 min (mostly download)
-- `multinest`: ~1 min (mostly download)
-- `cmake ..`: ~75 s
-- `make gambit -j16`: Can take ~25 min (gambit.cpp.o alone can take ~20 min)
-
-The whole chain runs cleanly start-to-finish if build is done in the order above.
-
-### Long-running build tip (Claude Code)
-
-The `Bash` tool defaults to a 10-minute timeout, but exceeding it does
-**not** kill the command — the tool returns "running in background" and
-the build continues. You can poll with subsequent `ps`/`tail` calls. Or
-raise the cap explicitly via the `BASH_MAX_TIMEOUT_MS` env var in
-`~/.claude/settings.json` (e.g. set to `3600000` for 60 min):
-
-```json
-{
-  "env": { "BASH_MAX_TIMEOUT_MS": "3600000" }
-}
-```
-
-To watch a verbose log instead of the silent default, prepend `VERBOSE=1`:
-
-```
-make gambit -j$(nproc) VERBOSE=1
-```
-
-## 5. Smoke test
-
-```
-./gambit --version
-./gambit -h
-./gambit -d -f yaml_files/spartan.yaml   # dry-run, prints functor tree
-```
-
-A successful dry-run prints the resolved dependency tree.
-
-## 6. Run example
-
-Without MPI:
-
-```
-OMP_NUM_THREADS=1 ./gambit -rf yaml_files/spartan.yaml
-```
-
-With MPI (build configured with `-DWITH_MPI=True`):
-
-```
-OMP_NUM_THREADS=1 mpiexec -np 2 ./gambit -rf yaml_files/spartan.yaml
-```
-
-If `mpiexec` complains about running as root in a container, add
-`--allow-run-as-root` (and, if you hit "not enough slots", also
-`--oversubscribe`):
-
-```
-OMP_NUM_THREADS=1 mpiexec --allow-run-as-root --oversubscribe -np 2 \
-    ./gambit -rf yaml_files/spartan.yaml
-```
-
diff --git a/hdf5_printer_issues.md b/hdf5_printer_issues.md
deleted file mode 100644
index 77a2da70f0..0000000000
--- a/hdf5_printer_issues.md
+++ /dev/null
@@ -1,195 +0,0 @@
-# HDF5 printer audit — synthesized findings
-
-Audit of the v1 (`Printers/src/printers/hdf5printer/`) and v2
-(`Printers/src/printers/hdf5printer_v2/`) HDF5 printer implementations,
-performed after the file_id fix for issue #495. Findings sorted by
-severity and grouped where v1 and v2 share a class of bug.
-
-## Critical (likely to bite real runs)
-
-### [v2] `final_size` is uninitialised on workers in `HDF5Printer2::finalise()`
-`hdf5printer_v2.cpp:1958-2010`. `final_size` is set only inside
-`if(myRank==0)` at lines 1959-1967, but the loop at 1996-2010 runs on
-**every** rank and calls `(*it)->extend_all_datasets_to(final_size)` at
-line 2008. Workers pass garbage. There's no rank-0 guard around the
-loop. This will misbehave any time a worker has a non-synchronised aux
-buffer registered (which any scanner using RA aux streams over MPI
-produces). The whole 1996-2010 block belongs inside `if(myRank==0)`,
-alongside the matching block at 1930-1952.
-
-### [v1] `errorsOff()` / `errorsOn()` nesting permanently silences HDF5 errors
-`hdf5tools.cpp:369-386`. `errorsOff()` saves the old handler into
-module-globals `old_func` / `old_client_data`. The function at line 377
-calls `errorsOff()` again *while errors are already off* — so it saves
-NULL into those globals. The matching `errorsOn()` at line 386 then
-restores NULL, leaving HDF5 error reporting permanently disabled for the
-rest of the process. Same pattern at lines 395/408 (less reachable). Any
-subsequent HDF5 problem will fail silently.
-
-### [v1] Aux printers' uninitialised `hid_t` members beyond `file_id`
-`hdf5printer.hpp:336-345`. None of `file_id`, `group_id`, `RA_group_id`,
-`metadata_id` (and arguably the `*_location_id` set) have default
-initialisers. The aux constructor only assigns `location_id` /
-`RA_location_id` / `metadata_location_id`. The `H5Fflush` issue we just
-fixed (#495) is one example — others are latent. Suggests a wider fix:
-either default-init all to `-1` (HDF5's invalid handle), or always go
-through `primary_printer->X` in code that runs on aux instances.
-
-### [v1] `openGroup` returns after creating only the first non-existent path component
-`hdf5tools.cpp:540-574`. On the first iteration where `H5Gopen2` fails
-and `H5Gcreate2` succeeds, the inner `else` branch at line 564 returns
-immediately, exiting the loop before processing the remaining
-components. Latent because v1 only ever opens single-level paths
-(`/data`, `/data/RA`, etc. as separate calls), but the function would
-silently mis-create any user-supplied deeper `group:` option.
-
-### [v1] Buffer destructors do not close their HDF5 datasets
-`VertexBufferNumeric1D_HDF5.hpp:294-303`,
-`DataSetInterfaceBase.hpp:189-204`. Destructors are deliberately empty
-(close calls commented out, with TODO about copy semantics). Cleanup
-relies entirely on `finalise()` running. Any abnormal exit before
-finalise — a constructor exception elsewhere, an error during
-synchronisation, an uncaught signal — leaks dataset handles and likely
-loses unflushed data when `H5Fclose` auto-closes them later.
-
-### [v2] `H5Tclose` missing in `HDF5DataSet<T>::write_buffer` — leaks one type handle per flush
-`hdf5printer_v2.hpp:304-311`. `hid_t dtype = H5Dget_type(get_dset_id())`
-is opened but never closed. The companion `write_RA_buffer` at lines
-438/467 *does* close it. Long scans with many datasets and many flushes
-accumulate handles until HDF5's table or the OS file-handle limit gives.
-
-### [v2] Stack-local `RAbuffer` added to member `aux_buffers`, then destroyed
-`hdf5printer_v2.cpp:1973, 1994`. `HDF5MasterBuffer RAbuffer(...)` is a
-local; `add_aux_buffer(RAbuffer)` pushes its address into the
-`aux_buffers` vector. When `finalise` returns, the pointer dangles.
-Currently dormant (no second `finalise` call, empty destructor at 1844)
-but a bug-magnet for any future change.
-
-## Major
-
-### [v1] `masterWaitForAll(FINAL_SYNC)` deadlocks if any worker dies before reaching it
-`hdf5printer.cpp:987` plus `Utils/src/mpiwrapper.cpp:163-188`. The
-barrier is plain blocking `Recv`s on rank 0, no Iprobe-with-timeout
-(unlike `allWaitForMasterWithFunc`). The TODO comment at line 988
-acknowledges this. Same risk on the resume-mode `myComm.Barrier()` at
-line 610 if `prepare_and_combine_tmp_files` raises on rank 0.
-
-### [v1] `exit(1)` from inside the constructor after `closeFile`
-`hdf5printer.cpp:467-469`. On any restart-without-`-r` against an
-existing output file with the requested group missing — a normal user
-mistake — rank 0 calls `exit(1)`, bypassing MPI shutdown. Workers
-blocked on the upcoming `myComm.Barrier()` at line 610 hang forever.
-
-### [v2] `MPI_flush_to_rank` / `MPI_recv_all_buffers` / `MPI_request_buffer_data` are dead code
-`hdf5printer_v2.cpp:664-810`, `hdf5printer_v2.hpp:910-1001`, including
-`recv_counter` / `send_counter` debug scaffolding. No callers.
-`finalise()` uses the `gather_and_print` / `Gatherv` path instead.
-Either half-finished refactor leftover or a missed wiring that should be
-using this faster path. Recommend either deleting or wiring up — the
-deadlock-warning comment is currently misleading.
-
-### [v2] `HDF5Printer2::flush()` does not gather over MPI; only the calling rank's buffermaster is touched
-`hdf5printer_v2.cpp:1876-1879`. So between scanner-driven `flush()`
-calls, RA aux data on workers stays in worker memory until `finalise()`.
-Different from v1's effective semantics. Any scanner that calls
-`aux_stream->flush()` for durability (MultiNest's dumper at
-`multinest_3.12/multinest.cpp:362-363` is exactly this) will *not* get
-RA data from non-rank-0 ranks to disk until the scan ends.
-
-### [v1] `synchronise_buffers` underflows if `sync_pos == 0`
-`hdf5printer.cpp:1307`. `const unsigned long sync_pos = get_sync_pos()-1;`
-becomes `ULONG_MAX`. Currently never reached because `check_for_new_point`
-only triggers it after a real point has been seen, but the invariant is
-not enforced anywhere.
-
-### [v1] Unbounded growth of `postpone_write_queue_and_locs`
-`VertexBufferNumeric1D_HDF5.hpp:63, 541-544`. RA writes that can't yet
-be matched to a sync point get pushed without a cap. The MPI-based
-queue-sending that was supposed to bound this has been disabled
-(commented out) and the comments weren't updated.
-
-### [v1] `_print_metadata` always passes `resume=false`
-`hdf5printer.cpp:1681-1691`. Re-creates datasets on every call.
-Currently only called once, at `finalise()`, so dormant — but any new
-caller will trip the "Dataset with same name may already exist" path
-immediately.
-
-### [v1] Aux constructor doesn't validate `dynamic_cast` result
-`hdf5printer.cpp:683`. If the cast fails (HDF5 aux attached to non-HDF5
-primary), `primary_printer` becomes nullptr and the very next
-`set_resume(primary_printer->get_resume())` segfaults.
-
-### [v2] `print_metadata` underflow
-`hdf5printer_v2.cpp:609-616`. `std::size_t size = get_next_metadata_position(); if(sameset) size --;`
-— if the GAMBIT dataset doesn't exist yet, `get_next_metadata_position()`
-returns 0, `--` underflows to `SIZE_MAX`. Currently protected by the
-call-site `if (use_metadata)` at 1948, but the function is unsafe.
-
-### [v1] Iterators decremented past `begin()` in combine size-trim loops
-`hdf5_combine_tools.cpp:539-545`, `807-813`. Technically UB on
-`std::vector::iterator` when all entries are false. Works on most STLs.
-
-### [v1] `H5Dget_type` not closed in combine tools
-`hdf5_combine_tools.cpp:949-950, 955-956, 1144-1145, 1273, 1278`.
-Type-handle leak per file/parameter combined; contributes to "too many
-open objects" warnings on big merges.
-
-## Minor / Notes
-
-A handful more across both files:
-
-- v1 dead/stale state members (`current_dset_position`, `previous_points`,
-  `global=false` field never assigned).
-- v1 `errorsOff` / `errorsOn` use static globals → fundamentally not
-  thread-safe.
-- v1 hardcoded `BUFFERLENGTH=100` and `MAX_PPIDPAIRS=1000`.
-- v1 `combine_output_py` is dead code (uses a Python helper that may not
-  exist).
-- v2 constructor's resource-leak risk if `printer_error()` raises
-  mid-setup (no RAII).
-- v2 generous `std::cout` / `std::cerr` use where `logger()` would be
-  appropriate (e.g. lines 1514, 1539, 1543, 1705, 1713, 1716, 1745,
-  1762, 1965, 2018).
-- v2 narrowing conversions between `std::size_t` and `int` in MPI size
-  handling.
-- Both v1 and v2 shell out to `popen("rm ...")` and `popen("cp ...")`
-  rather than calling `std::remove` / filesystem APIs. Brittle on paths
-  with shell metacharacters.
-- v2 `MAX_BUFFER_SIZE = 100000` `T buffer[MAX_BUFFER_SIZE]` is a stack
-  array (~800 KB for `T=double`). Fine on Linux defaults, fragile
-  elsewhere.
-- v2 `H5Tequal(in-memory, on-disk)` check can fire spuriously on resume
-  because on-disk types aren't normalised through `H5Tget_native_type`
-  like v1 does.
-- v2 `_isvalid` companion existence not separately tracked from value
-  dataset (TODO at `hdf5printer_v2.hpp:888`).
-
-## What to prioritize fixing
-
-1. **v2 `final_size` / `extend_all_datasets_to` on workers** — same shape
-   as the bug we just fixed: a finalise-time member-not-set-on-aux-path
-   issue that hides because some scenarios mask the UB. Will hit any MPI
-   scan with v2 + RA aux streams.
-2. **v1 `errorsOff` / `errorsOn` nesting** — silently disables your
-   safety net for HDF5 errors. Trivial to fix (save state in a local).
-3. **v1 uninit `hid_t` cluster** — same root cause as #495. A small
-   refactor to default-init or to collapse aux/primary handles into a
-   single struct would prevent another year of "another field wasn't set
-   on aux printers" bugs.
-4. **v2 `H5Tclose` leak in `write_buffer`** — slow-burn but unbounded.
-5. **v1 buffer-destructor cleanup** — would matter on any abnormal
-   shutdown, including signal-driven ones from a scheduler killing a
-   job.
-
-Everything below "Major" is good to file as low-priority issues but
-probably not urgent.
-
-## Theme
-
-A lot of the fragility across both printers comes from sharing state
-between primary and auxiliary printer instances via raw pointers and
-ad-hoc "use this one, not that one" rules. A refactor that makes the
-auxiliary's role purely a *view* over a shared `HDF5File` object (so
-there are no per-printer file/group handles to forget to initialise)
-would eliminate this whole class of bug. Big change, but probably worth
-scoping if you're touching this code anyway.
diff --git a/yaml_files/spartan_issue_495.yaml b/yaml_files/spartan_issue_495.yaml
deleted file mode 100644
index b87d435cb7..0000000000
--- a/yaml_files/spartan_issue_495.yaml
+++ /dev/null
@@ -1,456 +0,0 @@
-##########################################################################
-## GAMBIT configuration for an ultra-minimal scan of a toy model.
-##
-## Only needs ExampleBit_A and a scanner (either one of the built-ins like
-## the random sampler or TWalk, or an external like Diver, MultiNest,
-## GreAT or Polychord).
-##########################################################################
-
-
-Parameters:
-  # In this example we will simply be fitting the mean and standard deviation of a normal distribution.
-  NormalDist:
-    # Example of using simple priors:
-    mu:
-      prior_type: flat
-      range: [15, 30]
-    sigma:
-      prior_type: flat
-      range: [0, 5]
-
-    # If using a more complicated prior specified in the 'Priors' section below,
-    # use the 'in_priors' keyword in this section:
-    # mu: in_priors
-    # sigma: in_priors
-
-
-Priors:
-
-  # None needed: simple priors are already specified in the 'Parameters' section above.
-
-  # If defining a more complicated prior, make sure to tag the relevant parameters with the
-  # keyword 'in_priors' in the 'Parameters' section above. Below is an example of a
-  # 2D gaussian prior for the two parameters 'mu' and 'sigma':
-  #
-  # my_prior:
-  #   parameters: ["NormalDist::mu", "NormalDist::sigma"]
-  #   prior_type: gaussian
-  #   mu: [18, 5]
-  #   cov_matrix: [[9.0, 0.0], [0.0, 1.0]]
-  #   # sigma: [3.0, 1]
-
-
-Printer:
-
-  # printer: sqlite
-  # options:
-  #   output_file: "results.sql"
-  #   table_name: "data"
-  #   buffer_length: 1000
-  #   delete_file_on_restart: true
-
-  # printer: hdf5
-  # options:
-  #   output_file: "results.hdf5"
-  #   group: "/data"
-  #   delete_file_on_restart: true
-  #   buffer_length: 1000
-  #   # disable_autorepair: true
-
-  printer: hdf5_v1
-  options:
-    output_file: "gambit_output.hdf5"
-    group: "/data"
-    delete_file_on_restart: true
-    disable_combine_routines: true
-
-  # printer: ascii
-  # options:
-  #   output_file: "results.dat"
-  #   buffer_length: 10
-  #   delete_file_on_restart: true
-  #   print_debug_data: true
-
-  # printer: none
-
-
-Scanner:
-
-  use_scanner: multinest
-
-  scanners:
-
-    random:
-      plugin: random
-      like: LogLike
-      point_number: 1000
-
-    grid:
-      # plugin: pygrid
-      plugin: grid
-      #version: ">=1.0"
-      like: LogLike
-      grid_pts: [5, 3]
-
-    de:
-      plugin: diver
-      like: LogLike
-      NP: 500
-      initial_guesses:
-        parameters: [NormalDist::sigma, NormalDist::mu]
-        values:
-        - [2.5, 25]
-        - [2, 24]
-        - [1.5, 22]
-
-    multinest:
-      plugin: multinest
-      like:  LogLike
-      nlive: 500
-      #tol: 0.0001
-      tol: 0.1
-
-    mcmc:
-      plugin: great
-      like: LogLike
-      nTrialLists: 5
-      nTrials: 10000
-
-    twalk:
-      plugin: twalk
-      like: LogLike
-      sqrtR: 1.001
-
-    polychord:
-      plugin: polychord
-      like:  LogLike
-      print_parameters_in_native_output: true
-      tol: 0.1
-
-    pso:
-      plugin: jswarm
-      like:  LogLike
-      NP: 400
-      adaptive_phi: true
-      adaptive_omega: true
-      verbosity: 2
-
-    minuit2:
-      plugin: minuit2
-      like: LogLike
-      tolerance: 0.0001
-      precision: 0.0001
-      max_loglike_calls: 100000
-      max_iterations: 100000
-      algorithm: combined # simplex, combined, scan, fumili, bfgs, migrad
-      print_level: 1
-      strategy: 2
-      start:
-        NormalDist::mu: 25.
-        NormalDist::sigma: 2.5
-      step:
-        NormalDist::mu: 5.
-        NormalDist::sigma: 1.
-
-    scipy_basin_hopping:
-      like: LogLike
-      plugin: scipy_basin_hopping
-      # The run arguments below have been tested with scipy v1.9.
-      # Other versions might expect different arguments.
-      run:
-        # n_runs: 4
-        x0:
-          NormalDist::mu: 25.
-          NormalDist::sigma: 2.5
-        niter: 100
-        T: 1.0
-        stepsize: 0.5
-        minimizer_kwargs:
-          method: "L-BFGS-B"
-        interval: 50
-        disp: true
-        target_accept_rate: 0.5
-        stepwise_factor: 0.9
-
-    scipy_differential_evolution:
-      like: LogLike
-      plugin: scipy_differential_evolution
-      # The run arguments below have been tested with scipy v1.9.
-      # Other versions might expect different arguments.
-      run:
-        # n_runs: 4
-        strategy: 'best1bin'
-        maxiter: 1000
-        popsize: 15
-        tol: 0.01
-        mutation: [0.5, 1]
-        recombination: 0.7
-        disp: false
-        polish: true
-        init: 'latinhypercube'
-        atol: 0
-        updating: 'immediate'
-        x0:
-          NormalDist::mu: 25.
-          NormalDist::sigma: 2.5
-
-    scipy_direct:
-      like: LogLike
-      plugin: scipy_direct
-      # The run arguments below have been tested with scipy v1.9.
-      # Other versions might expect different arguments.
-      run:
-        # n_runs: 4
-        eps: 0.0001
-        # maxfun: 2000
-        maxiter: 1000
-        locally_biased: true
-        vol_tol: 1e-16
-        len_tol: 1e-6
-
-    scipy_dual_annealing:
-      like: LogLike
-      plugin: scipy_dual_annealing
-      # The run arguments below have been tested with scipy v1.9.
-      # Other versions might expect different arguments.
-      run:
-        # n_runs: 4
-        maxiter: 1000
-        initial_temp: 5230.0
-        restart_temp_ratio: 2.0e-5
-        visit: 2.62
-        accept: -5.0
-        maxfun: 10000000.0
-        no_local_search: false
-        x0:
-          NormalDist::mu: 25.
-          NormalDist::sigma: 2.5
-
-    scipy_shgo:
-      like: LogLike
-      plugin: scipy_shgo
-      # The run arguments below have been tested with scipy v1.9.
-      # Other versions might expect different arguments.
-      run:
-        split_param_space:
-          NormalDist::mu: 2
-          NormalDist::sigma: 2
-        n: 100
-        iters: 1
-        sampling_method: "sobol"  # "simplicial", "halton", "sobol"
-        minimizer_kwargs:
-          method: "SLSQP" # "SLSQP" "L-BFGS-B"
-          options:
-            ftol: 1e-12
-        options:
-          # maxfev:
-          # f_min:
-          # f_tol:
-          # maxiter:
-          # maxev:
-          # maxtime:
-          # minhgrd:
-          # jac: false  # Buggy in some scipy versions: https://github.com/scipy/scipy/issues/14533
-          minimize_every_iter: true
-          local_iter: false
-          infty_constraints: true
-          disp: false
-
-    scipy_minimize:
-      like: LogLike
-      plugin: scipy_minimize
-      # The run arguments below have been tested with scipy v1.9.
-      # Other versions might expect different arguments.
-      run:
-        # n_runs: 5
-        x0:
-          NormalDist::mu: 25.
-          NormalDist::sigma: 2.5
-        method: "L-BFGS-B"  # "Nelder-Mead", "Powell", "CG", "BFGS", "L-BFGS-B", "TNC", "COBYLA", "SLSQP", "trust-constr",
-        # jac: "2-point"      # "2-point", "3-point", "cs"
-        # hess: "2-point"     # "2-point", "3-point", "cs"
-        tol: 1e-6
-        options:
-          maxiter: 15000
-          disp: false
-
-    static_dynesty:
-      like: LogLike
-      pkg: gambit_dynesty
-      plugin: static_dynesty
-      pkl_name: "static_dynesty.pkl"
-      init:
-        nlive: 1000
-      run:
-        dlogz: 0.5
-        checkpoint_every: 60
-
-    dynamic_dynesty:
-      like: LogLike
-      plugin: dynamic_dynesty
-      pkg: gambit_dynesty
-      pkl_name: "dynamic_dynesty.pkl"
-      init:
-        nlive: 1000
-      run:
-        dlogz_init: 0.5
-        n_effective: 10000
-        checkpoint_every: 60
-
-    nessai:
-      like: LogLike
-      plugin: nessai_flow_sampler
-      pkg: gambit_nessai
-      #init:
-        #output: "nessai_log_dir"
-        #logger: true
-
-    nautilus:
-      like: LogLike
-      plugin: nautilus
-      #pkg: gambit_nautilus
-      run:
-        verbose: true
-
-    zeus:
-      like: LogLike
-      pkg: gambit_zeus
-      plugin: zeus
-      init:
-        nwalkers: 8
-      #run:
-        #nsteps: 20000
-        #filename: "zeus.h5"
-      SaveProgressCallback:
-        filename: zeus.h5
-        ncheck: 100
-
-    emcee:
-      like: LogLike
-      plugin: emcee
-      pkg: gambit_emcee
-      init:
-        nwalkers: 8
-      run:
-        nsteps: 20000
-        #filename: "emcee.h5"
-
-    pocomc:
-      like: LogLike
-      plugin: pocomc
-      #pkg: gambit_pocomc
-      #init:
-      #    nparticles: 1000
-
-    ultranest:
-      like: LogLike
-      pkg: gambit_ultranest
-      plugin: reactive_ultranest
-      #init:
-      run:
-        min_num_live_points: 1000
-        dlogz: 0.5
-
-    binminpy:
-      like: LogLike
-      plugin: binminpy
-      run:
-        n_bins:
-          # For any parameters not listed under 'n_bins' binminpy will assume a single bin
-          NormalDist::mu: 100
-          NormalDist::sigma: 100
-        # sampled_parameters: ["NormalDist::mu", "NormalDist::sigma"]  # By default parameters are optimized within each bin
-        sampler: "latinhypercube"
-        optimizer: "minimize"
-        optimizer_kwargs:
-          method: "L-BFGS-B"
-          tol: 1e-4
-        n_initial_points: 10
-        n_sampler_points_per_bin: 10
-        # accept_loglike_above: -30.0
-        accept_delta_loglike: 6.5
-        neighborhood_distance: 2
-        inherit_best_init_point_within_bin: False
-        n_optim_restarts_per_bin: 1
-        n_tasks_per_batch: 1
-        print_progress_every_n_batch: 100
-
-
-ObsLikes:
-
-  - purpose:      LogLike
-    capability:   normaldist_loglike
-    module:       ExampleBit_A
-    type:         double
-
-  #- purpose:      Observable
-  #  capability:   normaldist_loglike
-  #  module:       ExampleBit_A
-  #  type:         double
-  #  critical:     true
-
-Rules:
-
-  - if:
-      capability: normaldist_loglike
-    then:
-      options:
-        probability_of_validity: 1.0
-
-Logger:
-
-  redirection:
-    [Default]      : "default.log"
-    [ExampleBit_A] : "ExampleBit_A.log"
-    [Scanner]      : "Scanner.log"
-  debug: true
-
-KeyValues:
-
-  debug: false #true
-
-  default_output_path: "${PWD}/runs/spartan"
-
-  # An additional entry in the dataset and metadata, useful for identifying which
-  # points correspond to a given scan in combined datasets.
-  # The default is for print_scanID: true,
-  # and for it to print the date and time as an int of the form
-  # scanID: HourMinuteSecondMillisecond. This can be overwritten to any integer.
-  print_scanID: true
-  scanID: 1
-
-  rng:
-    generator: ranlux48
-    seed: -1
-
-  print_timing_data: true
-
-  print_unitcube: true
-
-  likelihood:
-    model_invalid_for_lnlike_below: -1e6
-
-    # A 'likelihood modifier function' recieves as input the total
-    # log-likelihood value and outputs a modified log-likelihood which
-    # is then passed to the scanner. This can be used to make an adaptive
-    # scanner explore specific ranges of the total log-likelihood, e.g.
-    # log-likelihood values corresponding to a given 1D/2D confidence region.
-    # The default is to use the 'identity' modifier, which does nothing.
-    use_lnlike_modifier: identity
-    lnlike_modifiers:
-      # Assuming that the best-fit log-likelihood value is 0.0,
-      # the 'gaussian' or 'gaussian_plateau' settings below
-      # will encourage the scanner to explore parameter regions
-      # at the border of the 2-sigma confidence region in 2D
-      # (Delta lnlike = -3.09).
-      gaussian:
-        mu: -3.1
-        sigma: 0.5
-        # use_limit: lower
-        use_delta_lnlike: false
-      gaussian_plateau:
-        mu_dn: -3.2
-        sigma_dn: 0.5
-        mu_up: -3.0
-        sigma_up: 3.0
-        use_delta_lnlike: false

From 07212109d08f1130ebba92b2a77e0b90c554a58f Mon Sep 17 00:00:00 2001
From: Anders Kvellestad <anders.kvellestad@fys.uio.no>
Date: Tue, 5 May 2026 09:46:39 +0200
Subject: [PATCH 20/24] Clarified a code snippet and comment in hdf5printer.hpp

---
 .../gambit/Printers/printers/hdf5printer.hpp      | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/Printers/include/gambit/Printers/printers/hdf5printer.hpp b/Printers/include/gambit/Printers/printers/hdf5printer.hpp
index c6a8acb1bc..d573469aa0 100644
--- a/Printers/include/gambit/Printers/printers/hdf5printer.hpp
+++ b/Printers/include/gambit/Printers/printers/hdf5printer.hpp
@@ -552,13 +552,14 @@ namespace Gambit
 
         // Force increment the buffer to "catch it up" to the current sync
         // position, in case it has been created "late".
-        // We subtract one because another increment will happen after
-        // the print statement (that triggered the creation of the new
-        // buffer) completes.
-        // If sync_pos is still 0 there is no previous position to catch up
-        // to (and the buffer is already at dset_head_pos == 0 from
-        // construction), so skip the call to avoid an unsigned underflow.
-        if(synchronised and printer->get_sync_pos()>0) it->second.fast_forward(printer->get_sync_pos()-1);
+        if (printer->get_sync_pos() > 0)  // if 0, nothing to catch up to
+        {
+          // We subtract one because another increment will happen after
+          // the print statement (that triggered the creation of the new
+          // buffer) completes. (Subtraction is safe since get_sync_pos() > 0.
+          // Otherwise this would have lead to an underflow.
+          if(synchronised) it->second.fast_forward(printer->get_sync_pos()-1);
+        }        
       }
 
       if( it == local_buffers.end() )

From f13198798706be81763e8edbd1400dee9fd1e003 Mon Sep 17 00:00:00 2001
From: Anders Kvellestad <anders.kvellestad@fys.uio.no>
Date: Tue, 5 May 2026 09:48:40 +0200
Subject: [PATCH 21/24] Fixed typos in hdf5printer.hpp

---
 Printers/include/gambit/Printers/printers/hdf5printer.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Printers/include/gambit/Printers/printers/hdf5printer.hpp b/Printers/include/gambit/Printers/printers/hdf5printer.hpp
index d573469aa0..a37c56d72e 100644
--- a/Printers/include/gambit/Printers/printers/hdf5printer.hpp
+++ b/Printers/include/gambit/Printers/printers/hdf5printer.hpp
@@ -557,7 +557,7 @@ namespace Gambit
           // We subtract one because another increment will happen after
           // the print statement (that triggered the creation of the new
           // buffer) completes. (Subtraction is safe since get_sync_pos() > 0.
-          // Otherwise this would have lead to an underflow.
+          // Otherwise this would have led to an underflow.)
           if(synchronised) it->second.fast_forward(printer->get_sync_pos()-1);
         }        
       }

From 0ba474e600c78aff8cc895de24bfa7d497cf202e Mon Sep 17 00:00:00 2001
From: Anders Kvellestad <anders.kvellestad@fys.uio.no>
Date: Tue, 5 May 2026 09:51:15 +0200
Subject: [PATCH 22/24] Tweaked comment in hdf5printer_v2.hpp

---
 Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp b/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp
index a6f996d1cc..07f04edaaa 100644
--- a/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp
+++ b/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp
@@ -294,8 +294,7 @@ namespace Gambit
              }
              // dtype is no longer needed after the equality check; close it
              // here to avoid leaking one HDF5 type handle per flush per
-             // dataset (write_RA_buffer below already does this at its
-             // matching H5Dget_type site).
+             // dataset.
              H5Tclose(dtype);
 
              std::size_t required_size = target_pos+length;

From d1edc9c3b52cb3239071654135d5d79805cec3e7 Mon Sep 17 00:00:00 2001
From: Anders Kvellestad <anders.kvellestad@fys.uio.no>
Date: Tue, 5 May 2026 09:54:56 +0200
Subject: [PATCH 23/24] Tweaks to comment in hdf5tools.cpp

---
 Printers/src/printers/hdf5printer/hdf5tools.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Printers/src/printers/hdf5printer/hdf5tools.cpp b/Printers/src/printers/hdf5printer/hdf5tools.cpp
index f47200a3ae..be934013c8 100644
--- a/Printers/src/printers/hdf5printer/hdf5tools.cpp
+++ b/Printers/src/printers/hdf5printer/hdf5tools.cpp
@@ -668,14 +668,14 @@ namespace Gambit {
       /// Close hdf5 group
       SIMPLE_CALL(hid_t, closeGroup,  hid_t, H5Gclose, "close", "group", "group")
 
-      /// State for errorsOff() / errorsOn(). File-local: only the two
-      /// functions below should ever touch these. The bool guard ensures
+      /// State for errorsOff() / errorsOn(). The bool guard ensures
       /// that a redundant errorsOff() called while errors are already off
       /// does NOT overwrite the saved handler with the NULL handler we
       /// installed ourselves -- otherwise the matching errorsOn() would
       /// "restore" NULL and silently disable HDF5 error reporting for
       /// the rest of the process.
-      namespace {
+      namespace
+      {
          bool errors_are_off = false;
          H5E_auto2_t saved_func = nullptr;
          void *saved_client_data = nullptr;

From 02d482af6b8408cb71156449a4893660b7d91f05 Mon Sep 17 00:00:00 2001
From: Anders Kvellestad <anders.kvellestad@fys.uio.no>
Date: Thu, 7 May 2026 19:01:09 +0200
Subject: [PATCH 24/24] Revised a bunch of overly verbose code comments.

---
 .../gambit/Printers/printers/hdf5printer.hpp  | 15 ++++-----------
 .../hdf5printer/DataSetInterfaceBase.hpp      | 19 ++++++-------------
 .../Printers/printers/hdf5printer_v2.hpp      | 11 +++--------
 .../src/printers/hdf5printer/hdf5printer.cpp  | 15 +++++----------
 .../src/printers/hdf5printer/hdf5tools.cpp    |  9 +++------
 5 files changed, 21 insertions(+), 48 deletions(-)

diff --git a/Printers/include/gambit/Printers/printers/hdf5printer.hpp b/Printers/include/gambit/Printers/printers/hdf5printer.hpp
index a37c56d72e..613b2761ea 100644
--- a/Printers/include/gambit/Printers/printers/hdf5printer.hpp
+++ b/Printers/include/gambit/Printers/printers/hdf5printer.hpp
@@ -333,15 +333,9 @@ namespace Gambit
         std::string metadata_group; // HDF5 group location to store metadata
 
         // Handles for HDF5 files and groups containing the datasets.
-        // Default-initialised to -1 (the convention this class already uses
-        // as the "unset HDF5 handle" sentinel; see e.g. the get_location() /
-        // get_RA_location() / get_metadata_location() checks). Only the
-        // primary printer's constructor assigns the file/group handles
-        // (file_id, group_id, RA_group_id, metadata_id); aux instances
-        // inherit the *_location_id values from the primary. The default
-        // initialisers ensure that any code path that mistakenly reads one
-        // of these on an aux instance gets a deterministic invalid handle
-        // rather than indeterminate memory.
+        // Only the primary printer's constructor assigns the file/group 
+        // handles (file_id, group_id, RA_group_id, metadata_id). Aux instances
+        // inherit the *_location_id values from the primary.
         hid_t file_id = -1;
         hid_t group_id = -1;
         hid_t RA_group_id = -1;
@@ -556,8 +550,7 @@ namespace Gambit
         {
           // We subtract one because another increment will happen after
           // the print statement (that triggered the creation of the new
-          // buffer) completes. (Subtraction is safe since get_sync_pos() > 0.
-          // Otherwise this would have led to an underflow.)
+          // buffer) completes.
           if(synchronised) it->second.fast_forward(printer->get_sync_pos()-1);
         }        
       }
diff --git a/Printers/include/gambit/Printers/printers/hdf5printer/DataSetInterfaceBase.hpp b/Printers/include/gambit/Printers/printers/hdf5printer/DataSetInterfaceBase.hpp
index e5b0a8233b..8e55d47338 100644
--- a/Printers/include/gambit/Printers/printers/hdf5printer/DataSetInterfaceBase.hpp
+++ b/Printers/include/gambit/Printers/printers/hdf5printer/DataSetInterfaceBase.hpp
@@ -188,20 +188,13 @@ namespace Gambit {
       /// Deliberately does NOT close the underlying HDF5 dataset.
       ///
       /// DataSetInterfaceBase (and its derived classes) are stored by value
-      /// and assigned-from-temporary in several places, e.g.
-      ///   _dsetdata = DataSetInterfaceScalar<T,CHUNKLENGTH>(loc, name, ...);
-      ///   local_buffers[key] = BuffType(loc, label, vertexID, ...);
-      /// In each case a temporary is constructed, then copy-/move-assigned
-      /// into a member or container slot, then destroyed. With the default
-      /// copy semantics both objects hold the same hid_t dset_id, so
-      /// closing in the destructor would invalidate the handle held by the
-      /// surviving copy. The actual close therefore lives in the explicit
-      /// closeDataSet() method below, which is driven by
+      /// and assigned-from-temporary in several places. In such cases a 
+      /// temporary is constructed, then copy-/move-assigned into a member or 
+      /// container slot, then destroyed. Closing here in the destructor would 
+      /// invalidate the handle held by the surviving copy. The actual close 
+      /// therefore lives in the explicit closeDataSet() method below, used by
       /// VertexBufferNumeric1D_HDF5::finalise() during the printer's
-      /// finalise() pass. If finalise() is bypassed (e.g. an exception
-      /// during scan setup or shutdown), HDF5's own H5Fclose() at process
-      /// exit will auto-close any still-open dataset handles and flush
-      /// their pending I/O, so the file on disk remains well-formed.
+      /// finalise() pass.
       template<class T, std::size_t RR, std::size_t CL>
       DataSetInterfaceBase<T,RR,CL>::~DataSetInterfaceBase()
       {
diff --git a/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp b/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp
index 07f04edaaa..14d2be3d61 100644
--- a/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp
+++ b/Printers/include/gambit/Printers/printers/hdf5printer_v2.hpp
@@ -78,13 +78,9 @@ namespace Gambit
     /// Largest allowed size of buffers. Size can be dynamically set from 1 to this number.
     static const std::size_t MAX_BUFFER_SIZE = 100000;
 
-    // The integer-encoded type ID for buffer-data messages
-    // (formerly used by h5v2_bufdata_type / h5v2_bufdata_values; the
-    // type encoding itself is still in use by the gather_and_print
-    // path via HDF5bufferchunk and add_to_buffers).
+    // The integer-encoded type ID for buffer-data messages.
     // Need a unique integer for each type. We can encode these
     // with a template function:
-
     template<class T>
     std::set<T> set_diff(const std::set<T>& set1, const std::set<T>& set2)
     {
@@ -292,9 +288,8 @@ namespace Gambit
                  errmsg << "Error! Tried to write to dataset (name="<<myname()<<") with type id "<<dtype<<" but expected it to have type id "<<expected_dtype<<". This is a bug, please report it.";
                  printer_error().raise(LOCAL_INFO, errmsg.str());
              }
-             // dtype is no longer needed after the equality check; close it
-             // here to avoid leaking one HDF5 type handle per flush per
-             // dataset.
+             // dtype is no longer needed, close it here to avoid leaking one 
+             // HDF5 type handle per flush per dataset.
              H5Tclose(dtype);
 
              std::size_t required_size = target_pos+length;
diff --git a/Printers/src/printers/hdf5printer/hdf5printer.cpp b/Printers/src/printers/hdf5printer/hdf5printer.cpp
index 13f530818f..32f9a3e9ef 100644
--- a/Printers/src/printers/hdf5printer/hdf5printer.cpp
+++ b/Printers/src/printers/hdf5printer/hdf5printer.cpp
@@ -468,11 +468,7 @@ namespace Gambit
                 // Group does not exist in the pre-existing file. Refuse to
                 // proceed: we are not in resume mode and 'delete_file_on_restart'
                 // is not set, so adding a new group to a file from an unrelated
-                // previous run risks silently mixing unrelated data. Raise a
-                // printer_error with actionable advice rather than calling
-                // bare exit(1) (which bypassed MPI shutdown and left worker
-                // ranks hanging at the upcoming Barrier with no error
-                // message).
+                // previous run risks silently mixing unrelated data.
                 std::ostringstream errmsg2;
                 errmsg2 << "Error preparing pre-existing output file '"<<finalfile<<"' for writing via hdf5printer! The file exists, but does not contain the requested output group '"<<group<<"', and the run is not in resume mode (so we will not append to it) and 'delete_file_on_restart' is not set (so we will not overwrite it). Refusing to proceed to avoid silently mixing unrelated data. Please take one of the following actions:"<<std::endl;
                 errmsg2 << "  1. Re-run with the -r flag (restart) and set 'delete_file_on_restart: true' to overwrite the existing file;"<<std::endl;
@@ -1318,8 +1314,8 @@ namespace Gambit
 
       // If sync_pos is still 0 then no point has yet been registered as the
       // current sync position, so there is no "previous slot" for buffers to
-      // catch up to. Return early to avoid the unsigned underflow that would
-      // otherwise occur in get_sync_pos()-1 below.
+      // catch up to. Return early. (The get_sync_pos()-1 below would be an 
+      // unsigned underflow when get_sync_pos() = 0.)
       if(get_sync_pos()==0) return;
 
       // Determine the desired sync position
@@ -1397,9 +1393,8 @@ namespace Gambit
       }
 
       // Tell the HDF5 library to flush everything to disk.
-      // Auxiliary printers don't own a file handle (only the primary opens
-      // the HDF5 file in its constructor), so always flush via the primary's
-      // file_id. For the primary itself, primary_printer == this.
+      // Auxiliary printers don't own a file handle, so always 
+      // flush via the primary's file_id.
       herr_t err = H5Fflush(primary_printer->file_id, H5F_SCOPE_GLOBAL);
       if(err<0)
       {
diff --git a/Printers/src/printers/hdf5printer/hdf5tools.cpp b/Printers/src/printers/hdf5printer/hdf5tools.cpp
index be934013c8..6f73fd96dd 100644
--- a/Printers/src/printers/hdf5printer/hdf5tools.cpp
+++ b/Printers/src/printers/hdf5printer/hdf5tools.cpp
@@ -668,12 +668,9 @@ namespace Gambit {
       /// Close hdf5 group
       SIMPLE_CALL(hid_t, closeGroup,  hid_t, H5Gclose, "close", "group", "group")
 
-      /// State for errorsOff() / errorsOn(). The bool guard ensures
-      /// that a redundant errorsOff() called while errors are already off
-      /// does NOT overwrite the saved handler with the NULL handler we
-      /// installed ourselves -- otherwise the matching errorsOn() would
-      /// "restore" NULL and silently disable HDF5 error reporting for
-      /// the rest of the process.
+      /// State for errorsOff() / errorsOn(). The bool guard errors_are_off 
+      /// ensures that a redundant errorsOff() called while errors are already 
+      /// off does NOT overwrite the saved handler with the NULL handler.
       namespace
       {
          bool errors_are_off = false;