From 807356d1b65f2b1f08d926f3f24303cfb3e78ee2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Houpert?= <10154151+lhoupert@users.noreply.github.com> Date: Tue, 23 Jun 2026 15:16:37 +0100 Subject: [PATCH] fix(s1-rtc): consolidate every orbit group, not just the last-ingested one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit S1 RTC cubes ended up with only one orbit consolidated on disk (staging: 30TWM asc✓/desc✗, 32TNN/30UVU asc✗/desc✓). Root cause: the pipeline ingests acquisitions one orbit at a time in separate pods, each of which strips all consolidated metadata (so `time` can resize), ingests its orbit, then calls `consolidate_s1_store(store, orbit_direction)` — which only re-consolidated the single orbit passed. So whichever orbit was ingested last is the only one left with on-disk consolidated metadata. Consolidate every orbit group present (iterate `root.groups()`), then the root. The minimal append-fetch already pulls all `zarr.json`, so both orbits' metadata is local — same reason the root consolidation already works. Signature unchanged (`orbit_direction` kept for logging/callers). Impact is cosmetic: the root consolidation is complete for both orbits and readers opening at the root get a synthesized child view (titiler renders the unconsolidated orbit fine). The fix matters for clients opening a single orbit group standalone (the STAC `.zarr/` hrefs) with `consolidated=True`, which otherwise fall back to a listing. Test asserts each orbit group is consolidated *standalone* — checking via `root[orbit]` is a false-green because a consolidated root synthesizes the child's view. Existing cubes self-heal on their next re-ingest. Co-Authored-By: Claude Opus 4.8 --- src/eopf_geozarr/conversion/s1_ingest.py | 13 +++++++++++-- tests/test_s1_rtc_ingest.py | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/eopf_geozarr/conversion/s1_ingest.py b/src/eopf_geozarr/conversion/s1_ingest.py index 5feed9e2..2b541cae 100644 --- a/src/eopf_geozarr/conversion/s1_ingest.py +++ b/src/eopf_geozarr/conversion/s1_ingest.py @@ -743,12 +743,21 @@ def ingest_s1tiling_acquisition( def consolidate_s1_store(store_path: str | Path, orbit_direction: str) -> None: - """Consolidate metadata at orbit direction and root levels. + """Consolidate metadata for every orbit-direction group and the root. Must be called AFTER all ingestions complete — consolidated metadata caches array shapes and will become stale if called mid-ingestion. + + Consolidates *every* orbit group present, not just ``orbit_direction``: the + pipeline ingests acquisitions one orbit at a time after stripping all + consolidated metadata (so ``time`` can resize), so consolidating only the + passed orbit would leave the other orbit's group unconsolidated on disk + (readers opening that orbit standalone then fall back to a listing). + ``orbit_direction`` is retained for logging / caller compatibility. """ - zarr.consolidate_metadata(str(store_path), path=orbit_direction, zarr_format=3) + root = zarr.open_group(str(store_path), mode="r", zarr_format=3) + for orbit_name, _ in root.groups(): + zarr.consolidate_metadata(str(store_path), path=orbit_name, zarr_format=3) zarr.consolidate_metadata(str(store_path), zarr_format=3) log.info( "Metadata consolidated", diff --git a/tests/test_s1_rtc_ingest.py b/tests/test_s1_rtc_ingest.py index 85cc0899..ef660459 100644 --- a/tests/test_s1_rtc_ingest.py +++ b/tests/test_s1_rtc_ingest.py @@ -652,6 +652,26 @@ def test_consolidate_after_all_ingestions( r10m = root["ascending"]["r10m"] assert r10m["vv"].shape[0] == 2 + def test_consolidate_all_orbits_present( + self, s1_geotiff_dir: Path, s1_store_path: Path + ) -> None: + """``consolidate_s1_store`` must leave EVERY orbit group consolidated on disk, not just the + one passed. The pipeline ingests acquisitions one orbit at a time after stripping all + consolidated metadata (so ``time`` can resize), so consolidating only the passed orbit left + staging cubes asc✓/desc✗. Each orbit group is checked **standalone**: a consolidated root + synthesises the child's view, so ``root[orbit].metadata.consolidated_metadata`` is a + false-green (non-None even when ``/zarr.json`` lacks it). + """ + vv1, vh1, mask1 = self._get_acq_paths(s1_geotiff_dir, "20230115t061234") + vv2, vh2, mask2 = self._get_acq_paths(s1_geotiff_dir, "20230127t061235") + ingest_s1tiling_acquisition(vv1, vh1, mask1, s1_store_path, "ascending") + ingest_s1tiling_acquisition(vv2, vh2, mask2, s1_store_path, "descending") + consolidate_s1_store(s1_store_path, "descending") # only one orbit passed + + for orbit in ("ascending", "descending"): + grp = zarr.open_group(str(s1_store_path / orbit), mode="r", zarr_format=3) + assert grp.metadata.consolidated_metadata is not None, f"{orbit} orbit not consolidated" + def _get_acq_paths(self, geotiff_dir: Path, stamp: str) -> tuple[Path, Path, Path]: vv = geotiff_dir / f"s1a_32TQM_vv_ASC_037_{stamp}_GammaNaughtRTC.tif" vh = geotiff_dir / f"s1a_32TQM_vh_ASC_037_{stamp}_GammaNaughtRTC.tif"