From cb1ac023341abe8552cea89217bd878e5f683e39 Mon Sep 17 00:00:00 2001
From: ceej640 <42260127+Ceej640@users.noreply.github.com>
Date: Sat, 30 May 2026 20:40:47 -0400
Subject: [PATCH 1/4] Fix test suite collection and stale expectations

---
 gently/harness/conversation.py      | 41 +++++++++++++++++++++++++++++
 gently/mesh/mesh_service.py         | 20 +++++++++++---
 tests/test_campaign_coordination.py |  8 ++++++
 tests/test_dispim_device_safety.py  | 33 +++++++++++++----------
 4 files changed, 85 insertions(+), 17 deletions(-)

diff --git a/gently/harness/conversation.py b/gently/harness/conversation.py
index e5de53d9..bfd09684 100644
--- a/gently/harness/conversation.py
+++ b/gently/harness/conversation.py
@@ -17,6 +17,47 @@
 logger = logging.getLogger(__name__)
 
 
+_TEXT_TOOL_CALL_RE = re.compile(r"<tool_call>\s*(.*?)\s*</tool_call>", re.DOTALL | re.IGNORECASE)
+
+
+def _extract_text_tool_calls(text: str) -> tuple[str, List[Dict[str, Any]]]:
+    """Extract JSON tool calls embedded in text fallback tags.
+
+    Some model/test harness paths may emit a tool request as text instead of
+    structured ``tool_use`` blocks.  Keep parsing permissive, but only return
+    well-formed objects that name a tool.
+    """
+    if not text:
+        return text, []
+
+    calls: List[Dict[str, Any]] = []
+
+    def _remove_or_collect(match: re.Match) -> str:
+        try:
+            payload = json.loads(match.group(1).strip())
+        except (TypeError, json.JSONDecodeError):
+            return ""
+        if not isinstance(payload, dict):
+            return ""
+        name = payload.get("name")
+        if not name:
+            return ""
+        tool_input = payload.get("input")
+        if tool_input is None:
+            tool_input = payload.get("arguments", {})
+        if tool_input is None:
+            tool_input = {}
+        calls.append({
+            "name": name,
+            "input": tool_input,
+            "id": payload.get("id"),
+        })
+        return ""
+
+    cleaned = _TEXT_TOOL_CALL_RE.sub(_remove_or_collect, text)
+    return cleaned, calls
+
+
 def _extend_tool_calls(out: List[Dict[str, Any]], content_blocks) -> None:
     """Append every tool_use block in content_blocks to out.
 
diff --git a/gently/mesh/mesh_service.py b/gently/mesh/mesh_service.py
index edac242c..c5cbe5b1 100644
--- a/gently/mesh/mesh_service.py
+++ b/gently/mesh/mesh_service.py
@@ -212,7 +212,7 @@ def _on_peer_discovered(self, data: dict, sender_ip: str, verified: bool = False
 
         # Only fetch status from trusted peers
         if trusted:
-            asyncio.ensure_future(self._fetch_and_update_peer(peer))
+            self._schedule_status_fetch(peer)
 
     def _on_peer_heartbeat(self, instance_id: str, sender_ip: str, verified: bool = False):
         """Called on subsequent heartbeats from a known peer."""
@@ -228,7 +228,7 @@ def _on_nudge_received(self, peer_id: str, sender_ip: str):
         if peer:
             peer.last_seen = time.time()
             peer.ip_address = sender_ip
-            asyncio.ensure_future(self._fetch_and_update_peer(peer))
+            self._schedule_status_fetch(peer)
             logger.debug(f"Mesh: nudge from {peer.hostname} ({peer_id[:8]}), refetching")
 
     def _on_local_status_changed(self, event):
@@ -311,6 +311,20 @@ async def _fetch_and_update_peer(self, peer: PeerInfo):
             "hostname": peer.hostname,
         })
 
+    def _schedule_status_fetch(self, peer: PeerInfo) -> None:
+        """Schedule a best-effort peer status fetch when the service is running."""
+        if not self._peer_client:
+            return
+        try:
+            loop = asyncio.get_running_loop()
+        except RuntimeError:
+            logger.debug(
+                "Mesh: skipping status fetch for %s because no event loop is running",
+                peer.instance_id[:8],
+            )
+            return
+        loop.create_task(self._fetch_and_update_peer(peer))
+
     # ------------------------------------------------------------------
     # Pairing integration
     # ------------------------------------------------------------------
@@ -336,7 +350,7 @@ def mark_peer_trusted(self, instance_id: str):
                 if cert_fp:
                     peer.tls_enabled = True
             # Kick off an immediate status fetch now that we trust them
-            asyncio.ensure_future(self._fetch_and_update_peer(peer))
+            self._schedule_status_fetch(peer)
             logger.info(f"Mesh: peer {peer.hostname} ({instance_id[:8]}) now trusted")
 
     # ------------------------------------------------------------------
diff --git a/tests/test_campaign_coordination.py b/tests/test_campaign_coordination.py
index 18ee49e2..49737fc4 100644
--- a/tests/test_campaign_coordination.py
+++ b/tests/test_campaign_coordination.py
@@ -15,6 +15,14 @@
 import urllib.request
 import urllib.error
 
+if "pytest" in sys.modules and os.environ.get("GENTLY_RUN_LIVE_CAMPAIGN_TESTS") != "1":
+    import pytest
+
+    pytest.skip(
+        "live campaign coordination script requires a running Gently server",
+        allow_module_level=True,
+    )
+
 BASE = os.environ.get("GENTLY_URL", "http://localhost:8080")
 FAKE_PEER = "test-peer-001"
 FAKE_HOST = "test-machine"
diff --git a/tests/test_dispim_device_safety.py b/tests/test_dispim_device_safety.py
index 40fab85a..9ca67e86 100644
--- a/tests/test_dispim_device_safety.py
+++ b/tests/test_dispim_device_safety.py
@@ -27,7 +27,7 @@ def __init__(self):
         self._configs = {}  # group -> current_config
         self._available_configs = {}  # group -> [configs]
         self._exposure = 10.0
-        self._camera_device = None
+        self._camera_device = ""
         self._focus_device = None
         self._circular_buffer = []
         self._sequence_running = False
@@ -83,6 +83,9 @@ def waitForConfig(self, group, config):
     def setCameraDevice(self, name):
         self._camera_device = name
 
+    def getCameraDevice(self):
+        return self._camera_device
+
     def setFocusDevice(self, name):
         self._focus_device = name
 
@@ -226,9 +229,9 @@ def make_z_stage(core=None, limits=(50.0, 250.0)):
     return DiSPIMZstage("ZStage", core or make_core(), limits=limits)
 
 
-def make_xy_stage(core=None, x_limits=(2000.0, 4000.0), y_limits=(-1000.0, 1000.0)):
+def make_xy_stage(core=None):
     from gently.hardware.dispim.devices.stage import DiSPIMXYStage
-    return DiSPIMXYStage("XYStage", core or make_core(), x_limits=x_limits, y_limits=y_limits)
+    return DiSPIMXYStage("XYStage", core or make_core())
 
 
 def make_piezo(core=None, limits=(-200.0, 200.0)):
@@ -318,38 +321,40 @@ class TestXYStageBounds:
 
     def test_valid_xy_position(self):
         stage = make_xy_stage()
-        status = stage.set([3000.0, 0.0])
+        x = (stage.x_limits[0] + stage.x_limits[1]) / 2.0
+        y = (stage.y_limits[0] + stage.y_limits[1]) / 2.0
+        status = stage.set([x, y])
         status.wait(timeout=2)
-        assert stage.core._xy_position == (3000.0, 0.0)
+        assert stage.core._xy_position == (x, y)
 
     def test_x_below_lower_limit(self):
         stage = make_xy_stage()
-        status = stage.set([1999.0, 0.0])
-        with pytest.raises(ValueError, match="outside limits"):
+        status = stage.set([stage.x_limits[0] - 1.0, 0.0])
+        with pytest.raises(ValueError, match="outside hardware limits"):
             status.wait(timeout=2)
 
     def test_x_above_upper_limit(self):
         stage = make_xy_stage()
-        status = stage.set([4001.0, 0.0])
-        with pytest.raises(ValueError, match="outside limits"):
+        status = stage.set([stage.x_limits[1] + 1.0, 0.0])
+        with pytest.raises(ValueError, match="outside hardware limits"):
             status.wait(timeout=2)
 
     def test_y_below_lower_limit(self):
         stage = make_xy_stage()
-        status = stage.set([3000.0, -1001.0])
-        with pytest.raises(ValueError, match="outside limits"):
+        status = stage.set([0.0, stage.y_limits[0] - 1.0])
+        with pytest.raises(ValueError, match="outside hardware limits"):
             status.wait(timeout=2)
 
     def test_y_above_upper_limit(self):
         stage = make_xy_stage()
-        status = stage.set([3000.0, 1001.0])
-        with pytest.raises(ValueError, match="outside limits"):
+        status = stage.set([0.0, stage.y_limits[1] + 1.0])
+        with pytest.raises(ValueError, match="outside hardware limits"):
             status.wait(timeout=2)
 
     def test_core_not_called_on_invalid_x(self):
         core = make_core()
         stage = make_xy_stage(core=core)
-        stage.set([0.0, 0.0])  # x=0 is below x_limits[0]=2000
+        stage.set([stage.x_limits[0] - 1.0, 0.0])
         time.sleep(0.1)
         assert not any(c[0] == 'setXYPosition' for c in core.call_log)
 

From c10c71b9f7a308342515a96ee7c19fddf4c23ec8 Mon Sep 17 00:00:00 2001
From: ceej640 <42260127+Ceej640@users.noreply.github.com>
Date: Sat, 30 May 2026 20:55:05 -0400
Subject: [PATCH 2/4] Harden FileStore path and YAML loading

---
 gently/app/agent.py             | 12 ++---
 gently/core/file_store.py       | 78 +++++++++++++++++++++++----------
 tests/test_file_store_safety.py | 45 +++++++++++++++++++
 3 files changed, 106 insertions(+), 29 deletions(-)
 create mode 100644 tests/test_file_store_safety.py

diff --git a/gently/app/agent.py b/gently/app/agent.py
index 9b144f92..5ea06ae9 100644
--- a/gently/app/agent.py
+++ b/gently/app/agent.py
@@ -1303,13 +1303,15 @@ def _compute_imported_dose(self, source_session_id: str, embryo_id: str) -> Dict
         if not self.store:
             return result
 
-        # FileStore exposes _session_dir(session_id) → resolved Path.
-        session_dir_fn = getattr(self.store, '_session_dir', None)
-        sd = session_dir_fn(source_session_id) if callable(session_dir_fn) else None
-        if sd is None:
+        # FileStore exposes _embryo_dir(session_id, embryo_id) with validation.
+        embryo_dir_fn = getattr(self.store, '_embryo_dir', None)
+        if not callable(embryo_dir_fn):
             return result
 
-        vols_dir = Path(sd) / 'embryos' / embryo_id / 'volumes'
+        try:
+            vols_dir = Path(embryo_dir_fn(source_session_id, embryo_id)) / 'volumes'
+        except (FileNotFoundError, ValueError):
+            return result
         if not vols_dir.is_dir():
             return result
 
diff --git a/gently/core/file_store.py b/gently/core/file_store.py
index e4f14009..e1f914c7 100644
--- a/gently/core/file_store.py
+++ b/gently/core/file_store.py
@@ -81,6 +81,36 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
+_INVALID_PATH_COMPONENT_RE = re.compile(r'[<>:"/\\|?*\x00-\x1f]')
+
+
+def _validate_path_component(value: str, label: str) -> str:
+    """Validate a caller-controlled value before using it as one path part."""
+    if not isinstance(value, str):
+        raise ValueError(f"{label} must be a string")
+    if not value:
+        raise ValueError(f"{label} must not be empty")
+    if value.strip() != value:
+        raise ValueError(f"{label} must not start or end with whitespace: {value!r}")
+    if value in {".", ".."} or _INVALID_PATH_COMPONENT_RE.search(value):
+        raise ValueError(f"{label} contains unsafe path characters: {value!r}")
+    if value.rstrip(" .") != value:
+        raise ValueError(f"{label} must not end with a space or dot: {value!r}")
+    return value
+
+
+def _safe_child_path(parent: Path, component: str, label: str) -> Path:
+    """Return parent/component after validating it stays below parent."""
+    component = _validate_path_component(component, label)
+    base = parent.resolve()
+    child = (parent / component).resolve()
+    try:
+        child.relative_to(base)
+    except ValueError:
+        raise ValueError(f"{label} escapes storage root: {component!r}")
+    return child
+
+
 def _slugify(text: str, max_len: int = 30) -> str:
     """Lowercase, replace non-alphanum with hyphens, truncate."""
     if not text:
@@ -173,15 +203,9 @@ def _write_yaml(path: Path, data: Any) -> None:
 def _read_yaml(path: Path) -> Any:
     """Read a YAML file.  Returns None if missing or empty.
 
-    Tolerant of legacy session files written before _sanitize_for_yaml
-    existed — those embed numpy scalars as
-    ``!!python/object/apply:numpy.core.multiarray.scalar`` tags that
-    safe_load refuses to construct. When we hit such a file we fall
-    back to unsafe_load (the only writer of these files is our own
-    code on local disk, same trust boundary as the code itself) and
-    immediately sanitize the result so the caller always receives
-    native Python types. New writes go through _write_yaml + safe_dump
-    so legacy form does not propagate.
+    Never constructs Python objects from YAML. Legacy files containing
+    ``!!python/object`` or numpy constructor tags must be migrated by a
+    trusted offline tool before FileStore will read them.
     """
     if not path.exists():
         return None
@@ -193,8 +217,10 @@ def _read_yaml(path: Path) -> Any:
         marker = str(err)
         if 'python/object' not in marker and 'numpy' not in marker:
             raise
-        data = yaml.unsafe_load(text)
-        return _sanitize_for_yaml(data)
+        raise ValueError(
+            f"Refusing to load unsafe YAML tags from {path}. "
+            "Migrate this file to safe YAML before opening it in Gently."
+        ) from err
 
 
 def _append_jsonl(path: Path, record: dict) -> None:
@@ -283,7 +309,10 @@ def _require_session_dir(self, session_id: str) -> Path:
 
     def _embryo_dir(self, session_id: str, embryo_id: str) -> Path:
         sd = self._require_session_dir(session_id)
-        return sd / "embryos" / embryo_id
+        return self._embryo_dir_for_session(sd, embryo_id)
+
+    def _embryo_dir_for_session(self, session_dir: Path, embryo_id: str) -> Path:
+        return _safe_child_path(session_dir / "embryos", embryo_id, "embryo_id")
 
     def _volume_dir(self, session_id: str, embryo_id: str) -> Path:
         d = self._embryo_dir(session_id, embryo_id) / "volumes"
@@ -555,7 +584,7 @@ def get_embryo(self, session_id: str, embryo_id: str) -> Optional[EmbryoInfo]:
         sd = self._session_dir(session_id)
         if sd is None:
             return None
-        yaml_path = sd / "embryos" / embryo_id / "embryo.yaml"
+        yaml_path = self._embryo_dir_for_session(sd, embryo_id) / "embryo.yaml"
         data = _read_yaml(yaml_path)
         return _normalize_embryo_record(data)
 
@@ -733,7 +762,8 @@ def get_volume_path(
         if sd is None:
             return None
         vol_path = (
-            sd / "embryos" / embryo_id / "volumes" / self._volume_filename(timepoint)
+            self._embryo_dir_for_session(sd, embryo_id)
+            / "volumes" / self._volume_filename(timepoint)
         )
         if vol_path.exists():
             return vol_path
@@ -753,7 +783,7 @@ def list_volumes(
 
         # Determine which embryo dirs to scan
         if embryo_id:
-            dirs = [embryos_dir / embryo_id]
+            dirs = [self._embryo_dir_for_session(sd, embryo_id)]
         else:
             dirs = sorted(
                 d for d in embryos_dir.iterdir() if d.is_dir()
@@ -816,8 +846,8 @@ def get_projection_path(
         if sd is None:
             return None
         proj_path = (
-            sd / "embryos" / embryo_id / "projections"
-            / self._projection_filename(timepoint)
+            self._embryo_dir_for_session(sd, embryo_id)
+            / "projections" / self._projection_filename(timepoint)
         )
         if proj_path.exists():
             return proj_path
@@ -834,7 +864,7 @@ def list_projection_timepoints(
         sd = self._session_dir(session_id)
         if sd is None:
             return []
-        proj_dir = sd / "embryos" / embryo_id / "projections"
+        proj_dir = self._embryo_dir_for_session(sd, embryo_id) / "projections"
         if not proj_dir.exists():
             return []
         tps: List[int] = []
@@ -861,7 +891,7 @@ def list_projections(
         sd = self._session_dir(session_id)
         if sd is None:
             return []
-        proj_dir = sd / "embryos" / embryo_id / "projections"
+        proj_dir = self._embryo_dir_for_session(sd, embryo_id) / "projections"
         if not proj_dir.exists():
             return []
 
@@ -886,8 +916,8 @@ def list_projections(
             # Use the volume sidecar's acquired_at as the projection created_at
             # if available; otherwise use the file mtime.
             meta_path = (
-                sd / "embryos" / embryo_id / "volumes"
-                / self._volume_meta_filename(tp)
+                self._embryo_dir_for_session(sd, embryo_id)
+                / "volumes" / self._volume_meta_filename(tp)
             )
             vol_meta = _read_yaml(meta_path)
             created = (
@@ -1133,7 +1163,7 @@ def store_prediction(
         # an offset based on embryo ordering.  A simpler and safer approach:
         # use a session-level counter stored in perception_runs.yaml.
         sd = self._require_session_dir(session_id)
-        pred_path = sd / "embryos" / embryo_id / "predictions.jsonl"
+        pred_path = self._embryo_dir_for_session(sd, embryo_id) / "predictions.jsonl"
         existing = _read_jsonl(pred_path)
         prediction_id = len(existing) + 1
 
@@ -1175,7 +1205,7 @@ def get_predictions(
 
         # Determine which embryo dirs to read
         if embryo_id:
-            dirs = [embryos_dir / embryo_id]
+            dirs = [self._embryo_dir_for_session(sd, embryo_id)]
         else:
             dirs = sorted(d for d in embryos_dir.iterdir() if d.is_dir())
 
@@ -1249,7 +1279,7 @@ def get_ground_truth(
         sd = self._session_dir(session_id)
         if sd is None:
             return []
-        gt_path = sd / "embryos" / embryo_id / "ground_truth.yaml"
+        gt_path = self._embryo_dir_for_session(sd, embryo_id) / "ground_truth.yaml"
         entries: list = _read_yaml(gt_path) or []
         entries.sort(key=lambda e: e.get("start_timepoint", 0))
         return entries
diff --git a/tests/test_file_store_safety.py b/tests/test_file_store_safety.py
new file mode 100644
index 00000000..a1303b23
--- /dev/null
+++ b/tests/test_file_store_safety.py
@@ -0,0 +1,45 @@
+import pytest
+
+from gently.core.file_store import FileStore, _read_yaml
+
+
+def _new_store(tmp_path):
+    store = FileStore(tmp_path / "store")
+    store.create_session("s1", name="safety")
+    return store
+
+
+@pytest.mark.parametrize("embryo_id", ["../outside", "..\\outside", "bad:name", "trailing."])
+def test_embryo_id_rejects_unsafe_path_components(tmp_path, embryo_id):
+    store = _new_store(tmp_path)
+
+    with pytest.raises(ValueError, match="embryo_id"):
+        store.register_embryo("s1", embryo_id)
+
+    assert not (store.root / "outside").exists()
+
+
+def test_prediction_path_uses_validated_embryo_id(tmp_path):
+    store = _new_store(tmp_path)
+
+    with pytest.raises(ValueError, match="embryo_id"):
+        store.store_prediction(
+            run_id=1,
+            session_id="s1",
+            embryo_id="../outside",
+            timepoint=0,
+            predicted_stage="early",
+        )
+
+    assert not (store.root / "outside").exists()
+
+
+def test_read_yaml_refuses_python_object_tags(tmp_path):
+    path = tmp_path / "unsafe.yaml"
+    path.write_text(
+        "value: !!python/object/apply:builtins.eval ['1 + 1']\n",
+        encoding="utf-8",
+    )
+
+    with pytest.raises(ValueError, match="Refusing to load unsafe YAML"):
+        _read_yaml(path)

From 9e932255dacea25515a182986cb7848a01df5e98 Mon Sep 17 00:00:00 2001
From: Johnson <johnsonc5@hhmi.org>
Date: Mon, 1 Jun 2026 00:48:47 -0400
Subject: [PATCH 3/4] Add datastore audit plan

---
 README.md               |  1 +
 docs/datastore-audit.md | 75 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+)
 create mode 100644 docs/datastore-audit.md

diff --git a/README.md b/README.md
index 28af4c12..5847e26f 100644
--- a/README.md
+++ b/README.md
@@ -178,6 +178,7 @@ That's the loop: **talk → plan → inspect.** With hardware connected (drop `-
 | [What Gently Can Do](docs/guides/capabilities.md) | Everyone | Perception, detection, plan mode, memory, mesh, safety |
 | [Build a Plugin](docs/guides/build-a-plugin.md) | Developers | Create organism and hardware plugins for other modalities |
 | [Hardware Setup](docs/guides/hardware-setup.md) | Labs | Connect a diSPIM, start the device layer, first acquisition |
+| [Datastore Audit](docs/datastore-audit.md) | Developers | Decide whether Gently3 can evolve or needs a Gently4 store API |
 
 ## Architecture
 
diff --git a/docs/datastore-audit.md b/docs/datastore-audit.md
new file mode 100644
index 00000000..96e147d5
--- /dev/null
+++ b/docs/datastore-audit.md
@@ -0,0 +1,75 @@
+# Datastore Audit
+
+The FileStore safety work is a prerequisite for a larger question: whether the
+current Gently3 datastore is sound enough to evolve, or whether a Gently4 store
+API is needed. Do the audit before choosing a migration.
+
+## Audit Questions
+
+For every data product Gently creates or consumes, answer:
+
+- Is this durable source data, derived/recomputable data, runtime state, or UI
+  cache?
+- Where is it stored on disk?
+- Is the path schema documented and safe against untrusted identifiers?
+- Is the file format stable, versioned, and readable without importing runtime
+  hardware dependencies?
+- Can a biologist browse it by session, sample, timepoint, modality, and
+  provenance?
+- Can downstream analysis find the raw data and the metadata needed to interpret
+  it?
+- Is there data Gently uses but does not persist?
+- Is there data Gently stores but never reads, displays, exports, or validates?
+
+## Inventory Template
+
+| Data product | Current path/table | Class | Producer | Consumer | Browse need | Gap |
+| --- | --- | --- | --- | --- | --- | --- |
+| session metadata | `sessions/*/session.yaml` | durable | launcher/session manager | UI, resume, audit | list sessions | check schema version |
+| timeline/events | `timeline.jsonl`, `events.jsonl` | durable | event capture | replay, debug export | filter by time/type | standardize event names |
+| interaction log | `interaction_log.jsonl` | durable | agent runtime | debug export | inspect chat/tool flow | include profile links |
+| embryo/sample state | `embryos/*/embryo.yaml` | durable | marking/calibration/acquisition | tools, UI, resume | browse sample state | generalize beyond embryos |
+| volumes/snapshots | `volumes/*.tif`, `snapshots/*.tif` | durable source | acquisition | perception, analysis | preview, export | verify metadata sidecars |
+| projections | `projections/*.jpg` | derived | store/perception | UI | preview | mark recomputable |
+| perception traces | `traces/*.json`, `predictions.jsonl` | durable derived | perception | UI/debug/eval | inspect reasoning | link to source volume |
+| plans/campaigns | `agent/campaigns/*` | durable | plan mode | UI, execution | browse by campaign | align with session data |
+| debug bundles | `debug_exports/*` | derived | debug exporter | coding agent | download/share | retention policy |
+
+## Biologist-Facing Browser
+
+A useful data browser should organize by:
+
+- session and experimental intent
+- sample or embryo
+- timepoint
+- modality: overview, lightsheet volume, projection, perception, plan, event
+- provenance: acquisition settings, calibration, exposure, software version,
+  operator action, and agent decision
+
+The browser should distinguish raw source data from derived previews and should
+always expose the raw file path/export path for analysis outside Gently.
+
+## Gently4 Decision Criteria
+
+Stay on Gently3 and migrate incrementally if:
+
+- path schemas can be versioned in place
+- all durable data can be discovered from `sessions/`
+- missing metadata can be added as sidecars without breaking existing sessions
+- the UI can browse the store without special-case crawlers
+
+Define a Gently4 API if:
+
+- durable data are split across incompatible roots
+- old sessions cannot be safely migrated or indexed
+- common queries require scanning many large files
+- sample abstractions cannot generalize without changing the store contract
+- provenance links between raw data, perception, plans, and operator actions are
+  not representable in the current layout
+
+## Safety Tie-In
+
+The path and YAML hardening in this PR should remain part of any future
+datastore design. A biologist-facing browser or migration API cannot be trusted
+unless user-controlled identifiers stay inside the store root and legacy files
+fail closed when they contain unsafe constructors.

From aa042ab5c5b907bccb1880c28f643cc321121d01 Mon Sep 17 00:00:00 2001
From: Johnson <johnsonc5@hhmi.org>
Date: Mon, 1 Jun 2026 01:34:12 -0400
Subject: [PATCH 4/4] Add datastore audit command

---
 docs/datastore-audit.md        |  21 ++++
 gently/core/datastore_audit.py | 184 +++++++++++++++++++++++++++++++++
 tests/test_datastore_audit.py  |  49 +++++++++
 3 files changed, 254 insertions(+)
 create mode 100644 gently/core/datastore_audit.py
 create mode 100644 tests/test_datastore_audit.py

diff --git a/docs/datastore-audit.md b/docs/datastore-audit.md
index 96e147d5..00307d2a 100644
--- a/docs/datastore-audit.md
+++ b/docs/datastore-audit.md
@@ -21,6 +21,27 @@ For every data product Gently creates or consumes, answer:
 - Is there data Gently uses but does not persist?
 - Is there data Gently stores but never reads, displays, exports, or validates?
 
+## Audit Command
+
+Run a first-pass inventory against a Gently3/FileStore root:
+
+```shell
+python -m gently.core.datastore_audit D:/Gently3
+```
+
+Use JSON output for scripts:
+
+```shell
+python -m gently.core.datastore_audit D:/Gently3 --json --output audit.json
+```
+
+The command counts session metadata, timelines/events, interaction logs,
+snapshots, volumes, sidecars, sample records, projections, perception traces,
+debug exports, profile spans, campaign plans, incoming files, and logs. It also
+flags obvious browseability/provenance gaps, including missing `session.yaml`,
+unreadable YAML, volume TIFFs without `.meta.yaml` sidecars, snapshot TIFFs
+without sidecars, and sample directories without `embryo.yaml`.
+
 ## Inventory Template
 
 | Data product | Current path/table | Class | Producer | Consumer | Browse need | Gap |
diff --git a/gently/core/datastore_audit.py b/gently/core/datastore_audit.py
new file mode 100644
index 00000000..1835873f
--- /dev/null
+++ b/gently/core/datastore_audit.py
@@ -0,0 +1,184 @@
+"""Audit a Gently3 FileStore root for data inventory and obvious gaps."""
+
+from __future__ import annotations
+
+import argparse
+import json
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Dict, List, Optional
+
+import yaml
+
+
+@dataclass
+class SessionAudit:
+    """Audit summary for one session directory."""
+
+    session_dir: str
+    session_id: Optional[str]
+    artifact_counts: Dict[str, int] = field(default_factory=dict)
+    gaps: List[str] = field(default_factory=list)
+
+
+@dataclass
+class DatastoreAudit:
+    """Top-level datastore audit report."""
+
+    root: str
+    session_count: int
+    artifact_counts: Dict[str, int]
+    gaps: List[str]
+    sessions: List[SessionAudit]
+
+    def to_dict(self) -> Dict:
+        return asdict(self)
+
+
+_COUNT_PATTERNS = {
+    "session_metadata": ["session.yaml"],
+    "timeline_logs": ["timeline.jsonl", "events.jsonl"],
+    "interaction_logs": ["interaction_log.jsonl"],
+    "snapshots": ["snapshots/*.tif"],
+    "snapshot_metadata": ["snapshots/*.meta.yaml"],
+    "sample_records": ["embryos/*/embryo.yaml"],
+    "volumes": ["embryos/*/volumes/*.tif"],
+    "volume_metadata": ["embryos/*/volumes/*.meta.yaml"],
+    "projections": ["embryos/*/projections/*"],
+    "perception_predictions": ["embryos/*/predictions.jsonl"],
+    "perception_traces": ["embryos/*/traces/*.json"],
+    "debug_exports": ["debug_exports/**/debug_context.md"],
+    "profile_spans": ["profile.jsonl", "profile_spans.jsonl"],
+}
+
+
+def audit_datastore(root: Path) -> DatastoreAudit:
+    """Scan a FileStore root and return a structured audit report."""
+    root = Path(root)
+    sessions_root = root / "sessions"
+    sessions: List[SessionAudit] = []
+    gaps: List[str] = []
+    totals: Dict[str, int] = {key: 0 for key in _COUNT_PATTERNS}
+    totals.update({"campaign_plans": 0, "plan_history": 0, "incoming_files": 0, "logs": 0})
+
+    if not sessions_root.exists():
+        gaps.append(f"missing sessions directory: {sessions_root}")
+    else:
+        for session_dir in sorted(p for p in sessions_root.iterdir() if p.is_dir()):
+            session = _audit_session(session_dir)
+            sessions.append(session)
+            for key, count in session.artifact_counts.items():
+                totals[key] = totals.get(key, 0) + count
+            gaps.extend(f"{Path(session.session_dir).name}: {gap}" for gap in session.gaps)
+
+    totals["campaign_plans"] = _count(root / "agent" / "campaigns", "**/plan/current.yaml")
+    totals["plan_history"] = _count(root / "agent" / "campaigns", "**/plan/history/*.yaml")
+    totals["incoming_files"] = _count(root / "incoming", "*")
+    totals["logs"] = _count(root / "logs", "*")
+
+    return DatastoreAudit(
+        root=str(root),
+        session_count=len(sessions),
+        artifact_counts=totals,
+        gaps=gaps,
+        sessions=sessions,
+    )
+
+
+def format_audit_markdown(report: DatastoreAudit) -> str:
+    """Render an audit report as concise Markdown."""
+    lines = [
+        f"# Datastore Audit: `{report.root}`",
+        "",
+        f"Sessions: {report.session_count}",
+        "",
+        "## Artifact Counts",
+        "",
+    ]
+    for key, count in sorted(report.artifact_counts.items()):
+        lines.append(f"- {key}: {count}")
+
+    lines.extend(["", "## Gaps", ""])
+    lines.extend(f"- {gap}" for gap in report.gaps) if report.gaps else lines.append("- none")
+
+    lines.extend(["", "## Sessions", ""])
+    for session in report.sessions:
+        label = session.session_id or Path(session.session_dir).name
+        lines.append(f"- `{label}`: {sum(session.artifact_counts.values())} counted artifacts")
+    return "\n".join(lines) + "\n"
+
+
+def _audit_session(session_dir: Path) -> SessionAudit:
+    counts = {
+        key: sum(_count(session_dir, pattern) for pattern in patterns)
+        for key, patterns in _COUNT_PATTERNS.items()
+    }
+    gaps: List[str] = []
+    session_data = _read_yaml(session_dir / "session.yaml", gaps)
+    session_id = str(session_data.get("session_id")) if session_data else None
+
+    if not session_data:
+        gaps.append("missing or unreadable session.yaml")
+
+    for volume in session_dir.glob("embryos/*/volumes/*.tif"):
+        meta = volume.with_suffix(".meta.yaml")
+        if not meta.exists():
+            gaps.append(f"volume missing metadata sidecar: {volume.relative_to(session_dir)}")
+
+    for snapshot in session_dir.glob("snapshots/*.tif"):
+        meta = snapshot.with_suffix(".meta.yaml")
+        if not meta.exists():
+            gaps.append(f"snapshot missing metadata sidecar: {snapshot.relative_to(session_dir)}")
+
+    for embryo in session_dir.glob("embryos/*"):
+        if embryo.is_dir() and not (embryo / "embryo.yaml").exists():
+            gaps.append(f"sample missing embryo.yaml: {embryo.relative_to(session_dir)}")
+
+    return SessionAudit(
+        session_dir=str(session_dir),
+        session_id=session_id,
+        artifact_counts=counts,
+        gaps=gaps,
+    )
+
+
+def _count(root: Path, pattern: str) -> int:
+    if not root.exists():
+        return 0
+    return sum(1 for p in root.glob(pattern) if p.is_file())
+
+
+def _read_yaml(path: Path, gaps: List[str]) -> Dict:
+    if not path.exists():
+        return {}
+    try:
+        data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
+    except Exception as exc:
+        gaps.append(f"unreadable YAML {path.name}: {exc}")
+        return {}
+    return data if isinstance(data, dict) else {}
+
+
+def main(argv: Optional[List[str]] = None) -> int:
+    parser = argparse.ArgumentParser(description="Audit a Gently3 datastore root")
+    parser.add_argument("root", type=Path, help="Gently3/FileStore root")
+    parser.add_argument("--json", action="store_true", help="Write JSON instead of Markdown")
+    parser.add_argument("--output", type=Path, help="Optional report output path")
+    args = parser.parse_args(argv)
+
+    report = audit_datastore(args.root)
+    text = (
+        json.dumps(report.to_dict(), indent=2)
+        if args.json
+        else format_audit_markdown(report)
+    )
+    if args.output:
+        args.output.parent.mkdir(parents=True, exist_ok=True)
+        args.output.write_text(text, encoding="utf-8")
+    else:
+        print(text, end="" if text.endswith("\n") else "\n")
+    return 1 if report.gaps else 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/test_datastore_audit.py b/tests/test_datastore_audit.py
new file mode 100644
index 00000000..4b45173c
--- /dev/null
+++ b/tests/test_datastore_audit.py
@@ -0,0 +1,49 @@
+import json
+
+from gently.core.datastore_audit import audit_datastore, format_audit_markdown
+
+
+def test_datastore_audit_counts_artifacts_and_flags_missing_sidecars(tmp_path):
+    session_dir = tmp_path / "sessions" / "20260601_test_abc12345"
+    volume_dir = session_dir / "embryos" / "embryo_1" / "volumes"
+    snapshot_dir = session_dir / "snapshots"
+    trace_dir = session_dir / "embryos" / "embryo_1" / "traces"
+    volume_dir.mkdir(parents=True)
+    snapshot_dir.mkdir(parents=True)
+    trace_dir.mkdir(parents=True)
+
+    (session_dir / "session.yaml").write_text("session_id: abc12345\n", encoding="utf-8")
+    (session_dir / "timeline.jsonl").write_text("{}\n", encoding="utf-8")
+    (session_dir / "interaction_log.jsonl").write_text("{}\n", encoding="utf-8")
+    (session_dir / "embryos" / "embryo_1" / "embryo.yaml").write_text(
+        "embryo_id: embryo_1\n",
+        encoding="utf-8",
+    )
+    (volume_dir / "t0001.tif").write_bytes(b"fake-tif")
+    (snapshot_dir / "bottom_test.tif").write_bytes(b"fake-tif")
+    (snapshot_dir / "bottom_test.meta.yaml").write_text("source: bottom\n", encoding="utf-8")
+    (trace_dir / "t0001.json").write_text(json.dumps({"ok": True}), encoding="utf-8")
+    (tmp_path / "agent" / "campaigns" / "c1" / "plan").mkdir(parents=True)
+    (tmp_path / "agent" / "campaigns" / "c1" / "plan" / "current.yaml").write_text(
+        "[]\n",
+        encoding="utf-8",
+    )
+
+    report = audit_datastore(tmp_path)
+    markdown = format_audit_markdown(report)
+
+    assert report.session_count == 1
+    assert report.artifact_counts["volumes"] == 1
+    assert report.artifact_counts["volume_metadata"] == 0
+    assert report.artifact_counts["snapshots"] == 1
+    assert report.artifact_counts["snapshot_metadata"] == 1
+    assert report.artifact_counts["campaign_plans"] == 1
+    assert any("volume missing metadata sidecar" in gap for gap in report.gaps)
+    assert "Artifact Counts" in markdown
+
+
+def test_datastore_audit_flags_missing_sessions_directory(tmp_path):
+    report = audit_datastore(tmp_path)
+
+    assert report.session_count == 0
+    assert any("missing sessions directory" in gap for gap in report.gaps)