From cb1ac023341abe8552cea89217bd878e5f683e39 Mon Sep 17 00:00:00 2001
From: ceej640 <42260127+Ceej640@users.noreply.github.com>
Date: Sat, 30 May 2026 20:40:47 -0400
Subject: [PATCH 1/4] Fix test suite collection and stale expectations
---
gently/harness/conversation.py | 41 +++++++++++++++++++++++++++++
gently/mesh/mesh_service.py | 20 +++++++++++---
tests/test_campaign_coordination.py | 8 ++++++
tests/test_dispim_device_safety.py | 33 +++++++++++++----------
4 files changed, 85 insertions(+), 17 deletions(-)
diff --git a/gently/harness/conversation.py b/gently/harness/conversation.py
index e5de53d9..bfd09684 100644
--- a/gently/harness/conversation.py
+++ b/gently/harness/conversation.py
@@ -17,6 +17,47 @@
logger = logging.getLogger(__name__)
+_TEXT_TOOL_CALL_RE = re.compile(r"\s*(.*?)\s*", re.DOTALL | re.IGNORECASE)
+
+
+def _extract_text_tool_calls(text: str) -> tuple[str, List[Dict[str, Any]]]:
+ """Extract JSON tool calls embedded in text fallback tags.
+
+ Some model/test harness paths may emit a tool request as text instead of
+ structured ``tool_use`` blocks. Keep parsing permissive, but only return
+ well-formed objects that name a tool.
+ """
+ if not text:
+ return text, []
+
+ calls: List[Dict[str, Any]] = []
+
+ def _remove_or_collect(match: re.Match) -> str:
+ try:
+ payload = json.loads(match.group(1).strip())
+ except (TypeError, json.JSONDecodeError):
+ return ""
+ if not isinstance(payload, dict):
+ return ""
+ name = payload.get("name")
+ if not name:
+ return ""
+ tool_input = payload.get("input")
+ if tool_input is None:
+ tool_input = payload.get("arguments", {})
+ if tool_input is None:
+ tool_input = {}
+ calls.append({
+ "name": name,
+ "input": tool_input,
+ "id": payload.get("id"),
+ })
+ return ""
+
+ cleaned = _TEXT_TOOL_CALL_RE.sub(_remove_or_collect, text)
+ return cleaned, calls
+
+
def _extend_tool_calls(out: List[Dict[str, Any]], content_blocks) -> None:
"""Append every tool_use block in content_blocks to out.
diff --git a/gently/mesh/mesh_service.py b/gently/mesh/mesh_service.py
index edac242c..c5cbe5b1 100644
--- a/gently/mesh/mesh_service.py
+++ b/gently/mesh/mesh_service.py
@@ -212,7 +212,7 @@ def _on_peer_discovered(self, data: dict, sender_ip: str, verified: bool = False
# Only fetch status from trusted peers
if trusted:
- asyncio.ensure_future(self._fetch_and_update_peer(peer))
+ self._schedule_status_fetch(peer)
def _on_peer_heartbeat(self, instance_id: str, sender_ip: str, verified: bool = False):
"""Called on subsequent heartbeats from a known peer."""
@@ -228,7 +228,7 @@ def _on_nudge_received(self, peer_id: str, sender_ip: str):
if peer:
peer.last_seen = time.time()
peer.ip_address = sender_ip
- asyncio.ensure_future(self._fetch_and_update_peer(peer))
+ self._schedule_status_fetch(peer)
logger.debug(f"Mesh: nudge from {peer.hostname} ({peer_id[:8]}), refetching")
def _on_local_status_changed(self, event):
@@ -311,6 +311,20 @@ async def _fetch_and_update_peer(self, peer: PeerInfo):
"hostname": peer.hostname,
})
+ def _schedule_status_fetch(self, peer: PeerInfo) -> None:
+ """Schedule a best-effort peer status fetch when the service is running."""
+ if not self._peer_client:
+ return
+ try:
+ loop = asyncio.get_running_loop()
+ except RuntimeError:
+ logger.debug(
+ "Mesh: skipping status fetch for %s because no event loop is running",
+ peer.instance_id[:8],
+ )
+ return
+ loop.create_task(self._fetch_and_update_peer(peer))
+
# ------------------------------------------------------------------
# Pairing integration
# ------------------------------------------------------------------
@@ -336,7 +350,7 @@ def mark_peer_trusted(self, instance_id: str):
if cert_fp:
peer.tls_enabled = True
# Kick off an immediate status fetch now that we trust them
- asyncio.ensure_future(self._fetch_and_update_peer(peer))
+ self._schedule_status_fetch(peer)
logger.info(f"Mesh: peer {peer.hostname} ({instance_id[:8]}) now trusted")
# ------------------------------------------------------------------
diff --git a/tests/test_campaign_coordination.py b/tests/test_campaign_coordination.py
index 18ee49e2..49737fc4 100644
--- a/tests/test_campaign_coordination.py
+++ b/tests/test_campaign_coordination.py
@@ -15,6 +15,14 @@
import urllib.request
import urllib.error
+if "pytest" in sys.modules and os.environ.get("GENTLY_RUN_LIVE_CAMPAIGN_TESTS") != "1":
+ import pytest
+
+ pytest.skip(
+ "live campaign coordination script requires a running Gently server",
+ allow_module_level=True,
+ )
+
BASE = os.environ.get("GENTLY_URL", "http://localhost:8080")
FAKE_PEER = "test-peer-001"
FAKE_HOST = "test-machine"
diff --git a/tests/test_dispim_device_safety.py b/tests/test_dispim_device_safety.py
index 40fab85a..9ca67e86 100644
--- a/tests/test_dispim_device_safety.py
+++ b/tests/test_dispim_device_safety.py
@@ -27,7 +27,7 @@ def __init__(self):
self._configs = {} # group -> current_config
self._available_configs = {} # group -> [configs]
self._exposure = 10.0
- self._camera_device = None
+ self._camera_device = ""
self._focus_device = None
self._circular_buffer = []
self._sequence_running = False
@@ -83,6 +83,9 @@ def waitForConfig(self, group, config):
def setCameraDevice(self, name):
self._camera_device = name
+ def getCameraDevice(self):
+ return self._camera_device
+
def setFocusDevice(self, name):
self._focus_device = name
@@ -226,9 +229,9 @@ def make_z_stage(core=None, limits=(50.0, 250.0)):
return DiSPIMZstage("ZStage", core or make_core(), limits=limits)
-def make_xy_stage(core=None, x_limits=(2000.0, 4000.0), y_limits=(-1000.0, 1000.0)):
+def make_xy_stage(core=None):
from gently.hardware.dispim.devices.stage import DiSPIMXYStage
- return DiSPIMXYStage("XYStage", core or make_core(), x_limits=x_limits, y_limits=y_limits)
+ return DiSPIMXYStage("XYStage", core or make_core())
def make_piezo(core=None, limits=(-200.0, 200.0)):
@@ -318,38 +321,40 @@ class TestXYStageBounds:
def test_valid_xy_position(self):
stage = make_xy_stage()
- status = stage.set([3000.0, 0.0])
+ x = (stage.x_limits[0] + stage.x_limits[1]) / 2.0
+ y = (stage.y_limits[0] + stage.y_limits[1]) / 2.0
+ status = stage.set([x, y])
status.wait(timeout=2)
- assert stage.core._xy_position == (3000.0, 0.0)
+ assert stage.core._xy_position == (x, y)
def test_x_below_lower_limit(self):
stage = make_xy_stage()
- status = stage.set([1999.0, 0.0])
- with pytest.raises(ValueError, match="outside limits"):
+ status = stage.set([stage.x_limits[0] - 1.0, 0.0])
+ with pytest.raises(ValueError, match="outside hardware limits"):
status.wait(timeout=2)
def test_x_above_upper_limit(self):
stage = make_xy_stage()
- status = stage.set([4001.0, 0.0])
- with pytest.raises(ValueError, match="outside limits"):
+ status = stage.set([stage.x_limits[1] + 1.0, 0.0])
+ with pytest.raises(ValueError, match="outside hardware limits"):
status.wait(timeout=2)
def test_y_below_lower_limit(self):
stage = make_xy_stage()
- status = stage.set([3000.0, -1001.0])
- with pytest.raises(ValueError, match="outside limits"):
+ status = stage.set([0.0, stage.y_limits[0] - 1.0])
+ with pytest.raises(ValueError, match="outside hardware limits"):
status.wait(timeout=2)
def test_y_above_upper_limit(self):
stage = make_xy_stage()
- status = stage.set([3000.0, 1001.0])
- with pytest.raises(ValueError, match="outside limits"):
+ status = stage.set([0.0, stage.y_limits[1] + 1.0])
+ with pytest.raises(ValueError, match="outside hardware limits"):
status.wait(timeout=2)
def test_core_not_called_on_invalid_x(self):
core = make_core()
stage = make_xy_stage(core=core)
- stage.set([0.0, 0.0]) # x=0 is below x_limits[0]=2000
+ stage.set([stage.x_limits[0] - 1.0, 0.0])
time.sleep(0.1)
assert not any(c[0] == 'setXYPosition' for c in core.call_log)
From c10c71b9f7a308342515a96ee7c19fddf4c23ec8 Mon Sep 17 00:00:00 2001
From: ceej640 <42260127+Ceej640@users.noreply.github.com>
Date: Sat, 30 May 2026 20:55:05 -0400
Subject: [PATCH 2/4] Harden FileStore path and YAML loading
---
gently/app/agent.py | 12 ++---
gently/core/file_store.py | 78 +++++++++++++++++++++++----------
tests/test_file_store_safety.py | 45 +++++++++++++++++++
3 files changed, 106 insertions(+), 29 deletions(-)
create mode 100644 tests/test_file_store_safety.py
diff --git a/gently/app/agent.py b/gently/app/agent.py
index 9b144f92..5ea06ae9 100644
--- a/gently/app/agent.py
+++ b/gently/app/agent.py
@@ -1303,13 +1303,15 @@ def _compute_imported_dose(self, source_session_id: str, embryo_id: str) -> Dict
if not self.store:
return result
- # FileStore exposes _session_dir(session_id) → resolved Path.
- session_dir_fn = getattr(self.store, '_session_dir', None)
- sd = session_dir_fn(source_session_id) if callable(session_dir_fn) else None
- if sd is None:
+ # FileStore exposes _embryo_dir(session_id, embryo_id) with validation.
+ embryo_dir_fn = getattr(self.store, '_embryo_dir', None)
+ if not callable(embryo_dir_fn):
return result
- vols_dir = Path(sd) / 'embryos' / embryo_id / 'volumes'
+ try:
+ vols_dir = Path(embryo_dir_fn(source_session_id, embryo_id)) / 'volumes'
+ except (FileNotFoundError, ValueError):
+ return result
if not vols_dir.is_dir():
return result
diff --git a/gently/core/file_store.py b/gently/core/file_store.py
index e4f14009..e1f914c7 100644
--- a/gently/core/file_store.py
+++ b/gently/core/file_store.py
@@ -81,6 +81,36 @@
# Helpers
# ---------------------------------------------------------------------------
+_INVALID_PATH_COMPONENT_RE = re.compile(r'[<>:"/\\|?*\x00-\x1f]')
+
+
+def _validate_path_component(value: str, label: str) -> str:
+ """Validate a caller-controlled value before using it as one path part."""
+ if not isinstance(value, str):
+ raise ValueError(f"{label} must be a string")
+ if not value:
+ raise ValueError(f"{label} must not be empty")
+ if value.strip() != value:
+ raise ValueError(f"{label} must not start or end with whitespace: {value!r}")
+ if value in {".", ".."} or _INVALID_PATH_COMPONENT_RE.search(value):
+ raise ValueError(f"{label} contains unsafe path characters: {value!r}")
+ if value.rstrip(" .") != value:
+ raise ValueError(f"{label} must not end with a space or dot: {value!r}")
+ return value
+
+
+def _safe_child_path(parent: Path, component: str, label: str) -> Path:
+ """Return parent/component after validating it stays below parent."""
+ component = _validate_path_component(component, label)
+ base = parent.resolve()
+ child = (parent / component).resolve()
+ try:
+ child.relative_to(base)
+ except ValueError:
+ raise ValueError(f"{label} escapes storage root: {component!r}")
+ return child
+
+
def _slugify(text: str, max_len: int = 30) -> str:
"""Lowercase, replace non-alphanum with hyphens, truncate."""
if not text:
@@ -173,15 +203,9 @@ def _write_yaml(path: Path, data: Any) -> None:
def _read_yaml(path: Path) -> Any:
"""Read a YAML file. Returns None if missing or empty.
- Tolerant of legacy session files written before _sanitize_for_yaml
- existed — those embed numpy scalars as
- ``!!python/object/apply:numpy.core.multiarray.scalar`` tags that
- safe_load refuses to construct. When we hit such a file we fall
- back to unsafe_load (the only writer of these files is our own
- code on local disk, same trust boundary as the code itself) and
- immediately sanitize the result so the caller always receives
- native Python types. New writes go through _write_yaml + safe_dump
- so legacy form does not propagate.
+ Never constructs Python objects from YAML. Legacy files containing
+ ``!!python/object`` or numpy constructor tags must be migrated by a
+ trusted offline tool before FileStore will read them.
"""
if not path.exists():
return None
@@ -193,8 +217,10 @@ def _read_yaml(path: Path) -> Any:
marker = str(err)
if 'python/object' not in marker and 'numpy' not in marker:
raise
- data = yaml.unsafe_load(text)
- return _sanitize_for_yaml(data)
+ raise ValueError(
+ f"Refusing to load unsafe YAML tags from {path}. "
+ "Migrate this file to safe YAML before opening it in Gently."
+ ) from err
def _append_jsonl(path: Path, record: dict) -> None:
@@ -283,7 +309,10 @@ def _require_session_dir(self, session_id: str) -> Path:
def _embryo_dir(self, session_id: str, embryo_id: str) -> Path:
sd = self._require_session_dir(session_id)
- return sd / "embryos" / embryo_id
+ return self._embryo_dir_for_session(sd, embryo_id)
+
+ def _embryo_dir_for_session(self, session_dir: Path, embryo_id: str) -> Path:
+ return _safe_child_path(session_dir / "embryos", embryo_id, "embryo_id")
def _volume_dir(self, session_id: str, embryo_id: str) -> Path:
d = self._embryo_dir(session_id, embryo_id) / "volumes"
@@ -555,7 +584,7 @@ def get_embryo(self, session_id: str, embryo_id: str) -> Optional[EmbryoInfo]:
sd = self._session_dir(session_id)
if sd is None:
return None
- yaml_path = sd / "embryos" / embryo_id / "embryo.yaml"
+ yaml_path = self._embryo_dir_for_session(sd, embryo_id) / "embryo.yaml"
data = _read_yaml(yaml_path)
return _normalize_embryo_record(data)
@@ -733,7 +762,8 @@ def get_volume_path(
if sd is None:
return None
vol_path = (
- sd / "embryos" / embryo_id / "volumes" / self._volume_filename(timepoint)
+ self._embryo_dir_for_session(sd, embryo_id)
+ / "volumes" / self._volume_filename(timepoint)
)
if vol_path.exists():
return vol_path
@@ -753,7 +783,7 @@ def list_volumes(
# Determine which embryo dirs to scan
if embryo_id:
- dirs = [embryos_dir / embryo_id]
+ dirs = [self._embryo_dir_for_session(sd, embryo_id)]
else:
dirs = sorted(
d for d in embryos_dir.iterdir() if d.is_dir()
@@ -816,8 +846,8 @@ def get_projection_path(
if sd is None:
return None
proj_path = (
- sd / "embryos" / embryo_id / "projections"
- / self._projection_filename(timepoint)
+ self._embryo_dir_for_session(sd, embryo_id)
+ / "projections" / self._projection_filename(timepoint)
)
if proj_path.exists():
return proj_path
@@ -834,7 +864,7 @@ def list_projection_timepoints(
sd = self._session_dir(session_id)
if sd is None:
return []
- proj_dir = sd / "embryos" / embryo_id / "projections"
+ proj_dir = self._embryo_dir_for_session(sd, embryo_id) / "projections"
if not proj_dir.exists():
return []
tps: List[int] = []
@@ -861,7 +891,7 @@ def list_projections(
sd = self._session_dir(session_id)
if sd is None:
return []
- proj_dir = sd / "embryos" / embryo_id / "projections"
+ proj_dir = self._embryo_dir_for_session(sd, embryo_id) / "projections"
if not proj_dir.exists():
return []
@@ -886,8 +916,8 @@ def list_projections(
# Use the volume sidecar's acquired_at as the projection created_at
# if available; otherwise use the file mtime.
meta_path = (
- sd / "embryos" / embryo_id / "volumes"
- / self._volume_meta_filename(tp)
+ self._embryo_dir_for_session(sd, embryo_id)
+ / "volumes" / self._volume_meta_filename(tp)
)
vol_meta = _read_yaml(meta_path)
created = (
@@ -1133,7 +1163,7 @@ def store_prediction(
# an offset based on embryo ordering. A simpler and safer approach:
# use a session-level counter stored in perception_runs.yaml.
sd = self._require_session_dir(session_id)
- pred_path = sd / "embryos" / embryo_id / "predictions.jsonl"
+ pred_path = self._embryo_dir_for_session(sd, embryo_id) / "predictions.jsonl"
existing = _read_jsonl(pred_path)
prediction_id = len(existing) + 1
@@ -1175,7 +1205,7 @@ def get_predictions(
# Determine which embryo dirs to read
if embryo_id:
- dirs = [embryos_dir / embryo_id]
+ dirs = [self._embryo_dir_for_session(sd, embryo_id)]
else:
dirs = sorted(d for d in embryos_dir.iterdir() if d.is_dir())
@@ -1249,7 +1279,7 @@ def get_ground_truth(
sd = self._session_dir(session_id)
if sd is None:
return []
- gt_path = sd / "embryos" / embryo_id / "ground_truth.yaml"
+ gt_path = self._embryo_dir_for_session(sd, embryo_id) / "ground_truth.yaml"
entries: list = _read_yaml(gt_path) or []
entries.sort(key=lambda e: e.get("start_timepoint", 0))
return entries
diff --git a/tests/test_file_store_safety.py b/tests/test_file_store_safety.py
new file mode 100644
index 00000000..a1303b23
--- /dev/null
+++ b/tests/test_file_store_safety.py
@@ -0,0 +1,45 @@
+import pytest
+
+from gently.core.file_store import FileStore, _read_yaml
+
+
+def _new_store(tmp_path):
+ store = FileStore(tmp_path / "store")
+ store.create_session("s1", name="safety")
+ return store
+
+
+@pytest.mark.parametrize("embryo_id", ["../outside", "..\\outside", "bad:name", "trailing."])
+def test_embryo_id_rejects_unsafe_path_components(tmp_path, embryo_id):
+ store = _new_store(tmp_path)
+
+ with pytest.raises(ValueError, match="embryo_id"):
+ store.register_embryo("s1", embryo_id)
+
+ assert not (store.root / "outside").exists()
+
+
+def test_prediction_path_uses_validated_embryo_id(tmp_path):
+ store = _new_store(tmp_path)
+
+ with pytest.raises(ValueError, match="embryo_id"):
+ store.store_prediction(
+ run_id=1,
+ session_id="s1",
+ embryo_id="../outside",
+ timepoint=0,
+ predicted_stage="early",
+ )
+
+ assert not (store.root / "outside").exists()
+
+
+def test_read_yaml_refuses_python_object_tags(tmp_path):
+ path = tmp_path / "unsafe.yaml"
+ path.write_text(
+ "value: !!python/object/apply:builtins.eval ['1 + 1']\n",
+ encoding="utf-8",
+ )
+
+ with pytest.raises(ValueError, match="Refusing to load unsafe YAML"):
+ _read_yaml(path)
From 9e932255dacea25515a182986cb7848a01df5e98 Mon Sep 17 00:00:00 2001
From: Johnson
Date: Mon, 1 Jun 2026 00:48:47 -0400
Subject: [PATCH 3/4] Add datastore audit plan
---
README.md | 1 +
docs/datastore-audit.md | 75 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 76 insertions(+)
create mode 100644 docs/datastore-audit.md
diff --git a/README.md b/README.md
index 28af4c12..5847e26f 100644
--- a/README.md
+++ b/README.md
@@ -178,6 +178,7 @@ That's the loop: **talk → plan → inspect.** With hardware connected (drop `-
| [What Gently Can Do](docs/guides/capabilities.md) | Everyone | Perception, detection, plan mode, memory, mesh, safety |
| [Build a Plugin](docs/guides/build-a-plugin.md) | Developers | Create organism and hardware plugins for other modalities |
| [Hardware Setup](docs/guides/hardware-setup.md) | Labs | Connect a diSPIM, start the device layer, first acquisition |
+| [Datastore Audit](docs/datastore-audit.md) | Developers | Decide whether Gently3 can evolve or needs a Gently4 store API |
## Architecture
diff --git a/docs/datastore-audit.md b/docs/datastore-audit.md
new file mode 100644
index 00000000..96e147d5
--- /dev/null
+++ b/docs/datastore-audit.md
@@ -0,0 +1,75 @@
+# Datastore Audit
+
+The FileStore safety work is a prerequisite for a larger question: whether the
+current Gently3 datastore is sound enough to evolve, or whether a Gently4 store
+API is needed. Do the audit before choosing a migration.
+
+## Audit Questions
+
+For every data product Gently creates or consumes, answer:
+
+- Is this durable source data, derived/recomputable data, runtime state, or UI
+ cache?
+- Where is it stored on disk?
+- Is the path schema documented and safe against untrusted identifiers?
+- Is the file format stable, versioned, and readable without importing runtime
+ hardware dependencies?
+- Can a biologist browse it by session, sample, timepoint, modality, and
+ provenance?
+- Can downstream analysis find the raw data and the metadata needed to interpret
+ it?
+- Is there data Gently uses but does not persist?
+- Is there data Gently stores but never reads, displays, exports, or validates?
+
+## Inventory Template
+
+| Data product | Current path/table | Class | Producer | Consumer | Browse need | Gap |
+| --- | --- | --- | --- | --- | --- | --- |
+| session metadata | `sessions/*/session.yaml` | durable | launcher/session manager | UI, resume, audit | list sessions | check schema version |
+| timeline/events | `timeline.jsonl`, `events.jsonl` | durable | event capture | replay, debug export | filter by time/type | standardize event names |
+| interaction log | `interaction_log.jsonl` | durable | agent runtime | debug export | inspect chat/tool flow | include profile links |
+| embryo/sample state | `embryos/*/embryo.yaml` | durable | marking/calibration/acquisition | tools, UI, resume | browse sample state | generalize beyond embryos |
+| volumes/snapshots | `volumes/*.tif`, `snapshots/*.tif` | durable source | acquisition | perception, analysis | preview, export | verify metadata sidecars |
+| projections | `projections/*.jpg` | derived | store/perception | UI | preview | mark recomputable |
+| perception traces | `traces/*.json`, `predictions.jsonl` | durable derived | perception | UI/debug/eval | inspect reasoning | link to source volume |
+| plans/campaigns | `agent/campaigns/*` | durable | plan mode | UI, execution | browse by campaign | align with session data |
+| debug bundles | `debug_exports/*` | derived | debug exporter | coding agent | download/share | retention policy |
+
+## Biologist-Facing Browser
+
+A useful data browser should organize by:
+
+- session and experimental intent
+- sample or embryo
+- timepoint
+- modality: overview, lightsheet volume, projection, perception, plan, event
+- provenance: acquisition settings, calibration, exposure, software version,
+ operator action, and agent decision
+
+The browser should distinguish raw source data from derived previews and should
+always expose the raw file path/export path for analysis outside Gently.
+
+## Gently4 Decision Criteria
+
+Stay on Gently3 and migrate incrementally if:
+
+- path schemas can be versioned in place
+- all durable data can be discovered from `sessions/`
+- missing metadata can be added as sidecars without breaking existing sessions
+- the UI can browse the store without special-case crawlers
+
+Define a Gently4 API if:
+
+- durable data are split across incompatible roots
+- old sessions cannot be safely migrated or indexed
+- common queries require scanning many large files
+- sample abstractions cannot generalize without changing the store contract
+- provenance links between raw data, perception, plans, and operator actions are
+ not representable in the current layout
+
+## Safety Tie-In
+
+The path and YAML hardening in this PR should remain part of any future
+datastore design. A biologist-facing browser or migration API cannot be trusted
+unless user-controlled identifiers stay inside the store root and legacy files
+fail closed when they contain unsafe constructors.
From aa042ab5c5b907bccb1880c28f643cc321121d01 Mon Sep 17 00:00:00 2001
From: Johnson
Date: Mon, 1 Jun 2026 01:34:12 -0400
Subject: [PATCH 4/4] Add datastore audit command
---
docs/datastore-audit.md | 21 ++++
gently/core/datastore_audit.py | 184 +++++++++++++++++++++++++++++++++
tests/test_datastore_audit.py | 49 +++++++++
3 files changed, 254 insertions(+)
create mode 100644 gently/core/datastore_audit.py
create mode 100644 tests/test_datastore_audit.py
diff --git a/docs/datastore-audit.md b/docs/datastore-audit.md
index 96e147d5..00307d2a 100644
--- a/docs/datastore-audit.md
+++ b/docs/datastore-audit.md
@@ -21,6 +21,27 @@ For every data product Gently creates or consumes, answer:
- Is there data Gently uses but does not persist?
- Is there data Gently stores but never reads, displays, exports, or validates?
+## Audit Command
+
+Run a first-pass inventory against a Gently3/FileStore root:
+
+```shell
+python -m gently.core.datastore_audit D:/Gently3
+```
+
+Use JSON output for scripts:
+
+```shell
+python -m gently.core.datastore_audit D:/Gently3 --json --output audit.json
+```
+
+The command counts session metadata, timelines/events, interaction logs,
+snapshots, volumes, sidecars, sample records, projections, perception traces,
+debug exports, profile spans, campaign plans, incoming files, and logs. It also
+flags obvious browseability/provenance gaps, including missing `session.yaml`,
+unreadable YAML, volume TIFFs without `.meta.yaml` sidecars, snapshot TIFFs
+without sidecars, and sample directories without `embryo.yaml`.
+
## Inventory Template
| Data product | Current path/table | Class | Producer | Consumer | Browse need | Gap |
diff --git a/gently/core/datastore_audit.py b/gently/core/datastore_audit.py
new file mode 100644
index 00000000..1835873f
--- /dev/null
+++ b/gently/core/datastore_audit.py
@@ -0,0 +1,184 @@
+"""Audit a Gently3 FileStore root for data inventory and obvious gaps."""
+
+from __future__ import annotations
+
+import argparse
+import json
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Dict, List, Optional
+
+import yaml
+
+
+@dataclass
+class SessionAudit:
+ """Audit summary for one session directory."""
+
+ session_dir: str
+ session_id: Optional[str]
+ artifact_counts: Dict[str, int] = field(default_factory=dict)
+ gaps: List[str] = field(default_factory=list)
+
+
+@dataclass
+class DatastoreAudit:
+ """Top-level datastore audit report."""
+
+ root: str
+ session_count: int
+ artifact_counts: Dict[str, int]
+ gaps: List[str]
+ sessions: List[SessionAudit]
+
+ def to_dict(self) -> Dict:
+ return asdict(self)
+
+
+_COUNT_PATTERNS = {
+ "session_metadata": ["session.yaml"],
+ "timeline_logs": ["timeline.jsonl", "events.jsonl"],
+ "interaction_logs": ["interaction_log.jsonl"],
+ "snapshots": ["snapshots/*.tif"],
+ "snapshot_metadata": ["snapshots/*.meta.yaml"],
+ "sample_records": ["embryos/*/embryo.yaml"],
+ "volumes": ["embryos/*/volumes/*.tif"],
+ "volume_metadata": ["embryos/*/volumes/*.meta.yaml"],
+ "projections": ["embryos/*/projections/*"],
+ "perception_predictions": ["embryos/*/predictions.jsonl"],
+ "perception_traces": ["embryos/*/traces/*.json"],
+ "debug_exports": ["debug_exports/**/debug_context.md"],
+ "profile_spans": ["profile.jsonl", "profile_spans.jsonl"],
+}
+
+
+def audit_datastore(root: Path) -> DatastoreAudit:
+ """Scan a FileStore root and return a structured audit report."""
+ root = Path(root)
+ sessions_root = root / "sessions"
+ sessions: List[SessionAudit] = []
+ gaps: List[str] = []
+ totals: Dict[str, int] = {key: 0 for key in _COUNT_PATTERNS}
+ totals.update({"campaign_plans": 0, "plan_history": 0, "incoming_files": 0, "logs": 0})
+
+ if not sessions_root.exists():
+ gaps.append(f"missing sessions directory: {sessions_root}")
+ else:
+ for session_dir in sorted(p for p in sessions_root.iterdir() if p.is_dir()):
+ session = _audit_session(session_dir)
+ sessions.append(session)
+ for key, count in session.artifact_counts.items():
+ totals[key] = totals.get(key, 0) + count
+ gaps.extend(f"{Path(session.session_dir).name}: {gap}" for gap in session.gaps)
+
+ totals["campaign_plans"] = _count(root / "agent" / "campaigns", "**/plan/current.yaml")
+ totals["plan_history"] = _count(root / "agent" / "campaigns", "**/plan/history/*.yaml")
+ totals["incoming_files"] = _count(root / "incoming", "*")
+ totals["logs"] = _count(root / "logs", "*")
+
+ return DatastoreAudit(
+ root=str(root),
+ session_count=len(sessions),
+ artifact_counts=totals,
+ gaps=gaps,
+ sessions=sessions,
+ )
+
+
+def format_audit_markdown(report: DatastoreAudit) -> str:
+ """Render an audit report as concise Markdown."""
+ lines = [
+ f"# Datastore Audit: `{report.root}`",
+ "",
+ f"Sessions: {report.session_count}",
+ "",
+ "## Artifact Counts",
+ "",
+ ]
+ for key, count in sorted(report.artifact_counts.items()):
+ lines.append(f"- {key}: {count}")
+
+ lines.extend(["", "## Gaps", ""])
+ lines.extend(f"- {gap}" for gap in report.gaps) if report.gaps else lines.append("- none")
+
+ lines.extend(["", "## Sessions", ""])
+ for session in report.sessions:
+ label = session.session_id or Path(session.session_dir).name
+ lines.append(f"- `{label}`: {sum(session.artifact_counts.values())} counted artifacts")
+ return "\n".join(lines) + "\n"
+
+
+def _audit_session(session_dir: Path) -> SessionAudit:
+ counts = {
+ key: sum(_count(session_dir, pattern) for pattern in patterns)
+ for key, patterns in _COUNT_PATTERNS.items()
+ }
+ gaps: List[str] = []
+ session_data = _read_yaml(session_dir / "session.yaml", gaps)
+ session_id = str(session_data.get("session_id")) if session_data else None
+
+ if not session_data:
+ gaps.append("missing or unreadable session.yaml")
+
+ for volume in session_dir.glob("embryos/*/volumes/*.tif"):
+ meta = volume.with_suffix(".meta.yaml")
+ if not meta.exists():
+ gaps.append(f"volume missing metadata sidecar: {volume.relative_to(session_dir)}")
+
+ for snapshot in session_dir.glob("snapshots/*.tif"):
+ meta = snapshot.with_suffix(".meta.yaml")
+ if not meta.exists():
+ gaps.append(f"snapshot missing metadata sidecar: {snapshot.relative_to(session_dir)}")
+
+ for embryo in session_dir.glob("embryos/*"):
+ if embryo.is_dir() and not (embryo / "embryo.yaml").exists():
+ gaps.append(f"sample missing embryo.yaml: {embryo.relative_to(session_dir)}")
+
+ return SessionAudit(
+ session_dir=str(session_dir),
+ session_id=session_id,
+ artifact_counts=counts,
+ gaps=gaps,
+ )
+
+
+def _count(root: Path, pattern: str) -> int:
+ if not root.exists():
+ return 0
+ return sum(1 for p in root.glob(pattern) if p.is_file())
+
+
+def _read_yaml(path: Path, gaps: List[str]) -> Dict:
+ if not path.exists():
+ return {}
+ try:
+ data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
+ except Exception as exc:
+ gaps.append(f"unreadable YAML {path.name}: {exc}")
+ return {}
+ return data if isinstance(data, dict) else {}
+
+
+def main(argv: Optional[List[str]] = None) -> int:
+ parser = argparse.ArgumentParser(description="Audit a Gently3 datastore root")
+ parser.add_argument("root", type=Path, help="Gently3/FileStore root")
+ parser.add_argument("--json", action="store_true", help="Write JSON instead of Markdown")
+ parser.add_argument("--output", type=Path, help="Optional report output path")
+ args = parser.parse_args(argv)
+
+ report = audit_datastore(args.root)
+ text = (
+ json.dumps(report.to_dict(), indent=2)
+ if args.json
+ else format_audit_markdown(report)
+ )
+ if args.output:
+ args.output.parent.mkdir(parents=True, exist_ok=True)
+ args.output.write_text(text, encoding="utf-8")
+ else:
+ print(text, end="" if text.endswith("\n") else "\n")
+ return 1 if report.gaps else 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/tests/test_datastore_audit.py b/tests/test_datastore_audit.py
new file mode 100644
index 00000000..4b45173c
--- /dev/null
+++ b/tests/test_datastore_audit.py
@@ -0,0 +1,49 @@
+import json
+
+from gently.core.datastore_audit import audit_datastore, format_audit_markdown
+
+
+def test_datastore_audit_counts_artifacts_and_flags_missing_sidecars(tmp_path):
+ session_dir = tmp_path / "sessions" / "20260601_test_abc12345"
+ volume_dir = session_dir / "embryos" / "embryo_1" / "volumes"
+ snapshot_dir = session_dir / "snapshots"
+ trace_dir = session_dir / "embryos" / "embryo_1" / "traces"
+ volume_dir.mkdir(parents=True)
+ snapshot_dir.mkdir(parents=True)
+ trace_dir.mkdir(parents=True)
+
+ (session_dir / "session.yaml").write_text("session_id: abc12345\n", encoding="utf-8")
+ (session_dir / "timeline.jsonl").write_text("{}\n", encoding="utf-8")
+ (session_dir / "interaction_log.jsonl").write_text("{}\n", encoding="utf-8")
+ (session_dir / "embryos" / "embryo_1" / "embryo.yaml").write_text(
+ "embryo_id: embryo_1\n",
+ encoding="utf-8",
+ )
+ (volume_dir / "t0001.tif").write_bytes(b"fake-tif")
+ (snapshot_dir / "bottom_test.tif").write_bytes(b"fake-tif")
+ (snapshot_dir / "bottom_test.meta.yaml").write_text("source: bottom\n", encoding="utf-8")
+ (trace_dir / "t0001.json").write_text(json.dumps({"ok": True}), encoding="utf-8")
+ (tmp_path / "agent" / "campaigns" / "c1" / "plan").mkdir(parents=True)
+ (tmp_path / "agent" / "campaigns" / "c1" / "plan" / "current.yaml").write_text(
+ "[]\n",
+ encoding="utf-8",
+ )
+
+ report = audit_datastore(tmp_path)
+ markdown = format_audit_markdown(report)
+
+ assert report.session_count == 1
+ assert report.artifact_counts["volumes"] == 1
+ assert report.artifact_counts["volume_metadata"] == 0
+ assert report.artifact_counts["snapshots"] == 1
+ assert report.artifact_counts["snapshot_metadata"] == 1
+ assert report.artifact_counts["campaign_plans"] == 1
+ assert any("volume missing metadata sidecar" in gap for gap in report.gaps)
+ assert "Artifact Counts" in markdown
+
+
+def test_datastore_audit_flags_missing_sessions_directory(tmp_path):
+ report = audit_datastore(tmp_path)
+
+ assert report.session_count == 0
+ assert any("missing sessions directory" in gap for gap in report.gaps)