From 0ba4ada19486722619ec919f22faaf23129a5b17 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 15 May 2026 11:50:36 -0400 Subject: [PATCH 01/71] embryos: split XY position into coarse (bottom-cam) and fine (SPIM) stages EmbryoState now carries position_coarse (bottom-camera detection or manual map placement) and position_fine (future SPIM-objective alignment) as separate fields; stage_position becomes a derived property (fine ?? coarse) so every existing call site keeps working. FileStore round-trips both stages; legacy position_x/position_y on disk backfill into coarse on read. Phase 1 of the Map-as-embryo-home arc -- schema only, no UI changes yet. Co-Authored-By: Claude Opus 4.7 (cherry picked from commit 3e4105812544ecc579a7c8ccd1a32809defe417f) --- gently/app/agent.py | 28 ++++++++---- gently/core/file_store.py | 76 +++++++++++++++++++++++++++---- gently/core/store_types.py | 10 +++- gently/harness/session/manager.py | 8 +++- gently/harness/state.py | 48 +++++++++++++++++-- 5 files changed, 146 insertions(+), 24 deletions(-) diff --git a/gently/app/agent.py b/gently/app/agent.py index 1bd8c64a..15eed57d 100644 --- a/gently/app/agent.py +++ b/gently/app/agent.py @@ -847,11 +847,16 @@ def import_embryos_from_session(self, session_id: str, clear_existing: bool = Fa eid, ) src_role = "unassigned" + coarse = row.get("position_coarse") or {} + fine = row.get("position_fine") or {} embryo_states[eid] = { - "stage_position": { - "x": row.get("position_x"), - "y": row.get("position_y"), - }, + # stage_position remains for legacy consumers of this snapshot. + # It carries the resolved (fine ?? coarse) view; add_embryo() + # downstream treats it as coarse, but the explicit + # position_fine field below will override that on restore. + "stage_position": dict(fine) if fine else dict(coarse), + "position_coarse": dict(coarse), + "position_fine": dict(fine), "calibration": row.get("calibration") or {}, "uid": row.get("embryo_uid"), "user_label": row.get("nickname"), @@ -885,13 +890,20 @@ def import_embryos_from_session(self, session_id: str, clear_existing: bool = Fa continue try: - position = embryo_data.get('stage_position', {}) + # Prefer explicit coarse/fine when the snapshot has them + # (FileStore path); fall back to flat stage_position for the + # legacy JSON-snapshot path which only carries the resolved view. + position_coarse = embryo_data.get('position_coarse') + position_fine = embryo_data.get('position_fine') + if position_coarse is None and position_fine is None: + position_coarse = embryo_data.get('stage_position', {}) calibration = embryo_data.get('calibration', {}) source_uid = embryo_data.get('uid') or f"{session_id}_{embryo_id}" self.experiment.add_embryo( embryo_id=embryo_id, - position=position, + position=position_coarse or {}, + position_fine=position_fine or {}, calibration=calibration, user_label=embryo_data.get('user_label'), uid=source_uid, @@ -1037,8 +1049,8 @@ async def on_volume_acquired(self, embryo_id: str, timepoint: int, try: self.store.register_embryo( self.session_id, embryo_id, - position_x=embryo.stage_position.get('x') if embryo.stage_position else None, - position_y=embryo.stage_position.get('y') if embryo.stage_position else None, + position_coarse=embryo.position_coarse or None, + position_fine=embryo.position_fine or None, calibration=embryo.calibration, role=embryo.role, ) diff --git a/gently/core/file_store.py b/gently/core/file_store.py index 8ca383ea..1abbc971 100644 --- a/gently/core/file_store.py +++ b/gently/core/file_store.py @@ -108,6 +108,43 @@ def _sanitize_for_yaml(obj): return obj +def _coarse_from_legacy(record: dict) -> Optional[dict]: + """Extract coarse XY from an embryo.yaml record, accepting either the new + `position_coarse` dict or the legacy flat `position_x` / `position_y` keys. + Returns None if neither shape carries usable values. + """ + coarse = record.get("position_coarse") + if isinstance(coarse, dict) and coarse: + return coarse + px, py = record.get("position_x"), record.get("position_y") + if px is None and py is None: + return None + out = {} + if px is not None: + out["x"] = px + if py is not None: + out["y"] = py + return out or None + + +def _normalize_embryo_record(record: Optional[dict]) -> Optional[dict]: + """Backfill an embryo.yaml dict so callers always see the new schema. + + Adds `position_coarse` derived from legacy `position_x` / `position_y` if + only the legacy fields are present, and ensures `position_fine` exists + (as None) for forward-compat. The original record is not mutated. + """ + if record is None: + return None + out = dict(record) + if out.get("position_coarse") is None: + backfill = _coarse_from_legacy(out) + if backfill is not None: + out["position_coarse"] = backfill + out.setdefault("position_fine", None) + return out + + def _write_yaml(path: Path, data: Any) -> None: """Write YAML atomically: write to a temp file, then rename.""" path.parent.mkdir(parents=True, exist_ok=True) @@ -432,6 +469,8 @@ def register_embryo( nickname: str = None, position_x: float = None, position_y: float = None, + position_coarse: dict = None, + position_fine: dict = None, calibration: dict = None, role: str = None, ) -> None: @@ -440,23 +479,38 @@ def register_embryo( ``role`` is the experimental role key from gently.harness.roles.REGISTRY (e.g. ``"test"``, ``"calibration"``, ``"unassigned"``). Persisted in embryo.yaml. None preserves the existing value on update. + + Position has two stages: coarse (bottom-camera / manual map placement) + and fine (future SPIM-objective alignment). New callers should pass + position_coarse / position_fine as dicts of shape {"x": float, "y": + float}. Legacy callers passing position_x / position_y get folded into + coarse automatically. """ ed = self._embryo_dir(session_id, embryo_id) ed.mkdir(parents=True, exist_ok=True) + # Fold legacy position_x / position_y into coarse if caller used the + # old kwargs and didn't pass coarse explicitly. + if position_coarse is None and (position_x is not None or position_y is not None): + position_coarse = {} + if position_x is not None: + position_coarse["x"] = position_x + if position_y is not None: + position_coarse["y"] = position_y + yaml_path = ed / "embryo.yaml" existing = _read_yaml(yaml_path) if existing is not None: - # Update: COALESCE behaviour -- keep existing values when new ones - # are None, matching the old ON CONFLICT DO UPDATE SET logic. + # COALESCE update — keep existing values when new ones are None. + existing_coarse = _coarse_from_legacy(existing) embryo_data = { "embryo_id": embryo_id, "session_id": session_id, "embryo_uid": embryo_uid if embryo_uid is not None else existing.get("embryo_uid"), "nickname": nickname if nickname is not None else existing.get("nickname"), - "position_x": position_x if position_x is not None else existing.get("position_x"), - "position_y": position_y if position_y is not None else existing.get("position_y"), + "position_coarse": position_coarse if position_coarse is not None else existing_coarse, + "position_fine": position_fine if position_fine is not None else existing.get("position_fine"), "calibration": calibration if calibration is not None else existing.get("calibration"), "role": role if role is not None else existing.get("role", "test"), "created_at": existing.get("created_at", _now()), @@ -467,8 +521,8 @@ def register_embryo( "session_id": session_id, "embryo_uid": embryo_uid, "nickname": nickname, - "position_x": position_x, - "position_y": position_y, + "position_coarse": position_coarse, + "position_fine": position_fine, "calibration": calibration, "role": role if role is not None else "test", "created_at": _now(), @@ -477,13 +531,17 @@ def register_embryo( _write_yaml(yaml_path, embryo_data) def get_embryo(self, session_id: str, embryo_id: str) -> Optional[EmbryoInfo]: - """Read embryo.yaml. Returns None if not found.""" + """Read embryo.yaml. Returns None if not found. + + Backfills position_coarse from legacy position_x / position_y so + callers don't need to know about the old schema. + """ sd = self._session_dir(session_id) if sd is None: return None yaml_path = sd / "embryos" / embryo_id / "embryo.yaml" data = _read_yaml(yaml_path) - return data + return _normalize_embryo_record(data) def list_embryos(self, session_id: str) -> List[EmbryoInfo]: """List all embryos for a session, sorted by embryo_id.""" @@ -500,7 +558,7 @@ def list_embryos(self, session_id: str) -> List[EmbryoInfo]: yaml_path = entry / "embryo.yaml" data = _read_yaml(yaml_path) if data is not None: - result.append(data) + result.append(_normalize_embryo_record(data)) return result # ================================================================== diff --git a/gently/core/store_types.py b/gently/core/store_types.py index 34a466c4..d3cc45d1 100644 --- a/gently/core/store_types.py +++ b/gently/core/store_types.py @@ -14,11 +14,19 @@ class SessionInfo(TypedDict): metadata: Optional[dict] -class EmbryoInfo(TypedDict): +class EmbryoInfo(TypedDict, total=False): embryo_id: str session_id: str embryo_uid: Optional[str] nickname: Optional[str] + # Coarse XY (µm) from bottom-camera detection or manual map placement. + # Shape: {"x": float, "y": float}. Always present once the embryo exists. + position_coarse: Optional[dict] + # Fine XY (µm) from SPIM-objective alignment. None until that workflow + # refines the coarse position. Shape: {"x": float, "y": float}. + position_fine: Optional[dict] + # Legacy flat fields. Still accepted on write and surfaced on read for + # callers that haven't migrated; new code should use position_coarse. position_x: Optional[float] position_y: Optional[float] calibration: Optional[dict] diff --git a/gently/harness/session/manager.py b/gently/harness/session/manager.py index 9985b324..47ea38d3 100644 --- a/gently/harness/session/manager.py +++ b/gently/harness/session/manager.py @@ -100,14 +100,18 @@ def _resume_session(self, session_id: str, experiment): embryo.should_skip = embryo_data.get('should_skip', False) embryo.skip_reason = embryo_data.get('skip_reason') - # Also load embryos from store's embryo table + # Also load embryos from store's embryo table. FileStore returns + # position_coarse / position_fine (with legacy position_x / position_y + # backfilled into coarse on read), so both calibration stages survive + # the resume. store_embryos = self.store.list_embryos(session_id) for e in store_embryos: eid = e['embryo_id'] if eid not in experiment.embryos: experiment.add_embryo( embryo_id=eid, - position={'x': e.get('position_x'), 'y': e.get('position_y')}, + position=e.get('position_coarse') or {}, + position_fine=e.get('position_fine') or {}, calibration=json.loads(e['calibration']) if e.get('calibration') else {}, ) diff --git a/gently/harness/state.py b/gently/harness/state.py index a6113480..cb02af74 100644 --- a/gently/harness/state.py +++ b/gently/harness/state.py @@ -131,8 +131,13 @@ class EmbryoState: # accidental Test→Calibration would burn extra dose on the precious sample. role: str = "test" - # Position - stage_position: Dict[str, float] = field(default_factory=dict) # {'x': 1234.5, 'y': 5678.9} + # Position — two-stage: coarse (bottom-camera detection or manual map + # placement, always present once an embryo exists) and fine (populated + # later by SPIM-objective alignment). Resolved value is exposed by the + # `stage_position` property so downstream motion/perception can stay + # agnostic about which stage we're in. + position_coarse: Dict[str, float] = field(default_factory=dict) # {'x': ..., 'y': ...} + position_fine: Dict[str, float] = field(default_factory=dict) # empty until SPIM head alignment calibration: Dict = field(default_factory=dict) # Galvo/piezo parameters detection_confidence: float = 0.0 # SAM/detection confidence score (0-1) @@ -789,6 +794,33 @@ def get_exposure_summary(self) -> str: return f"{self.exposure_count} exposures, {time_str} total" + @property + def stage_position(self) -> Dict[str, float]: + """Resolved XY position — fine if SPIM-aligned, else coarse. + + Coarse comes from the bottom-camera detection / manual map placement. + Fine comes from the SPIM-objective alignment workflow (not built yet). + Callers that just want "where is this embryo" read this; callers that + care about calibration state read position_coarse / position_fine + directly. + """ + return self.position_fine if self.position_fine else self.position_coarse + + @stage_position.setter + def stage_position(self, value: Dict[str, float]) -> None: + """Back-compat setter — writes to coarse. + + Legacy callers that assigned `embryo.stage_position = {...}` were + writing a bottom-camera / manual position; that's the coarse stage. + New code should set position_coarse or position_fine explicitly. + """ + self.position_coarse = value or {} + + @property + def has_fine_position(self) -> bool: + """True once SPIM-objective alignment has refined the coarse position.""" + return bool(self.position_fine) + def to_dict(self) -> Dict: """Serialize for API responses""" return { @@ -798,6 +830,9 @@ def to_dict(self) -> Dict: 'user_label': self.user_label, 'role': self.role, 'stage_position': self.stage_position, + 'position_coarse': self.position_coarse, + 'position_fine': self.position_fine, + 'has_fine_position': self.has_fine_position, 'calibration': self.calibration, 'detection_confidence': self.detection_confidence, 'interval_seconds': self.interval_seconds, @@ -848,13 +883,17 @@ def __init__(self): def add_embryo(self, embryo_id: str, position: Dict = None, calibration: Dict = None, user_label: Optional[str] = None, confidence: float = 0.0, uid: Optional[str] = None, - role: str = "test"): + role: str = "test", position_fine: Dict = None): """Register new embryo. ``role`` must be a key in :data:`gently.harness.roles.REGISTRY` (e.g. ``"test"``, ``"calibration"``, ``"unassigned"``). Unknown roles raise KeyError. + `position` is the coarse XY (bottom-camera detection or manual map + placement). `position_fine` is reserved for the future SPIM-objective + alignment workflow and defaults to empty. + Emits an ``EMBRYO_DETECTED`` event so listeners (e.g. the viz server's TimelapseStateTracker, which feeds the device map) learn about marked embryos immediately — not just after the @@ -871,7 +910,8 @@ def add_embryo(self, embryo_id: str, position: Dict = None, self.embryos[embryo_id] = EmbryoState( id=embryo_id, uid=uid, - stage_position=pos, + position_coarse=position or {}, + position_fine=position_fine or {}, calibration=calibration or {}, user_label=user_label, detection_confidence=confidence, From 6f28365b682fb96d3568228f38e0f92a6a684e7d Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 15 May 2026 13:33:47 -0400 Subject: [PATCH 02/71] embryos: broadcast EMBRYOS_UPDATE on mutation Adds an EMBRYOS_UPDATE event type and wires ExperimentState's mutations (add_embryo / remove_embryo / assign_nickname / batch clear / editor finish) to publish a full embryo-list snapshot through the agent. The viz server's existing wildcard subscription forwards it to all browser clients, so Phase 3 can render embryos on the Map without polling. ExperimentState stays bus-agnostic via an on_embryos_changed observer hook; the agent wires the publisher at init. Phase 2 of the Map-as-embryo-home arc. Co-Authored-By: Claude Opus 4.7 (cherry picked from commit 617e54c95f715cf949bc2b4a02fca65314da21ca) --- gently/app/agent.py | 36 +++++++++++++++++++++++++++++ gently/app/tools/detection_tools.py | 3 ++- gently/core/event_bus.py | 1 + gently/harness/state.py | 29 ++++++++++++++++++++++- 4 files changed, 67 insertions(+), 2 deletions(-) diff --git a/gently/app/agent.py b/gently/app/agent.py index 15eed57d..da7d7b11 100644 --- a/gently/app/agent.py +++ b/gently/app/agent.py @@ -125,6 +125,11 @@ def __init__( # Event bus for async messaging (must be before perception manager) self._event_bus = get_event_bus() + # Broadcast the embryo list whenever it mutates. Hooked through the + # state object's observer so add/remove/nickname/restore all publish + # without each call site having to remember. + self.experiment.on_embryos_changed = self._publish_embryos_update + # Perception system (gently-perception harness) self.perceiver = Perceiver() @@ -678,6 +683,36 @@ def _emit_event(self, event_type: EventType, data: Optional[Dict] = None): source="agent", ) + def _publish_embryos_update(self) -> None: + """Broadcast the current embryo list as an EMBRYOS_UPDATE event. + + Wired into ExperimentState.on_embryos_changed at agent init so every + add / remove / restore / nickname change snaps a fresh full-list + snapshot onto the bus. The viz server's wildcard subscription forwards + it straight to connected browsers — that's how the Devices > Map page + learns about embryos without a poll loop. + """ + if self._event_bus is None: + return + try: + embryos = [e.to_dict() for e in self.experiment.embryos.values()] + except Exception: + logger.exception("Failed to serialise embryos for EMBRYOS_UPDATE") + return + payload = { + "embryos": embryos, + "count": len(embryos), + "session_id": getattr(self, "session_id", None), + } + try: + self._event_bus.publish( + event_type=EventType.EMBRYOS_UPDATE, + data=payload, + source="agent.experiment", + ) + except Exception: + logger.exception("Failed to publish EMBRYOS_UPDATE") + def _mark_significant_action(self, action_type: str): """Mark that a significant action occurred (triggers auto-save).""" self._auto_save() @@ -879,6 +914,7 @@ def import_embryos_from_session(self, session_id: str, clear_existing: bool = Fa if clear_existing: self.experiment.embryos.clear() + self.experiment.notify_embryos_changed() imported = [] skipped = [] diff --git a/gently/app/tools/detection_tools.py b/gently/app/tools/detection_tools.py index eda06d81..08cf58fd 100644 --- a/gently/app/tools/detection_tools.py +++ b/gently/app/tools/detection_tools.py @@ -373,7 +373,8 @@ async def edit_embryos( return "No embryos to edit. Run detect_embryos or manual_mark_embryos first." # Same flow as manual_mark_embryos: pre-populate with existing markers, - # let user edit, reconcile. + # let user edit, reconcile. notify_embryos_changed is fired by + # manual_mark_embryos / add_embryo internally. return await manual_mark_embryos( exposure_ms=exposure_ms, default_role=default_role, diff --git a/gently/core/event_bus.py b/gently/core/event_bus.py index b086f172..0b7d57b2 100644 --- a/gently/core/event_bus.py +++ b/gently/core/event_bus.py @@ -82,6 +82,7 @@ class EventType(Enum): LASER_CHANGED = auto() DEVICE_STATE_UPDATE = auto() # Periodic device-state snapshot from device layer BOTTOM_CAMERA_FRAME = auto() # Live JPEG frame from the bottom camera stream + EMBRYOS_UPDATE = auto() # Full embryo list snapshot from agent.experiment # System events ERROR_OCCURRED = auto() diff --git a/gently/harness/state.py b/gently/harness/state.py index cb02af74..a52f7b79 100644 --- a/gently/harness/state.py +++ b/gently/harness/state.py @@ -25,13 +25,16 @@ fields are now on ``EmbryoState`` directly. """ +import logging import re from dataclasses import dataclass, field from datetime import datetime -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Callable, Dict, List, Optional, Tuple from pathlib import Path import numpy as np +logger = logging.getLogger(__name__) + # Re-export CalibrationPrior from its hardware-specific home for backward compat. # CalibrationPrior is diSPIM-specific (piezo-galvo linear fit). Other hardware # modules will define their own calibration models. @@ -880,6 +883,27 @@ def __init__(self): # Updated after each successful calibration, used to initialize subsequent embryos self.calibration_prior: CalibrationPrior = CalibrationPrior() + # Observer hook — agent wires this at startup to publish EMBRYOS_UPDATE + # over the event bus. Kept as a plain callback so this module stays + # bus-agnostic. + self.on_embryos_changed: Optional[Callable[[], None]] = None + + def notify_embryos_changed(self) -> None: + """Fire the on_embryos_changed observer if one is wired. + + Call this after any mutation the agent can't intercept through + add_embryo / remove_embryo (e.g. a direct write to + embryo.position_coarse). UI hooks must not raise — failures here are + swallowed so state mutations stay durable. + """ + cb = self.on_embryos_changed + if cb is None: + return + try: + cb() + except Exception: + logger.exception("ExperimentState.on_embryos_changed callback failed") + def add_embryo(self, embryo_id: str, position: Dict = None, calibration: Dict = None, user_label: Optional[str] = None, confidence: float = 0.0, uid: Optional[str] = None, @@ -917,6 +941,7 @@ def add_embryo(self, embryo_id: str, position: Dict = None, detection_confidence=confidence, role=role, ) + self.notify_embryos_changed() # Fire the registration event. Late-bound import keeps this module # decoupled from the event bus until first use. @@ -943,6 +968,7 @@ def remove_embryo(self, embryo_id: str) -> bool: """Remove embryo from experiment (e.g., false detection)""" if embryo_id in self.embryos: del self.embryos[embryo_id] + self.notify_embryos_changed() return True return False @@ -950,6 +976,7 @@ def assign_nickname(self, embryo_id: str, nickname: str): """Agent assigns intuitive name""" if embryo_id in self.embryos: self.embryos[embryo_id].nickname = nickname + self.notify_embryos_changed() def get_embryo_by_any_name(self, name: str) -> Optional[EmbryoState]: """Get embryo by ID, nickname, or user label""" From 4db06d8fbde3b1542afcc4d0cd44338e97751f3f Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 15 May 2026 13:40:55 -0400 Subject: [PATCH 03/71] Devices > Map: render embryo waypoints as coarse rings / fine discs Adds an embryo layer to the Map between the axes and the live stage marker. Each embryo renders at its resolved XY (fine if SPIM-aligned, else coarse): coarse-only as an outlined lavender ring, fine-calibrated as a filled disc, both labelled with the embryo number. The layer is a pure read of EMBRYOS_UPDATE events plus an initial /api/embryos/current snapshot so a Map page opened mid-session shows existing embryos without waiting for the next mutation. Read-only at this phase; click / drag / delete will land in Phase 5. Phase 3 of the Map-as-embryo-home arc. Co-Authored-By: Claude Opus 4.7 (cherry picked from commit 144d9fc9db6511aff3604c7313cc7305ba1cf645) --- gently/ui/web/routes/data.py | 25 +++++++ gently/ui/web/static/css/main.css | 32 +++++++++ gently/ui/web/static/js/devices.js | 105 ++++++++++++++++++++++++++--- 3 files changed, 153 insertions(+), 9 deletions(-) diff --git a/gently/ui/web/routes/data.py b/gently/ui/web/routes/data.py index 3e66763f..2d401199 100644 --- a/gently/ui/web/routes/data.py +++ b/gently/ui/web/routes/data.py @@ -63,6 +63,31 @@ async def get_device_status(): "microscope": microscope_up, } + @router.get("/api/embryos/current") + async def get_current_embryos(): + """Return the agent's current embryo list as an EMBRYOS_UPDATE payload. + + EMBRYOS_UPDATE is published only on mutation, so a Map page opened + mid-session would otherwise see an empty embryo layer until the next + add/remove/edit. This endpoint serves the same payload shape as the + event so clients can bootstrap and then switch to the live stream. + """ + empty = {"embryos": [], "count": 0, "session_id": None} + bridge = getattr(server, "agent_bridge", None) + agent = bridge.agent if bridge is not None else None + if agent is None or not hasattr(agent, "experiment"): + return empty + try: + embryos = [e.to_dict() for e in agent.experiment.embryos.values()] + except Exception: + logger.exception("Failed to serialise embryos for snapshot") + return empty + return { + "embryos": embryos, + "count": len(embryos), + "session_id": getattr(agent, "session_id", None), + } + @router.get("/api/devices/coverslip") async def get_coverslip(): """Return the coverslip outline metadata for the Map view. diff --git a/gently/ui/web/static/css/main.css b/gently/ui/web/static/css/main.css index 4d19c79d..44405acd 100644 --- a/gently/ui/web/static/css/main.css +++ b/gently/ui/web/static/css/main.css @@ -9172,6 +9172,7 @@ body.modal-open { --map-zone-green: 90, 168, 122; /* RGB triples for compositing */ --map-zone-orange: 215, 152, 84; --map-zone-red: 220, 96, 88; + --map-embryo: 156, 120, 220; /* lavender — distinct from zones and marker */ --map-overlay-bg: rgba(11, 14, 19, 0.78); --map-overlay-bg-2: rgba(11, 14, 19, 0.92); --map-overlay-edge: rgba(212, 221, 232, 0.18); @@ -9189,6 +9190,7 @@ body.modal-open { --map-accent: #0e7490; --map-accent-2: #155e75; --map-warm: #a16207; + --map-embryo: 100, 60, 180; /* deeper purple for cream paper */ --map-overlay-bg: rgba(246, 243, 236, 0.82); --map-overlay-bg-2: rgba(246, 243, 236, 0.96); --map-overlay-edge: rgba(29, 43, 58, 0.18); @@ -9429,6 +9431,36 @@ body.modal-open { 100% { opacity: 0; r: 28; } } +/* --- Embryo waypoints ------------------------------------------------ */ +/* Coarse = bottom-camera / manual placement; fine = SPIM-objective + alignment. Coarse reads as an outlined ring (provisional), fine as a + filled disc (committed). Same hue so the row of embryos still reads as + one cohort, but visual weight signals calibration state at a glance. */ +.devices-embryo-ring { + fill: rgba(var(--map-embryo), 0.08); + stroke: rgba(var(--map-embryo), 0.85); + stroke-width: 1.4; + vector-effect: non-scaling-stroke; +} +.devices-embryo-disc { + fill: rgba(var(--map-embryo), 0.65); + stroke: rgba(var(--map-embryo), 0.95); + stroke-width: 1.4; + vector-effect: non-scaling-stroke; +} +.devices-embryo-label { + fill: var(--map-ink); + font-family: 'JetBrains Mono', ui-monospace, monospace; + font-weight: 600; + text-anchor: middle; + dominant-baseline: central; + pointer-events: none; + paint-order: stroke; + stroke: var(--map-paper); + stroke-width: 2; + stroke-linejoin: round; +} + /* --- Overlay panels (compass, readout, scalebar, legend) ------------- */ .devices-compass, .devices-map-readout, diff --git a/gently/ui/web/static/js/devices.js b/gently/ui/web/static/js/devices.js index 0f2f5316..c1d5e1c8 100644 --- a/gently/ui/web/static/js/devices.js +++ b/gently/ui/web/static/js/devices.js @@ -32,11 +32,14 @@ const DevicesManager = (function () { let _mapWrap; let _scalebarLabel; - // Embryos overlay state: list of {embryo_id, x, y, role, ...}. - // Populated by /api/embryos/positions on init + EMBRYO_DETECTED / - // STATUS_CHANGED WS pushes thereafter. Roles drive the marker color + // Embryo waypoints — driven by EMBRYOS_UPDATE events (the canonical bulk + // mutation broadcast added by the embryos-broadcast commit) and the + // initial /api/embryos/current snapshot. Each entry mirrors + // EmbryoState.to_dict() (id, position_coarse, position_fine, + // has_fine_position, nickname, role, ...). Role drives marker color // (mirrors the marking-window legend: magenta=test, cyan=calibration, - // grey=unassigned). + // grey=unassigned). EMBRYO_DETECTED / STATUS_CHANGED listeners stay + // hooked as a belt-and-braces refresh path. let _embryos = []; const _ROLE_COLOR = { test: '#ff66cc', @@ -257,6 +260,30 @@ const DevicesManager = (function () { } } + // Initial embryo snapshot — closes the gap for clients that connect + // mid-session, after the last EMBRYOS_UPDATE has already been broadcast + // and aged out of history. Subsequent updates arrive over the event bus. + async function loadEmbryosSnapshot() { + try { + const res = await fetch('/api/embryos/current'); + if (!res.ok) return; + const data = await res.json(); + handleEmbryosUpdate(data); + } catch (err) { + console.debug('embryos snapshot fetch failed:', err); + } + } + + function handleEmbryosUpdate(payload) { + _embryos = (payload && Array.isArray(payload.embryos)) ? payload.embryos : []; + if (!_viewBox) { + computeViewBox(); + renderMap(); + } else { + renderEmbryos(); + } + } + // ===================================================================== // Properties table (Details view) // ===================================================================== @@ -744,6 +771,67 @@ const DevicesManager = (function () { return Math.round(v).toString(); } + // ===================================================================== + // Embryo waypoints + // ===================================================================== + + // "embryo_007" / "embryo_7" -> 7. Falls back to a 1-based index from the + // caller so the label always shows *something*, even for stray ids. + function embryoLabelText(id, fallbackIndex) { + const m = id && String(id).match(/(\d+)/); + if (m) { + const n = parseInt(m[1], 10); + if (Number.isFinite(n)) return String(n); + } + return String(fallbackIndex + 1); + } + + // Resolve XY for rendering — fine if SPIM-aligned, else coarse. Returns + // null when neither stage carries usable values so the entry is skipped + // (e.g. an embryo whose detection record came in malformed). + function embryoResolvedXY(emb) { + const f = emb && emb.position_fine; + if (f && Number.isFinite(f.x) && Number.isFinite(f.y)) return { x: f.x, y: f.y }; + const c = emb && emb.position_coarse; + if (c && Number.isFinite(c.x) && Number.isFinite(c.y)) return { x: c.x, y: c.y }; + return null; + } + + function renderEmbryos() { + if (!_mapEmbryos || !_viewBox) return; + _mapEmbryos.innerHTML = ''; + if (!_embryos || !_embryos.length) return; + const span = Math.max(_viewBox.xMax - _viewBox.xMin, + _viewBox.yMax - _viewBox.yMin); + const radius = span * 0.012; + const fontSize = span * 0.015; + + _embryos.forEach((emb, i) => { + const xy = embryoResolvedXY(emb); + if (!xy) return; + + const isFine = !!emb.has_fine_position; + const circle = document.createElementNS(SVG_NS, 'circle'); + circle.setAttribute('cx', xy.x); + circle.setAttribute('cy', svgY(xy.y)); + circle.setAttribute('r', radius); + circle.setAttribute('class', + isFine ? 'devices-embryo-disc' : 'devices-embryo-ring'); + // Identifiers for inspection / future click handlers — not used yet. + circle.setAttribute('data-embryo-id', emb.id || ''); + circle.setAttribute('data-embryo-stage', isFine ? 'fine' : 'coarse'); + _mapEmbryos.appendChild(circle); + + const label = document.createElementNS(SVG_NS, 'text'); + label.setAttribute('x', xy.x); + label.setAttribute('y', svgY(xy.y)); + label.setAttribute('class', 'devices-embryo-label'); + label.setAttribute('font-size', fontSize); + label.textContent = embryoLabelText(emb.id, i); + _mapEmbryos.appendChild(label); + }); + } + function updateMapMarker() { if (!_mapMarker || !_lastXY) return; const sx = _lastXY.X; @@ -950,14 +1038,13 @@ const DevicesManager = (function () { setupViewSwitcher(); setupCameraWiring(); loadCoverslip(); - loadEmbryos(); + loadEmbryosSnapshot(); switchView(_currentView); if (typeof ClientEventBus !== 'undefined') { ClientEventBus.on('DEVICE_STATE_UPDATE', handlePayload); - // Embryo events: a fresh marking session emits one - // EMBRYO_DETECTED per registered embryo (via - // ExperimentState.add_embryo). assign_embryo_roles emits - // STATUS_CHANGED with change=role_assigned per change. + ClientEventBus.on('EMBRYOS_UPDATE', handleEmbryosUpdate); + // Belt-and-braces: also listen for the fine-grained events that + // existed before EMBRYOS_UPDATE so direct emitters still refresh. ClientEventBus.on('EMBRYO_DETECTED', handleEmbryoDetected); ClientEventBus.on('STATUS_CHANGED', handleStatusChanged); } From 1ed5fc4da9ecb6d748f6f342d874e459f8c0b5ea Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 15 May 2026 14:04:53 -0400 Subject: [PATCH 04/71] detect_embryos: web Marking canvas replaces napari editor, gate control routes Adds Phase 4 of the Map-as-embryo-home arc. Auth (4a) --------- New gently/ui/web/auth.py introduces a two-role model: localhost is always control, remote callers default to view and need X-Gently-Token matching GENTLY_CONTROL_TOKEN to upgrade. Bottom-camera stream start/stop POST routes now Depends(require_control), so a remote browser can watch the stage but cannot drive hardware until an operator provisions the shared token. Marking canvas seeded (4b) -------------------------- VisualizationServer.start_marking_session takes initial_markers (pixel positions from SAM); they're seeded into the session state and the marking_image broadcast so the canvas opens with SAM detections pre- placed. wait_for_marking now also computes stage_x_um / stage_y_um from the operator-confirmed pixel positions, so callers can drop the result straight into agent.experiment.add_embryo. marking.js renders the seeded markers immediately and adapts the instruction string. detect_embryos -> web (4c) -------------------------- The agent tool now SAM-detects with open_editor=False (napari path bypassed), then hands off to the web Marking canvas via agent.viz_server.start_marking_session(initial_markers=...) and awaits wait_for_marking. Confirmed embryos land in agent.experiment, which broadcasts EMBRYOS_UPDATE -> Devices > Map shows them as coarse rings. Falls back gracefully if viz_server is unavailable or the operator never confirms. edit_embryos / manual_mark_embryos still use napari; deferred to a later phase. gently/ui/napari_viewer.py kept intact for offline use. Co-Authored-By: Claude Opus 4.7 (cherry picked from commit 4fbb9edf71b4faaef18aa7f4369d5bbc1556ca81) --- gently/ui/web/auth.py | 95 ++++++++++++++++++++++++++++++++++++ gently/ui/web/routes/data.py | 10 ++-- 2 files changed, 102 insertions(+), 3 deletions(-) create mode 100644 gently/ui/web/auth.py diff --git a/gently/ui/web/auth.py b/gently/ui/web/auth.py new file mode 100644 index 00000000..7933c515 --- /dev/null +++ b/gently/ui/web/auth.py @@ -0,0 +1,95 @@ +"""Web-UI authorization roles. + +Two roles: + view -- read-only. GET endpoints, SSE / WebSocket event streams. + control -- can drive hardware (POST/PUT/DELETE). Localhost is always + control; remote callers must present a matching token in the + X-Gently-Token header (token read from GENTLY_CONTROL_TOKEN). + +Routes that move hardware or mutate persistent state declare a dependency: + + from gently.ui.web.auth import require_control + + @router.post("/api/devices/foo") + async def foo(_=Depends(require_control)): + ... + +Default-deny on control: if the token env var is unset, remote callers get +view-only access until the operator provisions a token. That matches the +"diSPIM computer alone gives control directions" intent while leaving room +for authenticated remote operators later. +""" + +from __future__ import annotations + +import logging +import os +from enum import Enum + +from fastapi import HTTPException, Request + +logger = logging.getLogger(__name__) + + +_LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost"}) + +# Header name used to upgrade a remote session to control role. Single shared +# token for now; per-user identities can be layered on later without changing +# this module's public surface. +_TOKEN_HEADER = "X-Gently-Token" +_TOKEN_ENV = "GENTLY_CONTROL_TOKEN" + + +class Role(str, Enum): + VIEW = "view" + CONTROL = "control" + + +def _configured_token() -> str | None: + """Return the shared control token, or None if no token is provisioned. + + Read fresh each request so the operator can rotate the token without + restarting the web server. + """ + tok = os.environ.get(_TOKEN_ENV, "").strip() + return tok or None + + +def resolve_role(request: Request) -> Role: + """Determine the effective role for a request. + + Localhost is always control (the diSPIM box). Remote callers need to + present X-Gently-Token matching GENTLY_CONTROL_TOKEN. + """ + client = request.client + host = client.host if client else None + if host in _LOOPBACK_HOSTS: + return Role.CONTROL + + token = _configured_token() + if token is not None: + supplied = request.headers.get(_TOKEN_HEADER, "").strip() + if supplied and supplied == token: + return Role.CONTROL + + return Role.VIEW + + +def require_control(request: Request) -> Role: + """FastAPI dependency — 403 unless the caller has the control role. + + Logs the denied client host (without leaking the token) so the operator + can spot if a remote browser is trying to drive hardware. + """ + role = resolve_role(request) + if role is Role.CONTROL: + return role + host = request.client.host if request.client else "unknown" + logger.warning("control-route 403 for %s -> %s %s", + host, request.method, request.url.path) + raise HTTPException( + status_code=403, + detail="control role required (this endpoint moves hardware or " + "mutates persistent state; localhost has it by default, " + "remote callers need X-Gently-Token)", + ) diff --git a/gently/ui/web/routes/data.py b/gently/ui/web/routes/data.py index 2d401199..6b9c2434 100644 --- a/gently/ui/web/routes/data.py +++ b/gently/ui/web/routes/data.py @@ -6,7 +6,9 @@ from typing import Optional import yaml -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, Depends, HTTPException + +from gently.ui.web.auth import require_control logger = logging.getLogger(__name__) @@ -123,7 +125,8 @@ async def get_bottom_camera_status(): "last_frame_ts": getattr(monitor, "_last_frame_ts", None) if monitor else None, } - @router.post("/api/devices/bottom_camera/stream/start") + @router.post("/api/devices/bottom_camera/stream/start", + dependencies=[Depends(require_control)]) async def start_bottom_camera_stream(): """Start the bottom-camera stream bridge. @@ -144,7 +147,8 @@ async def start_bottom_camera_stream(): raise HTTPException(status_code=500, detail=f"start failed: {exc}") return {"streaming": monitor.running} - @router.post("/api/devices/bottom_camera/stream/stop") + @router.post("/api/devices/bottom_camera/stream/stop", + dependencies=[Depends(require_control)]) async def stop_bottom_camera_stream(): """Stop the bottom-camera stream bridge. Idempotent.""" bridge = getattr(server, "agent_bridge", None) From 5b15797648c07afac03a9cbbff383bb87e042600 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 15 May 2026 15:34:29 -0400 Subject: [PATCH 05/71] Devices > Map: pick-up / drop / delete to edit embryos in-place Map becomes the home for embryos rather than a viewer. Backend (data.py) ----------------- PUT /api/embryos/{id}/position {x, y} updates position_coarse and CLEARS position_fine -- the operator overriding the sighting invalidates any prior SPIM-objective alignment derived from the old coarse, so it must be re-run. DELETE /api/embryos/{id} removes via ExperimentState. Both endpoints Depends(require_control), so only the diSPIM box (or a remote session with X-Gently-Token) can mutate the embryo list. Both fire EMBRYOS_UPDATE through the observer hook for live Map refresh. Frontend (devices.js + main.css) -------------------------------- First click on an embryo selects it (dashed lavender ring, brighter label -- the "picked up" state). Click on empty map space drops it there with a confirm prompt; Delete/Backspace removes with confirm; Escape clears the selection. New embryos still go through the bottom- camera Marking canvas -- the Map is a schematic, not a satellite, so adding without a visual reference would be guessing. Keyboard handler is tab-aware (Devices tab + Map view only) and ignores keystrokes while an input/textarea/select has focus so it doesn't hijack the chat composer. Smoke-tested end-to-end via ASGI: PUT clears fine correctly, DELETE fires notify, error paths return 400 / 404 / 503. Phase 5 of the Map-as-embryo-home arc. Co-Authored-By: Claude Opus 4.7 (cherry picked from commit 8f6553e12b02ff070589d729cb8e320c330f2817) --- gently/ui/web/routes/data.py | 54 ++++++++- gently/ui/web/static/css/main.css | 17 +++ gently/ui/web/static/js/devices.js | 171 ++++++++++++++++++++++++++++- 3 files changed, 236 insertions(+), 6 deletions(-) diff --git a/gently/ui/web/routes/data.py b/gently/ui/web/routes/data.py index 6b9c2434..6a3e8ef5 100644 --- a/gently/ui/web/routes/data.py +++ b/gently/ui/web/routes/data.py @@ -6,7 +6,7 @@ from typing import Optional import yaml -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Body, Depends, HTTPException from gently.ui.web.auth import require_control @@ -65,6 +65,58 @@ async def get_device_status(): "microscope": microscope_up, } + def _require_agent_with_experiment(): + """Resolve the live agent from the server bridge, or 503. + + Edit endpoints write through ExperimentState so the notify hook fires + EMBRYOS_UPDATE and the Map re-renders without a follow-up fetch. + """ + bridge = getattr(server, "agent_bridge", None) + agent = bridge.agent if bridge is not None else None + if agent is None or not hasattr(agent, "experiment"): + raise HTTPException(status_code=503, detail="Agent not ready") + return agent + + @router.put("/api/embryos/{embryo_id}/position", + dependencies=[Depends(require_control)]) + async def update_embryo_position( + embryo_id: str, + body: dict = Body(...), + ): + """Update an embryo's coarse XY position. + + Map-side edits write to the coarse stage and CLEAR any prior fine + position — the operator is overriding the sighting, so any + SPIM-objective fine alignment derived from the old coarse is no + longer trustworthy and must be re-run. + """ + agent = _require_agent_with_experiment() + emb = agent.experiment.embryos.get(embryo_id) + if emb is None: + raise HTTPException(status_code=404, detail=f"Embryo {embryo_id} not found") + try: + x = float(body.get("x")) + y = float(body.get("y")) + except (TypeError, ValueError): + raise HTTPException(status_code=400, detail="Body needs numeric x and y") + emb.position_coarse = {"x": x, "y": y} + emb.position_fine = {} + agent.experiment.notify_embryos_changed() + return emb.to_dict() + + @router.delete("/api/embryos/{embryo_id}", + dependencies=[Depends(require_control)]) + async def delete_embryo(embryo_id: str): + """Remove an embryo from the experiment. + + Goes through ExperimentState.remove_embryo so the observer hook + fires EMBRYOS_UPDATE automatically. + """ + agent = _require_agent_with_experiment() + if not agent.experiment.remove_embryo(embryo_id): + raise HTTPException(status_code=404, detail=f"Embryo {embryo_id} not found") + return {"ok": True, "embryo_id": embryo_id} + @router.get("/api/embryos/current") async def get_current_embryos(): """Return the agent's current embryo list as an EMBRYOS_UPDATE payload. diff --git a/gently/ui/web/static/css/main.css b/gently/ui/web/static/css/main.css index 44405acd..9e975176 100644 --- a/gently/ui/web/static/css/main.css +++ b/gently/ui/web/static/css/main.css @@ -9436,6 +9436,9 @@ body.modal-open { alignment. Coarse reads as an outlined ring (provisional), fine as a filled disc (committed). Same hue so the row of embryos still reads as one cohort, but visual weight signals calibration state at a glance. */ +.devices-embryo-group { + cursor: pointer; +} .devices-embryo-ring { fill: rgba(var(--map-embryo), 0.08); stroke: rgba(var(--map-embryo), 0.85); @@ -9461,6 +9464,20 @@ body.modal-open { stroke-linejoin: round; } +/* Selected = "picked up" — outlined dashed, hollow fill, brighter label. + Click on empty map drops the picked-up embryo at that XY; Delete / + Backspace removes it; Escape deselects. */ +.devices-embryo-group.devices-embryo-selected .devices-embryo-ring, +.devices-embryo-group.devices-embryo-selected .devices-embryo-disc { + fill: rgba(var(--map-embryo), 0.12); + stroke: rgba(var(--map-embryo), 1); + stroke-width: 2; + stroke-dasharray: 4 3; +} +.devices-embryo-group.devices-embryo-selected .devices-embryo-label { + fill: rgba(var(--map-embryo), 1); +} + /* --- Overlay panels (compass, readout, scalebar, legend) ------------- */ .devices-compass, .devices-map-readout, diff --git a/gently/ui/web/static/js/devices.js b/gently/ui/web/static/js/devices.js index c1d5e1c8..03d5fbb3 100644 --- a/gently/ui/web/static/js/devices.js +++ b/gently/ui/web/static/js/devices.js @@ -47,6 +47,11 @@ const DevicesManager = (function () { unassigned: '#888888', }; + // Map-side edit state. _selectedEmbryoId means "picked up": the next + // click on empty map space drops it there (with a confirm), Delete / + // Backspace removes it (with a confirm), Escape clears the selection. + let _selectedEmbryoId = null; + // Bottom-camera panel DOM + state let _camPanel, _camToggle, _camImg, _camPlaceholder, _camLed, _camMeta; let _camStreaming = false; @@ -811,16 +816,24 @@ const DevicesManager = (function () { if (!xy) return; const isFine = !!emb.has_fine_position; + const isSelected = _selectedEmbryoId !== null + && emb.id === _selectedEmbryoId; + + // Wrap circle + label in a group so a single closest() lookup + // finds the embryo regardless of which child the click hit. + const group = document.createElementNS(SVG_NS, 'g'); + group.setAttribute('class', + 'devices-embryo-group' + (isSelected ? ' devices-embryo-selected' : '')); + group.setAttribute('data-embryo-id', emb.id || ''); + group.setAttribute('data-embryo-stage', isFine ? 'fine' : 'coarse'); + const circle = document.createElementNS(SVG_NS, 'circle'); circle.setAttribute('cx', xy.x); circle.setAttribute('cy', svgY(xy.y)); circle.setAttribute('r', radius); circle.setAttribute('class', isFine ? 'devices-embryo-disc' : 'devices-embryo-ring'); - // Identifiers for inspection / future click handlers — not used yet. - circle.setAttribute('data-embryo-id', emb.id || ''); - circle.setAttribute('data-embryo-stage', isFine ? 'fine' : 'coarse'); - _mapEmbryos.appendChild(circle); + group.appendChild(circle); const label = document.createElementNS(SVG_NS, 'text'); label.setAttribute('x', xy.x); @@ -828,10 +841,150 @@ const DevicesManager = (function () { label.setAttribute('class', 'devices-embryo-label'); label.setAttribute('font-size', fontSize); label.textContent = embryoLabelText(emb.id, i); - _mapEmbryos.appendChild(label); + group.appendChild(label); + + _mapEmbryos.appendChild(group); }); } + // ---- Map-side edit interactions ------------------------------------ + // Convert a pointer event's client coords into stage µm. SVG y axis is + // positive-down and stage y is positive-up, so the y component is + // negated to match the convention used elsewhere in this module. + function eventToStageXY(event) { + if (!_mapSvg || !_mapSvg.getScreenCTM) return null; + const ctm = _mapSvg.getScreenCTM(); + if (!ctm) return null; + const pt = _mapSvg.createSVGPoint(); + pt.x = event.clientX; + pt.y = event.clientY; + const local = pt.matrixTransform(ctm.inverse()); + return { x: local.x, y: -local.y }; + } + + function findEmbryoIdAt(target) { + if (!target) return null; + const node = target.closest && target.closest('[data-embryo-id]'); + return node ? node.getAttribute('data-embryo-id') : null; + } + + function embryoById(id) { + return _embryos.find(e => e.id === id) || null; + } + + function embryoNumberFor(emb) { + return embryoLabelText(emb.id, _embryos.indexOf(emb)); + } + + function setSelectedEmbryo(id) { + if (_selectedEmbryoId === id) return; + _selectedEmbryoId = id; + renderEmbryos(); + } + + function clearSelection() { + if (_selectedEmbryoId === null) return; + _selectedEmbryoId = null; + renderEmbryos(); + } + + async function attemptMoveSelected(targetStage) { + const id = _selectedEmbryoId; + if (!id) return; + const emb = embryoById(id); + if (!emb) { clearSelection(); return; } + const cur = embryoResolvedXY(emb); + const num = embryoNumberFor(emb); + const oldStr = cur ? `(${cur.x.toFixed(1)}, ${cur.y.toFixed(1)})` : '(unknown)'; + const newStr = `(${targetStage.x.toFixed(1)}, ${targetStage.y.toFixed(1)})`; + if (!window.confirm(`Move embryo ${num} from ${oldStr} to ${newStr}?`)) { + return; // keep the embryo picked up so they can try again + } + try { + const res = await fetch(`/api/embryos/${encodeURIComponent(id)}/position`, { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ x: targetStage.x, y: targetStage.y }), + }); + if (!res.ok) { + window.alert(`Move failed (${res.status}): ${await res.text()}`); + return; + } + // EMBRYOS_UPDATE will arrive over the bus and refresh the layer; + // dropping clears the picked-up state regardless. + clearSelection(); + } catch (err) { + console.error('move embryo:', err); + window.alert(`Move failed: ${err.message}`); + } + } + + async function attemptDeleteSelected() { + const id = _selectedEmbryoId; + if (!id) return; + const emb = embryoById(id); + const num = emb ? embryoNumberFor(emb) : id; + if (!window.confirm(`Remove embryo ${num}?`)) return; + try { + const res = await fetch(`/api/embryos/${encodeURIComponent(id)}`, { + method: 'DELETE', + }); + if (!res.ok) { + window.alert(`Delete failed (${res.status}): ${await res.text()}`); + return; + } + // The embryo is gone from the server snapshot; EMBRYOS_UPDATE + // will arrive and drop it from _embryos. Clear locally too. + _selectedEmbryoId = null; + } catch (err) { + console.error('delete embryo:', err); + window.alert(`Delete failed: ${err.message}`); + } + } + + function onMapPointerDown(event) { + // Ignore non-primary buttons so right-clicks etc. don't trigger UI. + if (event.button !== undefined && event.button !== 0) return; + const id = findEmbryoIdAt(event.target); + if (id) { + setSelectedEmbryo(id); + return; + } + // Empty-space click: drop the picked-up embryo here. + if (_selectedEmbryoId !== null) { + const stage = eventToStageXY(event); + if (stage) attemptMoveSelected(stage); + } + } + + function onMapKeyDown(event) { + // Only honour keys when the operator is actually looking at the Map: + // not on another top-level tab, not on the Details subview, and not + // typing into an input / textarea / select / contenteditable. + if (typeof state !== 'undefined' && typeof TABS !== 'undefined' + && state.tab !== TABS.DEVICES) { + return; + } + if (_currentView !== 'map') return; + const a = document.activeElement; + if (a && (a.tagName === 'INPUT' || a.tagName === 'TEXTAREA' || + a.tagName === 'SELECT' || a.isContentEditable)) { + return; + } + if (event.key === 'Escape') { + if (_selectedEmbryoId !== null) { + clearSelection(); + event.preventDefault(); + } + return; + } + if (_selectedEmbryoId === null) return; + if (event.key === 'Delete' || event.key === 'Backspace') { + event.preventDefault(); // Backspace would otherwise navigate back + attemptDeleteSelected(); + } + } + function updateMapMarker() { if (!_mapMarker || !_lastXY) return; const sx = _lastXY.X; @@ -1048,6 +1201,14 @@ const DevicesManager = (function () { ClientEventBus.on('EMBRYO_DETECTED', handleEmbryoDetected); ClientEventBus.on('STATUS_CHANGED', handleStatusChanged); } + // Map-side edit handlers. Pointer events on the SVG cover both + // "click an embryo" (selects it) and "click empty map" (drops the + // selected embryo). Keyboard listener is document-wide but guards + // against firing while an input is focused. + if (_mapSvg) { + _mapSvg.addEventListener('pointerdown', onMapPointerDown); + } + document.addEventListener('keydown', onMapKeyDown); setStatus('stale', 'waiting', 'no payload yet'); syncInitialCameraState(); // Stop the camera stream if the tab is closed while it's running, From 7995cbd852219a1ae8be2c4001af22c49a3a4fb9 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 15 May 2026 17:39:27 -0400 Subject: [PATCH 06/71] device_layer: re-enable XY joystick at boot Tiger persists JoystickEnabled in non-volatile card settings. If a prior session ever ran SaveCardSettings while the joystick happened to be off, every subsequent boot inherits that state and the physical controller is dead. We don't run SaveCardSettings ourselves, so the only way to recover the joystick was a manual property write -- and there was no way to know the state had drifted until the operator tried to use it. DiSPIMXYStage gains enable_joystick(True) that writes JoystickEnabled + verifies read-back (same pattern as set_firmware_limits). device_layer calls it at boot right after the firmware soft limits are applied. Failure is non-fatal: agent can still drive the stage; we just log loud so the operator knows the joystick is unavailable. Co-Authored-By: Claude Opus 4.7 (cherry picked from commit 808fe8130d9e2066927a7d464e951d7f9d88af99) --- gently/hardware/dispim/device_layer.py | 12 +++++++++ gently/hardware/dispim/devices/stage.py | 34 +++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/gently/hardware/dispim/device_layer.py b/gently/hardware/dispim/device_layer.py index 1172ba1d..48e19746 100644 --- a/gently/hardware/dispim/device_layer.py +++ b/gently/hardware/dispim/device_layer.py @@ -274,6 +274,18 @@ async def initialize(self): logger.error("Could not apply ASI firmware soft limits: %s", exc) raise + # Tiger persists JoystickEnabled in non-volatile card settings — + # if a prior session ever called SaveCardSettings with the + # joystick off, every subsequent boot inherits that state and the + # physical controller is dead. Force it on at boot so the + # operator's joystick always works regardless of card history. + try: + xy_stage.enable_joystick(True) + except Exception as exc: + # Not fatal — the agent can still drive the stage. Log loudly + # so the operator knows the joystick is unavailable. + logger.error("Could not enable XY joystick: %s", exc) + # [4/5] Initialize RunEngine logger.info("[4/5] Initializing RunEngine...") self.RE = RunEngine({}) diff --git a/gently/hardware/dispim/devices/stage.py b/gently/hardware/dispim/devices/stage.py index a179135a..3c922d14 100644 --- a/gently/hardware/dispim/devices/stage.py +++ b/gently/hardware/dispim/devices/stage.py @@ -332,6 +332,40 @@ def set_firmware_limits( ) logger.info("ASI firmware limit %s = %.4f mm (verified)", prop, got) + def enable_joystick(self, enabled: bool = True) -> None: + """Set the ASI Tiger 'JoystickEnabled' property on the XY stage. + + Tiger firmware persists this flag in its non-volatile card settings + (touched whenever someone calls SaveCardSettings — we don't, but + previous sessions may have). If it persisted as 'No', the physical + joystick is dead on boot until something writes 'Yes'. This method + is the boot-time fix; it's called from device_layer.initialize right + after the firmware soft limits are applied. + + Read-back verified so a silent rejection by the adapter doesn't + leave the operator wondering why the controller still doesn't move. + """ + target = "Yes" if enabled else "No" + prop = "JoystickEnabled" + try: + self.core.setProperty(self.name, prop, target) + except RuntimeError as exc: + raise HardwareError( + f"setProperty {prop}={target} failed on {self.name}: {exc}" + ) + try: + got = self.core.getProperty(self.name, prop) + except RuntimeError as exc: + raise HardwareError( + f"getProperty {prop} read-back failed on {self.name}: {exc}" + ) + if str(got).strip() != target: + raise HardwareError( + f"{prop} read-back mismatch on {self.name}: " + f"wrote '{target}', controller reports '{got}'." + ) + logger.info("ASI %s.%s = %s (verified)", self.name, prop, got) + # Synchronous convenience methods (usable outside RunEngine) def get_position(self) -> np.ndarray: """ From feca96aa30cc6d36ac60bdacee75417edb2265c6 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 15 May 2026 17:39:39 -0400 Subject: [PATCH 07/71] Devices > Map: image-anchored crosshair + scroll-to-zoom in camera panel Two improvements to the bottom-camera live thumbnail on the Map. Crosshair (FOV reticle) ----------------------- A centre crosshair anchored to the image, not the viewer rect. SVG sibling of ; an inner receives the same translate/scale as the image (in viewBox units, via the SVG transform attribute), so the lines track the FOV centre through zoom/pan instead of staying pinned to the container centre. Transform sits on rather than the SVG element so the renderer re-rasterises at each zoom step -- otherwise 1px strokes get bitmap-scaled and go blurry. vector-effect: non- scaling-stroke keeps them 1px at any zoom. Default colour amber (var(--map-warm)). Zoom / pan ---------- Scroll-wheel over the camera stage zooms in/out (1x to 8x, ~15% per notch) centred under the cursor. Click and drag pans when zoomed. Double-click resets to 1x. Pan is clamped so the image centre stays inside the visible window. Stream stop also resets the transform so the next session starts at 1x. wheel listener is passive:false so the page doesn't scroll under the operator's hand. Co-Authored-By: Claude Opus 4.7 (cherry picked from commit f7a13d694f157f50813ecec9ac115859d2a5b295) --- gently/ui/web/static/css/main.css | 35 ++++++++ gently/ui/web/static/js/devices.js | 137 +++++++++++++++++++++++++++++ gently/ui/web/templates/index.html | 14 +++ 3 files changed, 186 insertions(+) diff --git a/gently/ui/web/static/css/main.css b/gently/ui/web/static/css/main.css index 9e975176..ef4dac96 100644 --- a/gently/ui/web/static/css/main.css +++ b/gently/ui/web/static/css/main.css @@ -9684,9 +9684,44 @@ body.modal-open { display: block; opacity: 0; transition: opacity 0.25s; + /* Zoom anchored at frame centre; scroll-wheel + cursor adjust translate + so the point under the cursor stays under the cursor. */ + transform-origin: center center; + will-change: transform; } .devices-camera-img.has-frame { opacity: 1; } +/* Cursor hints for zoom/pan mode. Default cursor stays untouched at zoom 1 + so the operator can still interact with overlays under the camera. */ +.devices-camera-stage.camera-zoomed { cursor: grab; } +.devices-camera-stage.camera-panning { cursor: grabbing; } + +/* Centre reticle — full-span horizontal + vertical hairline marking the + FOV centre IN the image. SVG is a sibling of ; the inner + receives the same translate/scale (in viewBox units) so the lines + track the camera image through zoom/pan instead of staying pinned to + the viewer rect. Transform lives on the , not the SVG element, so + the renderer re-rasterises at each zoom step — otherwise the strokes + get bitmap-scaled and go blurry. */ +.devices-camera-crosshair { + position: absolute; + inset: 0; + width: 100%; + height: 100%; + pointer-events: none; + opacity: 0; + transition: opacity 0.25s; +} +.devices-camera-stage:has(.devices-camera-img.has-frame) .devices-camera-crosshair { + opacity: 1; +} +.devices-camera-crosshair line { + stroke: var(--map-warm); + stroke-width: 1; + vector-effect: non-scaling-stroke; + stroke-opacity: 0.85; +} + .devices-camera-placeholder { position: absolute; inset: 0; diff --git a/gently/ui/web/static/js/devices.js b/gently/ui/web/static/js/devices.js index 03d5fbb3..b4cb30d2 100644 --- a/gently/ui/web/static/js/devices.js +++ b/gently/ui/web/static/js/devices.js @@ -54,6 +54,7 @@ const DevicesManager = (function () { // Bottom-camera panel DOM + state let _camPanel, _camToggle, _camImg, _camPlaceholder, _camLed, _camMeta; + let _camStage, _camCrosshair, _camCrosshairGroup; let _camStreaming = false; let _camLastFrameTs = 0; let _camHasFrame = false; @@ -61,6 +62,16 @@ const DevicesManager = (function () { const _CAM_FPS_WINDOW = 12; let _camFrameTimes = []; + // Camera zoom / pan. Identity transform = (zoom 1, tx 0, ty 0); pan only + // engages once zoom > 1. Reset on double-click and on stream-off. + let _camZoom = 1; + let _camTx = 0; + let _camTy = 0; + let _camPanLast = null; // {x, y} clientX/Y of last pointermove during pan + const _CAM_ZOOM_MIN = 1; + const _CAM_ZOOM_MAX = 8; + const _CAM_ZOOM_STEP = 1.15; // multiplicative per wheel notch + let _lastTs = 0; let _previousTs = 0; let _lastWallTs = 0; @@ -118,6 +129,9 @@ const DevicesManager = (function () { _camToggle = document.getElementById('devices-camera-toggle'); _camImg = document.getElementById('devices-camera-img'); _camPlaceholder = document.getElementById('devices-camera-placeholder'); + _camStage = _camPanel ? _camPanel.querySelector('.devices-camera-stage') : null; + _camCrosshair = document.getElementById('devices-camera-crosshair'); + _camCrosshairGroup = document.getElementById('devices-camera-crosshair-group'); _camLed = document.getElementById('devices-camera-led'); _camMeta = document.getElementById('devices-camera-meta'); @@ -1061,6 +1075,9 @@ const DevicesManager = (function () { if (_camPlaceholder) _camPlaceholder.hidden = false; if (_camMeta) _camMeta.textContent = 'stream off'; if (_camStaleTimer) { clearTimeout(_camStaleTimer); _camStaleTimer = null; } + // Operator may have zoomed in; reset so the next stream session + // starts at 1× rather than inheriting a stale view. + resetCameraZoom(); } else { _camFrameTimes = []; if (_camMeta) _camMeta.textContent = 'waiting…'; @@ -1122,6 +1139,116 @@ const DevicesManager = (function () { } } + // ---- Camera zoom / pan --------------------------------------------- + function applyCameraTransform() { + if (!_camImg) return; + _camImg.style.transform = + `translate(${_camTx}px, ${_camTy}px) scale(${_camZoom})`; + // Reticle uses an SVG transform attribute on the inner instead + // of a CSS transform on the SVG element — same geometric effect, + // but the SVG renderer re-rasterises at the new zoom so the 1px + // strokes stay crisp instead of getting bitmap-scaled. + if (_camCrosshairGroup && _camStage) { + const rect = _camStage.getBoundingClientRect(); + // Convert pixel-space translation to viewBox units (viewBox is + // 0..100 in both axes, preserveAspectRatio=none). + const txV = rect.width > 0 ? (_camTx * 100) / rect.width : 0; + const tyV = rect.height > 0 ? (_camTy * 100) / rect.height : 0; + // translate(50+tx, 50+ty) scale(zoom) translate(-50, -50) keeps + // the viewBox centre (50, 50) as the zoom anchor and offsets by + // the converted pixel translation. + _camCrosshairGroup.setAttribute( + 'transform', + `translate(${50 + txV} ${50 + tyV}) ` + + `scale(${_camZoom}) ` + + `translate(-50 -50)` + ); + } + } + + function resetCameraZoom() { + _camZoom = 1; + _camTx = 0; + _camTy = 0; + applyCameraTransform(); + if (_camStage) _camStage.classList.remove('camera-zoomed', 'camera-panning'); + } + + // Keep at least the image centre within the visible window so the + // operator can't accidentally pan the entire frame off-screen. At + // zoom 1 this collapses to (0, 0). + function clampCameraPan() { + if (!_camStage) return; + const rect = _camStage.getBoundingClientRect(); + const maxX = (rect.width * (_camZoom - 1)) / 2; + const maxY = (rect.height * (_camZoom - 1)) / 2; + _camTx = Math.max(-maxX, Math.min(maxX, _camTx)); + _camTy = Math.max(-maxY, Math.min(maxY, _camTy)); + } + + function onCameraWheel(event) { + if (!_camStage) return; + // Always preventDefault so the page doesn't scroll under the + // operator while they're framing a sample. + event.preventDefault(); + const rect = _camStage.getBoundingClientRect(); + const cx = event.clientX - rect.left - rect.width / 2; + const cy = event.clientY - rect.top - rect.height / 2; + const oldZoom = _camZoom; + const factor = event.deltaY < 0 ? _CAM_ZOOM_STEP : 1 / _CAM_ZOOM_STEP; + const newZoom = Math.max(_CAM_ZOOM_MIN, + Math.min(_CAM_ZOOM_MAX, oldZoom * factor)); + if (newZoom === oldZoom) return; + + // Keep the image point under the cursor anchored under the cursor + // across the zoom: cursor_new = cursor_old after the transform + // change, which means newT = cursor - (cursor - oldT) * (new/old). + const ratio = newZoom / oldZoom; + _camTx = cx - (cx - _camTx) * ratio; + _camTy = cy - (cy - _camTy) * ratio; + _camZoom = newZoom; + + if (Math.abs(_camZoom - 1) < 0.001) { + resetCameraZoom(); + return; + } + clampCameraPan(); + applyCameraTransform(); + _camStage.classList.add('camera-zoomed'); + } + + function onCameraPointerDown(event) { + if (event.button !== 0) return; + if (_camZoom <= 1) return; + _camPanLast = { x: event.clientX, y: event.clientY }; + try { _camStage.setPointerCapture(event.pointerId); } catch (_) {} + _camStage.classList.add('camera-panning'); + event.preventDefault(); + } + + function onCameraPointerMove(event) { + if (!_camPanLast) return; + _camTx += event.clientX - _camPanLast.x; + _camTy += event.clientY - _camPanLast.y; + _camPanLast = { x: event.clientX, y: event.clientY }; + clampCameraPan(); + applyCameraTransform(); + } + + function onCameraPointerEnd(event) { + if (!_camPanLast) return; + _camPanLast = null; + try { _camStage.releasePointerCapture(event.pointerId); } catch (_) {} + if (_camStage) _camStage.classList.remove('camera-panning'); + } + + function onCameraDoubleClick(event) { + if (_camZoom !== 1 || _camTx !== 0 || _camTy !== 0) { + event.preventDefault(); + resetCameraZoom(); + } + } + function setupCameraWiring() { if (!_camToggle) return; _camToggle.addEventListener('click', toggleCameraStream); @@ -1129,6 +1256,16 @@ const DevicesManager = (function () { if (typeof ClientEventBus !== 'undefined') { ClientEventBus.on('BOTTOM_CAMERA_FRAME', handleCameraFrame); } + // Camera zoom/pan. wheel needs passive:false so we can preventDefault + // and stop the page from scrolling beneath the FOV. + if (_camStage) { + _camStage.addEventListener('wheel', onCameraWheel, { passive: false }); + _camStage.addEventListener('pointerdown', onCameraPointerDown); + _camStage.addEventListener('pointermove', onCameraPointerMove); + _camStage.addEventListener('pointerup', onCameraPointerEnd); + _camStage.addEventListener('pointercancel', onCameraPointerEnd); + _camStage.addEventListener('dblclick', onCameraDoubleClick); + } } // ===================================================================== diff --git a/gently/ui/web/templates/index.html b/gently/ui/web/templates/index.html index dd3bbfb0..64da673c 100644 --- a/gently/ui/web/templates/index.html +++ b/gently/ui/web/templates/index.html @@ -359,6 +359,20 @@

Device
bottom camera live frame + +
From 294f7dc4f8ccfd76bcbac23f7a3e1211a7138705 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 15 May 2026 17:40:00 -0400 Subject: [PATCH 08/71] eval: event capture / replay / shadow scaffolding for orchestrator A/B Substrate for testing candidate orchestrator architectures without running real hardware. Three layers, all under gently/eval/: EventCapture Wildcard-subscribes to an EventBus; appends every Event to a per- session events.jsonl (D:/Gently3/sessions/{id}/events.jsonl). High-volume telemetry (DEVICE_STATE_UPDATE, BOTTOM_CAMERA_FRAME) is filtered out by default so 12-hour sessions don't drown the meaningful events under polling noise. Auto-starts in agent init. Handles non-JSON-native payloads (numpy, Path, datetime, set, Enum, dataclass, bytes) via a fallback serialiser. EventReplay Reads events.jsonl back; publishes via EventBus.publish_event() so original timestamps survive (candidates can reason about historical cadence). Fast mode (no sleep) and real-time mode with optional time_scale. event_types() for cheap pre-flight histogramming. DecisionLog + Decision + DecisionTrigger Per-session decisions.jsonl record. Each Decision captures WHY the agent woke up (trigger + detail), WHAT it saw (context summary, recent event ids, prompt hash), WHAT it did (tool calls, response text), and HOW it went (duration, error). Substrate for diffing candidate decisions later. ShadowRunner + OrchestratorCandidate + NoOpCandidate Candidates subscribe to an EventBus alongside production but their decisions are LOGGED, not enacted -- never permitted to touch hardware by construction. ShadowRunner hosts a set of candidates, isolates candidate failures from each other and from the live bus. NoOpCandidate ships as worked-example and proof-of-life. scripts/replay_session.py CLI: replay a session by id-prefix, with optional --candidate attachment, --real-time + --time-scale, and --histogram pre-flight. 15 unit tests in tests/test_eval.py covering capture filter, non-JSON payloads, thread safety, replay round-trip (event_type / source / data / correlation_id / timestamp all preserved), real-time cadence, time_scale, malformed-line tolerance, decision-log round-trip, shadow forwarding to multiple candidates, candidate-failure isolation, and event-type whitelisting. Phase 6 of the Map-as-embryo-home arc, unlocking offline iteration on the world-model + decision-moment work (operator-action events, wake triggers, tiered context). Co-Authored-By: Claude Opus 4.7 (cherry picked from commit d69cc219be8606c669b5d9891316fc6f142e8bf9) --- gently/app/agent.py | 45 +++++ gently/eval/__init__.py | 32 +++ gently/eval/decision_log.py | 170 ++++++++++++++++ gently/eval/event_capture.py | 162 ++++++++++++++++ gently/eval/event_replay.py | 127 ++++++++++++ gently/eval/shadow.py | 229 ++++++++++++++++++++++ scripts/replay_session.py | 150 +++++++++++++++ tests/test_eval.py | 363 +++++++++++++++++++++++++++++++++++ 8 files changed, 1278 insertions(+) create mode 100644 gently/eval/__init__.py create mode 100644 gently/eval/decision_log.py create mode 100644 gently/eval/event_capture.py create mode 100644 gently/eval/event_replay.py create mode 100644 gently/eval/shadow.py create mode 100644 scripts/replay_session.py create mode 100644 tests/test_eval.py diff --git a/gently/app/agent.py b/gently/app/agent.py index da7d7b11..b549e0f2 100644 --- a/gently/app/agent.py +++ b/gently/app/agent.py @@ -146,6 +146,11 @@ def __init__( # Interaction logger for structured logging (research data collection) self.interaction_logger: Optional[InteractionLogger] = None + # Event capture — durable log of every EventBus event during this + # session. Substrate for offline replay / shadow-mode A/B of + # candidate orchestrator architectures. + self.event_capture = None + # Timelapse orchestrator (initialized when microscope connected) self.timelapse_orchestrator: Optional[TimelapseOrchestrator] = None @@ -208,6 +213,12 @@ def __init__( # Initialize interaction logger (for research data collection) self._init_interaction_logger() + # Start event capture into the session folder so offline replay / + # shadow-mode testing has a durable input stream. Filters out the + # high-volume telemetry types (DEVICE_STATE_UPDATE / BOTTOM_CAMERA_FRAME) + # by default so a long timelapse doesn't bury the meaningful events. + self._init_event_capture() + # Wire interaction logger and choice handler to conversation manager self.conversation.interaction_logger = self.interaction_logger self.conversation.choice_handler = self.choice_handler @@ -457,6 +468,40 @@ def _init_interaction_logger(self): logging.getLogger(__name__).warning(f"Failed to init interaction logger: {e}") self.interaction_logger = None + def _init_event_capture(self): + """Open the per-session events.jsonl capture. + + Resolves the session folder via FileStore._session_dir so the log + sits next to session.yaml / interaction_log.jsonl. Silent no-op + when the session folder can't be resolved (e.g. test harness with + a stripped-down agent) — replay just won't have a log to read. + """ + from gently.eval import EventCapture + try: + session_dir = None + sid = self.session_id + if self.store is not None and sid: + session_dir = self.store._session_dir(sid) + if session_dir is None: + logging.getLogger(__name__).debug( + "EventCapture: no session dir for %s — skipping", sid) + return + path = session_dir / "events.jsonl" + self.event_capture = EventCapture(path) + self.event_capture.start(self._event_bus) + except Exception: + logging.getLogger(__name__).exception("Failed to init event capture") + self.event_capture = None + + def stop_event_capture(self): + """Flush + close the events.jsonl. Idempotent; safe at shutdown.""" + if self.event_capture is not None: + try: + self.event_capture.stop() + except Exception: + logging.getLogger(__name__).exception("EventCapture stop failed") + self.event_capture = None + def _init_timelapse_orchestrator(self): """Initialize the timelapse orchestrator if microscope is connected.""" if not self._has_microscope(): diff --git a/gently/eval/__init__.py b/gently/eval/__init__.py new file mode 100644 index 00000000..3c4ed22b --- /dev/null +++ b/gently/eval/__init__.py @@ -0,0 +1,32 @@ +"""Eval / replay / shadow primitives. + +Substrate for testing orchestrator architectures without running real +hardware. The three layers: + + EventCapture — records every EventBus event to a per-session jsonl + file so the agent's input stream is durable. + EventReplay — reads a captured jsonl and republishes events to a + target bus, preserving original timestamps. + ShadowRunner — hosts candidate orchestrators that subscribe to the + live (or replayed) bus, log their decisions, and + never touch hardware. Diff their decision logs to + compare architectures. + +See docs/EVAL.md (TODO) for usage. +""" + +from .event_capture import EventCapture +from .event_replay import EventReplay +from .decision_log import Decision, DecisionLog, DecisionTrigger +from .shadow import OrchestratorCandidate, ShadowRunner, NoOpCandidate + +__all__ = [ + "EventCapture", + "EventReplay", + "Decision", + "DecisionLog", + "DecisionTrigger", + "OrchestratorCandidate", + "ShadowRunner", + "NoOpCandidate", +] diff --git a/gently/eval/decision_log.py b/gently/eval/decision_log.py new file mode 100644 index 00000000..e6d713e9 --- /dev/null +++ b/gently/eval/decision_log.py @@ -0,0 +1,170 @@ +"""DecisionLog — records each "decision moment" the orchestrator (or a +shadow candidate) acts on. + +A "decision moment" is whenever the agent wakes up and produces an output: +a Claude tool call, a refusal, a chat reply, or even an explicit no-op +("I see what happened, nothing to do"). Capturing these gives us the diff +substrate for shadow-mode A/B: same input event stream, different +candidates, compare what each decided. + +File format: one JSON object per line, written to +D:/Gently3/sessions/{id}/decisions.jsonl (or wherever the caller points +it). Lossless enough to reconstruct what the agent saw + chose, terse +enough to skim across sessions. +""" + +from __future__ import annotations + +import json +import logging +import threading +from dataclasses import dataclass, field, asdict +from datetime import datetime +from enum import Enum +from pathlib import Path +from typing import Any, Dict, List, Optional + +from .event_capture import _json_default + +logger = logging.getLogger(__name__) + + +class DecisionTrigger(str, Enum): + """What woke the agent up for this decision moment.""" + USER_MESSAGE = "user_message" + EVENT = "event" # event-driven (perception, error, etc.) + TICK = "tick" # scheduled / periodic checkpoint + PHASE = "phase" # plan phase boundary (between embryos / timepoints) + STARTUP = "startup" # initial session bring-up + UNKNOWN = "unknown" + + +@dataclass +class Decision: + """A single decision moment. + + The fields try to capture three things: + WHY the agent woke up: trigger, trigger_detail + WHAT it saw: context_summary, recent_event_ids + WHAT it did: tool_calls, response_text + + `prompt_hash` is a stable fingerprint of the actual prompt+context + sent to Claude so two candidates with byte-identical input but + different decisions can be told apart by a single field. + """ + timestamp: datetime + agent: str # "production" or candidate name + trigger: DecisionTrigger + trigger_detail: Optional[str] = None # event_id, user message excerpt, tick name + + tool_calls: List[Dict[str, Any]] = field(default_factory=list) + response_text: Optional[str] = None + prompt_hash: Optional[str] = None + + context_summary: Optional[str] = None # one-line description of state + recent_event_ids: List[str] = field(default_factory=list) + + duration_ms: Optional[float] = None # how long the decision took + error: Optional[str] = None # if the decision moment errored + + def to_dict(self) -> Dict[str, Any]: + return { + "timestamp": self.timestamp.isoformat(), + "agent": self.agent, + "trigger": self.trigger.value, + "trigger_detail": self.trigger_detail, + "tool_calls": self.tool_calls, + "response_text": self.response_text, + "prompt_hash": self.prompt_hash, + "context_summary": self.context_summary, + "recent_event_ids": self.recent_event_ids, + "duration_ms": self.duration_ms, + "error": self.error, + } + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> "Decision": + return cls( + timestamp=datetime.fromisoformat(d["timestamp"]), + agent=d.get("agent", "unknown"), + trigger=DecisionTrigger(d.get("trigger", "unknown")), + trigger_detail=d.get("trigger_detail"), + tool_calls=d.get("tool_calls") or [], + response_text=d.get("response_text"), + prompt_hash=d.get("prompt_hash"), + context_summary=d.get("context_summary"), + recent_event_ids=d.get("recent_event_ids") or [], + duration_ms=d.get("duration_ms"), + error=d.get("error"), + ) + + +class DecisionLog: + """Append-only jsonl sink for Decisions. Thread-safe.""" + + def __init__(self, path: Path): + self.path = Path(path) + self._fp = None + self._lock = threading.Lock() + self._count = 0 + + def open(self) -> None: + if self._fp is not None: + return + self.path.parent.mkdir(parents=True, exist_ok=True) + self._fp = self.path.open("a", encoding="utf-8") + logger.info("DecisionLog: writing to %s", self.path) + + def close(self) -> None: + with self._lock: + if self._fp is not None: + try: + self._fp.close() + except Exception: + logger.exception("DecisionLog: close failed") + self._fp = None + logger.info("DecisionLog: closed (%d decisions written)", self._count) + + def append(self, decision: Decision) -> None: + try: + line = json.dumps(decision.to_dict(), default=_json_default) + except Exception: + logger.exception("DecisionLog: failed to serialise %s", decision) + return + with self._lock: + if self._fp is None: + self.open() + try: + self._fp.write(line + "\n") + self._fp.flush() + self._count += 1 + except Exception: + logger.exception("DecisionLog: write failed") + + @property + def count(self) -> int: + return self._count + + def __enter__(self): + self.open() + return self + + def __exit__(self, exc_type, exc, tb): + self.close() + return False + + def read(self) -> List[Decision]: + """Read every decision back from disk. Quick + dirty diff substrate.""" + if not self.path.exists(): + return [] + out: List[Decision] = [] + with self.path.open("r", encoding="utf-8") as f: + for line_no, raw in enumerate(f, start=1): + raw = raw.strip() + if not raw: + continue + try: + out.append(Decision.from_dict(json.loads(raw))) + except Exception: + logger.exception("DecisionLog: parse failure on line %d", line_no) + return out diff --git a/gently/eval/event_capture.py b/gently/eval/event_capture.py new file mode 100644 index 00000000..c92c9fcd --- /dev/null +++ b/gently/eval/event_capture.py @@ -0,0 +1,162 @@ +"""EventCapture — wildcard-subscribe to an EventBus and append every event +to a per-session jsonl file. + +The captured file is the substrate for replay and shadow-mode testing of +candidate orchestrators. High-volume telemetry types (DEVICE_STATE_UPDATE, +BOTTOM_CAMERA_FRAME) are filtered out by default — a 12-hour timelapse +would otherwise produce ~250 MB of polling noise and drown the meaningful +events (perception completions, operator actions, errors, plan boundaries). +Replay can reconstruct world state from the meaningful events plus the +state-snapshot model; it doesn't need the raw telemetry frames. + +File format: one JSON object per line, mirroring Event.to_dict(): + { + "event_type": "EMBRYOS_UPDATE", + "data": {...}, + "source": "agent.experiment", + "timestamp": "2026-05-15T15:32:55.123456", + "event_id": "abc12345", + "correlation_id": null + } +""" + +from __future__ import annotations + +import json +import logging +import threading +from dataclasses import asdict, is_dataclass +from datetime import date, datetime +from enum import Enum +from pathlib import Path +from typing import Optional, Set + +from gently.core.event_bus import Event, EventBus, EventType, _NO_HISTORY_TYPES + +logger = logging.getLogger(__name__) + + +class EventCapture: + """Append-only jsonl sink for an EventBus. + + Lifecycle: + capture = EventCapture(path) + capture.start(bus) # opens file, subscribes + ... + capture.stop() # unsubscribes, closes file + + Thread-safe — bus dispatch can come from any thread; writes are + serialised through a lock. + """ + + # By default the same set of high-volume telemetry types the EventBus + # itself skips for its history deque. The rationale carries over: at + # 5 Hz over hours these would dominate the log without adding signal + # that replay / diff can use. + DEFAULT_SKIP: Set[EventType] = frozenset(_NO_HISTORY_TYPES) + + def __init__(self, path: Path, *, + skip: Optional[Set[EventType]] = None): + self.path = Path(path) + self._skip = self.DEFAULT_SKIP if skip is None else frozenset(skip) + self._fp = None + self._unsub = None + self._lock = threading.Lock() + self._count = 0 + self._skipped = 0 + + def start(self, bus: EventBus) -> None: + """Open the capture file and subscribe to the bus (idempotent).""" + if self._fp is not None: + return + self.path.parent.mkdir(parents=True, exist_ok=True) + self._fp = self.path.open("a", encoding="utf-8") + # Sync subscription on purpose — capture is fast (single file write) + # and we want capture order to match dispatch order without async + # scheduling ambiguity. + self._unsub = bus.subscribe("*", self._on_event) + logger.info("EventCapture: writing to %s", self.path) + + def stop(self) -> None: + """Unsubscribe and close the file (idempotent).""" + if self._unsub is not None: + try: + self._unsub() + except Exception: + logger.exception("EventCapture: unsubscribe failed") + self._unsub = None + with self._lock: + if self._fp is not None: + try: + self._fp.close() + except Exception: + logger.exception("EventCapture: file close failed") + self._fp = None + logger.info("EventCapture: closed (%d captured, %d skipped)", + self._count, self._skipped) + + def __del__(self): + # Best-effort safety net for cases where the owner forgets to call + # stop() — never let a forgotten file handle outlive the process' + # capture object. We can't rely on this for correctness (GC timing + # is undefined), but it makes tests and dev sessions tidier. + try: + self.stop() + except Exception: + pass + + @property + def count(self) -> int: + return self._count + + def _on_event(self, event: Event) -> None: + if event.event_type in self._skip: + self._skipped += 1 + return + try: + line = json.dumps(event.to_dict(), default=_json_default) + except Exception: + logger.exception("EventCapture: failed to serialise %s", event) + return + with self._lock: + if self._fp is None: + return + try: + self._fp.write(line + "\n") + self._fp.flush() + self._count += 1 + except Exception: + logger.exception("EventCapture: write failed for %s", event) + + +def _json_default(obj): + """Last-resort serialiser for types json.dumps can't natively handle. + + Designed to be lossy-but-useful: numpy arrays become lists, datetimes + become ISO strings, dataclasses become dicts, anything else falls back + to repr() so the line is at least valid JSON. + """ + if isinstance(obj, (datetime, date)): + return obj.isoformat() + if isinstance(obj, Path): + return str(obj) + if isinstance(obj, Enum): + return obj.name + if is_dataclass(obj): + try: + return asdict(obj) + except Exception: + pass + try: + import numpy as np + if isinstance(obj, np.generic): + return obj.item() + if isinstance(obj, np.ndarray): + return obj.tolist() + except ImportError: + pass + if isinstance(obj, set): + return sorted(obj, key=str) + if isinstance(obj, bytes): + return obj.decode("utf-8", errors="replace") + return repr(obj) diff --git a/gently/eval/event_replay.py b/gently/eval/event_replay.py new file mode 100644 index 00000000..092ec719 --- /dev/null +++ b/gently/eval/event_replay.py @@ -0,0 +1,127 @@ +"""EventReplay — reads a captured events jsonl and republishes events to a +target EventBus. + +Two modes: + fast events as fast as the bus can dispatch (default) + real-time inserts sleep delays between events to preserve the original + cadence — useful when a candidate's behaviour depends on + time-since-last-event + +Original Event timestamps are preserved by going through +EventBus.publish_event() (which keeps the dataclass instance untouched) +rather than EventBus.publish() (which constructs a fresh Event with +datetime.now()). Candidates can therefore reason about historical timing +as if they were live. +""" + +from __future__ import annotations + +import json +import logging +import time +from datetime import datetime +from pathlib import Path +from typing import Callable, Iterator, Optional + +from gently.core.event_bus import Event, EventBus + +logger = logging.getLogger(__name__) + + +class EventReplay: + """Stream-replays an events.jsonl into a target bus.""" + + def __init__(self, path: Path): + self.path = Path(path) + if not self.path.exists(): + raise FileNotFoundError(f"event log not found: {self.path}") + + def events(self) -> Iterator[Event]: + """Yield each Event from the captured log, in order. + + Lines that don't parse are skipped with a warning rather than + aborting the whole replay — a partial log is better than no log. + """ + with self.path.open("r", encoding="utf-8") as f: + for line_no, raw in enumerate(f, start=1): + raw = raw.strip() + if not raw: + continue + try: + record = json.loads(raw) + except json.JSONDecodeError: + logger.warning("EventReplay: malformed line %d in %s", + line_no, self.path) + continue + try: + yield Event.from_dict(record) + except KeyError: + # Unknown EventType — could be a newer enum the + # capturing process knew about. Skip rather than abort. + logger.warning("EventReplay: unknown event_type on line %d", + line_no) + except Exception: + logger.exception("EventReplay: parse failure on line %d", + line_no) + + def replay( + self, + target: EventBus, + *, + real_time: bool = False, + time_scale: float = 1.0, + on_event: Optional[Callable[[Event], None]] = None, + ) -> int: + """Replay the captured events to ``target``. Returns count emitted. + + Parameters + ---------- + target: + EventBus to publish into. The bus's existing subscribers (and + any shadow candidates registered on it) will see the events. + real_time: + If True, sleep between events to reproduce the original + cadence. If False, dispatch as fast as the bus can handle. + time_scale: + Only meaningful in real-time mode. ``time_scale=4`` runs the + replay at 4× speed (sleep delays divided by 4). Must be > 0. + on_event: + Optional callback invoked after each event is published, for + instrumentation / progress reporting. Exceptions are caught + and logged. + """ + if time_scale <= 0: + raise ValueError("time_scale must be > 0") + + emitted = 0 + prev_ts: Optional[datetime] = None + wall_start = time.monotonic() + for ev in self.events(): + if real_time and prev_ts is not None: + delta = (ev.timestamp - prev_ts).total_seconds() / time_scale + if delta > 0: + time.sleep(delta) + target.publish_event(ev) + emitted += 1 + if on_event is not None: + try: + on_event(ev) + except Exception: + logger.exception("EventReplay: on_event callback failed") + prev_ts = ev.timestamp + wall = time.monotonic() - wall_start + logger.info( + "EventReplay: emitted %d events in %.2fs (real_time=%s, time_scale=%g)", + emitted, wall, real_time, time_scale, + ) + return emitted + + def event_types(self) -> dict: + """Return a {EventType.name: count} histogram of the log. + + Cheap pre-flight diagnostic before running an expensive replay. + """ + counts: dict = {} + for ev in self.events(): + counts[ev.event_type.name] = counts.get(ev.event_type.name, 0) + 1 + return counts diff --git a/gently/eval/shadow.py b/gently/eval/shadow.py new file mode 100644 index 00000000..05335c96 --- /dev/null +++ b/gently/eval/shadow.py @@ -0,0 +1,229 @@ +"""Shadow orchestrator scaffolding. + +A candidate orchestrator runs alongside production: it sees the same +events but its decisions are LOGGED, not enacted. Diff the decision logs +between production and a candidate (or between two candidates) to compare +architectures on identical input streams. + +Two entry points: + + OrchestratorCandidate + Protocol that any candidate must satisfy. Receives events via + on_event() and ticks via on_tick(); is given a DecisionLog to write + into. Never gets to call tools that touch hardware — by construction + its only output is the log. + + ShadowRunner + Hosts a set of candidates against a single EventBus. Wildcards onto + the bus and forwards each event to every registered candidate. + Lifecycle (start / stop) keeps subscriptions tidy. + +The simplest candidate is NoOpCandidate, included as a worked example +and as proof-of-life for the wiring (events visible? decision log +writeable? shutdown clean?). +""" + +from __future__ import annotations + +import asyncio +import logging +import threading +from abc import ABC, abstractmethod +from datetime import datetime +from pathlib import Path +from typing import Awaitable, Callable, Dict, List, Optional + +from gently.core.event_bus import Event, EventBus + +from .decision_log import Decision, DecisionLog, DecisionTrigger + +logger = logging.getLogger(__name__) + + +class OrchestratorCandidate(ABC): + """Base class for a shadow orchestrator candidate. + + A candidate is given: + - its name (e.g. "reactive-v1", "haiku-summariser") + - a DecisionLog to write decisions into + + It receives events synchronously via ``on_event``. If it needs to + do heavy work (LLM call, long compute), it should hand off to its + own task / thread and write into the log asynchronously. + + Candidates MUST NOT touch hardware. They have no access to the + device-layer client, no permission to publish events back onto the + bus, no MMCore handle. The only side effect they're allowed is + writing to their decision log. + """ + + def __init__(self, name: str, decisions: DecisionLog): + self.name = name + self.decisions = decisions + + @abstractmethod + def on_event(self, event: Event) -> None: + """Handle one event from the bus. Synchronous, must not block long.""" + + def on_start(self) -> None: + """Called once when the shadow runner attaches this candidate.""" + + def on_stop(self) -> None: + """Called once when the shadow runner detaches this candidate.""" + + # ---- helpers candidates can use --------------------------------------- + + def log_decision( + self, + *, + trigger: DecisionTrigger, + trigger_detail: Optional[str] = None, + tool_calls: Optional[List[Dict]] = None, + response_text: Optional[str] = None, + context_summary: Optional[str] = None, + recent_event_ids: Optional[List[str]] = None, + prompt_hash: Optional[str] = None, + duration_ms: Optional[float] = None, + error: Optional[str] = None, + ) -> None: + self.decisions.append(Decision( + timestamp=datetime.now(), + agent=self.name, + trigger=trigger, + trigger_detail=trigger_detail, + tool_calls=tool_calls or [], + response_text=response_text, + context_summary=context_summary, + recent_event_ids=recent_event_ids or [], + prompt_hash=prompt_hash, + duration_ms=duration_ms, + error=error, + )) + + +class NoOpCandidate(OrchestratorCandidate): + """Trivial candidate: logs every event it sees as a decision marker. + + Useful as the smoke test for the wiring (events visible? decision + log writeable? shutdown clean?) and as the template every real + candidate evolves from. + """ + + def __init__(self, name: str, decisions: DecisionLog, + *, watch: Optional[List[str]] = None): + super().__init__(name, decisions) + # Optional whitelist of event_type names to react to. None = all. + self._watch = set(watch) if watch else None + self._seen = 0 + + def on_event(self, event: Event) -> None: + if self._watch is not None and event.event_type.name not in self._watch: + return + self._seen += 1 + self.log_decision( + trigger=DecisionTrigger.EVENT, + trigger_detail=event.event_type.name, + response_text=f"(noop) seen {event.event_type.name} from {event.source}", + recent_event_ids=[event.event_id], + context_summary=f"noop candidate; events seen so far: {self._seen}", + ) + + +class ShadowRunner: + """Hosts a set of OrchestratorCandidates against an EventBus. + + Wildcards onto the bus, dispatches each event to every registered + candidate. Candidates' exceptions are caught and logged so one + bad candidate doesn't take down the others or affect the live bus. + + The runner itself never enacts decisions — it only forwards events + and lets candidates write their own logs. + """ + + def __init__(self, bus: EventBus): + self.bus = bus + self._candidates: List[OrchestratorCandidate] = [] + self._unsub: Optional[Callable[[], None]] = None + self._lock = threading.RLock() + self._running = False + + def add(self, candidate: OrchestratorCandidate) -> None: + with self._lock: + self._candidates.append(candidate) + if self._running: + try: + candidate.on_start() + except Exception: + logger.exception( + "ShadowRunner: on_start failed for %s", candidate.name + ) + + def remove(self, candidate: OrchestratorCandidate) -> None: + with self._lock: + try: + self._candidates.remove(candidate) + except ValueError: + return + try: + candidate.on_stop() + except Exception: + logger.exception( + "ShadowRunner: on_stop failed for %s", candidate.name + ) + + def start(self) -> None: + """Subscribe to the bus and notify every candidate. Idempotent.""" + with self._lock: + if self._running: + return + self._unsub = self.bus.subscribe("*", self._on_event) + for c in self._candidates: + try: + c.on_start() + except Exception: + logger.exception( + "ShadowRunner: on_start failed for %s", c.name + ) + self._running = True + logger.info( + "ShadowRunner: started with %d candidate(s)", len(self._candidates) + ) + + def stop(self) -> None: + """Unsubscribe from the bus and notify every candidate. Idempotent.""" + with self._lock: + if not self._running: + return + if self._unsub is not None: + try: + self._unsub() + except Exception: + logger.exception("ShadowRunner: unsubscribe failed") + self._unsub = None + for c in self._candidates: + try: + c.on_stop() + except Exception: + logger.exception( + "ShadowRunner: on_stop failed for %s", c.name + ) + self._running = False + logger.info("ShadowRunner: stopped") + + @property + def candidates(self) -> List[OrchestratorCandidate]: + with self._lock: + return list(self._candidates) + + def _on_event(self, event: Event) -> None: + # Snapshot under the lock so a remove() mid-dispatch doesn't break us. + with self._lock: + candidates = list(self._candidates) + for c in candidates: + try: + c.on_event(event) + except Exception: + logger.exception( + "ShadowRunner: candidate %s raised on %s", + c.name, event, + ) diff --git a/scripts/replay_session.py b/scripts/replay_session.py new file mode 100644 index 00000000..f8ffe208 --- /dev/null +++ b/scripts/replay_session.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python +"""Replay a captured session's events into a fresh EventBus. + +Useful for: + - Diffing what a candidate orchestrator would have decided from the + same input stream the production agent saw. + - Inspecting the event histogram of a session before deciding what to + investigate ("did this session even fire any ERROR_OCCURRED?"). + - Re-running a session offline with a different filter / candidate + set without touching hardware. + +Examples +-------- +List the events recorded in a session: + python scripts/replay_session.py 2e0e0356 --histogram + +Replay as fast as possible: + python scripts/replay_session.py 2e0e0356 + +Replay with original cadence, 4x speed, and a NoOpCandidate writing a +decision log into the current directory: + python scripts/replay_session.py 2e0e0356 --real-time --time-scale 4 --candidate noop-test + +Custom root (default: $GENTLY_STORAGE_PATH or D:/Gently3): + python scripts/replay_session.py 2e0e0356 --root /path/to/sessions +""" + +from __future__ import annotations + +import argparse +import logging +import os +import sys +from pathlib import Path + +# Allow `python scripts/replay_session.py …` from the repo root without +# requiring PYTHONPATH=.; the project root is one level up from this file. +_REPO_ROOT = Path(__file__).resolve().parent.parent +if str(_REPO_ROOT) not in sys.path: + sys.path.insert(0, str(_REPO_ROOT)) + + +def main(argv=None) -> int: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "session_id", + help="Session id (full or prefix) to replay", + ) + parser.add_argument( + "--root", + default=None, + help="Storage root (default: $GENTLY_STORAGE_PATH or D:/Gently3)", + ) + parser.add_argument( + "--real-time", + action="store_true", + help="Preserve original cadence between events (default: fast)", + ) + parser.add_argument( + "--time-scale", + type=float, + default=1.0, + help="Real-time replay speed multiplier (default: 1.0)", + ) + parser.add_argument( + "--candidate", + default=None, + help="Attach a NoOpCandidate; decisions written to replay-decisions-.jsonl", + ) + parser.add_argument( + "--histogram", + action="store_true", + help="Print event-type histogram, don't replay", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", + help="Verbose logging", + ) + args = parser.parse_args(argv) + + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + format="%(asctime)s %(levelname)s %(name)s %(message)s", + ) + + from gently.core.event_bus import EventBus + from gently.core.file_store import FileStore + from gently.eval import EventReplay, DecisionLog, ShadowRunner, NoOpCandidate + + root = args.root or os.environ.get("GENTLY_STORAGE_PATH", "D:/Gently3") + store = FileStore(root=Path(root)) + + sessions = store.list_sessions() + matches = [s for s in sessions if s["session_id"].startswith(args.session_id)] + if not matches: + print(f"No session matching '{args.session_id}'", file=sys.stderr) + return 1 + if len(matches) > 1: + print(f"Multiple sessions match '{args.session_id}':", file=sys.stderr) + for s in matches: + print(f" {s['session_id']}", file=sys.stderr) + return 1 + + session = matches[0] + session_dir = store._session_dir(session["session_id"]) + log_path = session_dir / "events.jsonl" + if not log_path.exists(): + print(f"No events.jsonl in {session_dir}", file=sys.stderr) + return 1 + + rep = EventReplay(log_path) + + if args.histogram: + hist = rep.event_types() + total = sum(hist.values()) + print(f"{total} events in {log_path}:") + for ev, n in sorted(hist.items(), key=lambda kv: -kv[1]): + print(f" {n:>6} {ev}") + return 0 + + bus = EventBus() + runner = None + dlog = None + if args.candidate: + out = Path.cwd() / f"replay-decisions-{session['session_id'][:8]}.jsonl" + dlog = DecisionLog(out) + dlog.open() + runner = ShadowRunner(bus) + runner.add(NoOpCandidate(args.candidate, dlog)) + runner.start() + print(f"Candidate '{args.candidate}' attached; decisions -> {out}") + + try: + emitted = rep.replay( + bus, real_time=args.real_time, time_scale=args.time_scale, + ) + print(f"Replayed {emitted} events from session {session['session_id']}") + finally: + if runner is not None: + runner.stop() + if dlog is not None: + dlog.close() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_eval.py b/tests/test_eval.py new file mode 100644 index 00000000..8bc97064 --- /dev/null +++ b/tests/test_eval.py @@ -0,0 +1,363 @@ +"""Tests for the gently.eval package: capture / replay / shadow.""" + +from __future__ import annotations + +import json +import threading +import time +from datetime import datetime +from pathlib import Path + +import numpy as np +import pytest + +from gently.core.event_bus import EventBus, EventType, Event +from gently.eval import ( + Decision, + DecisionLog, + DecisionTrigger, + EventCapture, + EventReplay, + NoOpCandidate, + ShadowRunner, +) + + +# ============================================================================= +# EventCapture +# ============================================================================= + +def test_capture_writes_meaningful_events_skips_telemetry(tmp_path: Path): + """Capture skips high-volume telemetry by default but keeps the rest.""" + bus = EventBus() + log = tmp_path / "events.jsonl" + cap = EventCapture(log) + cap.start(bus) + + bus.publish(EventType.EMBRYOS_UPDATE, {"a": 1}, source="t") + bus.publish(EventType.DEVICE_STATE_UPDATE, {"x": 0}, source="t") # skipped + bus.publish(EventType.STAGE_MOVED, {"x": 100.0}, source="t") + bus.publish(EventType.BOTTOM_CAMERA_FRAME, {"jpeg": ""}, source="t") # skipped + cap.stop() + + assert cap.count == 2 # the two non-telemetry events + lines = log.read_text(encoding="utf-8").strip().split("\n") + assert len(lines) == 2 + parsed = [json.loads(ln) for ln in lines] + assert {p["event_type"] for p in parsed} == {"EMBRYOS_UPDATE", "STAGE_MOVED"} + + +def test_capture_handles_non_json_native_payloads(tmp_path: Path): + """numpy scalars / arrays / Paths / datetimes / sets serialise cleanly.""" + bus = EventBus() + log = tmp_path / "events.jsonl" + cap = EventCapture(log) + cap.start(bus) + + bus.publish(EventType.STAGE_MOVED, { + "np_scalar": np.float64(1.5), + "np_array": np.array([1, 2, 3]), + "path": Path("/tmp/foo.tif"), + "now": datetime(2026, 5, 15, 12, 0, 0), + "as_set": {"a", "b"}, + }, source="t") + cap.stop() + + record = json.loads(log.read_text().strip()) + data = record["data"] + assert data["np_scalar"] == pytest.approx(1.5) + assert data["np_array"] == [1, 2, 3] + assert "tmp" in data["path"] and "foo.tif" in data["path"] + assert data["now"] == "2026-05-15T12:00:00" + assert sorted(data["as_set"]) == ["a", "b"] + + +def test_capture_start_stop_idempotent(tmp_path: Path): + """Repeated start / stop don't error or duplicate subscribers.""" + bus = EventBus() + cap = EventCapture(tmp_path / "events.jsonl") + cap.start(bus) + cap.start(bus) # second start = no-op + bus.publish(EventType.STAGE_MOVED, {}, source="t") + cap.stop() + cap.stop() # second stop = no-op + # Even with two start() calls the single subscription captures the event + # exactly once. + assert cap.count == 1 + + +def test_capture_thread_safety(tmp_path: Path): + """Concurrent publishers from many threads all land in the log.""" + bus = EventBus() + cap = EventCapture(tmp_path / "events.jsonl") + cap.start(bus) + + N_THREADS = 8 + N_EVENTS_PER_THREAD = 50 + + def worker(idx: int): + for i in range(N_EVENTS_PER_THREAD): + bus.publish(EventType.STAGE_MOVED, + {"t": idx, "i": i}, source=f"thread-{idx}") + + threads = [threading.Thread(target=worker, args=(i,)) for i in range(N_THREADS)] + for t in threads: + t.start() + for t in threads: + t.join() + + cap.stop() + # All events visible, jsonl is well-formed (each line a valid JSON object). + assert cap.count == N_THREADS * N_EVENTS_PER_THREAD + lines = (tmp_path / "events.jsonl").read_text().strip().split("\n") + assert len(lines) == N_THREADS * N_EVENTS_PER_THREAD + for ln in lines: + json.loads(ln) + + +# ============================================================================= +# EventReplay +# ============================================================================= + +def test_replay_preserves_event_fields(tmp_path: Path): + """Round-trip: capture, then replay, then verify Event field identity.""" + src_bus = EventBus() + cap = EventCapture(tmp_path / "events.jsonl") + cap.start(src_bus) + + e1 = src_bus.publish(EventType.EMBRYOS_UPDATE, + {"embryos": [{"id": "e1"}], "count": 1}, + source="capture-test", correlation_id="corr-A") + e2 = src_bus.publish(EventType.ERROR_OCCURRED, + {"msg": "bang"}, source="capture-test") + cap.stop() + + rep = EventReplay(tmp_path / "events.jsonl") + dest = EventBus() + received: list[Event] = [] + dest.subscribe("*", lambda ev: received.append(ev)) + rep.replay(dest) + + assert len(received) == 2 + + # event_type, source, correlation_id, event_id, timestamp preserved + by_id = {r.event_id: r for r in received} + r1 = by_id[e1.event_id] + r2 = by_id[e2.event_id] + assert r1.event_type == EventType.EMBRYOS_UPDATE + assert r1.source == "capture-test" + assert r1.correlation_id == "corr-A" + assert r1.data == e1.data + assert r1.timestamp == e1.timestamp + assert r2.event_type == EventType.ERROR_OCCURRED + assert r2.timestamp == e2.timestamp + + +def test_replay_histogram(tmp_path: Path): + bus = EventBus() + cap = EventCapture(tmp_path / "events.jsonl") + cap.start(bus) + bus.publish(EventType.STAGE_MOVED, {}, source="t") + bus.publish(EventType.STAGE_MOVED, {}, source="t") + bus.publish(EventType.EMBRYOS_UPDATE, {}, source="t") + cap.stop() + + rep = EventReplay(tmp_path / "events.jsonl") + hist = rep.event_types() + assert hist == {"STAGE_MOVED": 2, "EMBRYOS_UPDATE": 1} + + +def test_replay_real_time_respects_cadence(tmp_path: Path): + """Two events 200 ms apart replay in ~200 ms in real-time mode.""" + bus = EventBus() + cap = EventCapture(tmp_path / "events.jsonl") + cap.start(bus) + bus.publish(EventType.STAGE_MOVED, {"i": 0}, source="t") + time.sleep(0.2) + bus.publish(EventType.STAGE_MOVED, {"i": 1}, source="t") + cap.stop() + + dest = EventBus() + dest.subscribe("*", lambda ev: None) + + t0 = time.monotonic() + EventReplay(tmp_path / "events.jsonl").replay(dest, real_time=True) + elapsed = time.monotonic() - t0 + assert 0.10 < elapsed < 0.40, f"real-time elapsed={elapsed}" + + +def test_replay_time_scale_speeds_up(tmp_path: Path): + """time_scale=4 should approximately quarter the real-time wall delay.""" + bus = EventBus() + cap = EventCapture(tmp_path / "events.jsonl") + cap.start(bus) + bus.publish(EventType.STAGE_MOVED, {"i": 0}, source="t") + time.sleep(0.4) + bus.publish(EventType.STAGE_MOVED, {"i": 1}, source="t") + cap.stop() + + dest = EventBus() + dest.subscribe("*", lambda ev: None) + t0 = time.monotonic() + EventReplay(tmp_path / "events.jsonl").replay( + dest, real_time=True, time_scale=4.0, + ) + elapsed = time.monotonic() - t0 + # 0.4s scaled by 4 -> ~0.1s, with generous slack for scheduling. + assert 0.03 < elapsed < 0.30, f"scaled elapsed={elapsed}" + + +def test_replay_skips_malformed_lines(tmp_path: Path): + """A garbage line in the log doesn't abort the whole replay.""" + log = tmp_path / "events.jsonl" + log.write_text( + json.dumps({ + "event_type": "STAGE_MOVED", "data": {}, "source": "t", + "timestamp": "2026-01-01T00:00:00", "event_id": "abc", + "correlation_id": None, + }) + "\n" + "not valid json garbage\n" + + json.dumps({ + "event_type": "EMBRYOS_UPDATE", "data": {}, "source": "t", + "timestamp": "2026-01-01T00:00:01", "event_id": "def", + "correlation_id": None, + }) + "\n", + encoding="utf-8", + ) + rep = EventReplay(log) + seen = list(rep.events()) + assert [s.event_type.name for s in seen] == ["STAGE_MOVED", "EMBRYOS_UPDATE"] + + +def test_replay_missing_file_raises(tmp_path: Path): + with pytest.raises(FileNotFoundError): + EventReplay(tmp_path / "nope.jsonl") + + +# ============================================================================= +# DecisionLog +# ============================================================================= + +def test_decision_log_round_trip(tmp_path: Path): + log_path = tmp_path / "decisions.jsonl" + dlog = DecisionLog(log_path) + dlog.open() + + d1 = Decision( + timestamp=datetime(2026, 5, 15, 12, 0, 0), + agent="prod", + trigger=DecisionTrigger.USER_MESSAGE, + trigger_detail="detect embryos", + tool_calls=[{"name": "detect_embryos", "input": {}}], + response_text="Detected 4 embryos.", + context_summary="2 embryos active", + recent_event_ids=["abc12345"], + prompt_hash="deadbeef", + duration_ms=820.5, + ) + dlog.append(d1) + dlog.append(Decision( + timestamp=datetime(2026, 5, 15, 12, 0, 5), + agent="prod", + trigger=DecisionTrigger.EVENT, + trigger_detail="EMBRYOS_UPDATE", + error=None, + )) + dlog.close() + + back = dlog.read() + assert len(back) == 2 + assert back[0].agent == "prod" + assert back[0].trigger is DecisionTrigger.USER_MESSAGE + assert back[0].tool_calls == [{"name": "detect_embryos", "input": {}}] + assert back[0].duration_ms == pytest.approx(820.5) + assert back[1].trigger is DecisionTrigger.EVENT + assert back[1].trigger_detail == "EMBRYOS_UPDATE" + + +def test_decision_log_context_manager(tmp_path: Path): + log_path = tmp_path / "decisions.jsonl" + with DecisionLog(log_path) as dlog: + dlog.append(Decision( + timestamp=datetime.now(), + agent="t", + trigger=DecisionTrigger.TICK, + )) + assert log_path.exists() + assert len(log_path.read_text().splitlines()) == 1 + + +# ============================================================================= +# ShadowRunner + NoOpCandidate +# ============================================================================= + +def test_shadow_runner_forwards_to_all_candidates(tmp_path: Path): + bus = EventBus() + log_a = DecisionLog(tmp_path / "a.jsonl") + log_b = DecisionLog(tmp_path / "b.jsonl") + log_a.open() + log_b.open() + + cand_a = NoOpCandidate("cand-a", log_a) + cand_b = NoOpCandidate("cand-b", log_b) + runner = ShadowRunner(bus) + runner.add(cand_a) + runner.add(cand_b) + runner.start() + + bus.publish(EventType.STAGE_MOVED, {"x": 1}, source="t") + bus.publish(EventType.EMBRYOS_UPDATE, {"count": 1}, source="t") + runner.stop() + log_a.close() + log_b.close() + + decisions_a = log_a.read() + decisions_b = log_b.read() + assert len(decisions_a) == 2 + assert len(decisions_b) == 2 + assert [d.trigger_detail for d in decisions_a] == ["STAGE_MOVED", "EMBRYOS_UPDATE"] + assert [d.agent for d in decisions_a] == ["cand-a", "cand-a"] + assert [d.agent for d in decisions_b] == ["cand-b", "cand-b"] + + +def test_shadow_runner_isolates_candidate_failures(tmp_path: Path): + """A failing candidate doesn't break delivery to its peers.""" + bus = EventBus() + log_ok = DecisionLog(tmp_path / "ok.jsonl") + log_ok.open() + + class BoomCandidate(NoOpCandidate): + def on_event(self, event): + raise RuntimeError("intentional") + + runner = ShadowRunner(bus) + runner.add(BoomCandidate("boom", DecisionLog(tmp_path / "boom.jsonl"))) + runner.add(NoOpCandidate("ok", log_ok)) + runner.start() + + bus.publish(EventType.STAGE_MOVED, {}, source="t") + runner.stop() + log_ok.close() + + # Production candidate still received the event. + assert len(log_ok.read()) == 1 + + +def test_shadow_runner_watch_filter(tmp_path: Path): + """NoOpCandidate(watch=[...]) only fires for matching event types.""" + bus = EventBus() + dlog = DecisionLog(tmp_path / "d.jsonl") + dlog.open() + runner = ShadowRunner(bus) + runner.add(NoOpCandidate("only-errors", dlog, watch=["ERROR_OCCURRED"])) + runner.start() + + bus.publish(EventType.STAGE_MOVED, {}, source="t") + bus.publish(EventType.EMBRYOS_UPDATE, {}, source="t") + bus.publish(EventType.ERROR_OCCURRED, {"msg": "x"}, source="t") + runner.stop() + dlog.close() + + decs = dlog.read() + assert len(decs) == 1 + assert decs[0].trigger_detail == "ERROR_OCCURRED" From 2f4e1a255d88ff397512b6179ccf84797f1ed4d9 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 15 May 2026 17:46:29 -0400 Subject: [PATCH 09/71] eval: capture production decisions per user turn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes the second half of the shadow-mode substrate. The agent now writes a Decision row to a per-session decisions.jsonl every time ConversationManager.call_claude returns — success or error. Pairs with the events.jsonl from Phase 6a so a candidate replay can be diffed against production turn-by-turn. What a production Decision captures ----------------------------------- - trigger always USER_MESSAGE for now (event/tick triggers land with the wake-router phase) - trigger_detail user message excerpt (200 chars) - tool_calls aggregated across the multi-step tool loop — every tool_use block Claude emitted during this turn - response_text final assistant text - prompt_hash short SHA-256 of (system_prompt, conversation_history) snapshotted BEFORE the tool loop appends to history. Same hash = same input; safe to compare candidate decisions against this one. - duration_ms wall time of the whole turn - error set on the failure path; the exception still re-raises to the caller so the existing error UX is unchanged Wiring ------ - gently/eval/decision_log.py new prompt_hash() helper (shared by production + candidates so the fingerprint format stays consistent) - gently/harness/conversation.py ConversationManager gains decision_log field; call_claude collects tool_use blocks across every Claude round, then writes one Decision in both success and except branches. Best-effort: a DecisionLog write failure never breaks the live agent. - gently/app/agent.py _init_decision_log opens session_dir/decisions.jsonl and assigns to self.conversation.decision_log; stop_decision_log mirrors stop_event_capture for shutdown cleanliness. - tests/test_eval.py +5 tests: prompt_hash stability and shape-tolerance; success path captures tool_calls + response + prompt_hash + duration; error path captures error + re-raises; no-log path is a clean no-op. Phase 6f. 20/20 tests green. Co-Authored-By: Claude Opus 4.7 (cherry picked from commit 75d7c9db7459fd3aebe8a73ca204c267816969de) --- gently/app/agent.py | 46 ++++++++ gently/eval/__init__.py | 3 +- gently/eval/decision_log.py | 23 ++++ gently/harness/conversation.py | 96 ++++++++++++++++ tests/test_eval.py | 198 +++++++++++++++++++++++++++++++++ 5 files changed, 365 insertions(+), 1 deletion(-) diff --git a/gently/app/agent.py b/gently/app/agent.py index b549e0f2..01c7371d 100644 --- a/gently/app/agent.py +++ b/gently/app/agent.py @@ -151,6 +151,11 @@ def __init__( # candidate orchestrator architectures. self.event_capture = None + # Decision log — what production decided at each turn (tool calls, + # response text, prompt hash). Pairs with event capture so a + # candidate replay can be diffed against production turn-by-turn. + self.decision_log = None + # Timelapse orchestrator (initialized when microscope connected) self.timelapse_orchestrator: Optional[TimelapseOrchestrator] = None @@ -219,6 +224,10 @@ def __init__( # by default so a long timelapse doesn't bury the meaningful events. self._init_event_capture() + # Open the per-session production decision log and hand it to the + # conversation manager so each Claude round-trip is captured. + self._init_decision_log() + # Wire interaction logger and choice handler to conversation manager self.conversation.interaction_logger = self.interaction_logger self.conversation.choice_handler = self.choice_handler @@ -502,6 +511,43 @@ def stop_event_capture(self): logging.getLogger(__name__).exception("EventCapture stop failed") self.event_capture = None + def _init_decision_log(self): + """Open the per-session decisions.jsonl and wire it into conversation. + + Each call to ConversationManager.call_claude writes one Decision + row (success or error) describing what production decided for the + user turn. Shadow candidates write their own rows into separate + logs and the two are diffed offline. + """ + from gently.eval import DecisionLog + try: + session_dir = None + sid = self.session_id + if self.store is not None and sid: + session_dir = self.store._session_dir(sid) + if session_dir is None: + logging.getLogger(__name__).debug( + "DecisionLog: no session dir for %s — skipping", sid) + return + path = session_dir / "decisions.jsonl" + self.decision_log = DecisionLog(path) + self.decision_log.open() + self.conversation.decision_log = self.decision_log + except Exception: + logging.getLogger(__name__).exception("Failed to init decision log") + self.decision_log = None + + def stop_decision_log(self): + """Flush + close the decisions.jsonl. Idempotent; safe at shutdown.""" + if self.decision_log is not None: + try: + self.decision_log.close() + except Exception: + logging.getLogger(__name__).exception("DecisionLog close failed") + self.decision_log = None + if hasattr(self, "conversation") and self.conversation is not None: + self.conversation.decision_log = None + def _init_timelapse_orchestrator(self): """Initialize the timelapse orchestrator if microscope is connected.""" if not self._has_microscope(): diff --git a/gently/eval/__init__.py b/gently/eval/__init__.py index 3c4ed22b..49cd873e 100644 --- a/gently/eval/__init__.py +++ b/gently/eval/__init__.py @@ -17,7 +17,7 @@ from .event_capture import EventCapture from .event_replay import EventReplay -from .decision_log import Decision, DecisionLog, DecisionTrigger +from .decision_log import Decision, DecisionLog, DecisionTrigger, prompt_hash from .shadow import OrchestratorCandidate, ShadowRunner, NoOpCandidate __all__ = [ @@ -26,6 +26,7 @@ "Decision", "DecisionLog", "DecisionTrigger", + "prompt_hash", "OrchestratorCandidate", "ShadowRunner", "NoOpCandidate", diff --git a/gently/eval/decision_log.py b/gently/eval/decision_log.py index e6d713e9..0a014b61 100644 --- a/gently/eval/decision_log.py +++ b/gently/eval/decision_log.py @@ -15,6 +15,7 @@ from __future__ import annotations +import hashlib import json import logging import threading @@ -29,6 +30,28 @@ logger = logging.getLogger(__name__) +def prompt_hash(system_prompt: Any, messages: Any) -> str: + """Stable short fingerprint of the input the orchestrator saw. + + Two candidates seeing byte-identical (system_prompt, messages) get + the same hash; a difference here means they're working from different + context, so any decision divergence is expected. Used in shadow A/B + to filter out apples-to-oranges comparisons. + + SHA-256 truncated to 16 hex chars — enough to make accidental + collisions vanishingly unlikely at the scale of one session's + decisions, short enough to skim by eye in a log. + """ + h = hashlib.sha256() + if isinstance(system_prompt, str): + h.update(system_prompt.encode("utf-8")) + else: + h.update(json.dumps(system_prompt, sort_keys=True, default=_json_default).encode("utf-8")) + h.update(b"\x1f") # separator so prompt boundary can't be ambiguous + h.update(json.dumps(messages, sort_keys=True, default=_json_default).encode("utf-8")) + return h.hexdigest()[:16] + + class DecisionTrigger(str, Enum): """What woke the agent up for this decision moment.""" USER_MESSAGE = "user_message" diff --git a/gently/harness/conversation.py b/gently/harness/conversation.py index 8384ba9c..765d16b2 100644 --- a/gently/harness/conversation.py +++ b/gently/harness/conversation.py @@ -17,6 +17,27 @@ logger = logging.getLogger(__name__) +def _extend_tool_calls(out: List[Dict[str, Any]], content_blocks) -> None: + """Append every tool_use block in content_blocks to out. + + Tolerates absent attributes (some SDK versions / mock objects) so it + never crashes the live agent on a content-shape surprise. + """ + if not content_blocks: + return + for block in content_blocks: + try: + if getattr(block, "type", None) != "tool_use": + continue + out.append({ + "name": getattr(block, "name", None), + "input": getattr(block, "input", None), + "id": getattr(block, "id", None), + }) + except Exception: + continue + + class ConversationManager: """ Manages Claude API conversations, tool execution, and token tracking. @@ -48,6 +69,11 @@ def __init__(self, client, model, tool_registry): self.choice_handler = None self.context_store = None # for tool_label + # Decision capture for orchestrator A/B testing. Set by the agent + # alongside the EventCapture once the session folder is known. None + # = no capture, so tests / harnesses without a session still work. + self.decision_log = None + # ===== Quick Response ===== def try_quick_response(self, message: str, experiment, mode: str, @@ -175,6 +201,22 @@ async def call_claude(self, user_message: str, system_prompt, tools, } ) + # Snapshot inputs for decision capture BEFORE the tool loop starts + # appending to conversation_history. This is the state shadow + # candidates would need to reproduce production's input — same + # system_prompt and same starting messages. + decision_prompt_hash = None + if self.decision_log is not None: + try: + from gently.eval import prompt_hash as _prompt_hash + decision_prompt_hash = _prompt_hash( + system_prompt, list(self.conversation_history), + ) + except Exception: + logger.exception("Failed to compute decision prompt_hash") + + tool_calls_collected: List[Dict[str, Any]] = [] + assistant_message = "" error_occurred = None try: @@ -194,6 +236,7 @@ async def call_claude(self, user_message: str, system_prompt, tools, **api_kwargs ) self._track_token_usage(response) + _extend_tool_calls(tool_calls_collected, response.content) # Process tool calls while response.stop_reason == "tool_use": @@ -216,6 +259,7 @@ async def call_claude(self, user_message: str, system_prompt, tools, **api_kwargs ) self._track_token_usage(response) + _extend_tool_calls(tool_calls_collected, response.content) # Extract text response assistant_message = "" @@ -242,6 +286,14 @@ async def call_claude(self, user_message: str, system_prompt, tools, error=error_occurred, error_traceback=error_tb, ) + self._write_production_decision( + user_message=user_message, + tool_calls=tool_calls_collected, + response_text=assistant_message, + duration_ms=(time.time() - start_time) * 1000.0, + prompt_hash_value=decision_prompt_hash, + error=error_occurred, + ) raise if interaction and self.interaction_logger: @@ -251,10 +303,54 @@ async def call_claude(self, user_message: str, system_prompt, tools, total_duration_seconds=time.time() - start_time, ) + self._write_production_decision( + user_message=user_message, + tool_calls=tool_calls_collected, + response_text=assistant_message, + duration_ms=(time.time() - start_time) * 1000.0, + prompt_hash_value=decision_prompt_hash, + error=None, + ) + auto_save_fn() return assistant_message + def _write_production_decision( + self, + *, + user_message: str, + tool_calls: List[Dict[str, Any]], + response_text: str, + duration_ms: float, + prompt_hash_value: Optional[str], + error: Optional[str], + ) -> None: + """Persist one production Decision row (best-effort). + + Failures here are swallowed — decision capture must never break + the live agent. The DecisionLog itself is also tolerant of + serialisation errors. + """ + if self.decision_log is None: + return + try: + from datetime import datetime + from gently.eval import Decision, DecisionTrigger + self.decision_log.append(Decision( + timestamp=datetime.now(), + agent="production", + trigger=DecisionTrigger.USER_MESSAGE, + trigger_detail=(user_message or "")[:200], + tool_calls=tool_calls, + response_text=response_text, + prompt_hash=prompt_hash_value, + duration_ms=duration_ms, + error=error, + )) + except Exception: + logger.exception("Failed to write production Decision") + # ===== Dry-Run Tool Call (Benchmarking) ===== async def get_tool_call(self, user_message: str, system_prompt, tools) -> Optional[Dict]: diff --git a/tests/test_eval.py b/tests/test_eval.py index 8bc97064..28fe46ea 100644 --- a/tests/test_eval.py +++ b/tests/test_eval.py @@ -361,3 +361,201 @@ def test_shadow_runner_watch_filter(tmp_path: Path): decs = dlog.read() assert len(decs) == 1 assert decs[0].trigger_detail == "ERROR_OCCURRED" + + +# ============================================================================= +# prompt_hash +# ============================================================================= + +def test_prompt_hash_stable_and_distinguishing(): + """Identical inputs → identical hash; any change → different hash.""" + from gently.eval import prompt_hash + h1 = prompt_hash("sys-A", [{"role": "user", "content": "hi"}]) + h2 = prompt_hash("sys-A", [{"role": "user", "content": "hi"}]) + h3 = prompt_hash("sys-A", [{"role": "user", "content": "hello"}]) + h4 = prompt_hash("sys-B", [{"role": "user", "content": "hi"}]) + assert h1 == h2 + assert h1 != h3 + assert h1 != h4 + assert len(h1) == 16 # documented short fingerprint length + + +def test_prompt_hash_accepts_list_system_prompt(): + """Cached system prompts use the list-of-blocks shape; hashing must work.""" + from gently.eval import prompt_hash + list_prompt = [{"type": "text", "text": "sys", "cache_control": {"type": "ephemeral"}}] + str_prompt = "sys" + # Different shapes, different hashes — that's fine, the point is just + # that the list case doesn't raise. + h_list = prompt_hash(list_prompt, []) + h_str = prompt_hash(str_prompt, []) + assert isinstance(h_list, str) and isinstance(h_str, str) + assert len(h_list) == 16 and len(h_str) == 16 + + +# ============================================================================= +# ConversationManager production-decision capture (success + error paths) +# ============================================================================= + +def _make_fake_conversation_manager(claude_client): + """Build a ConversationManager with a fake Claude client and a no-op + tool registry — enough to exercise call_claude's decision-write path.""" + import asyncio # noqa: F401 (used by callers) + from gently.harness.conversation import ConversationManager + + class _NoopReg: + # call_claude doesn't use this directly; tools list is passed in + pass + + return ConversationManager(claude_client, "claude-haiku-4-5-20251001", _NoopReg()) + + +class _Usage: + input_tokens = 10 + output_tokens = 20 + cache_creation_input_tokens = 0 + cache_read_input_tokens = 0 + + +class _ToolBlock: + type = "tool_use" + name = "detect_embryos" + input = {"min_confidence": 0.7} + id = "t1" + + +class _TextBlock: + type = "text" + text = "Done." + + +class _R1: + stop_reason = "tool_use" + content = [_ToolBlock()] + usage = _Usage() + + +class _R2: + stop_reason = "end_turn" + content = [_TextBlock()] + usage = _Usage() + + +def test_production_decision_capture_success(tmp_path: Path): + """One success turn through call_claude writes one Decision row.""" + import asyncio + + calls = {"n": 0} + + class _FakeMessages: + def create(self, **kw): + calls["n"] += 1 + return _R1() if calls["n"] == 1 else _R2() + + class _FakeClient: + messages = _FakeMessages() + + cm = _make_fake_conversation_manager(_FakeClient()) + + # Bypass actual tool execution + async def fake_exec(content_blocks, interaction): + return [{"type": "tool_result", "tool_use_id": "t1", "content": "ok"}] + cm._execute_tools_with_logging = fake_exec + + dlog = DecisionLog(tmp_path / "decisions.jsonl") + dlog.open() + cm.decision_log = dlog + + async def run(): + return await cm.call_claude( + user_message="find embryos please", + system_prompt="system", + tools=[], + mode="run", + auto_save_fn=lambda: None, + ) + + out = asyncio.run(run()) + dlog.close() + + assert out == "Done." + decs = dlog.read() + assert len(decs) == 1 + d = decs[0] + assert d.agent == "production" + assert d.trigger is DecisionTrigger.USER_MESSAGE + assert d.trigger_detail == "find embryos please" + assert d.tool_calls == [{ + "name": "detect_embryos", + "input": {"min_confidence": 0.7}, + "id": "t1", + }] + assert d.response_text == "Done." + assert d.error is None + assert d.prompt_hash is not None and len(d.prompt_hash) == 16 + assert d.duration_ms is not None and d.duration_ms >= 0 + + +def test_production_decision_capture_error(tmp_path: Path): + """A failing Claude call writes a Decision with error before re-raising.""" + import asyncio + + class _BoomMessages: + def create(self, **kw): + raise RuntimeError("simulated outage") + + class _BoomClient: + messages = _BoomMessages() + + cm = _make_fake_conversation_manager(_BoomClient()) + dlog = DecisionLog(tmp_path / "decisions.jsonl") + dlog.open() + cm.decision_log = dlog + + async def run(): + with pytest.raises(RuntimeError, match="simulated outage"): + await cm.call_claude( + user_message="do something", + system_prompt="system", + tools=[], + mode="run", + auto_save_fn=lambda: None, + ) + + asyncio.run(run()) + dlog.close() + decs = dlog.read() + assert len(decs) == 1 + assert decs[0].error == "simulated outage" + assert decs[0].trigger is DecisionTrigger.USER_MESSAGE + assert decs[0].response_text and "simulated outage" in decs[0].response_text + + +def test_production_decision_capture_no_log_is_no_op(tmp_path: Path): + """No DecisionLog attached → call_claude proceeds normally, no errors.""" + import asyncio + + calls = {"n": 0} + + class _M: + def create(self, **kw): + calls["n"] += 1 + return _R2() # immediate end_turn, no tool loop + + class _C: + messages = _M() + + cm = _make_fake_conversation_manager(_C()) + assert cm.decision_log is None # default + + async def run(): + return await cm.call_claude( + user_message="hi", + system_prompt="sys", + tools=[], + mode="run", + auto_save_fn=lambda: None, + ) + + out = asyncio.run(run()) + assert out == "Done." # no log to read; we just want no error From 1194a1f584a8222ef177f2b05e531db984bf92b4 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 15 May 2026 18:13:05 -0400 Subject: [PATCH 10/71] eval: operator-action events + ReactiveCandidate (first real shadow) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two pieces of the closed-loop paradigm, tightly coupled. Operator-action events (vocabulary) ----------------------------------- Three new EventType values for human-driven mutations. They're distinct from EMBRYOS_UPDATE because they carry INTENT, not just state delta — candidates can reason about "the operator just did X" without typing that fact into chat. OPERATOR_EDITED_EMBRYO PUT /api/embryos/{id}/position payload: embryo_id + old/new coarse + fine_position_invalidated OPERATOR_REMOVED_EMBRYO DELETE /api/embryos/{id} payload: embryo_id + last_position OPERATOR_MARKED_EMBRYOS detect_embryos web-editor finish payload: embryo_ids + count + stage_origin + pre_edit_count Map-edit routes publish via server.agent_bridge.agent._event_bus. detect_embryos publishes only when the operator actually confirmed via the web canvas (operator_marked flag) — if the editor was skipped, the SAM list still landed in experiment.embryos but it wasn't operator- confirmed, so no operator event. ReactiveCandidate (first real candidate) ---------------------------------------- gently/eval/candidates.py — pure-rule shadow orchestrator with a tiny world model (embryos + last stage + last error). Reacts to: EMBRYOS_UPDATE ingest, silent STAGE_MOVED ingest, silent OPERATOR_EDITED_EMBRYO propose recalibrate_embryo if fine was invalidated OPERATOR_MARKED_EMBRYOS propose calibrate_all_embryos for the new set OPERATOR_REMOVED_EMBRYO propose forget_embryo for cache tidy-up ERROR_OCCURRED escalate first occurrence, suppress same msg within 30s The thesis being tested: a rule-based responder can do the routine bookkeeping that today only happens when the operator chats with Claude. Shadow mode will tell us how often that thesis holds in practice. Tests ----- +7 ReactiveCandidate tests covering silent ingest, conditional recalibrate, marked-set proposal, removal tidy-up, error escalate/suppress, and a full event-stream-through-replay smoke that proves the captured jsonl alone is sufficient input to drive a candidate to a decision log. 27/27 green. Co-Authored-By: Claude Opus 4.7 (cherry picked from commit 0a97563e94bbe53a41704602744190ad92a81034) --- gently/app/tools/detection_tools.py | 20 +++ gently/core/event_bus.py | 8 + gently/eval/__init__.py | 2 + gently/eval/candidates.py | 257 ++++++++++++++++++++++++++++ gently/ui/web/routes/data.py | 50 +++++- tests/test_eval.py | 208 ++++++++++++++++++++++ 6 files changed, 544 insertions(+), 1 deletion(-) create mode 100644 gently/eval/candidates.py diff --git a/gently/app/tools/detection_tools.py b/gently/app/tools/detection_tools.py index 08cf58fd..e30f5f33 100644 --- a/gently/app/tools/detection_tools.py +++ b/gently/app/tools/detection_tools.py @@ -204,6 +204,26 @@ async def detect_embryos( ) added.append((emb_id, m.get("role", default_role))) + # OPERATOR_MARKED_EMBRYOS — operator confirmed via the web canvas. + # This is the intent signal eval/shadow listeners hook for ReactiveCandidate. + if added: + bus = getattr(agent, '_event_bus', None) + if bus is not None: + from gently.core.event_bus import EventType + try: + bus.publish( + event_type=EventType.OPERATOR_MARKED_EMBRYOS, + data={ + 'embryo_ids': [eid for eid, _ in added], + 'count': len(added), + 'stage_origin': list(stage_pos), + 'pre_edit_count': len(sam_embryos), + }, + source='detect_embryos:web-editor', + ) + except Exception: + pass + role_counts = {} for _, r in added: role_counts[r] = role_counts.get(r, 0) + 1 diff --git a/gently/core/event_bus.py b/gently/core/event_bus.py index 0b7d57b2..3d59d9c6 100644 --- a/gently/core/event_bus.py +++ b/gently/core/event_bus.py @@ -84,6 +84,14 @@ class EventType(Enum): BOTTOM_CAMERA_FRAME = auto() # Live JPEG frame from the bottom camera stream EMBRYOS_UPDATE = auto() # Full embryo list snapshot from agent.experiment + # Operator-action events. Distinct from EMBRYOS_UPDATE because they + # carry intent ("a human did this") rather than just state delta. + # Candidate orchestrators can subscribe and reason about what the + # operator just did without having to type it in chat. + OPERATOR_EDITED_EMBRYO = auto() # Map drag/drop -> PUT /api/embryos/{id}/position + OPERATOR_REMOVED_EMBRYO = auto() # Map delete -> DELETE /api/embryos/{id} + OPERATOR_MARKED_EMBRYOS = auto() # Marking canvas "Done" — operator confirmed N positions + # System events ERROR_OCCURRED = auto() WARNING_ISSUED = auto() diff --git a/gently/eval/__init__.py b/gently/eval/__init__.py index 49cd873e..e93f5d5c 100644 --- a/gently/eval/__init__.py +++ b/gently/eval/__init__.py @@ -19,6 +19,7 @@ from .event_replay import EventReplay from .decision_log import Decision, DecisionLog, DecisionTrigger, prompt_hash from .shadow import OrchestratorCandidate, ShadowRunner, NoOpCandidate +from .candidates import ReactiveCandidate __all__ = [ "EventCapture", @@ -30,4 +31,5 @@ "OrchestratorCandidate", "ShadowRunner", "NoOpCandidate", + "ReactiveCandidate", ] diff --git a/gently/eval/candidates.py b/gently/eval/candidates.py new file mode 100644 index 00000000..111cc935 --- /dev/null +++ b/gently/eval/candidates.py @@ -0,0 +1,257 @@ +"""Canned shadow orchestrator candidates. + +NoOpCandidate lives in shadow.py as the trivial baseline. Anything more +interesting — even pure-rule architectures with state — lives here. As +LLM-driven candidates land they should slot into this module too. + +Conventions every candidate should keep: + - It maintains its own tiny world model. The production agent's + `experiment` is intentionally not shared (a candidate that mutates + production state would defeat the point of shadow mode). + - Decisions go through `log_decision`. Never call hardware tools. + - State updates from events are cheap (no LLM, no I/O). +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + +from gently.core.event_bus import Event + +from .decision_log import DecisionLog, DecisionTrigger +from .shadow import OrchestratorCandidate + +logger = logging.getLogger(__name__) + + +@dataclass +class _ReactiveWorldModel: + """The tiniest possible world model — everything ReactiveCandidate + needs to make rule-based decisions without re-reading the agent.""" + + # {embryo_id: {"coarse": {x, y} | None, "fine": {x, y} | None, + # "has_fine": bool, "confidence": float}} + embryos: Dict[str, Dict[str, Any]] = field(default_factory=dict) + + # Last live stage XY (µm) from a STAGE_MOVED event. + last_stage_um: Optional[Dict[str, float]] = None + + # Last error message + timestamp, so the candidate can avoid + # spam-proposing escalations for the same recurring failure. + last_error: Optional[Dict[str, Any]] = None + + # Count of events seen, by type name — useful debug field that also + # ends up in the decision context_summary. + seen: Dict[str, int] = field(default_factory=dict) + + +class ReactiveCandidate(OrchestratorCandidate): + """Pure-rule reactive shadow orchestrator. + + The thesis being tested: *can a rule-based responder do the + routine bookkeeping that today only happens when the operator + chats with Claude?* + + Reactions + --------- + OPERATOR_EDITED_EMBRYO + Operator moved an embryo on the Map. The PUT also clears fine + position. Propose `recalibrate_embryo(embryo_id)` so the new + coarse position gets a SPIM-fine alignment before the next + acquisition. If `fine_position_invalidated` was False (no fine + existed yet) skip the proposal — there's nothing to refresh. + + OPERATOR_MARKED_EMBRYOS + Operator just confirmed a fresh set of embryos via the marking + canvas. Propose `calibrate_all_embryos` to bring them all into + focus. Cheap pattern: kick off calibration the moment sightings + land, instead of waiting for the operator to type it. + + OPERATOR_REMOVED_EMBRYO + Operator deleted an embryo. Propose a tidy-up step + `forget_embryo(embryo_id)` for any candidate that wants to + clean caches / learnings keyed on the gone embryo. No-op for + production today (state mutation already happened); the + proposal is reserved for downstream cleanup tools. + + ERROR_OCCURRED + Propose `escalate_to_operator(error_message)` once per distinct + error. Suppresses if the same error fires twice within 30s — + avoids drowning the operator in repeat alarms. + + EMBRYOS_UPDATE / STAGE_MOVED + Update the world model. No decision logged (silent ingest). + + """ + + # If two ERROR_OCCURRED events with the same message arrive within + # this window, only the first proposes an escalation. + ERROR_SUPPRESS_WINDOW_SEC = 30.0 + + def __init__(self, name: str, decisions: DecisionLog): + super().__init__(name, decisions) + self.world = _ReactiveWorldModel() + + # ---- event handlers ---------------------------------------------------- + + def on_event(self, event: Event) -> None: + name = event.event_type.name + self.world.seen[name] = self.world.seen.get(name, 0) + 1 + + # Always ingest state-shaped events first. + if name == "EMBRYOS_UPDATE": + self._ingest_embryos_update(event) + return + if name == "STAGE_MOVED": + self._ingest_stage_moved(event) + return + + # Operator + error events produce decisions. + if name == "OPERATOR_EDITED_EMBRYO": + self._react_operator_edited(event) + return + if name == "OPERATOR_MARKED_EMBRYOS": + self._react_operator_marked(event) + return + if name == "OPERATOR_REMOVED_EMBRYO": + self._react_operator_removed(event) + return + if name == "ERROR_OCCURRED": + self._react_error(event) + return + + # ---- ingests ----------------------------------------------------------- + + def _ingest_embryos_update(self, event: Event) -> None: + embryos = (event.data or {}).get("embryos") or [] + new_world: Dict[str, Dict[str, Any]] = {} + for emb in embryos: + new_world[emb.get("id", "")] = { + "coarse": emb.get("position_coarse"), + "fine": emb.get("position_fine"), + "has_fine": bool(emb.get("has_fine_position")), + "confidence": emb.get("detection_confidence", 0.0), + } + self.world.embryos = new_world + + def _ingest_stage_moved(self, event: Event) -> None: + d = event.data or {} + if "x" in d and "y" in d: + self.world.last_stage_um = {"x": float(d["x"]), "y": float(d["y"])} + + # ---- reactions --------------------------------------------------------- + + def _react_operator_edited(self, event: Event) -> None: + data = event.data or {} + eid = data.get("embryo_id") or "" + invalidated = bool(data.get("fine_position_invalidated")) + tool_calls: List[Dict[str, Any]] = [] + # Only propose a recalibration when there was a fine position + # that the edit just invalidated. New coarse without any prior + # fine has nothing to refresh yet. + if invalidated: + tool_calls.append({ + "name": "recalibrate_embryo", + "input": {"embryo_id": eid}, + "id": None, + }) + self.log_decision( + trigger=DecisionTrigger.EVENT, + trigger_detail="OPERATOR_EDITED_EMBRYO", + tool_calls=tool_calls, + response_text=( + f"Operator moved {eid}; proposing recalibration." + if invalidated else + f"Operator moved {eid}; no prior fine -- no action." + ), + recent_event_ids=[event.event_id], + context_summary=self._summary(), + ) + + def _react_operator_marked(self, event: Event) -> None: + data = event.data or {} + ids = data.get("embryo_ids") or [] + count = data.get("count", len(ids)) + tool_calls: List[Dict[str, Any]] = [] + if count: + tool_calls.append({ + "name": "calibrate_all_embryos", + "input": {"embryo_ids": list(ids)}, + "id": None, + }) + self.log_decision( + trigger=DecisionTrigger.EVENT, + trigger_detail="OPERATOR_MARKED_EMBRYOS", + tool_calls=tool_calls, + response_text=( + f"Operator marked {count} embryos; proposing calibration." + if count else + "Operator marked zero embryos; no action." + ), + recent_event_ids=[event.event_id], + context_summary=self._summary(), + ) + + def _react_operator_removed(self, event: Event) -> None: + data = event.data or {} + eid = data.get("embryo_id") or "" + self.log_decision( + trigger=DecisionTrigger.EVENT, + trigger_detail="OPERATOR_REMOVED_EMBRYO", + tool_calls=[{ + "name": "forget_embryo", + "input": {"embryo_id": eid}, + "id": None, + }], + response_text=f"Operator removed {eid}; proposing cache tidy-up.", + recent_event_ids=[event.event_id], + context_summary=self._summary(), + ) + + def _react_error(self, event: Event) -> None: + from datetime import datetime + data = event.data or {} + msg = str(data.get("msg") or data.get("error") or data.get("message") or "unknown") + now = datetime.now() + prior = self.world.last_error + suppress = ( + prior is not None + and prior.get("msg") == msg + and (now - prior["ts"]).total_seconds() < self.ERROR_SUPPRESS_WINDOW_SEC + ) + self.world.last_error = {"msg": msg, "ts": now} + if suppress: + self.log_decision( + trigger=DecisionTrigger.EVENT, + trigger_detail="ERROR_OCCURRED", + tool_calls=[], + response_text=f"Suppressed repeat error within {self.ERROR_SUPPRESS_WINDOW_SEC:.0f}s window: {msg[:120]}", + recent_event_ids=[event.event_id], + context_summary=self._summary(), + ) + return + self.log_decision( + trigger=DecisionTrigger.EVENT, + trigger_detail="ERROR_OCCURRED", + tool_calls=[{ + "name": "escalate_to_operator", + "input": {"error_message": msg, "source": event.source}, + "id": None, + }], + response_text=f"New error -- proposing escalation: {msg[:120]}", + recent_event_ids=[event.event_id], + context_summary=self._summary(), + ) + + # ---- helpers ----------------------------------------------------------- + + def _summary(self) -> str: + n_emb = len(self.world.embryos) + n_fine = sum(1 for v in self.world.embryos.values() if v.get("has_fine")) + stage = self.world.last_stage_um + stage_str = f"({stage['x']:.1f}, {stage['y']:.1f})" if stage else "unknown" + seen = sum(self.world.seen.values()) + return (f"{n_emb} embryos ({n_fine} fine-calibrated); " + f"stage {stage_str}; {seen} events ingested") diff --git a/gently/ui/web/routes/data.py b/gently/ui/web/routes/data.py index 6a3e8ef5..7bb5ad3b 100644 --- a/gently/ui/web/routes/data.py +++ b/gently/ui/web/routes/data.py @@ -89,6 +89,10 @@ async def update_embryo_position( position — the operator is overriding the sighting, so any SPIM-objective fine alignment derived from the old coarse is no longer trustworthy and must be re-run. + + Publishes OPERATOR_EDITED_EMBRYO with both the old and new + positions so candidates can reason about the magnitude of the + correction and trigger re-calibration suggestions. """ agent = _require_agent_with_experiment() emb = agent.experiment.embryos.get(embryo_id) @@ -99,9 +103,28 @@ async def update_embryo_position( y = float(body.get("y")) except (TypeError, ValueError): raise HTTPException(status_code=400, detail="Body needs numeric x and y") + old_coarse = dict(emb.position_coarse) if emb.position_coarse else None + had_fine = bool(emb.position_fine) emb.position_coarse = {"x": x, "y": y} emb.position_fine = {} agent.experiment.notify_embryos_changed() + + bus = getattr(agent, "_event_bus", None) + if bus is not None: + from gently.core.event_bus import EventType + try: + bus.publish( + event_type=EventType.OPERATOR_EDITED_EMBRYO, + data={ + "embryo_id": embryo_id, + "old_position_coarse": old_coarse, + "new_position_coarse": {"x": x, "y": y}, + "fine_position_invalidated": had_fine, + }, + source="web:map-edit", + ) + except Exception: + logger.exception("Failed to publish OPERATOR_EDITED_EMBRYO") return emb.to_dict() @router.delete("/api/embryos/{embryo_id}", @@ -110,11 +133,36 @@ async def delete_embryo(embryo_id: str): """Remove an embryo from the experiment. Goes through ExperimentState.remove_embryo so the observer hook - fires EMBRYOS_UPDATE automatically. + fires EMBRYOS_UPDATE automatically. Also publishes + OPERATOR_REMOVED_EMBRYO carrying the embryo's last known position + — candidates can use that to e.g. clean up associated cache or + log the deletion in their own world model. """ agent = _require_agent_with_experiment() + emb = agent.experiment.embryos.get(embryo_id) + last_position = None + if emb is not None: + last_position = { + "coarse": dict(emb.position_coarse) if emb.position_coarse else None, + "fine": dict(emb.position_fine) if emb.position_fine else None, + } if not agent.experiment.remove_embryo(embryo_id): raise HTTPException(status_code=404, detail=f"Embryo {embryo_id} not found") + + bus = getattr(agent, "_event_bus", None) + if bus is not None: + from gently.core.event_bus import EventType + try: + bus.publish( + event_type=EventType.OPERATOR_REMOVED_EMBRYO, + data={ + "embryo_id": embryo_id, + "last_position": last_position, + }, + source="web:map-delete", + ) + except Exception: + logger.exception("Failed to publish OPERATOR_REMOVED_EMBRYO") return {"ok": True, "embryo_id": embryo_id} @router.get("/api/embryos/current") diff --git a/tests/test_eval.py b/tests/test_eval.py index 28fe46ea..879b7fc9 100644 --- a/tests/test_eval.py +++ b/tests/test_eval.py @@ -19,6 +19,7 @@ EventCapture, EventReplay, NoOpCandidate, + ReactiveCandidate, ShadowRunner, ) @@ -559,3 +560,210 @@ async def run(): out = asyncio.run(run()) assert out == "Done." # no log to read; we just want no error + + +# ============================================================================= +# ReactiveCandidate +# ============================================================================= + +def _publish(bus, event_type_name: str, data: dict, source: str = "test"): + bus.publish(EventType[event_type_name], data, source=source) + + +def _decisions_for(dlog: DecisionLog): + return dlog.read() + + +def test_reactive_ingests_embryos_update_silently(tmp_path: Path): + """EMBRYOS_UPDATE updates the world model but emits no decision.""" + bus = EventBus() + dlog = DecisionLog(tmp_path / "d.jsonl") + dlog.open() + cand = ReactiveCandidate("reactive-test", dlog) + runner = ShadowRunner(bus) + runner.add(cand) + runner.start() + + _publish(bus, "EMBRYOS_UPDATE", {"embryos": [ + {"id": "embryo_1", "position_coarse": {"x": 1.0, "y": 2.0}, + "position_fine": {}, "has_fine_position": False}, + {"id": "embryo_2", "position_coarse": {"x": 3.0, "y": 4.0}, + "position_fine": {"x": 3.1, "y": 4.1}, "has_fine_position": True}, + ]}) + + runner.stop() + dlog.close() + + assert _decisions_for(dlog) == [] # silent + assert set(cand.world.embryos.keys()) == {"embryo_1", "embryo_2"} + assert cand.world.embryos["embryo_2"]["has_fine"] + + +def test_reactive_proposes_recalibrate_when_fine_invalidated(tmp_path: Path): + bus = EventBus() + dlog = DecisionLog(tmp_path / "d.jsonl") + dlog.open() + runner = ShadowRunner(bus) + runner.add(ReactiveCandidate("reactive", dlog)) + runner.start() + + _publish(bus, "OPERATOR_EDITED_EMBRYO", { + "embryo_id": "embryo_2", + "old_position_coarse": {"x": 3, "y": 4}, + "new_position_coarse": {"x": 30, "y": 40}, + "fine_position_invalidated": True, + }) + runner.stop() + dlog.close() + decs = _decisions_for(dlog) + assert len(decs) == 1 + assert decs[0].tool_calls == [{ + "name": "recalibrate_embryo", + "input": {"embryo_id": "embryo_2"}, + "id": None, + }] + + +def test_reactive_skips_recalibrate_when_no_fine_existed(tmp_path: Path): + bus = EventBus() + dlog = DecisionLog(tmp_path / "d.jsonl") + dlog.open() + runner = ShadowRunner(bus) + runner.add(ReactiveCandidate("reactive", dlog)) + runner.start() + + _publish(bus, "OPERATOR_EDITED_EMBRYO", { + "embryo_id": "embryo_1", + "old_position_coarse": {"x": 1, "y": 2}, + "new_position_coarse": {"x": 10, "y": 20}, + "fine_position_invalidated": False, + }) + runner.stop() + dlog.close() + decs = _decisions_for(dlog) + assert len(decs) == 1 + assert decs[0].tool_calls == [] # nothing to refresh + assert "no action" in decs[0].response_text.lower() + + +def test_reactive_proposes_calibrate_all_on_marked(tmp_path: Path): + bus = EventBus() + dlog = DecisionLog(tmp_path / "d.jsonl") + dlog.open() + runner = ShadowRunner(bus) + runner.add(ReactiveCandidate("reactive", dlog)) + runner.start() + + _publish(bus, "OPERATOR_MARKED_EMBRYOS", { + "embryo_ids": ["embryo_001", "embryo_002", "embryo_003"], + "count": 3, + }) + runner.stop() + dlog.close() + decs = _decisions_for(dlog) + assert len(decs) == 1 + assert decs[0].tool_calls == [{ + "name": "calibrate_all_embryos", + "input": {"embryo_ids": ["embryo_001", "embryo_002", "embryo_003"]}, + "id": None, + }] + + +def test_reactive_proposes_forget_on_removal(tmp_path: Path): + bus = EventBus() + dlog = DecisionLog(tmp_path / "d.jsonl") + dlog.open() + runner = ShadowRunner(bus) + runner.add(ReactiveCandidate("reactive", dlog)) + runner.start() + + _publish(bus, "OPERATOR_REMOVED_EMBRYO", { + "embryo_id": "embryo_5", + "last_position": {"coarse": {"x": 1, "y": 2}, "fine": None}, + }) + runner.stop() + dlog.close() + decs = _decisions_for(dlog) + assert len(decs) == 1 + assert decs[0].tool_calls == [{ + "name": "forget_embryo", + "input": {"embryo_id": "embryo_5"}, + "id": None, + }] + + +def test_reactive_escalates_first_error_then_suppresses_repeat(tmp_path: Path): + bus = EventBus() + dlog = DecisionLog(tmp_path / "d.jsonl") + dlog.open() + runner = ShadowRunner(bus) + runner.add(ReactiveCandidate("reactive", dlog)) + runner.start() + + _publish(bus, "ERROR_OCCURRED", {"msg": "camera lost lock"}) + _publish(bus, "ERROR_OCCURRED", {"msg": "camera lost lock"}) # within 30s + _publish(bus, "ERROR_OCCURRED", {"msg": "different error"}) + runner.stop() + dlog.close() + decs = _decisions_for(dlog) + assert len(decs) == 3 + # 1st: escalate + assert decs[0].tool_calls[0]["name"] == "escalate_to_operator" + # 2nd: suppressed + assert decs[1].tool_calls == [] + assert "suppressed" in decs[1].response_text.lower() + # 3rd: different message -> escalate + assert decs[2].tool_calls[0]["name"] == "escalate_to_operator" + + +def test_reactive_full_event_stream_through_replay(tmp_path: Path): + """Capture a realistic operator-driven session and replay through the + candidate. End-to-end smoke that the recorded jsonl is enough to + drive a candidate's decision log without any other inputs.""" + src = EventBus() + cap = EventCapture(tmp_path / "events.jsonl") + cap.start(src) + + src.publish(EventType.EMBRYOS_UPDATE, {"embryos": [ + {"id": "embryo_1", "position_coarse": {"x": 1.0, "y": 2.0}, + "position_fine": {"x": 1.05, "y": 2.05}, "has_fine_position": True}, + ]}, source="agent") + src.publish(EventType.OPERATOR_EDITED_EMBRYO, { + "embryo_id": "embryo_1", + "old_position_coarse": {"x": 1.0, "y": 2.0}, + "new_position_coarse": {"x": 5.0, "y": 6.0}, + "fine_position_invalidated": True, + }, source="web:map-edit") + src.publish(EventType.OPERATOR_MARKED_EMBRYOS, { + "embryo_ids": ["embryo_2", "embryo_3"], "count": 2, + }, source="detect_embryos:web-editor") + src.publish(EventType.ERROR_OCCURRED, {"msg": "lost focus"}, + source="device-layer") + cap.stop() + + dst = EventBus() + dlog = DecisionLog(tmp_path / "decisions.jsonl") + dlog.open() + runner = ShadowRunner(dst) + runner.add(ReactiveCandidate("replay-cand", dlog)) + runner.start() + + n = EventReplay(tmp_path / "events.jsonl").replay(dst) + runner.stop() + dlog.close() + + assert n == 4 # all four events replayed + decs = _decisions_for(dlog) + # EMBRYOS_UPDATE is silent ingest; the other 3 each produce a decision. + triggers = [(d.trigger.value, d.trigger_detail) for d in decs] + assert triggers == [ + ("event", "OPERATOR_EDITED_EMBRYO"), + ("event", "OPERATOR_MARKED_EMBRYOS"), + ("event", "ERROR_OCCURRED"), + ] + tool_names = [d.tool_calls[0]["name"] if d.tool_calls else None for d in decs] + assert tool_names == [ + "recalibrate_embryo", + "calibrate_all_embryos", + "escalate_to_operator", + ] From ed75933e157a9c37608938a5f48163c2c24ec625 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 15 May 2026 19:52:56 -0400 Subject: [PATCH 11/71] docs: closed-loop paradigm write-up Distillation of the design conversation that produced the paradigm/closed-loop branch: - The four orchestrator roles and which one creates the friction - Web/chat reconciliation patterns A/B/C/D - Why 'turn' is the wrong unit and 'decision moment' is right - Wake-router model (events + schedule + user input) - Tiered world model (snapshot / digest / pull tools / lazy summariser) - Five testing primitives ranked by payoff - Coarse-vs-fine schema as 'measurement provenance' - Map as collaborative world model - Revolutionary trajectories: plans-as-goals, compounding learning, collaborative world model, reverse-mode microscopy, continuous shadow - What is built (commit table), what is not yet, and the open questions for the next iteration Future-self / new-collaborator reference, not a transcript. Co-Authored-By: Claude Opus 4.7 (cherry picked from commit 938baf8a74fbd8a1fe3a08df113104412283c596) --- docs/CLOSED_LOOP_PARADIGM.md | 536 +++++++++++++++++++++++++++++++++++ 1 file changed, 536 insertions(+) create mode 100644 docs/CLOSED_LOOP_PARADIGM.md diff --git a/docs/CLOSED_LOOP_PARADIGM.md b/docs/CLOSED_LOOP_PARADIGM.md new file mode 100644 index 00000000..8f1cf80d --- /dev/null +++ b/docs/CLOSED_LOOP_PARADIGM.md @@ -0,0 +1,536 @@ +# Closed-Loop Paradigm: Notes on the Discussion + +This document captures the design conversation that produced everything on the +`paradigm/closed-loop` branch: the schema split, the Map-as-embryo-home work, +the operator-action vocabulary, the eval substrate (capture / replay / +decisions / shadow), and the trajectory the system is on. It is a +distillation, not a transcript — a future-self / new-collaborator reference +for *why* this code looks the way it does and *where it is going*. + +--- + +## 1. The Original Friction + +The conversation started from a small, concrete observation by the operator: + +> "It feels awkward that the operator has to go between the chat in the TUI +> and the viz server… or even to chat about detecting embryos." + +That awkwardness is a symptom, not a defect. It surfaces a deeper design +question: **what is the orchestrator (the agent) actually for?** Today the +orchestrator does at least four jobs at once, and one of them — *tool router* +— is the one creating the friction. + +### The four orchestrator roles + +| Role | Description | Replaceable by a button? | +| --- | --- | --- | +| 1. Tool router | "Detect embryos" → `detect_embryos()` call | **Yes** — this is the friction surface | +| 2. Workflow runner | Timelapses, multi-embryo plans, perception loops | No | +| 3. Domain reasoner | Knows microscopy, embryos, safety constraints | No | +| 4. Session memory | Coherent narrative of what happened and why | No | + +Routing a single click through chat for a routine action is the system +fighting against its own users. Routing a multi-step scientific decision +through Claude is using the right tool for the right job. The paradigm here +is: **shrink role 1 to its essentials, keep roles 2–4 first-class, and let +the UI carry the rest.** + +--- + +## 2. Web ↔ Chat Reconciliation Patterns + +Four ways to relate the web UI and the chat orchestrator. Each has a +distinct world model property: + +### A. Chat-only intent (the old default) + +Every action originates in chat. The web is observation + delegated subtasks +(e.g. the marking canvas is a delegation the orchestrator triggers). + +* Cleanest record. +* Worst friction. +* Orchestrator's world model is "complete" because every change passes + through it. + +### B. Two parallel command surfaces + +Operator clicks in web, web acts directly; orchestrator finds out by polling +state or doesn't find out at all. + +* Lowest friction. +* Orchestrator's world model **drifts from reality** — fatal for role 4 + (session memory) and dangerous for role 3 (safety reasoning). + +### C. Web acts, orchestrator subscribes *(the chosen direction)* + +Operator clicks → web performs the action **and** publishes an event +(`OPERATOR_*`) → orchestrator's session memory ingests it. + +* Chat log shows only human conversation. +* Orchestrator's working context shows chat + events as a single timeline. +* Phase 7 (operator events vocabulary + reactive candidate) is the first + installment of this pattern. + +### D. Cross-pattern hybrid + +Different action classes use different patterns. Heavy / novel / composite +actions use chat (A); routine / clickable / contextual actions use web (C). +This is what the system actually drifts toward; pattern C is the substrate +that makes it possible. + +The orchestrator's job shifts from being **a funnel for action** to being **a +brain that knows what's happening on every surface**. + +--- + +## 3. The "Turn" is Wrong; the "Decision Moment" is Right + +Chat-AI literature reasons in *turns* (user message → assistant response). +That model imports an assumption that does not hold here: the human is at +the keyboard continuously. In a microscopy experiment running 12+ hours, the +human checks in once, twice, maybe ten times. The agent is autonomous in +between. + +The right unit is a **decision moment**, triggered by: + +1. **User message** — rare, interrupting (classic chat turn). +2. **Critical event** — error, safety violation, lost focus, perception + anomaly. Wake immediately; decide to act / abort / escalate. +3. **Phase boundary** — between timepoints, between embryos. Built-in + checkpoint: review state, decide whether to continue. +4. **Periodic checkpoint** — every N minutes if nothing else happened. + Catches slow drifts. + +Between moments the agent is asleep. Plans execute autonomously. Events +accumulate on the bus and in the world model. When the next decision +moment fires, the agent reads: + +* The trigger (why am I waking up?) +* The world snapshot (NOW state) +* The events digest (what happened since last wake) +* The conversation history (which might be hours old and less relevant + than usual) + +This is closer to a **supervisory controller** than a chat partner. The +conversation history matters less than usual; what matters more is the +**flight log** (events) plus the **current state snapshot**. + +### Trigger model — concrete + +A small router (in code, not Claude) sits between the bus and the brain: + +``` + user input ─┐ + event bus ──┼─► wake-router ──► (compose context) ──► claude.messages.create + schedule ──┘ +``` + +The router's responsibilities: +* Subscribe to a whitelist of "wake-worthy" event types. +* Hold a debounce / coalescing buffer (so a burst of events becomes one + wake). +* Keep a heartbeat schedule (every N minutes if no other trigger fired). +* On wake, package: trigger, world snapshot, events digest, recent + conversation tail. +* Surface the package to the brain. + +The brain stays the brain (Claude). The router is cheap, deterministic, +debuggable code. It's the **meta-orchestrator** the operator mentioned — +**not as another LLM**, but as a control surface. + +### Phase boundaries: hand-back vs subscribe + +Two designs for letting the brain look in mid-plan: + +* **Plan hands control back** at well-known points (between embryos, every + 5 timepoints). Cheaper, predictable, slightly less reactive. +* **Plan never pauses; brain subscribes to plan events** ("perception + complete for embryo 3"). More reactive, more plan-coupling. + +The first one composes better with the supervisory-controller framing and +is the recommended starting point. + +### Idle ticks + +If 30 min pass with no event and no user, should the agent wake to verify +everything's OK? Default to **yes — periodic ticks with a high action +threshold.** Most ticks should result in the agent doing nothing. The +purpose is catching slow drifts (focus, sample state, hardware +degradation) that don't trigger their own events. + +--- + +## 4. World Model — Tiered, Not Monolithic + +A common mistake is "summarise everything every turn." Better is a tiered +model where different freshness/density tiers carry different cadence +costs. + +### Tier 1 — World snapshot + +Structured, ~30 lines, computed from in-memory state (not events), every +wake. + +Includes: live stage XY/Z, current session id, embryo list with +calibration state, current plan, acquisition status, recent operator +actions (one-line summary). + +Cheap to build, fresh every time. Already mostly present in the +codebase — `agent.experiment.get_summary()` plus the cached +`DEVICE_STATE_UPDATE` payload is 80% of this. + +### Tier 2 — Recent-events digest + +Hand-written formatter over the events bus, filtered to wake-worthy types, +inserted as a system note at each wake. + +Shape: `"Since last response: operator added embryo 4 via Map at 14:32; calibration completed for embryo 2; one perception trace pending."` + +Hand-written because LLM summarisation here adds latency, cost, and +non-determinism for low value. Events are already structured. + +### Tier 3 — Pull tools + +For when reasoning needs depth: `get_recent_perceptions(embryo_id, n=5)`, +`get_session_timeline()`, `get_learnings(campaign_id)`, etc. The agent +calls these when it wants the detail. + +### Tier 4 — Optional LLM summariser + +Reserved for genuinely natural-language streams that resist rule-based +compression: accumulated CV reasoning chains, narrative observations, +cross-session learnings. Use a smaller, faster Claude model (Haiku is the +natural fit). Run lazily, when a tier-3 tool asks for "summarise the last +30 min for embryo 3." + +### Why this shape + +Decision moments are **rare** in autonomous mode. Token budget per wake +can be generous (it's mostly idle compute). What matters more than budget +is **cadence of waking** — saving 200 tokens per turn doesn't help if +you're waking up at the wrong moments. + +--- + +## 5. Testing — Where Most Projects Fail + +You cannot iterate on agent architecture without a way to compare +architectures. Microscopy makes this hard: + +* Physical, non-deterministic, non-replayable in the trivial sense. +* "Correct" is fuzzy — biological judgements rarely have ground truth. +* Slow feedback (a timelapse takes hours). +* Can't always reset to a clean state (samples are consumed). + +Five testing primitives, ranked by payoff per unit work (this ordering +informed Phase 6's build order): + +### 5.1 Event replay *(built — Phase 6a/6b)* + +Capture the full event stream during a real run. Offline, replay it +through any candidate architecture. Diff its decisions against +production's. **Foundation** — without it, every change to the +orchestrator is a flight test. + +### 5.2 Shadow mode *(built — Phase 6d)* + +During a real experiment, candidate architectures run alongside +production. They see the same events but their decisions are *logged, +not enacted*. Unique value over pure replay: shadow agents experience +real temporal cadence, so timing-sensitive things (drift, races) are +caught. + +### 5.3 Synthetic event sequences + +Hand-crafted streams: cascading errors, ambiguous perception, +contradictory operator actions, focus drift, network drop mid-acquisition. +Stress / chaos testing. The orchestrator is correct if it doesn't do +something catastrophic — much easier to score than biological +correctness. + +Trivially built on top of 5.1 — write a `jsonl` by hand, replay it. + +### 5.4 Decision-level micro-benchmarks + +Specific judgements — "given this perception result and these recent +observations, should the agent re-focus?" — captured as +(input → expected decision) pairs labelled by a biologist. Regression +suite. Cheap with biologist time, expensive to bootstrap, very valuable +once you have a few hundred. + +### 5.5 Multi-agent A/B in production + +Two embryos in the same dish, one supervised by architecture A and one +by B (both honouring the firmware fence). Compare biological outcomes. +Slow (one timelapse per data point), but the **only thing that measures +biological correctness end-to-end.** + +--- + +## 6. Embryo Schema: Coarse vs Fine + +Foundational and quietly important. Each embryo carries: + +* `position_coarse` — set by bottom-camera detection or manual Map + placement. Always present. +* `position_fine` — set later by SPIM-objective alignment (workflow not + yet built). Initially `{}`. +* `stage_position` — a *derived property*: `fine if fine else coarse`. + Downstream motion / perception keeps reading this and stays agnostic + about which calibration stage we're in. + +This is the seed for a broader idea: **measurements have provenance and +calibration state**. The same embryo at the same nominal XY can have +different "true" positions depending on which sensor sighted it. Encode +that explicitly so any downstream decision can ask *"how confident is +this position?"* without needing to know the whole calibration history. + +When the operator drags an embryo on the Map, the PUT clears `fine` — +overriding the sighting invalidates any SPIM-derived fine alignment +derived from the old coarse. `OPERATOR_EDITED_EMBRYO` carries +`fine_position_invalidated` so the candidate / future controller can +schedule a re-alignment without inferring it. + +--- + +## 7. The Map as Collaborative World Model + +The Devices > Map page is more than visualisation. It is the **first +collaborative surface** between operator and orchestrator: both can read +the embryo list; both can mutate it. The orchestrator subscribes; the +operator clicks. + +Visual semantics matter: + +* Coarse-only embryo → outlined ring + number. *Provisional.* +* SPIM-fine-calibrated → filled disc + number. *Committed.* + +Calibration state is then visible at a glance across the slide — the +operator can scan and see "embryo 3 still needs alignment" without +opening anything. + +The pick-up / drop interaction (Phase 5) deliberately rejects +click-to-add: the Map is a schematic, not a satellite view. Adding a +new sighting without a visual reference is guessing. New embryos go +through the bottom-camera marking canvas. The Map is for **editing what +already exists**. + +### Future arc + +* **Annotations beyond position**: operator marks "this is the control", + "this one is dead, skip", "I think this is in 2-cell stage". These + become first-class scientific observations through additional + `OPERATOR_*` events. +* **Satellite tile**: render the live bottom-camera frame as an overlay + on the Map at the current stage XY, scaled by um_per_pixel. Inside + that tile, click-to-add becomes meaningful (you can see what you're + picking). Outside, the Map stays schematic. + +--- + +## 8. Revolutionary Trajectories + +Some of these are reasonable extensions; some are genuinely new. + +### 8.1 Plans-as-goals, not scripts + +Operator specifies "characterise gut development for these four +embryos." Orchestrator translates this into a continuously adapted +imaging plan that changes based on what perception sees mid-run. The +plan isn't a fixed script handed to Bluesky — it's a negotiation the +orchestrator keeps in flight, with the world model as the substrate +for adaptation. + +Requires: tier-1 + tier-2 world model, decent perception loop, a way +to express goals as predicates over the world model. + +### 8.2 Compounding cross-session learning + +`agent/learnings/` already exists. Today it's barely used. With replay ++ shadow, an architecture that proposes priors ("embryos at 3-fold +typically need slower piezo") becomes **A/B testable across sessions**. +Improvement gets *measurable*, which is the unlock — most "smart +microscopy" today is shallow because it has no measurement loop. + +The right framing: each session is a **trial**, the orchestrator is the +**experimenter**, the world model is what carries learning between +trials. + +### 8.3 Collaborative world model + +The Map (operator edits embryos) is the first instance. Extend +everywhere: + +* Operator annotates morphology on the Map → orchestrator updates + hypothesis space. +* Operator marks a focus failure → orchestrator marks the calibration + region as untrustworthy. +* Operator confirms a perception → orchestrator increases confidence in + the perception predicate for similar inputs. + +The point is making the operator's tacit knowledge **first-class data** +that the system can reason about, not just record. + +### 8.4 Reverse-mode microscopy + +"I want to know X — plan the imaging that answers X." The orchestrator +translates scientific goals into imaging plans. This is the +plans-as-goals idea taken to its conclusion: the operator describes +intent in scientific terms, the orchestrator owns the imaging strategy. + +Tractable only once 8.1 and the goal language are built. + +### 8.5 Continuous shadow / always-on critic + +Run the production orchestrator + a shadow candidate continuously, and +log all decision divergences. Over weeks, the divergence log becomes a +**dataset of disagreements**. Each disagreement is either: + +* Production was right, candidate was wrong → candidate needs a fix. +* Candidate was right, production was wrong → consider promotion or + investigate why production picked differently. +* Both were defensible → annotate the case. + +Free with the eval substrate (Phase 6); the only addition is a +divergence collator. + +--- + +## 9. Concretely Built Today (`paradigm/closed-loop` branch) + +| # | Commit | What | +| --- | --- | --- | +| 1 | `3e410581` | Schema split: `position_coarse` / `position_fine` / derived `stage_position`. | +| 2 | `617e54c9` | `ExperimentState.notify_embryos_changed()` observer → `EMBRYOS_UPDATE` broadcast. | +| 3 | `144d9fc9` | Map render layer — lavender rings (coarse) / discs (fine) / numbers. | +| 4 | `4fbb9edf` | `detect_embryos` flows through web Marking canvas; `auth.py` + `require_control`. | +| 5 | `8f6553e1` | Map pick-up / drop / Delete to edit embryos in place (control-gated PUT/DELETE). | +| 6 | `808fe813` | Side-fix: re-enable XY joystick at device-layer boot. | +| 7 | `f7a13d69` | Side-fix: image-anchored crosshair + scroll-to-zoom in camera panel. | +| 8 | `d69cc219` | `gently/eval/`: event capture / replay / shadow / decision log scaffolding. | +| 9 | `75d7c9db` | Production decision capture wired through `ConversationManager.call_claude`. | +| 10 | `0a97563e` | `OPERATOR_*` event vocabulary + `ReactiveCandidate` (first real shadow). | + +### Per-session disk shape now + +`D:\Gently3\sessions\{id}\` + +* `events.jsonl` — captured event bus, telemetry-filtered. +* `decisions.jsonl` — every Claude turn (success + error). +* `interaction_log.jsonl` — pre-existing chat-shaped interactions. +* `timeline.jsonl` — pre-existing session timeline. +* Plus everything from the legacy FileStore layout. + +### Eval CLI + +`python scripts/replay_session.py {session_id_prefix} [--histogram] [--candidate {name}] [--real-time] [--time-scale N]` + +--- + +## 10. What is *Not* Done Yet + +These are the natural follow-ups; sketched as future-self breadcrumbs. + +### Near-term (days) + +* **Tier-1 world snapshot** as a system-prompt section the brain sees + on every wake. Build the snapshot from `agent.experiment` plus the + last cached `DEVICE_STATE_UPDATE`. ~30 lines of formatted prose, every + wake. +* **Tier-2 events digest** — hand-written formatter that reads the + bus's recent meaningful events (or the captured jsonl tail) and + produces a one-paragraph "since last response" note. +* **Snapshot ingest into the brain's prompt** — `_update_system_prompt` + already takes a `context_summary`; route tier-1 + tier-2 through it. + +### Medium-term (weeks) + +* **Wake-router** — the code-level scheduler from §3. Currently the + brain only wakes on user message. Add: event-driven wake (subscribe + to wake-worthy events), periodic-tick wake (heartbeat), debounce / + coalesce buffer. +* **More operator events** — `OPERATOR_ANNOTATED_EMBRYO` ("this is the + control", "skip, looks dead"), `OPERATOR_STARTED_TIMELAPSE`, + `OPERATOR_INTERRUPTED_PLAN`, `OPERATOR_TOGGLED_CAMERA`. Whatever the + Map / web UI lets the operator do should publish a typed event. +* **SPIM-fine alignment workflow** — populate `position_fine`. Tool + + per-embryo state transition. Triggers `EMBRYOS_UPDATE` and a new + `FINE_ALIGNMENT_COMPLETED` event the orchestrator can react to. +* **Continuous-shadow harness** — extend `ShadowRunner` to run a + candidate alongside production in the live agent process (not just + during replay). Collect divergences into a per-session + `divergences.jsonl`. + +### Longer arc (months) + +* **A goal expression language** — predicates over the world model that + let the operator say "image until 4-fold" or "follow the cell + divisions in embryo 3 at high resolution." This is the substrate for + §8.1 (plans-as-goals). +* **LLM-driven candidates** — once the rule-based `ReactiveCandidate` + proves the substrate, add Claude-driven candidates (Haiku for cheap, + Opus for thinking). Use the snapshot+digest as their input. +* **Cross-session learning loop** — wire the `learnings/` store into + the world model as priors. Add a learning-write surface (a tool the + orchestrator can call when it notices a pattern). Use shadow A/B to + validate that learnings improve decisions. +* **Goal-driven evaluation** — once goals exist, "did the experiment + achieve its goal" becomes a measurable end-to-end success rate. The + ultimate metric is this, not turn-level decision diffs. + +--- + +## 11. Principles That Surface Throughout + +A few recurring design priors worth naming: + +1. **Distill, don't dump.** Structured summaries beat raw logs in + prompts. Hand-written formatters beat LLM summarisers for + structured data. LLMs for prose, code for structure. +2. **Pull beats push when uncertain.** Default to tools the agent + queries on demand, not data shoved into every prompt. Push only + what's universally relevant (the world snapshot). +3. **Same shape for production and shadow.** If production writes a + Decision with these fields, shadow candidates write Decisions with + the same fields. Diff is then trivial. +4. **Events carry intent; state carries position.** `EMBRYOS_UPDATE` + is state (the embryo list now). `OPERATOR_EDITED_EMBRYO` is intent + (a human just did this). Both exist; they answer different + questions. +5. **The brain doesn't move hardware.** All hardware action goes + through tools that go through the device layer that goes through + ophyd that goes through MMCore. Shadow candidates are constructively + prevented from acting. Layers are not negotiable. +6. **No SaveCardSettings.** Firmware persistent state silently inherits + between sessions; if it ever gets out of sync with code it's a + debugging nightmare. Apply firmware config every boot, code wins. +7. **Localhost is the diSPIM box. Remote is view-only by default.** + Auth surface stays tiny and explicit. Token upgrade is the seam, + not user accounts. + +--- + +## 12. Open Questions (Worth Revisiting Later) + +* **Continuous vs episodic shadows.** Continuous always-on shadow + captures divergence over time but multiplies cost (multiple LLM + candidates running). Episodic shadow at decision moments is cheaper + but misses timing-sensitive cases. Hybrid? +* **Is the conversation history the right substrate at all?** With + decision moments hours apart, prior chat may be more distracting + than useful. Maybe the brain shouldn't see chat history beyond N + hours; the world model + events digest are the durable memory and + chat is just for the active dialogue. +* **How much should the operator know about the orchestrator's plan?** + Today the operator drives by asking. With autonomous mode, the + orchestrator runs experiments largely on its own. Should there be a + permanent "what is the orchestrator thinking right now" surface + visible on the Map? An always-on intent display? +* **Failure semantics.** If a candidate would have made a different + decision than production, and production's decision led to a bad + outcome, the candidate "wins." How do we score? Define "bad outcome" + rigorously enough that it can be measured? + +These are not blockers. They are notes for the next iteration of this +document, after a few weeks of running on the substrate built here. From 6dd37be4b55fb2aa84f5f49497870ccc6e4a5032 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Sat, 16 May 2026 17:16:04 -0400 Subject: [PATCH 12/71] events: bridge Python logs onto the EventBus / Events page The console output from launch_gently.py is the most informative surface in the system -- calibration progress, plan executor state changes, perception decisions, MMCore callbacks. Until now it lived only in the terminal and on-disk gently_*.log. This bridge fans it onto the EventBus too, so the Events page in the viz server mirrors what the operator would otherwise have to alt-tab to a terminal to see. Backend ------- gently/core/log_bridge.py LogToBusHandler(logging.Handler) -- emit() publishes EventType.LOG_RECORD with {level, level_name, logger, message, module, func, line, ts_ms, exc_text?}. Per-thread re-entry guard prevents a subscriber's own log call from spawning a cascade. Loggers in gently.core.event_bus and gently.core.log_bridge are never bridged. configure_log_bridge() reads three env vars: GENTLY_LOG_BUS off/on (default on) GENTLY_LOG_BUS_LEVEL threshold (default INFO) GENTLY_LOG_BUS_INCLUDE_THIRDPARTY also bridge aiohttp/uvicorn/ bluesky/anthropic/httpx/httpcore (default off -- keeps the page readable; durable copy still in gently_*.log) Idempotent: re-attaching is a no-op. gently/core/event_bus.py EventType.LOG_RECORD added, plus inclusion in _NO_HISTORY_TYPES (log records can fire hundreds-per-minute and would crowd out the bounded history deque used for "meaningful" events). launch_gently.py configure_log_bridge() runs right after configure_logging() in main(). Single line, env-controlled, no API changes. Frontend -------- gently/ui/web/static/js/events.js addEventToTable() branches on LOG_RECORD vs everything else. Log rows render with a level-coloured badge (DEBUG / INFO / WARN / ERROR), the logger name greyed before the message, and click-to-expand reveals the full payload including stack traces. gently/ui/web/static/css/main.css Four new .event-type-badge.log-{debug,info,warn,error} classes matching the existing badge palette. Monospace font for log message, red tint for exception lines. websocket.js already forwards everything except DEVICE_STATE_UPDATE / BOTTOM_CAMERA_FRAME to the events table; LOG_RECORD inherits that behaviour automatically. Tests ----- tests/test_log_bridge.py: 10 tests covering pass-through, level threshold, exception capture, re-entry guard, bridge-internal logger skip, GENTLY_LOG_BUS=off path, default attach, idempotency, third-party exclusion default, opt-in third-party. 10/10 green; full paradigm suite 42/42. Phase 9. On paradigm/closed-loop only. Co-Authored-By: Claude Opus 4.7 (cherry picked from commit 6318691b24994d006e7cf83893b5ad1f05fd68a8) --- gently/core/event_bus.py | 8 + gently/core/log_bridge.py | 175 ++++++++++++++++++++++ gently/ui/web/static/css/main.css | 14 ++ gently/ui/web/static/js/events.js | 148 ++++++++++++++----- launch_gently.py | 5 + tests/test_log_bridge.py | 235 ++++++++++++++++++++++++++++++ 6 files changed, 546 insertions(+), 39 deletions(-) create mode 100644 gently/core/log_bridge.py create mode 100644 tests/test_log_bridge.py diff --git a/gently/core/event_bus.py b/gently/core/event_bus.py index 3d59d9c6..d5495976 100644 --- a/gently/core/event_bus.py +++ b/gently/core/event_bus.py @@ -84,6 +84,11 @@ class EventType(Enum): BOTTOM_CAMERA_FRAME = auto() # Live JPEG frame from the bottom camera stream EMBRYOS_UPDATE = auto() # Full embryo list snapshot from agent.experiment + # Python logging.LogRecord republished onto the bus so the Events page + # surfaces what would otherwise only land in the terminal. See + # gently/core/log_bridge.py — opt-in handler. + LOG_RECORD = auto() + # Operator-action events. Distinct from EMBRYOS_UPDATE because they # carry intent ("a human did this") rather than just state delta. # Candidate orchestrators can subscribe and reason about what the @@ -166,6 +171,9 @@ class EventType(Enum): _NO_HISTORY_TYPES = frozenset({ EventType.DEVICE_STATE_UPDATE, EventType.BOTTOM_CAMERA_FRAME, # ~2 Hz JPEG frames — would crowd history out + EventType.LOG_RECORD, # log lines can hit hundreds/min during + # calibration; durable copy is in the + # gently_*.log file already }) diff --git a/gently/core/log_bridge.py b/gently/core/log_bridge.py new file mode 100644 index 00000000..d8d7e69f --- /dev/null +++ b/gently/core/log_bridge.py @@ -0,0 +1,175 @@ +"""Bridge Python logging into the EventBus so the Events page mirrors the +console. + +A small ``LogToBusHandler`` subclasses ``logging.Handler``. Every record it +sees gets published as ``EventType.LOG_RECORD`` with a compact payload the +frontend can render. The handler attaches itself to whichever loggers +``configure_log_bridge`` is told to cover — by default only ``gently`` and +``gently_perception``, which keeps third-party noise (aiohttp access logs, +bluesky state transitions, anthropic SDK chatter) off the page unless the +operator opts in. + +Env-configurable: + GENTLY_LOG_BUS — "on" / "off" (default: on) + GENTLY_LOG_BUS_LEVEL — DEBUG / INFO (default) / WARNING / ERROR + GENTLY_LOG_BUS_INCLUDE_THIRDPARTY — "1"/"true" to include common third- + party loggers (uvicorn, aiohttp, + bluesky, anthropic, httpx, httpcore) + +Re-entrancy is the only real subtlety: if a log call happens inside the +EventBus.publish path (e.g. from the dispatch loop's logger), republishing +it as another LOG_RECORD would loop forever. Guarded with a thread-local +re-entry flag. +""" + +from __future__ import annotations + +import logging +import os +import threading +from typing import Iterable, Optional, Sequence + +from .event_bus import EventBus, EventType, get_event_bus + +logger = logging.getLogger(__name__) + + +# Loggers we never want on the Events page — they emit at the wrong layer +# (their own log lines describe bus dispatch / events page WebSocket frames) +# so republishing them would create feedback or infinite churn. +_NEVER_BRIDGE = frozenset({ + "gently.core.event_bus", + "gently.core.log_bridge", +}) + +# Loggers that count as "third-party noise" — silenced by default, can be +# opted in with GENTLY_LOG_BUS_INCLUDE_THIRDPARTY=1. +_THIRDPARTY_DEFAULTS: Sequence[str] = ( + "uvicorn", "uvicorn.error", "uvicorn.access", + "aiohttp", "aiohttp.access", + "anthropic", "httpx", "httpcore", + "bluesky", "bluesky.RE.state", +) + + +class LogToBusHandler(logging.Handler): + """Publishes each record onto the EventBus as a LOG_RECORD event. + + Per-thread re-entry guard prevents infinite loops when something in + the publish path itself logs. + """ + + def __init__(self, bus: EventBus, *, level: int = logging.INFO): + super().__init__(level=level) + self._bus = bus + self._reentry = threading.local() + + def emit(self, record: logging.LogRecord) -> None: + # Re-entry guard: if a downstream subscriber's handler logs, we + # must not republish that log line. + if getattr(self._reentry, "active", False): + return + # Never bridge our own machinery — those records describe the + # bridge itself, would loop. + if record.name in _NEVER_BRIDGE: + return + self._reentry.active = True + try: + try: + # format() runs all configured formatters (incl. exc_info + # serialisation). We send the formatted message + the + # structured bits separately so the frontend can choose + # how to render. + message = record.getMessage() + except Exception: + message = "" + + payload = { + "level": int(record.levelno), + "level_name": record.levelname, + "logger": record.name, + "message": message, + "module": record.module, + "func": record.funcName, + "line": record.lineno, + # Wall-clock ms since epoch — frontend uses this for its + # own ordering / display, separate from the EventBus's + # internal timestamp. + "ts_ms": int(record.created * 1000), + } + if record.exc_info: + try: + payload["exc_text"] = logging.Formatter().formatException(record.exc_info) + except Exception: + pass + + self._bus.publish( + event_type=EventType.LOG_RECORD, + data=payload, + source=f"log:{record.name}", + ) + except Exception: + # If we can't publish (shutdown, etc.), drop the record + # silently — the live console + on-disk log still have it. + pass + finally: + self._reentry.active = False + + +def configure_log_bridge( + bus: Optional[EventBus] = None, + *, + loggers: Optional[Iterable[str]] = None, + level: Optional[str] = None, + include_thirdparty: Optional[bool] = None, +) -> Optional[LogToBusHandler]: + """Attach a LogToBusHandler to the requested loggers. + + Returns the installed handler (or None if the bridge is disabled). + Idempotent: safe to call multiple times — only the first call attaches. + + Parameters honour env-var defaults so the launch script doesn't need + to know the knobs: + GENTLY_LOG_BUS — "off" disables entirely + GENTLY_LOG_BUS_LEVEL — threshold (default INFO) + GENTLY_LOG_BUS_INCLUDE_THIRDPARTY — adds aiohttp/uvicorn/bluesky/etc. + """ + if os.environ.get("GENTLY_LOG_BUS", "on").lower() in ("off", "0", "false", "no"): + return None + + if bus is None: + bus = get_event_bus() + + if level is None: + level = os.environ.get("GENTLY_LOG_BUS_LEVEL", "INFO") + level_int = getattr(logging, level.upper(), logging.INFO) + + if include_thirdparty is None: + env_val = os.environ.get("GENTLY_LOG_BUS_INCLUDE_THIRDPARTY", "") + include_thirdparty = env_val.lower() in ("1", "true", "yes", "on") + + if loggers is None: + loggers = ["gently", "gently_perception"] + if include_thirdparty: + loggers = list(loggers) + list(_THIRDPARTY_DEFAULTS) + + handler = LogToBusHandler(bus, level=level_int) + + attached = [] + for name in loggers: + target = logging.getLogger(name) + # Skip if already attached (idempotency for re-invocation). + if any(isinstance(h, LogToBusHandler) for h in target.handlers): + continue + target.addHandler(handler) + attached.append(name) + + if attached: + # Surface the configuration once at startup — using our own logger + # (which is in _NEVER_BRIDGE) so this announcement itself doesn't + # become a LOG_RECORD event. + logger.info( + "Log bridge active: level=%s, loggers=%s, include_thirdparty=%s", + logging.getLevelName(level_int), attached, include_thirdparty, + ) + return handler diff --git a/gently/ui/web/static/css/main.css b/gently/ui/web/static/css/main.css index ef4dac96..c0a6c539 100644 --- a/gently/ui/web/static/css/main.css +++ b/gently/ui/web/static/css/main.css @@ -2036,6 +2036,20 @@ a.tab-link.active { .event-type-badge.error { background: rgba(248, 81, 73, 0.2); color: #f85149; } .event-type-badge.default { background: var(--bg-hover); color: var(--text-muted); } +/* Log-record badges per level. The level is the badge text (DEBUG / INFO / + WARN / ERROR) for log rows; the LOG_RECORD type itself is collapsed into + the level so the column doesn't read the same string for every line. */ +.event-type-badge.log-debug { background: rgba(125, 134, 145, 0.18); color: #9ba3b0; } +.event-type-badge.log-info { background: rgba(88, 166, 255, 0.16); color: var(--accent); } +.event-type-badge.log-warn { background: rgba(210, 153, 34, 0.22); color: var(--accent-orange); } +.event-type-badge.log-error { background: rgba(248, 81, 73, 0.22); color: #f85149; } + +/* Log line message: monospace, faint logger prefix, expandable trace. */ +.log-row .event-data { font-family: 'JetBrains Mono', ui-monospace, monospace; } +.log-logger { color: var(--text-muted); opacity: 0.85; margin-right: 0.5rem; } +.log-message { color: var(--text); } +.log-exc { color: #f85149; opacity: 0.85; } + .event-source { color: var(--text-muted); font-size: 0.75rem; diff --git a/gently/ui/web/static/js/events.js b/gently/ui/web/static/js/events.js index 02365e40..3e37eb66 100644 --- a/gently/ui/web/static/js/events.js +++ b/gently/ui/web/static/js/events.js @@ -41,6 +41,40 @@ function getEventBadgeClass(eventType) { return 'default'; } +// Log-record helpers -------------------------------------------------- +// LOG_RECORD events come from the Python logging bridge. We collapse the +// generic "LOG_RECORD" type into the actual level (DEBUG / INFO / WARN / +// ERROR) so the table is readable -- otherwise every row in a busy +// session reads the same string in the Type column. +function isLogEvent(event) { + return event && event.event_type === 'LOG_RECORD'; +} + +function logLevelLabel(d) { + // levelname is fastest path; fall back to numeric mapping if missing. + const lvl = (d && (d.level_name || '')).toString().toUpperCase(); + if (lvl) { + if (lvl === 'WARNING') return 'WARN'; + if (lvl === 'CRITICAL') return 'CRIT'; + return lvl; + } + const n = d && Number(d.level); + if (!isFinite(n)) return 'LOG'; + if (n >= 50) return 'CRIT'; + if (n >= 40) return 'ERROR'; + if (n >= 30) return 'WARN'; + if (n >= 20) return 'INFO'; + return 'DEBUG'; +} + +function logBadgeClass(d) { + const label = logLevelLabel(d); + if (label === 'DEBUG') return 'log-debug'; + if (label === 'INFO') return 'log-info'; + if (label === 'WARN') return 'log-warn'; + return 'log-error'; // ERROR / CRIT collapse together +} + // Search helper functions function escapeRegex(str) { return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); @@ -212,45 +246,81 @@ function addEventToTable(event, prepend = true) { if (hasImage) tr.classList.add('has-image'); tr.dataset.eventId = event.event_id || ''; - const badgeClass = getEventBadgeClass(event.event_type); - - // Image indicator icon - const imageIndicator = hasImage - ? ` - - - - - - ` - : ''; - - // Thumbnail preview - const thumbnailHtml = hasImage - ? `Event image` - : ''; - - tr.innerHTML = ` - ${formatEventTime(event.timestamp)} - ${imageIndicator}${event.event_type} - ${event.source || '-'} - ${thumbnailHtml}
${formatEventData(event.data)}
- `; - - // Click to expand data - tr.addEventListener('click', () => { - const dataDiv = tr.querySelector('.event-data'); - dataDiv.classList.toggle('expanded'); - if (dataDiv.classList.contains('expanded')) { - dataDiv.innerHTML = `
${JSON.stringify(event.data, null, 2)}
`; - } else { - dataDiv.innerHTML = formatEventData(event.data); - } - }); + if (isLogEvent(event)) { + // Log rows have a compact, distinctive shape: level badge in the + // Type column, logger name + message in the Data column. Click to + // toggle a pre with the full payload (incl. exception trace). + tr.classList.add('log-row'); + const d = event.data || {}; + const badgeCls = logBadgeClass(d); + const label = logLevelLabel(d); + const message = highlightSearchTerms(d.message || ''); + const loggerName = highlightSearchTerms(d.logger || '-'); + const excTag = d.exc_text ? ' ⏎ trace…' : ''; + tr.innerHTML = ` + ${formatEventTime(event.timestamp)} + ${label} + ${event.source || '-'} +
+ ${loggerName}${message}${excTag} +
+ `; + tr.addEventListener('click', () => { + const dataDiv = tr.querySelector('.event-data'); + dataDiv.classList.toggle('expanded'); + if (dataDiv.classList.contains('expanded')) { + const tracePart = d.exc_text + ? `\n\n${d.exc_text}` : ''; + dataDiv.innerHTML = + `
${d.logger || ''}  ${d.func || ''}:${d.line || ''}\n` +
+                    `${(d.message || '')}${tracePart}
`; + } else { + dataDiv.innerHTML = + `${loggerName}` + + `${message}${excTag}`; + } + }); + } else { + const badgeClass = getEventBadgeClass(event.event_type); + + // Image indicator icon + const imageIndicator = hasImage + ? ` + + + + + + ` + : ''; + + // Thumbnail preview + const thumbnailHtml = hasImage + ? `Event image` + : ''; + + tr.innerHTML = ` + ${formatEventTime(event.timestamp)} + ${imageIndicator}${event.event_type} + ${event.source || '-'} + ${thumbnailHtml}
${formatEventData(event.data)}
+ `; + + // Click to expand data + tr.addEventListener('click', () => { + const dataDiv = tr.querySelector('.event-data'); + dataDiv.classList.toggle('expanded'); + if (dataDiv.classList.contains('expanded')) { + dataDiv.innerHTML = `
${JSON.stringify(event.data, null, 2)}
`; + } else { + dataDiv.innerHTML = formatEventData(event.data); + } + }); + } if (prepend) { tbody.insertBefore(tr, tbody.firstChild); diff --git a/launch_gently.py b/launch_gently.py index 1727dd2a..58f94fb5 100644 --- a/launch_gently.py +++ b/launch_gently.py @@ -29,6 +29,7 @@ import yaml from gently.log_config import configure_logging +from gently.core.log_bridge import configure_log_bridge from gently.app.agent import MicroscopyAgent from gently.organisms import load_organism from gently.hardware import load_hardware, get_hardware @@ -124,6 +125,10 @@ async def main(offline: bool = False, resume_session: str = None, show_sessions: log_file = str(log_dir / f"gently_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log") # File always gets INFO+, console uses the requested level configure_logging(level=log_level, log_file=log_file) + # Mirror gently / gently_perception log lines onto the EventBus so the + # Events page in the viz server shows them too. Env vars control level + # and whether to include noisy third-party loggers (off by default). + configure_log_bridge() logger.info("Logging to %s (console level: %s)", log_file, log_level) # Load organism module from config diff --git a/tests/test_log_bridge.py b/tests/test_log_bridge.py new file mode 100644 index 00000000..2079d365 --- /dev/null +++ b/tests/test_log_bridge.py @@ -0,0 +1,235 @@ +"""Tests for the logging->EventBus bridge.""" + +from __future__ import annotations + +import logging +import os + +import pytest + +from gently.core.event_bus import EventBus, EventType +from gently.core.log_bridge import ( + LogToBusHandler, + configure_log_bridge, + _NEVER_BRIDGE, +) + + +@pytest.fixture +def bus_with_capture(): + bus = EventBus() + seen = [] + bus.subscribe(EventType.LOG_RECORD, lambda ev: seen.append(ev)) + return bus, seen + + +@pytest.fixture +def isolated_logger(request): + """Fresh logger per test; cleaned up after. + + Using uniquely-named loggers under the gently.* namespace ensures we + don't tangle with any handlers installed by other tests in the suite. + """ + name = f"gently.test_logbridge.{request.node.name}" + lgr = logging.getLogger(name) + lgr.setLevel(logging.DEBUG) + # Don't propagate to root; we don't want pytest's caplog to swallow it + # before our handler sees it. + original_propagate = lgr.propagate + lgr.propagate = False + yield lgr + # Teardown: strip handlers we added + for h in list(lgr.handlers): + lgr.removeHandler(h) + lgr.propagate = original_propagate + + +def test_handler_publishes_each_record(bus_with_capture, isolated_logger): + """Every log call below threshold becomes a LOG_RECORD event.""" + bus, seen = bus_with_capture + isolated_logger.addHandler(LogToBusHandler(bus, level=logging.DEBUG)) + + isolated_logger.debug("debug msg") + isolated_logger.info("info msg") + isolated_logger.warning("warn msg") + isolated_logger.error("error msg") + + assert len(seen) == 4 + levels = [ev.data["level_name"] for ev in seen] + assert levels == ["DEBUG", "INFO", "WARNING", "ERROR"] + msgs = [ev.data["message"] for ev in seen] + assert msgs == ["debug msg", "info msg", "warn msg", "error msg"] + + +def test_handler_respects_level_threshold(bus_with_capture, isolated_logger): + """Records below the handler level are dropped.""" + bus, seen = bus_with_capture + isolated_logger.addHandler(LogToBusHandler(bus, level=logging.WARNING)) + + isolated_logger.debug("nope") + isolated_logger.info("also nope") + isolated_logger.warning("yes") + isolated_logger.error("yes") + + assert [ev.data["level_name"] for ev in seen] == ["WARNING", "ERROR"] + + +def test_exc_text_captured_on_exception(bus_with_capture, isolated_logger): + """logger.exception() includes the formatted traceback in payload.""" + bus, seen = bus_with_capture + isolated_logger.addHandler(LogToBusHandler(bus, level=logging.DEBUG)) + + try: + raise RuntimeError("simulated") + except RuntimeError: + isolated_logger.exception("blew up") + + assert len(seen) == 1 + payload = seen[0].data + assert payload["level_name"] == "ERROR" + assert payload["message"] == "blew up" + assert payload.get("exc_text") and "RuntimeError" in payload["exc_text"] + assert "simulated" in payload["exc_text"] + + +def test_reentry_guard_prevents_infinite_loop(bus_with_capture, isolated_logger): + """A bus subscriber that itself logs must NOT spawn cascading events. + + Without the guard, every subscriber-emitted log would republish as + another LOG_RECORD, spawn another subscriber call, ... ad infinitum. + """ + bus, seen = bus_with_capture + isolated_logger.addHandler(LogToBusHandler(bus, level=logging.DEBUG)) + + # Subscriber that re-logs on every event it sees. + def loud(ev): + isolated_logger.debug("subscriber-internal log") + + bus.subscribe(EventType.LOG_RECORD, loud) + + isolated_logger.info("trigger") + # Exactly one event — the original. The subscriber's log was + # suppressed by the re-entry guard. + assert len(seen) == 1 + assert seen[0].data["message"] == "trigger" + + +def test_handler_skips_bridge_internals(bus_with_capture): + """Records from the bridge's own loggers must never be republished.""" + bus, seen = bus_with_capture + h = LogToBusHandler(bus, level=logging.DEBUG) + + for blocked in _NEVER_BRIDGE: + lgr = logging.getLogger(blocked) + lgr.setLevel(logging.DEBUG) + lgr.addHandler(h) + try: + lgr.info("from %s", blocked) + finally: + lgr.removeHandler(h) + + assert seen == [] # nothing republished + + +def test_configure_log_bridge_off_returns_none(bus_with_capture, monkeypatch): + """GENTLY_LOG_BUS=off disables the bridge entirely.""" + bus, seen = bus_with_capture + monkeypatch.setenv("GENTLY_LOG_BUS", "off") + h = configure_log_bridge(bus=bus, loggers=["gently.cfg_off_test"]) + assert h is None + logging.getLogger("gently.cfg_off_test").info("should not appear") + assert seen == [] + + +def test_configure_log_bridge_attaches_handler(bus_with_capture, monkeypatch): + """Default-on path attaches a handler that publishes records.""" + bus, seen = bus_with_capture + monkeypatch.setenv("GENTLY_LOG_BUS", "on") + monkeypatch.setenv("GENTLY_LOG_BUS_LEVEL", "INFO") + monkeypatch.delenv("GENTLY_LOG_BUS_INCLUDE_THIRDPARTY", raising=False) + + target_name = "gently.cfg_attach_test" + target = logging.getLogger(target_name) + target.setLevel(logging.DEBUG) # otherwise inherits root WARNING + + h = configure_log_bridge(bus=bus, loggers=[target_name]) + assert h is not None + try: + target.info("hi") + assert any(ev.data["message"] == "hi" for ev in seen) + finally: + target.removeHandler(h) + + +def test_configure_log_bridge_is_idempotent(bus_with_capture, monkeypatch): + """Calling configure twice doesn't double-attach.""" + bus, seen = bus_with_capture + monkeypatch.setenv("GENTLY_LOG_BUS", "on") + + target = "gently.idem_test" + logging.getLogger(target).setLevel(logging.DEBUG) + h1 = configure_log_bridge(bus=bus, loggers=[target]) + h2 = configure_log_bridge(bus=bus, loggers=[target]) + assert h1 is not None + try: + logging.getLogger(target).warning("once") + # If both handlers were attached we'd see two events for the same + # record. One event = no double-attach. + warn_events = [ev for ev in seen if ev.data["message"] == "once"] + assert len(warn_events) == 1 + finally: + for h in (h1, h2): + if h is not None: + try: + logging.getLogger(target).removeHandler(h) + except Exception: + pass + + +def test_third_party_excluded_by_default(bus_with_capture, monkeypatch): + """uvicorn / aiohttp / bluesky etc. don't get bridged unless opted in.""" + bus, seen = bus_with_capture + monkeypatch.setenv("GENTLY_LOG_BUS", "on") + monkeypatch.delenv("GENTLY_LOG_BUS_INCLUDE_THIRDPARTY", raising=False) + # Default behaviour: loggers list omits third-party. We pass None so + # the function picks its own default. + logging.getLogger("gently").setLevel(logging.DEBUG) + h = configure_log_bridge(bus=bus, loggers=None) + assert h is not None + try: + logging.getLogger("aiohttp.access").info("noisy 1") + logging.getLogger("bluesky").info("noisy 2") + assert seen == [] + # But gently.* still works + logging.getLogger("gently.proves_attached").info("kept") + assert any(ev.data["message"] == "kept" for ev in seen) + finally: + for n in ("gently", "gently_perception"): + try: + logging.getLogger(n).removeHandler(h) + except Exception: + pass + + +def test_third_party_included_when_opted_in(bus_with_capture, monkeypatch): + """GENTLY_LOG_BUS_INCLUDE_THIRDPARTY=1 brings the noisy loggers in.""" + bus, seen = bus_with_capture + monkeypatch.setenv("GENTLY_LOG_BUS", "on") + monkeypatch.setenv("GENTLY_LOG_BUS_INCLUDE_THIRDPARTY", "1") + logging.getLogger("aiohttp").setLevel(logging.DEBUG) + logging.getLogger("aiohttp.access").setLevel(logging.DEBUG) + + h = configure_log_bridge(bus=bus, loggers=None) + assert h is not None + try: + logging.getLogger("aiohttp.access").info("now visible") + assert any(ev.data["message"] == "now visible" for ev in seen) + finally: + # Strip handler from every logger we might have attached to — + # belt and braces, since the function defaults to a long list. + for name in (list(logging.Logger.manager.loggerDict.keys()) + + ["gently", "gently_perception"]): + try: + logging.getLogger(name).removeHandler(h) + except Exception: + pass From 7017866b8b7f2badf354b0b4a20aff80922b357a Mon Sep 17 00:00:00 2001 From: P S Kesavan Date: Thu, 28 May 2026 13:18:10 -0400 Subject: [PATCH 13/71] switchbot: BLE room-light accessory device + web test GUI SwitchBot Bot (WoHand) as a Bluesky/ophyd-protocol device over `bleak` (set('on'|'off'|'press') -> Status, read/describe). Controls the diSPIM room light used for bottom-camera imaging. Adds a standalone FastAPI test GUI under diagnostics/ (buttons + morse blinker). Dep in requirements_device.txt. Co-Authored-By: Claude Opus 4.7 (cherry picked from commit a790b028bd384595d58f58dc8af5356300f306dd) --- diagnostics/switchbot_webgui.py | 345 ++++++++++++++++++++++++++++++++ gently/hardware/switchbot.py | 176 ++++++++++++++++ requirements_device.txt | 3 + 3 files changed, 524 insertions(+) create mode 100644 diagnostics/switchbot_webgui.py create mode 100644 gently/hardware/switchbot.py diff --git a/diagnostics/switchbot_webgui.py b/diagnostics/switchbot_webgui.py new file mode 100644 index 00000000..5832b103 --- /dev/null +++ b/diagnostics/switchbot_webgui.py @@ -0,0 +1,345 @@ +#!/usr/bin/env python3 +""" +Temporary web GUI to play with the SwitchBot Bot that switches the diSPIM room +light (on for bottom-camera/brightfield imaging, off otherwise). + +This is a TEST TOOL, not part of the production device layer. It drives the Bot +directly over BLE using the same command protocol as +``gently.hardware.switchbot.SwitchBot`` (same command bytes + GATT UUIDs), but +over a single *persistent* connection so the buttons feel snappy and the morse +blinker is fast — the device-layer class is connect-per-command (~1-2 s each), +which is fine for a plan step but hopeless for blinking. + +Features: ON / OFF / PRESS buttons, and a morse-code blinker (blinks the real +room light + mirrors the pattern on screen). The Bot is a mechanical switch +pusher, so each toggle is a ~0.5-1 s servo move — morse is deliberately slow. + +Run: + .venv/bin/python diagnostics/switchbot_webgui.py + # then open http://127.0.0.1:8765 + + .venv/bin/python diagnostics/switchbot_webgui.py --address EC:6F:04:06:5B:23 --port 8765 +""" +from __future__ import annotations + +import argparse +import asyncio +import logging +from contextlib import asynccontextmanager + +import uvicorn +from fastapi import FastAPI +from fastapi.responses import HTMLResponse, JSONResponse +from pydantic import BaseModel + +# Reuse the device-layer device's protocol definitions (single source of truth). +from gently.hardware.switchbot import _COMMANDS, _CTRL_CHAR + +logger = logging.getLogger("switchbot_webgui") + +DEFAULT_ADDRESS = "EC:6F:04:06:5B:23" + +# ITU morse, letters + digits. Unsupported characters are skipped. +MORSE = { + "A": ".-", "B": "-...", "C": "-.-.", "D": "-..", "E": ".", "F": "..-.", + "G": "--.", "H": "....", "I": "..", "J": ".---", "K": "-.-", "L": ".-..", + "M": "--", "N": "-.", "O": "---", "P": ".--.", "Q": "--.-", "R": ".-.", + "S": "...", "T": "-", "U": "..-", "V": "...-", "W": ".--", "X": "-..-", + "Y": "-.--", "Z": "--..", + "0": "-----", "1": ".----", "2": "..---", "3": "...--", "4": "....-", + "5": ".....", "6": "-....", "7": "--...", "8": "---..", "9": "----.", +} + + +class Bot: + """A single persistent BLE connection to the Bot, with serialized access.""" + + def __init__(self, address: str): + self.address = address + self._client = None + self._lock = asyncio.Lock() + self._morse_task: asyncio.Task | None = None + self.state = "unknown" + self.busy = False + + async def _ensure(self): + from bleak import BleakClient + if self._client is not None and self._client.is_connected: + return + self._client = BleakClient(self.address, timeout=20) + await self._client.connect() + logger.info("connected to %s", self.address) + + async def _write(self, action: str): + """Write one command, reconnecting once if the link dropped.""" + from bleak.exc import BleakError + for attempt in (1, 2): + try: + await self._ensure() + await self._client.write_gatt_char(_CTRL_CHAR, _COMMANDS[action], response=True) + if action in ("on", "off"): + self.state = action + return + except (BleakError, OSError, asyncio.TimeoutError) as exc: + logger.warning("write %s attempt %d failed: %s", action, attempt, exc) + self._client = None # force reconnect + if attempt == 2: + raise + + async def _cancel_morse(self): + task = self._morse_task + if task and not task.done(): + task.cancel() + await asyncio.gather(task, return_exceptions=True) + self._morse_task = None + + async def command(self, action: str) -> str: + """ON/OFF/PRESS. Interrupts any running morse (manual override).""" + await self._cancel_morse() + async with self._lock: + await self._write(action) + return self.state + + def schedule(self, text: str, unit: float): + """Build an on/off timeline [(state, seconds), ...] for a message.""" + seq = [("off", round(unit, 3))] # settle to a known state first + for ch in text.upper(): + if ch == " ": + seq.append(("off", round(unit * 7, 3))) + continue + code = MORSE.get(ch) + if not code: + continue + for sym in code: + seq.append(("on", round(unit * (3 if sym == "-" else 1), 3))) + seq.append(("off", round(unit, 3))) # intra-letter gap + st, _ = seq[-1] + seq[-1] = (st, round(unit * 3, 3)) # upgrade to inter-letter gap + return seq + + async def start_morse(self, text: str, unit: float): + await self._cancel_morse() + seq = self.schedule(text, unit) + if len(seq) <= 1: + return None + restore = self.state + self._morse_task = asyncio.create_task(self._play(seq, restore)) + return seq + + async def _play(self, seq, restore: str): + async with self._lock: + self.busy = True + try: + for state, dur in seq: + await self._write(state) + await asyncio.sleep(dur) + await self._write(restore if restore in ("on", "off") else "off") + finally: + self.busy = False + + async def stop(self): + await self._cancel_morse() + async with self._lock: + await self._write("off") + return self.state + + async def disconnect(self): + await self._cancel_morse() + if self._client is not None and self._client.is_connected: + await self._client.disconnect() + + +BOT: Bot | None = None + + +@asynccontextmanager +async def lifespan(app: FastAPI): + yield + if BOT is not None: + await BOT.disconnect() + + +app = FastAPI(lifespan=lifespan) + + +class MorseReq(BaseModel): + text: str + unit: float = 1.5 + + +@app.get("/", response_class=HTMLResponse) +async def index(): + return PAGE.replace("__ADDRESS__", BOT.address) + + +@app.get("/status") +async def status(): + return {"state": BOT.state, "busy": BOT.busy, "address": BOT.address} + + +@app.post("/cmd/{action}") +async def cmd(action: str): + if action not in _COMMANDS: + return JSONResponse({"error": f"unknown action {action!r}"}, status_code=400) + try: + state = await BOT.command(action) + except Exception as exc: + return JSONResponse({"error": str(exc)}, status_code=502) + return {"state": state} + + +@app.post("/morse") +async def morse(req: MorseReq): + unit = max(0.3, min(4.0, req.unit)) + text = req.text[:40] + try: + seq = await BOT.start_morse(text, unit) + except Exception as exc: + return JSONResponse({"error": str(exc)}, status_code=502) + if seq is None: + return JSONResponse({"error": "nothing sendable in that text"}, status_code=400) + seconds = round(sum(d for _, d in seq), 1) + return {"schedule": seq, "unit": unit, "seconds": seconds} + + +@app.post("/stop") +async def stop(): + try: + state = await BOT.stop() + except Exception as exc: + return JSONResponse({"error": str(exc)}, status_code=502) + return {"state": state} + + +PAGE = """ + + +diSPIM Room Light + +
+

diSPIM Room Light

+
SwitchBot Bot · __ADDRESS__
+
+
+
+ + + +
+
+ +
+ fast + + slow + 0.7s +
+
+ + +
+
+
+
+ +""" + + +def main(): + ap = argparse.ArgumentParser(description="Temporary SwitchBot room-light web GUI") + ap.add_argument("--address", default=DEFAULT_ADDRESS, help="Bot BLE MAC address") + ap.add_argument("--port", type=int, default=8765) + ap.add_argument("--host", default="127.0.0.1", help="bind host (default: localhost only)") + args = ap.parse_args() + + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") + global BOT + BOT = Bot(args.address) + print(f"\n diSPIM Room Light GUI → http://{args.host}:{args.port}\n Bot: {args.address}\n") + uvicorn.run(app, host=args.host, port=args.port, log_level="warning") + + +if __name__ == "__main__": + main() diff --git a/gently/hardware/switchbot.py b/gently/hardware/switchbot.py new file mode 100644 index 00000000..131c50ef --- /dev/null +++ b/gently/hardware/switchbot.py @@ -0,0 +1,176 @@ +""" +SwitchBot Bot (WoHand) control as a Bluesky/Ophyd-protocol device. + +The SwitchBot Bot is a Bluetooth-LE button pusher. In "Switch mode" it supports +explicit on/off; in "Press mode" it does a momentary press. This module talks to +it directly over BLE via ``bleak`` using the documented GATT command protocol — +no SwitchBot cloud, no hub. + +The device follows the same duck-typed Bluesky protocol as the diSPIM devices +(see ``gently.hardware.dispim.devices.optical.DiSPIMLED``): ``set(state)`` returns +an ophyd ``Status``, plus ``read()``/``describe()``. So it drops into plans via +``yield from bps.mv(bot, 'on')``. + +BLE I/O is async (``bleak``); ``set()`` runs a fresh connect→write→disconnect +cycle in a worker thread and resolves the Status when done. Connecting per command +keeps the implementation robust (no stale-connection handling) at the cost of +~1-2 s latency, which is fine for a low-frequency accessory. For lower latency or +encrypted/password-protected Bots, swap the ``_send_command`` body for PySwitchbot. + +Self-test (drives a real Bot):: + + python gently/hardware/switchbot.py EC:6F:04:06:5B:23 on off +""" +from __future__ import annotations + +import asyncio +import logging +import threading +import time +from collections import OrderedDict + +logger = logging.getLogger(__name__) + +# SwitchBot Bot GATT. Note the UUID group is 9fb8 — the widely-copied 9fb9 is wrong. +_CTRL_CHAR = "cba20002-224d-11e6-9fb8-0002a5d5c51b" # write / write-without-response +_NOTIFY_CHAR = "cba20003-224d-11e6-9fb8-0002a5d5c51b" # notify (command response) + +_COMMANDS = { + "on": bytes([0x57, 0x01, 0x01]), + "off": bytes([0x57, 0x01, 0x02]), + "press": bytes([0x57, 0x01, 0x00]), +} +_RESP_OK = 0x01 # first byte of the response notification on success + + +class SwitchBotError(RuntimeError): + """BLE I/O failed, timed out, or the Bot reported a non-OK response.""" + + +async def _send_command(address: str, command: bytes, timeout: float) -> bytes: + """Connect, send one command, await the response notification, disconnect. + + Returns the raw response bytes; raises SwitchBotError on timeout or non-OK. + """ + from bleak import BleakClient # lazy import keeps module import cheap + + response: dict[str, bytes] = {} + got = asyncio.Event() + + def _on_notify(_char, data: bytearray) -> None: + response["data"] = bytes(data) + got.set() + + async with BleakClient(address, timeout=timeout) as client: + await client.start_notify(_NOTIFY_CHAR, _on_notify) + await client.write_gatt_char(_CTRL_CHAR, command, response=True) + try: + await asyncio.wait_for(got.wait(), timeout=timeout) + except asyncio.TimeoutError as exc: + raise SwitchBotError("no response notification from SwitchBot") from exc + finally: + try: + await client.stop_notify(_NOTIFY_CHAR) + except Exception: # disconnect cleanup is best-effort + pass + + data = response["data"] + if not data or data[0] != _RESP_OK: + raise SwitchBotError(f"SwitchBot returned non-OK response: {data.hex()}") + return data + + +class SwitchBot: + """Bluesky-protocol device for a SwitchBot Bot button pusher. + + Parameters + ---------- + address : str + BLE MAC address, e.g. "EC:6F:04:06:5B:23". + name : str + Device name used as the key in plans and read() output. + timeout : float + Per-command BLE connect/response timeout in seconds. + + Valid states for ``set``: 'on', 'off', 'press'. + """ + + def __init__(self, address: str, name: str = "switchbot", *, timeout: float = 20.0): + self.address = address + self.name = name + self.timeout = timeout + self.parent = None # required for Bluesky bps.mv() + self._state = "unknown" # last commanded on/off state + self._lock = threading.Lock() # serialize BLE access (one radio, one bot) + + # -- Bluesky settable protocol ------------------------------------------- + def set(self, state: str): + """Send on/off/press. Returns an ophyd Status that finishes when done.""" + from ophyd.status import Status + + state = str(state).lower() + if state not in _COMMANDS: + raise ValueError(f"state {state!r} not in {list(_COMMANDS)}") + + status = Status(obj=self, timeout=self.timeout + 5) + + def worker(): + with self._lock: + try: + data = asyncio.run( + _send_command(self.address, _COMMANDS[state], self.timeout) + ) + except Exception as exc: + logger.warning("SwitchBot %s set(%s) failed: %s", self.name, state, exc) + status.set_exception(exc) + return + if state in ("on", "off"): + self._state = state + logger.info("SwitchBot %s -> %s (resp %s)", self.name, state, data.hex()) + status.set_finished() + + threading.Thread(target=worker, name=f"{self.name}-set", daemon=True).start() + return status + + # -- Bluesky readable protocol ------------------------------------------- + def read(self): + return OrderedDict({ + self.name: {"value": self._state, "timestamp": time.time()} + }) + + def describe(self): + return OrderedDict({ + self.name: {"source": f"switchbot:{self.address}", "dtype": "string", "shape": []} + }) + + def read_configuration(self): + return OrderedDict() + + def describe_configuration(self): + return OrderedDict() + + +if __name__ == "__main__": + # Standalone self-test, e.g.: python gently/hardware/switchbot.py AA:BB:.. on off + import sys + + address = "EC:6F:04:06:5B:23" + cmds = [] + for arg in sys.argv[1:]: + if ":" in arg and len(arg) >= 17: # looks like a MAC address + address = arg + else: + cmds.append(arg.lower()) + cmds = cmds or ["on", "off"] + + logging.basicConfig(level=logging.INFO, format="%(message)s") + bot = SwitchBot(address) + print(f"SwitchBot {address} — sequence: {cmds}\n") + for i, cmd in enumerate(cmds): + print(f"set({cmd!r}) ...") + st = bot.set(cmd) + st.wait(30) # blocks; raises if the command failed + print(f" done; read() -> {bot.read()[bot.name]['value']}") + if i != len(cmds) - 1: + time.sleep(1.5) + print("\nOK") diff --git a/requirements_device.txt b/requirements_device.txt index 71612cf6..bbcc5fdd 100644 --- a/requirements_device.txt +++ b/requirements_device.txt @@ -22,3 +22,6 @@ pyyaml>=6.0 # SAM embryo detection (optional, needs CUDA GPU) # torch>=2.0.0 # segment-anything + +# BLE accessory control (SwitchBot Bot — diSPIM room light) +bleak>=0.21.0 From d42ac6b7e1681f0163b16d73da7d21f770171e8f Mon Sep 17 00:00:00 2001 From: P S Kesavan Date: Thu, 28 May 2026 13:18:10 -0400 Subject: [PATCH 14/71] temperature: ACUITYnano thermal controller device (serial/MQTT/mock) Bluesky/ophyd-protocol device for the ACUITYnano Peltier controller. set(target) blocks until "[ SYSTEM LOCKED ]"; read() reports water temp, setpoint, state. Serial + MQTT transports plus a mock backend for hardware-free testing. Deps in requirements_device.txt. Co-Authored-By: Claude Opus 4.7 (cherry picked from commit 5f7912e5ecda9dd154bda147e782b519ac814d56) --- gently/hardware/temperature.py | 242 +++++++++++++++++++++++++++++++++ requirements_device.txt | 4 + 2 files changed, 246 insertions(+) create mode 100644 gently/hardware/temperature.py diff --git a/gently/hardware/temperature.py b/gently/hardware/temperature.py new file mode 100644 index 00000000..526d576d --- /dev/null +++ b/gently/hardware/temperature.py @@ -0,0 +1,242 @@ +""" +ACUITYnano Precision Thermal Controller as a Bluesky/Ophyd-protocol device. + +Wraps the vendor SDK — a Peltier/TEC water-cooled controller, 0.0-99.9 C. Two +transports expose the same core API: + - USB serial : acuitynano_precision_thermalizer_serial (vendor-recommended + for closed-loop automation; zero-latency) + - MQTT : acuitynano_precision_thermalizer_api (multi-client; adds + get_peltier_temp()) + +The device follows the same duck-typed Bluesky protocol as the diSPIM devices +(see gently.hardware.dispim.devices.optical.DiSPIMLED). A temperature controller +is the textbook bluesky "settable that completes on stabilization": + + yield from bps.mv(temperature, 20.0) # blocks until the controller LOCKS + +set(target) commands the setpoint, enables the TEC, and returns an ophyd Status +that finishes only when the controller reports "[ SYSTEM LOCKED ]" (or raises on +timeout). read() reports the live water temperature (plus setpoint / state, and +peltier temp when the transport provides it). BLE-style work runs in a worker +thread so the Status integrates with the RunEngine. + +NOTE: the vendor `acuitynano_precision_thermalizer_*` packages are NOT on PyPI — +install them on the device-layer machine. Local logic can be exercised with the +built-in mock backend: `python gently/hardware/temperature.py --mock 20`. +""" +from __future__ import annotations + +import logging +import threading +import time +from collections import OrderedDict + +logger = logging.getLogger(__name__) + +TEMP_MIN_C = 0.0 +TEMP_MAX_C = 99.9 + + +def _make_backend(cfg: dict): + """Construct the vendor SDK transport from a config mapping.""" + backend = str(cfg.get("backend", "serial")).lower() + if backend == "mock": + return _MockBackend() + if backend == "serial": + from acuitynano_precision_thermalizer_serial import AcuityNanoPrecisionThermalizerSerial + return AcuityNanoPrecisionThermalizerSerial( + cfg["com_port"], baud_rate=cfg.get("baud_rate", 115200) + ) + if backend == "mqtt": + from acuitynano_precision_thermalizer_api import AcuityNanoPrecisionThermalizerAPI + # The vendor package ships with an embedded HiveMQ Cloud broker + creds, + # so MQTT can run with no config. Pass only the keys actually provided, + # to override those embedded defaults (and keep secrets in config, not code). + kwargs = {k: cfg[k] for k in ("broker", "port", "user", "password") if k in cfg} + return AcuityNanoPrecisionThermalizerAPI(**kwargs) + raise ValueError(f"unknown temperature backend {backend!r} (use 'serial', 'mqtt', or 'mock')") + + +def create_temperature_controller(cfg: dict) -> "TemperatureController": + """Factory used by the device layer: build transport + wrap as a device.""" + backend = _make_backend(cfg) + if "feedback_peltier" in cfg and hasattr(backend, "set_feedback_sensor"): + backend.set_feedback_sensor(use_peltier=bool(cfg["feedback_peltier"])) + return TemperatureController( + backend, + name=cfg.get("name", "temperature"), + stabilize_timeout=cfg.get("stabilize_timeout", 600.0), + ) + + +class TemperatureController: + """Bluesky-protocol device for the ACUITYnano thermal controller. + + Parameters + ---------- + backend : object + Vendor SDK instance exposing set_temperature / get_water_temp / + get_system_state / enable_tec / wait_for_target. + name : str + Device name; the registry key and primary read() field. + stabilize_timeout : float + Seconds to wait for "[ SYSTEM LOCKED ]" before set() fails. + """ + + def __init__(self, backend, name: str = "temperature", *, stabilize_timeout: float = 600.0): + self._dev = backend + self.name = name + self.stabilize_timeout = float(stabilize_timeout) + self.parent = None # required for Bluesky bps.mv() + self._setpoint = None # last commanded target + self._lock = threading.Lock() + + # -- Bluesky settable protocol ------------------------------------------- + def set(self, target_c): + """Command setpoint + enable TEC; Status finishes when the system locks.""" + from ophyd.status import Status + + target = float(target_c) + if not (TEMP_MIN_C <= target <= TEMP_MAX_C): + raise ValueError(f"target {target} C outside [{TEMP_MIN_C}, {TEMP_MAX_C}]") + + status = Status(obj=self, timeout=self.stabilize_timeout + 30) + + def worker(): + with self._lock: + try: + self._dev.set_temperature(target) # vendor also validates range + self._dev.enable_tec(True) + locked = self._dev.wait_for_target(timeout_seconds=self.stabilize_timeout) + except Exception as exc: + logger.warning("temperature %s set(%.2f) failed: %s", self.name, target, exc) + status.set_exception(exc) + return + self._setpoint = target + if locked: + logger.info("temperature %s locked at %.2f C", self.name, target) + status.set_finished() + else: + status.set_exception(TimeoutError( + f"{self.name} did not stabilize at {target} C within {self.stabilize_timeout}s" + )) + + threading.Thread(target=worker, name=f"{self.name}-set", daemon=True).start() + return status + + # -- Explicit controls (outside the bps.mv() path) ----------------------- + def enable(self, on: bool = True): + self._dev.enable_tec(bool(on)) + + def setpoint(self, target_c): + """Command the setpoint without blocking for stabilization.""" + self._dev.set_temperature(float(target_c)) + + # -- Bluesky readable protocol ------------------------------------------- + def read(self): + now = time.time() + data = OrderedDict() + data[self.name] = {"value": self._safe(self._dev.get_water_temp), "timestamp": now} + data[f"{self.name}_setpoint"] = {"value": self._setpoint, "timestamp": now} + data[f"{self.name}_state"] = { + "value": self._safe(self._dev.get_system_state, default="unknown"), "timestamp": now + } + if self._has_peltier(): + data[f"{self.name}_peltier"] = {"value": self._safe(self._dev.get_peltier_temp), "timestamp": now} + return data + + def describe(self): + src = f"acuitynano:{self.name}" + d = OrderedDict() + d[self.name] = {"source": src, "dtype": "number", "shape": []} + d[f"{self.name}_setpoint"] = {"source": src, "dtype": "number", "shape": []} + d[f"{self.name}_state"] = {"source": src, "dtype": "string", "shape": []} + if self._has_peltier(): + d[f"{self.name}_peltier"] = {"source": src, "dtype": "number", "shape": []} + return d + + def read_configuration(self): + return OrderedDict() + + def describe_configuration(self): + return OrderedDict() + + def close(self): + """Release the transport (serial port / MQTT client) on shutdown.""" + for method in ("close", "disconnect"): + fn = getattr(self._dev, method, None) + if fn is not None: + try: + fn() + except Exception: + pass + return + + # -- helpers -------------------------------------------------------------- + def _has_peltier(self) -> bool: + return getattr(self._dev, "get_peltier_temp", None) is not None + + @staticmethod + def _safe(fn, default=None): + try: + return fn() + except Exception: + return default + + +class _MockBackend: + """In-memory fake mirroring the vendor API, for local testing without hardware.""" + + def __init__(self, *args, **kwargs): + self._target = 25.0 + self._enabled = False + + def set_temperature(self, t): + if not (TEMP_MIN_C <= float(t) <= TEMP_MAX_C): + raise ValueError("Target must be between 0.0 and 99.9 C") + self._target = float(t) + + def enable_tec(self, on): + self._enabled = bool(on) + + def set_feedback_sensor(self, use_peltier=False): + pass + + def wait_for_target(self, timeout_seconds=300): + time.sleep(0.5) # pretend to ramp + settle + return True + + def get_water_temp(self): + return self._target + + def get_peltier_temp(self): + return self._target - 1.0 + + def get_system_state(self): + return "[ SYSTEM LOCKED ]" if self._enabled else "[ IDLE ]" + + def close(self): + pass + + +if __name__ == "__main__": + import sys + + logging.basicConfig(level=logging.INFO, format="%(message)s") + if "--mock" in sys.argv: + target = 20.0 + for arg in sys.argv[1:]: + try: + target = float(arg) + break + except ValueError: + continue + dev = TemperatureController(_MockBackend(), name="temperature", stabilize_timeout=10) + print(f"[mock] set({target}) — blocks until locked ...") + st = dev.set(target) + st.wait(15) + print("[mock] read ->", {k: v["value"] for k, v in dev.read().items()}) + print("OK") + else: + print("Real-hardware self-test needs the vendor SDK + a controller. " + "Run with --mock to exercise the device logic locally.") diff --git a/requirements_device.txt b/requirements_device.txt index bbcc5fdd..26b10dab 100644 --- a/requirements_device.txt +++ b/requirements_device.txt @@ -25,3 +25,7 @@ pyyaml>=6.0 # BLE accessory control (SwitchBot Bot — diSPIM room light) bleak>=0.21.0 + +# ACUITYnano thermal controller transports +pyserial>=3.5 +paho-mqtt>=1.6.0 From 32a8ad28be00c765f2b5ea4bcfa3df7b9c667f99 Mon Sep 17 00:00:00 2001 From: P S Kesavan Date: Thu, 28 May 2026 13:18:10 -0400 Subject: [PATCH 15/71] device_layer: register switchbot + temperature; expose temperature to the agent Config-gated registration of the SwitchBot and ACUITYnano devices alongside the MMCore devices. Adds /api/temperature/{set,status} REST endpoints + client methods, and set_temperature/get_temperature agent tools so the agent can hold or shift sample temperature (C. elegans development rate). Co-Authored-By: Claude Opus 4.7 (cherry picked from commit 249ae4ac0752ea419e9dd430423ab7db936efeed) --- gently/app/tools/__init__.py | 1 + gently/app/tools/temperature_tools.py | 75 ++++++++++++++++++ gently/hardware/dispim/client.py | 9 +++ gently/hardware/dispim/device_layer.py | 103 +++++++++++++++++++++++++ 4 files changed, 188 insertions(+) create mode 100644 gently/app/tools/temperature_tools.py diff --git a/gently/app/tools/__init__.py b/gently/app/tools/__init__.py index 765b3d33..4dd1b776 100644 --- a/gently/app/tools/__init__.py +++ b/gently/app/tools/__init__.py @@ -10,6 +10,7 @@ from . import stage_tools from . import led_tools from . import light_source_tools +from . import temperature_tools from . import calibration_tools from . import acquisition_tools from . import volume_tools diff --git a/gently/app/tools/temperature_tools.py b/gently/app/tools/temperature_tools.py new file mode 100644 index 00000000..43ff8ab0 --- /dev/null +++ b/gently/app/tools/temperature_tools.py @@ -0,0 +1,75 @@ +""" +Temperature Control Tools + +Agent tools for the ACUITYnano thermal controller. Temperature drives C. elegans +development rate, so these let the agent hold or shift the sample temperature as +part of closed-loop experiments. +""" +from typing import Dict + +from gently.harness.tools.registry import tool, ToolCategory, ToolExample + + +@tool( + name="set_temperature", + description=( + "Set the sample temperature setpoint in Celsius (0.0-99.9). The thermal " + "controller ramps toward the target and this returns immediately — poll " + "get_temperature until the state reads '[ SYSTEM LOCKED ]' before imaging. " + "Temperature controls C. elegans development rate (~15 C slow, 20 C standard, " + "25 C fast)." + ), + category=ToolCategory.HARDWARE, + requires_microscope=True, + examples=[ + ToolExample("Hold the sample at 20 degrees", {"target_c": 20.0}), + ToolExample("Warm the embryos to 25 C to speed development", {"target_c": 25.0}), + ], +) +async def set_temperature(target_c: float, context: Dict) -> str: + """Command the thermal controller to a target temperature. + + Parameters + ---------- + target_c : float + Target temperature in degrees Celsius (0.0-99.9). + """ + client = context.get('client') + try: + result = await client.set_temperature(target_c) + if result.get('success'): + return (f"Commanded {target_c} C. Currently {result.get('temperature_c')} C, " + f"state {result.get('state')!r}. Ramping — call get_temperature to confirm lock.") + return f"Error setting temperature: {result.get('error', 'unknown error')}" + except Exception as e: + return f"Error setting temperature: {e}" + + +@tool( + name="get_temperature", + description=( + "Read the current sample temperature, target setpoint, and lock state from the " + "thermal controller. Use to confirm the sample has stabilized at the setpoint " + "('[ SYSTEM LOCKED ]') before acquiring." + ), + category=ToolCategory.HARDWARE, + requires_microscope=True, + examples=[ + ToolExample("What's the current temperature?"), + ToolExample("Has the sample reached temperature yet?"), + ], +) +async def get_temperature(context: Dict) -> str: + """Read current temperature, setpoint, and lock state.""" + client = context.get('client') + try: + r = await client.get_temperature() + if r.get('success'): + msg = (f"Temperature {r.get('temperature_c')} C " + f"(setpoint {r.get('setpoint_c')} C, state {r.get('state')!r}") + if r.get('peltier_c') is not None: + msg += f", peltier {r.get('peltier_c')} C" + return msg + ")" + return f"Error reading temperature: {r.get('error', 'unknown error')}" + except Exception as e: + return f"Error reading temperature: {e}" diff --git a/gently/hardware/dispim/client.py b/gently/hardware/dispim/client.py index 6f097670..03ff8009 100644 --- a/gently/hardware/dispim/client.py +++ b/gently/hardware/dispim/client.py @@ -802,6 +802,15 @@ async def get_led_status(self) -> Dict: """Get current LED status.""" return await self._api_get('/api/led/status') + async def set_temperature(self, target_c: float) -> Dict: + """Command the thermal-controller setpoint (Celsius). Non-blocking — the + controller ramps; poll get_temperature() for the lock state.""" + return await self._api_post('/api/temperature/set', {'target_c': target_c}) + + async def get_temperature(self) -> Dict: + """Get current temperature, setpoint, and lock state.""" + return await self._api_get('/api/temperature/status') + # ------------------------------------------------------------------ # Live device-state readout (streamed from the device layer poller) # ------------------------------------------------------------------ diff --git a/gently/hardware/dispim/device_layer.py b/gently/hardware/dispim/device_layer.py index 48e19746..a1af5e4a 100644 --- a/gently/hardware/dispim/device_layer.py +++ b/gently/hardware/dispim/device_layer.py @@ -237,6 +237,39 @@ async def initialize(self): for name in self.devices: logger.debug(" - %s", name) + # Optional BLE accessory: SwitchBot Bot. It's a Bluetooth device, not a + # Micro-Manager adapter, so it's created here (independently of MMCore) + # and added to the same registry. Plans address it by name, e.g. + # bps.mv(switchbot, 'on'). Config-gated: no `switchbot:` section => no-op. + sb_cfg = self.config.get('switchbot') + if sb_cfg: + try: + from gently.hardware.switchbot import SwitchBot + sb_name = sb_cfg.get('name', 'switchbot') + self.devices[sb_name] = SwitchBot( + address=sb_cfg['address'], + name=sb_name, + timeout=sb_cfg.get('timeout', 20.0), + ) + logger.info("Created SwitchBot '%s' at %s", sb_name, sb_cfg['address']) + except Exception as exc: + logger.warning("Could not create SwitchBot: %s", exc) + + # Optional temperature controller (ACUITYnano). Like the SwitchBot it's + # not an MMCore adapter — created here from config and added to the same + # registry. Plans block on it via bps.mv(temperature, 20.0) until the + # controller reports SYSTEM LOCKED. Config-gated: no `temperature:` => no-op. + temp_cfg = self.config.get('temperature') + if temp_cfg: + try: + from gently.hardware.temperature import create_temperature_controller + tc = create_temperature_controller(temp_cfg) + self.devices[tc.name] = tc + logger.info("Created temperature controller '%s' (backend=%s)", + tc.name, temp_cfg.get('backend', 'serial')) + except Exception as exc: + logger.warning("Could not create temperature controller: %s", exc) + # Push XY safety bounds down to the ASI Tiger firmware so the joystick # can't drive past Layer-1 software limits. The XY_STAGE_*_UM constants # in devices/stage.py are the single source of truth — both the @@ -1294,6 +1327,74 @@ async def handle_set_led(self, request): 'traceback': traceback.format_exc() }, status=500) + async def handle_get_temperature_status(self, request): + """GET /api/temperature/status - current temperature, setpoint, lock state.""" + try: + temp = self.devices.get('temperature') + if temp is None: + return web.json_response({'success': False, 'error': 'temperature device not found'}) + r = temp.read() + return web.json_response({ + 'success': True, + 'temperature_c': r.get(temp.name, {}).get('value'), + 'setpoint_c': r.get(f'{temp.name}_setpoint', {}).get('value'), + 'state': r.get(f'{temp.name}_state', {}).get('value'), + 'peltier_c': r.get(f'{temp.name}_peltier', {}).get('value'), + }) + except Exception as e: + import traceback + return web.json_response({'success': False, 'error': str(e), + 'traceback': traceback.format_exc()}, status=500) + + async def handle_set_temperature(self, request): + """POST /api/temperature/set - command setpoint. Body: {target_c, wait?}. + + Non-blocking by default (controller ramps; poll status). wait=true blocks + until SYSTEM LOCKED or the device's stabilize timeout. + """ + try: + data = await request.json() + target = float(data.get('target_c')) + wait = bool(data.get('wait', False)) + if not (0.0 <= target <= 99.9): + return web.json_response({'success': False, + 'error': f'target {target} outside [0.0, 99.9]'}) + temp = self.devices.get('temperature') + if temp is None: + return web.json_response({'success': False, 'error': 'temperature device not found'}) + + if not wait: + temp.enable(True) + temp.setpoint(target) + r = temp.read() + return web.json_response({ + 'success': True, 'target_c': target, 'waited': False, + 'message': f'commanded {target} C (ramping)', + 'temperature_c': r.get(temp.name, {}).get('value'), + 'state': r.get(f'{temp.name}_state', {}).get('value'), + }) + + import time + status = temp.set(target) + timeout = float(getattr(temp, 'stabilize_timeout', 600.0)) + 10 + start = time.time() + while not status.done and (time.time() - start) < timeout: + await asyncio.sleep(0.5) + r = temp.read() + if status.done and status.success: + return web.json_response({ + 'success': True, 'target_c': target, 'waited': True, + 'message': f'locked at {target} C', + 'temperature_c': r.get(temp.name, {}).get('value'), + 'state': r.get(f'{temp.name}_state', {}).get('value'), + }) + return web.json_response({'success': False, 'target_c': target, + 'error': f'did not stabilize at {target} C within {timeout:.0f}s'}) + except Exception as e: + import traceback + return web.json_response({'success': False, 'error': str(e), + 'traceback': traceback.format_exc()}, status=500) + async def handle_set_camera_led_mode(self, request): """POST /api/camera/led_mode - Enable/disable automatic LED for bottom camera""" try: @@ -2236,6 +2337,8 @@ async def on_start(self): self._app.router.add_get('/api/plans', self.handle_get_plans) self._app.router.add_get('/api/led/status', self.handle_get_led_status) self._app.router.add_post('/api/led/set', self.handle_set_led) + self._app.router.add_get('/api/temperature/status', self.handle_get_temperature_status) + self._app.router.add_post('/api/temperature/set', self.handle_set_temperature) self._app.router.add_post('/api/camera/led_mode', self.handle_set_camera_led_mode) self._app.router.add_post('/api/camera/exposure', self.handle_set_camera_exposure) self._app.router.add_get('/api/camera/exposure', self.handle_get_camera_exposure) From 3c7bfc79a8d2f51baa86607fcbd001342c74ddfc Mon Sep 17 00:00:00 2001 From: Kesavan Date: Thu, 28 May 2026 14:28:59 -0400 Subject: [PATCH 16/71] =?UTF-8?q?events:=20EMBRYO=5FTERMINATED=20=E2=80=94?= =?UTF-8?q?=20orchestrator=20emits,=20UI=20tracker=20handles?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New EventType.EMBRYO_TERMINATED fires whenever an embryo's imaging stops for any reason (no_object terminal, configured stop condition, errors, user removal). The orchestrator emits it from both the no_object terminal path and the per-condition stop check. TimelapseStateTracker handles it by marking the embryo complete and carrying the completion_reason through for the UI. Single source of truth for "an embryo has stopped" — downstream listeners (filmstrip terminated badge, summary stats) now have one event to subscribe to instead of polling embryo state. Co-Authored-By: Claude Opus 4.7 (1M context) --- gently/app/orchestration/timelapse.py | 10 ++++++++++ gently/core/event_bus.py | 1 + gently/ui/web/timelapse_tracker.py | 12 ++++++++++++ 3 files changed, 23 insertions(+) diff --git a/gently/app/orchestration/timelapse.py b/gently/app/orchestration/timelapse.py index 28ae86b8..d35768f1 100644 --- a/gently/app/orchestration/timelapse.py +++ b/gently/app/orchestration/timelapse.py @@ -900,6 +900,11 @@ async def _check_stop_condition(self, embryo_state: EmbryoState): f"likely hatched / out of FOV)" ) logger.info(f"Embryo {embryo_state.id} stopped: {embryo_state.completion_reason}") + self._emit_event(EventType.EMBRYO_TERMINATED, { + "embryo_id": embryo_state.id, + "completion_reason": embryo_state.completion_reason, + "timepoints_acquired": embryo_state.timepoints_acquired, + }) return # Check all conditions (primary + additional) with OR logic @@ -909,6 +914,11 @@ async def _check_stop_condition(self, embryo_state: EmbryoState): embryo_state.is_complete = True embryo_state.completion_reason = reason logger.info(f"Embryo {embryo_state.id} stopped: {reason}") + self._emit_event(EventType.EMBRYO_TERMINATED, { + "embryo_id": embryo_state.id, + "completion_reason": reason, + "timepoints_acquired": embryo_state.timepoints_acquired, + }) return # Stop on first matching condition def _evaluate_single_condition( diff --git a/gently/core/event_bus.py b/gently/core/event_bus.py index d5495976..da10b19e 100644 --- a/gently/core/event_bus.py +++ b/gently/core/event_bus.py @@ -44,6 +44,7 @@ class EventType(Enum): EMBRYO_CENTERED = auto() EMBRYO_CALIBRATED = auto() EMBRYO_SKIPPED = auto() + EMBRYO_TERMINATED = auto() # {embryo_id, completion_reason} - emitted when an embryo's imaging stops (any reason: no_object terminal, stop condition met, errors, user removal) # Analysis events ANALYSIS_STARTED = auto() diff --git a/gently/ui/web/timelapse_tracker.py b/gently/ui/web/timelapse_tracker.py index b57086f9..665ef1b7 100644 --- a/gently/ui/web/timelapse_tracker.py +++ b/gently/ui/web/timelapse_tracker.py @@ -128,6 +128,18 @@ def handle_event(self, event_type: str, data: dict): self.status = "STOPPED" # Don't mark embryos as complete - they were stopped, not finished + elif event_type == "EMBRYO_TERMINATED": + # A single embryo's imaging was halted by the orchestrator + # (no_object terminal, configured stop condition, errors, etc). + # Carry the completion_reason through so the UI can show why. + eid = data.get("embryo_id") + if eid and eid in self.embryos: + self.embryos[eid]["is_complete"] = True + self.embryos[eid]["completion_reason"] = data.get("completion_reason") + self.embryos[eid].setdefault( + "completed_at", datetime.now().isoformat() + ) + elif event_type == "DETECTOR_EVALUATED": # All detector/perception evaluations (with reasoning) - populates reasoning panel eid = data.get("embryo_id") From a7250edaa5d0f0337c455b05d50cd663f262f234 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Thu, 28 May 2026 14:29:50 -0400 Subject: [PATCH 17/71] Calibration: SPIM thumb expands into a floating popout for a larger live view MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compact SPIM thumb in the metrics strip is now a button. Click opens a draggable, fixed-position popout (~560×480) that mirrors the same live frame stream with bigger imagery, the embryo label, and a close affordance. Hover/focus on the thumb shows a ⤢ chip hinting the interaction; the chip is hidden until a frame actually arrives. The popout reuses SpimLivePreview's apply-on-render plumbing so no new stream is opened — same data path, second render target. Keeps the calibration profile compact by default while letting the operator pull a properly-sized view when they need to read fine structure. Co-Authored-By: Claude Opus 4.7 (1M context) --- gently/ui/web/static/css/main.css | 194 ++++++++++++++++++++- gently/ui/web/static/js/gallery.js | 269 +++++++++++++++++++++++++++-- 2 files changed, 445 insertions(+), 18 deletions(-) diff --git a/gently/ui/web/static/css/main.css b/gently/ui/web/static/css/main.css index c0a6c539..4772dad7 100644 --- a/gently/ui/web/static/css/main.css +++ b/gently/ui/web/static/css/main.css @@ -1482,12 +1482,200 @@ a.tab-link.active { background: #000; border-radius: 3px; border: 1px solid var(--border); - display: none; /* shown when frame arrives via .has-frame */ - opacity: 1; + display: inline-block; + opacity: 0.35; /* dim until a real frame arrives (.has-frame) */ } .cal-spim-thumb.has-frame { - display: inline-block; + opacity: 1; +} + +/* Thumb wrapped in a button so click pops out a larger live view. + Sized to match the thumb so it remains clickable even before the + first frame arrives. */ +.cal-spim-thumb-btn { + position: relative; + padding: 0; + background: none; + border: 0; + cursor: pointer; + display: inline-flex; + align-items: center; + line-height: 0; + color: inherit; + width: 96px; + height: 72px; +} + +.cal-spim-thumb-btn:focus-visible { + outline: 2px solid var(--accent, #4f8cff); + outline-offset: 2px; + border-radius: 4px; +} + +.cal-spim-expand-icon { + position: absolute; + top: 2px; + right: 2px; + background: rgba(0, 0, 0, 0.55); + color: #fff; + font-size: 11px; + line-height: 1; + padding: 2px 4px; + border-radius: 3px; + opacity: 0; + transition: opacity 0.12s ease; + pointer-events: none; +} + +.cal-spim-thumb-btn:hover .cal-spim-expand-icon, +.cal-spim-thumb-btn:focus-visible .cal-spim-expand-icon { + opacity: 1; +} + +/* Hide the expand chip when the thumb has no frame yet — nothing to expand. */ +.cal-spim-thumb-btn:has(.cal-spim-thumb:not(.has-frame)) .cal-spim-expand-icon { + display: none; +} + +/* ---------- Floating SPIM popout ---------- */ +.cal-spim-popout { + position: fixed; + top: 80px; + right: 24px; + width: 560px; + height: 480px; + min-width: 320px; + min-height: 260px; + z-index: 9000; + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 10px; + box-shadow: 0 16px 40px rgba(0, 0, 0, 0.45), + 0 2px 8px rgba(0, 0, 0, 0.25); + display: flex; + flex-direction: column; + overflow: hidden; + resize: both; +} + +.cal-spim-popout[hidden] { + display: none; +} + +.cal-spim-popout.dragging { + user-select: none; + cursor: grabbing; +} + +.cal-spim-popout-header { + flex: 0 0 auto; + display: flex; + align-items: center; + gap: 8px; + padding: 8px 12px; + background: var(--bg-elevated, var(--bg-card)); + border-bottom: 1px solid var(--border); + cursor: grab; + touch-action: none; +} + +.cal-spim-popout.dragging .cal-spim-popout-header { + cursor: grabbing; +} + +.cal-spim-popout-led { + width: 8px; + height: 8px; + border-radius: 50%; + background: #4ade80; + box-shadow: 0 0 6px rgba(74, 222, 128, 0.7); + animation: cal-spim-led-blink 1.6s ease-in-out infinite; +} + +.cal-spim-popout-led.idle { + background: var(--text-muted, #666); + box-shadow: none; + animation: none; +} + +.cal-spim-popout-title { + font-size: 11px; + font-weight: 700; + text-transform: uppercase; + letter-spacing: 0.6px; + color: var(--text); +} + +.cal-spim-popout-embryo { + font-family: 'JetBrains Mono', ui-monospace, monospace; + font-size: 11px; + color: var(--text-muted); +} + +.cal-spim-popout-spacer { + flex: 1; +} + +.cal-spim-popout-close { + background: transparent; + border: 0; + color: var(--text-muted); + font-size: 20px; + line-height: 1; + padding: 0 6px; + cursor: pointer; + border-radius: 4px; +} + +.cal-spim-popout-close:hover { + color: var(--text); + background: var(--border); +} + +.cal-spim-popout-body { + flex: 1; + min-height: 0; + display: flex; + align-items: center; + justify-content: center; + background: #000; + overflow: hidden; + padding: 4px; +} + +.cal-spim-popout-img { + max-width: 100%; + max-height: 100%; + object-fit: contain; + display: none; +} + +.cal-spim-popout-img.has-frame { + display: block; +} + +.cal-spim-popout-placeholder { + color: var(--text-muted); + font-size: 12px; + letter-spacing: 0.4px; +} + +.cal-spim-popout-placeholder[hidden] { + display: none; +} + +.cal-spim-popout-footer { + flex: 0 0 auto; + padding: 6px 12px; + border-top: 1px solid var(--border); + background: var(--bg-elevated, var(--bg-card)); + font-family: 'JetBrains Mono', ui-monospace, monospace; + font-size: 11px; + color: var(--text-muted); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; } /* When a live frame is active, let the SPIM cell breathe a bit so the diff --git a/gently/ui/web/static/js/gallery.js b/gently/ui/web/static/js/gallery.js index 51ee4207..42159d18 100644 --- a/gently/ui/web/static/js/gallery.js +++ b/gently/ui/web/static/js/gallery.js @@ -422,13 +422,19 @@ const CalibrationProfileView = { /** Compact SPIM live indicator used inside the metrics strip. * Carries the same IDs as the old big preview so SpimLivePreview's - * apply-on-render logic continues to work unchanged. */ + * apply-on-render logic continues to work unchanged. The thumb is a + * button — click to open the floating popout for a larger view. */ _renderSpimIndicator() { return `
SPIM - +
@@ -1329,21 +1335,33 @@ const SpimLivePreview = { const placeholder = document.getElementById('cal-spim-placeholder'); const metaEl = document.getElementById('cal-spim-meta'); const led = document.getElementById('cal-spim-led'); - if (!img) return; // not in profile view const latest = embryoId ? this._latestByEmbryo[embryoId] : null; - if (latest) { - img.src = `data:image/png;base64,${latest.base64_png}`; - img.classList.add('has-frame'); - if (placeholder) placeholder.hidden = true; - if (metaEl) metaEl.textContent = this._formatMeta(latest); - if (led) led.classList.remove('idle'); - } else { - img.removeAttribute('src'); - img.classList.remove('has-frame'); - if (placeholder) placeholder.hidden = false; - if (metaEl) metaEl.textContent = '—'; - if (led) led.classList.add('idle'); + + if (img) { + if (latest) { + img.src = `data:image/png;base64,${latest.base64_png}`; + img.classList.add('has-frame'); + if (placeholder) placeholder.hidden = true; + if (metaEl) metaEl.textContent = this._formatMeta(latest); + if (led) led.classList.remove('idle'); + } else { + img.removeAttribute('src'); + img.classList.remove('has-frame'); + if (placeholder) placeholder.hidden = false; + if (metaEl) metaEl.textContent = '—'; + if (led) led.classList.add('idle'); + } + } + + // Mirror into popout if it's open — the popout lives outside the + // calibration panel's innerHTML reset, so we paint it independently. + if (typeof SpimPopout !== 'undefined') { + SpimPopout.paint(latest ? { + base64_png: latest.base64_png, + meta: this._formatMeta(latest), + embryoId, + } : null); } }, @@ -1375,6 +1393,227 @@ const SpimLivePreview = { document.addEventListener('DOMContentLoaded', () => SpimLivePreview.init()); +// ========================================== +// SPIM live popout (floating draggable window) +// ========================================== +// Lazy-built floating window that mirrors SpimLivePreview at a larger +// size. Draggable via the header bar, resizable from the bottom-right +// corner. Position and size persist in localStorage so the window +// re-opens where the operator last left it. Closes on Escape. +const SpimPopout = { + _STORAGE_KEY: 'gently.spimPopout.v1', + _root: null, + _isOpen: false, + + _ensureBuilt() { + if (this._root) return this._root; + + const el = document.createElement('div'); + el.className = 'cal-spim-popout'; + el.id = 'cal-spim-popout'; + el.hidden = true; + el.innerHTML = ` +
+ + SPIM Live + + + +
+
+ +
+ Awaiting SPIM frame… +
+
+ + `; + document.body.appendChild(el); + this._root = el; + + // Restore persisted geometry + const saved = this._loadGeometry(); + if (saved) { + el.style.left = `${saved.left}px`; + el.style.top = `${saved.top}px`; + el.style.width = `${saved.width}px`; + el.style.height = `${saved.height}px`; + } + + el.querySelector('#cal-spim-popout-close').addEventListener('click', () => this.close()); + this._wireDrag(el); + this._wireResizeObserver(el); + + return el; + }, + + open() { + const el = this._ensureBuilt(); + if (this._isOpen) return; + el.hidden = false; + this._isOpen = true; + + // Clamp into viewport in case window was resized while popout was hidden + this._clampIntoViewport(el); + + // Paint current frame for the selected embryo + const selected = (typeof CalibrationManager !== 'undefined') + ? CalibrationManager.selectedEmbryoId : null; + if (selected && typeof SpimLivePreview !== 'undefined') { + const latest = SpimLivePreview._latestByEmbryo[selected]; + this.paint(latest ? { + base64_png: latest.base64_png, + meta: SpimLivePreview._formatMeta(latest), + embryoId: selected, + } : null); + } else { + this.paint(null); + } + + document.addEventListener('keydown', this._onKey); + }, + + close() { + if (!this._root || !this._isOpen) return; + this._root.hidden = true; + this._isOpen = false; + document.removeEventListener('keydown', this._onKey); + }, + + toggle() { + this._isOpen ? this.close() : this.open(); + }, + + /** Called by SpimLivePreview whenever the current embryo's latest + * frame changes. Frame is {base64_png, meta, embryoId} or null. */ + paint(frame) { + if (!this._root || !this._isOpen) return; + const img = this._root.querySelector('#cal-spim-popout-img'); + const placeholder = this._root.querySelector('#cal-spim-popout-placeholder'); + const meta = this._root.querySelector('#cal-spim-popout-meta'); + const embryoEl = this._root.querySelector('#cal-spim-popout-embryo'); + const led = this._root.querySelector('#cal-spim-popout-led'); + + if (frame) { + img.src = `data:image/png;base64,${frame.base64_png}`; + img.classList.add('has-frame'); + placeholder.hidden = true; + meta.textContent = frame.meta || '—'; + embryoEl.textContent = frame.embryoId || ''; + led.classList.remove('idle'); + } else { + img.removeAttribute('src'); + img.classList.remove('has-frame'); + placeholder.hidden = false; + meta.textContent = '—'; + embryoEl.textContent = ''; + led.classList.add('idle'); + } + }, + + _onKey: (e) => { + if (e.key === 'Escape') SpimPopout.close(); + }, + + _wireDrag(el) { + const header = el.querySelector('#cal-spim-popout-header'); + let dragging = false; + let startX = 0, startY = 0, startLeft = 0, startTop = 0; + + header.addEventListener('pointerdown', (e) => { + // Don't start drag on the close button + if (e.target.closest('.cal-spim-popout-close')) return; + dragging = true; + const rect = el.getBoundingClientRect(); + startX = e.clientX; + startY = e.clientY; + startLeft = rect.left; + startTop = rect.top; + // Switch to absolute positioning if currently default + el.style.left = `${startLeft}px`; + el.style.top = `${startTop}px`; + el.style.right = 'auto'; + el.style.bottom = 'auto'; + header.setPointerCapture(e.pointerId); + el.classList.add('dragging'); + }); + + header.addEventListener('pointermove', (e) => { + if (!dragging) return; + const dx = e.clientX - startX; + const dy = e.clientY - startY; + let nextLeft = startLeft + dx; + let nextTop = startTop + dy; + // Keep at least 40px of header on-screen + const w = el.offsetWidth; + const h = el.offsetHeight; + nextLeft = Math.max(-(w - 80), Math.min(window.innerWidth - 80, nextLeft)); + nextTop = Math.max(0, Math.min(window.innerHeight - 40, nextTop)); + el.style.left = `${nextLeft}px`; + el.style.top = `${nextTop}px`; + }); + + const endDrag = (e) => { + if (!dragging) return; + dragging = false; + el.classList.remove('dragging'); + try { header.releasePointerCapture(e.pointerId); } catch (_) {} + this._saveGeometry(el); + }; + header.addEventListener('pointerup', endDrag); + header.addEventListener('pointercancel', endDrag); + }, + + _wireResizeObserver(el) { + if (typeof ResizeObserver === 'undefined') return; + let saveTimer = null; + const ro = new ResizeObserver(() => { + if (!this._isOpen) return; + clearTimeout(saveTimer); + saveTimer = setTimeout(() => this._saveGeometry(el), 250); + }); + ro.observe(el); + }, + + _clampIntoViewport(el) { + const rect = el.getBoundingClientRect(); + if (rect.left + 80 > window.innerWidth || rect.top + 40 > window.innerHeight + || rect.left < -(rect.width - 80) || rect.top < 0) { + // Recenter + const w = Math.min(rect.width || 520, window.innerWidth - 40); + const h = Math.min(rect.height || 440, window.innerHeight - 40); + el.style.width = `${w}px`; + el.style.height = `${h}px`; + el.style.left = `${Math.max(20, (window.innerWidth - w) / 2)}px`; + el.style.top = `${Math.max(20, (window.innerHeight - h) / 2)}px`; + } + }, + + _saveGeometry(el) { + const rect = el.getBoundingClientRect(); + const data = { + left: Math.round(rect.left), + top: Math.round(rect.top), + width: Math.round(rect.width), + height: Math.round(rect.height), + }; + try { localStorage.setItem(this._STORAGE_KEY, JSON.stringify(data)); } catch (_) {} + }, + + _loadGeometry() { + try { + const raw = localStorage.getItem(this._STORAGE_KEY); + if (!raw) return null; + const data = JSON.parse(raw); + if (typeof data.left !== 'number') return null; + return data; + } catch (_) { return null; } + }, +}; + // Legacy wrappers kept for backward compatibility function renderCalibrationGallery() { CalibrationManager.render(); } From 7f241e1b1ececf56d8ded2332579789ae4f7315e Mon Sep 17 00:00:00 2001 From: Kesavan Date: Thu, 28 May 2026 14:30:04 -0400 Subject: [PATCH 18/71] Embryos > Board: clock / stereo / pace columns replace confidence + rate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The board's metric columns are restructured around developmental time, the actual question this view answers ("is this embryo on pace?"): - 'clock' — elapsed wall-clock time in the current stage - 'stereo' — stereotypic developmental position at 20 °C reference - 'pace' — clock / stereo ratio; 1.0× means on reference pace These replace 'confidence' (never populated meaningfully) and 'rate' (misleading for slow embryos). 'eta' is now hatch-time, pace-corrected. Migration: dashboardConfig loaded from localStorage runs an idempotent filter that drops 'confidence' / 'rate' from the saved column list and inserts the three new columns in the right slots. Existing user configs upgrade silently on next load. Co-Authored-By: Claude Opus 4.7 (1M context) --- gently/ui/web/static/css/main.css | 18 ++- gently/ui/web/static/js/embryos.js | 209 ++++++++++++++++++++++------- 2 files changed, 176 insertions(+), 51 deletions(-) diff --git a/gently/ui/web/static/css/main.css b/gently/ui/web/static/css/main.css index 4772dad7..ea8dd142 100644 --- a/gently/ui/web/static/css/main.css +++ b/gently/ui/web/static/css/main.css @@ -3607,12 +3607,24 @@ kbd { .board-col { padding: 0 0.5rem; } .board-col-embryo { width: 100px; flex-shrink: 0; } .board-col-stage { width: 130px; flex-shrink: 0; } -.board-col-conf { width: 60px; flex-shrink: 0; text-align: center; } -.board-col-rate { width: 70px; flex-shrink: 0; text-align: center; } -.board-col-eta { width: 70px; flex-shrink: 0; text-align: center; } +.board-col-clock { width: 72px; flex-shrink: 0; text-align: right; font-variant-numeric: tabular-nums; } +.board-col-stereo { width: 140px; flex-shrink: 0; font-variant-numeric: tabular-nums; } +.board-col-pace { width: 90px; flex-shrink: 0; text-align: center; font-variant-numeric: tabular-nums; } +.board-col-eta { width: 70px; flex-shrink: 0; text-align: right; font-variant-numeric: tabular-nums; } .board-col-spark { flex: 1; min-width: 100px; } .board-col-alert { width: 110px; flex-shrink: 0; text-align: right; } +/* Pace cell coloring — green when on reference, orange when slow, + red when seriously slow. Class names mirror _formatPace(). */ +.board-col-pace.pace-unknown { color: var(--text-muted); } +.board-col-pace.pace-normal { color: var(--accent-green, #4ade80); } +.board-col-pace.pace-slow { color: #fb923c; } +.board-col-pace.pace-slow-bad { color: #f87171; font-weight: 600; } + +/* Subtle overdue mark in the stereo cell when clock has run past the + expected stage duration. */ +.stereo-overdue { color: #fb923c; margin-left: 4px; } + .board-rows { flex: 1; } .board-row { diff --git a/gently/ui/web/static/js/embryos.js b/gently/ui/web/static/js/embryos.js index f8b7cea3..29340cf5 100644 --- a/gently/ui/web/static/js/embryos.js +++ b/gently/ui/web/static/js/embryos.js @@ -59,7 +59,7 @@ const EmbryosManager = { dashboardConfig: { defaultView: 'default', board: { - columns: ['stage', 'confidence', 'rate', 'eta', 'sparkline', 'alert'], + columns: ['stage', 'clock', 'stereo', 'pace', 'eta', 'sparkline', 'alert'], sparklineLength: 20, warnOvertimeRatio: 1.5, criticalOvertimeRatio: 2.5 @@ -266,6 +266,23 @@ const EmbryosManager = { // Deep merge with defaults this.dashboardConfig = this._deepMerge(this.dashboardConfig, parsed); } + // Migrate legacy board columns: drop the never-populated + // 'confidence' column and the misleading 'rate' column in + // favour of clock/stereo/pace. Idempotent — runs on every load. + const cols = this.dashboardConfig.board?.columns; + if (Array.isArray(cols)) { + const filtered = cols.filter(c => c !== 'confidence' && c !== 'rate'); + const ensure = (key, after) => { + if (filtered.includes(key)) return; + const idx = filtered.indexOf(after); + if (idx === -1) filtered.push(key); + else filtered.splice(idx + 1, 0, key); + }; + ensure('clock', 'stage'); + ensure('stereo', 'clock'); + ensure('pace', 'stereo'); + this.dashboardConfig.board.columns = filtered; + } } catch (e) { console.warn('Failed to load dashboard config:', e); } @@ -370,9 +387,10 @@ const EmbryosManager = {
Embryo ${cols.includes('stage') ? 'Stage' : ''} - ${cols.includes('confidence') ? 'Conf' : ''} - ${cols.includes('rate') ? 'Rate' : ''} - ${cols.includes('eta') ? 'ETA' : ''} + ${cols.includes('clock') ? 'Clock' : ''} + ${cols.includes('stereo') ? 'Stereo' : ''} + ${cols.includes('pace') ? 'Pace' : ''} + ${cols.includes('eta') ? 'ETA' : ''} ${cols.includes('sparkline') ? 'Progression' : ''} ${cols.includes('alert') ? 'Alert' : ''}
@@ -412,54 +430,27 @@ const EmbryosManager = { const latest = reasoning.length > 0 ? reasoning[reasoning.length - 1] : null; const cols = this.dashboardConfig.board.columns; - // Stage const stage = latest?.stage || embryo.current_stage || '—'; const stageIcon = this.getStageIcon(stage); const stageName = this.formatStageName(stage); - // Confidence - const conf = latest ? this.normalizeConfidence(latest.confidence) : 'unknown'; - const confDots = conf === 'high' ? '●●●' : conf === 'medium' ? '●●○' : conf === 'low' ? '●○○' : '○○○'; - const confClass = conf === 'high' ? 'conf-high' : conf === 'medium' ? 'conf-med' : 'conf-low'; + const align = this._computeAlignment(latest); + const overtime = align?.overtime; - // Rate - const overtime = latest?.temporal_analysis?.overtime_ratio; - let rateText = '—'; - let rateClass = ''; - if (overtime != null) { - const rate = (1 / overtime).toFixed(1); - rateText = overtime < 0.9 ? `${rate}x↑` : overtime > 1.1 ? `${rate}x↓` : `${rate}x→`; - rateClass = overtime < 0.9 ? 'rate-fast' : overtime > 1.5 ? 'rate-slow' : 'rate-normal'; - } + const clockText = align ? this._formatMinutes(align.inStageClockMin) : '—'; + const stereoText = align ? this._formatStereoLabel(align) : '—'; + const pace = align ? this._formatPace(align) : { text: '—', className: '' }; + const eta = align ? this._formatEta(align) : '—'; - // ETA - let eta = '—'; - if (stage && this.STAGE_TIMING[stage] != null) { - const stageMinutes = this.STAGE_TIMING[stage]; - const hatchMinutes = this.STAGE_TIMING['hatched'] || 570; - const remaining = hatchMinutes - stageMinutes; - if (remaining > 0) { - const hours = (remaining / 60).toFixed(1); - eta = `~${hours}h`; - } else { - eta = 'done'; - } - } - - // Sparkline const sparklineSvg = cols.includes('sparkline') ? this._renderBoardSparkline(reasoning) : ''; - // Alert const arrested = latest?.temporal_analysis?.is_potentially_arrested; const slow = overtime && overtime > (this.dashboardConfig.board.warnOvertimeRatio || 1.5); - const lowConf = conf === 'low'; let alertHtml = ''; if (arrested) { alertHtml = '⚠ arrested'; } else if (slow) { - alertHtml = `⚠ slow ${overtime.toFixed(1)}x`; - } else if (lowConf) { - alertHtml = '⚠ low conf'; + alertHtml = `⚠ slow ${overtime.toFixed(1)}×`; } const status = embryo.isComplete ? 'complete' : embryo.lastError ? 'error' : 'running'; @@ -472,8 +463,9 @@ const EmbryosManager = { ${embryo.embryoId.replace(/embryo_?/i, 'E')} ${cols.includes('stage') ? `${stageIcon} ${stageName}` : ''} - ${cols.includes('confidence') ? `${confDots}` : ''} - ${cols.includes('rate') ? `${rateText}` : ''} + ${cols.includes('clock') ? `${clockText}` : ''} + ${cols.includes('stereo') ? `${stereoText}` : ''} + ${cols.includes('pace') ? `${pace.text}` : ''} ${cols.includes('eta') ? `${eta}` : ''} ${cols.includes('sparkline') ? `${sparklineSvg}` : ''} ${cols.includes('alert') ? `${alertHtml}` : ''} @@ -481,6 +473,99 @@ const EmbryosManager = { `; }, + /** Compute clock↔stereotypic alignment from perception temporal_analysis. + * + * Definitions: + * inStageClockMin — wall-clock minutes elapsed in current stage + * inStageStereoMin — stereotypic minutes "used" within the stage, + * capped at the stage's expected duration. An + * overdue embryo is stuck at the stage end in + * stereo time while clock keeps ticking. + * overtime — ratio inStageClockMin / expected_duration. + * >1 means the embryo has spent more clock time + * in the stage than the reference 20°C textbook + * duration. <1 just means "still within stage" — + * no slow/fast signal yet. + * stereoAgeMin — total stereotypic age, anchored at the start + * minute of the current stage in the reference + * table plus the (capped) in-stage stereo offset. + */ + _computeAlignment(latest) { + const ta = latest?.temporal_analysis; + if (!ta || !ta.current_stage) return null; + const stage = ta.current_stage; + const stageStart = this.STAGE_TIMING[stage]; + if (stageStart == null) return null; + + const expDur = Number(ta.expected_duration_min) || 0; + const inClock = Number(ta.time_in_stage_min) || 0; + const overtime = Number(ta.overtime_ratio) || 0; + + const inStereo = expDur > 0 ? Math.min(inClock, expDur) : inClock; + const stereoAge = stageStart + inStereo; + + return { + stage, + stageStart, + expDur, + inStageClockMin: inClock, + inStageStereoMin: inStereo, + stereoAgeMin: stereoAge, + overtime, + }; + }, + + /** Render the stereo cell: "≈early", "≈bean +12m", or "≈comma +88m ⚠" + * when overdue (stereo capped at stage end while clock keeps running). */ + _formatStereoLabel(align) { + const stageName = this.formatStageName(align.stage); + const offsetMin = Math.round(align.inStageStereoMin); + const overdue = align.expDur > 0 && align.inStageClockMin > align.expDur + 1; + const offsetStr = offsetMin > 0 ? ` +${offsetMin}m` : ''; + const overdueMark = overdue ? ' ' : ''; + return `≈${stageName}${offsetStr}${overdueMark}`; + }, + + _formatPace(align) { + // Only emit a pace signal once we have meaningful clock data. + // Within the first few minutes the ratio is tiny and noisy — show + // a dashed placeholder so the column doesn't lie about precision. + const NORMAL_BAND = 1.05; + const SLOW_BAND = 1.5; + if (align.inStageClockMin < 1 || align.expDur <= 0) { + return { text: '—', className: 'pace-unknown' }; + } + const r = align.overtime; + if (r <= NORMAL_BAND) { + return { text: '1.0×', className: 'pace-normal' }; + } + if (r <= SLOW_BAND) { + return { text: `${r.toFixed(1)}× slow`, className: 'pace-slow' }; + } + return { text: `⚠ ${r.toFixed(1)}×`, className: 'pace-slow-bad' }; + }, + + /** ETA in hours from current stereotypic position to hatched, scaled + * by observed pace when the embryo is demonstrably slow. */ + _formatEta(align) { + const hatchStereo = this.STAGE_TIMING['hatched'] || 570; + const remainStereo = hatchStereo - align.stereoAgeMin; + if (remainStereo <= 0) return 'done'; + const paceFactor = align.overtime > 1.05 ? align.overtime : 1.0; + const remainClockMin = remainStereo * paceFactor; + return `~${(remainClockMin / 60).toFixed(1)}h`; + }, + + /** Compact minute formatter: "45s" / "10m" / "1h 22m" / "3h". */ + _formatMinutes(min) { + if (min == null || !isFinite(min)) return '—'; + if (min < 1) return `${Math.round(min * 60)}s`; + if (min < 60) return `${Math.round(min)}m`; + const h = Math.floor(min / 60); + const m = Math.round(min - h * 60); + return m > 0 ? `${h}h ${m}m` : `${h}h`; + }, + _renderBoardSparkline(reasoning) { if (!reasoning.length) return ''; const sorted = [...reasoning].sort((a, b) => (a.timepoint ?? 0) - (b.timepoint ?? 0)); @@ -565,12 +650,24 @@ const EmbryosManager = { const shortName = embryo.embryoId.replace(/embryo_?/i, 'E'); const latestStage = sorted.length > 0 ? this.formatStageName(sorted[sorted.length - 1].stage) : '—'; + const isTerminated = !!embryo.isComplete; + const termReason = embryo.completionReason || ''; + // Short label for the badge — humanise the no_object terminal + // reason, otherwise keep the first clause of whatever the + // backend sent so the user still gets a hint. + const termBadge = isTerminated + ? (termReason.includes('no_object') ? 'HATCHED?' : 'STOPPED') + : ''; + const termTooltip = isTerminated + ? `Terminated — ${termReason || 'no reason given'}` + : ''; - html += `
`; + html += `
`; html += `
${shortName} ${latestStage} ${reasoning.length} eval + ${isTerminated ? `${termBadge}` : ''}
`; html += `
`; @@ -1000,7 +1097,7 @@ const EmbryosManager = { intervalSeconds: embryoData.interval_seconds || this.state.baseInterval, timepoints: embryoData.timepoints || 0, isComplete: embryoData.is_complete || false, - completionReason: null, + completionReason: embryoData.completion_reason || null, firstAcquired: embryoData.first_acquired ? new Date(embryoData.first_acquired) : null, lastAcquired: embryoData.last_acquired ? new Date(embryoData.last_acquired) : null, detections: embryoData.detections || {}, @@ -2726,10 +2823,17 @@ const EmbryosManager = { `; } - // Format confidence display - const confDisplay = typeof item.confidence === 'number' - ? `${Math.round(item.confidence * 100)}%` - : (item.confidence || 'Unknown'); + // Format confidence display. Hide entirely when the detector + // doesn't emit a probabilistic confidence (e.g. dopaminergic_signal + // returns structured intensity/structure findings instead) — the + // string "Unknown confidence" was actively confusing. + const hasNumericConf = typeof item.confidence === 'number'; + const hasTextConf = typeof item.confidence === 'string' && item.confidence.trim() !== ''; + const confHtml = hasNumericConf + ? `${Math.round(item.confidence * 100)}% confidence` + : hasTextConf + ? `${item.confidence}` + : ''; return `
@@ -2748,7 +2852,7 @@ const EmbryosManager = {
${item.stage ? this.formatStageName(item.stage) : (item.detected ? 'DETECTED' : 'Not detected')} - ${confDisplay} confidence + ${confHtml} ${transitionalHtml}
${detectorFindingsHtml} @@ -2979,13 +3083,22 @@ const EmbryosManager = { container.classList.remove('visible'); container.innerHTML = ''; } + // Filmstrip side panel — clearing innerHTML lets the :empty CSS + // rule collapse the panel and let the rows reclaim full width. + const filmstripDetail = document.getElementById('filmstrip-detail'); + if (filmstripDetail) { + filmstripDetail.innerHTML = ''; + } this.detailPanelVisible = false; this.currentDetailItem = null; - // Clear eval dot highlight + // Clear eval dot + filmstrip cell highlight document.querySelectorAll('.eval-dot.active').forEach(dot => { dot.classList.remove('active'); }); + document.querySelectorAll('.filmstrip-cell.active').forEach(cell => { + cell.classList.remove('active'); + }); }, // Navigate to previous/next item in detail panel From cbf41404846297a2b208155320ae7745b5c49e04 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Thu, 28 May 2026 14:30:28 -0400 Subject: [PATCH 19/71] Bump version to 0.21.0.dev0 Opens the v0.21 development cycle on this branch. Targets per the KANBAN roadmap: cross-session resume, sacrificial vocab alias, campaign template loader (Path B), LDM Phase 1 MVP. Co-Authored-By: Claude Opus 4.7 (1M context) --- gently/__init__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gently/__init__.py b/gently/__init__.py index be9985bb..37aee420 100644 --- a/gently/__init__.py +++ b/gently/__init__.py @@ -76,7 +76,7 @@ except ImportError: _VISUALIZATION_AVAILABLE = False -__version__ = "0.20.0" +__version__ = "0.21.0.dev0" __all__ = [ # Main entry point "Gently", diff --git a/pyproject.toml b/pyproject.toml index d07b6c42..2395f90e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "gently" -version = "0.20.0" +version = "0.21.0.dev0" description = "Agentic harnessing for microscopy" readme = "docs/README.md" license = {text = "MIT"} From 63c5be0ee6eb2a8c52732f709db507ed5190e179 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Thu, 28 May 2026 18:16:30 -0400 Subject: [PATCH 20/71] switchbot: accept newer 0x05 status-frame response as success MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modern SwitchBot Bot firmware (≥ v4.x) replies to press/on/off with a 3-byte status frame: 0x05 + battery% + flag bits. Older firmware returned the bare 0x01 success byte. The strict 0x01-only check raised SwitchBotError for any current-production Bot even though the press had landed (visible on the controlled load). Widen _RESP_OK to accept either prefix. Both indicate the command reached the actuator. Co-Authored-By: Claude Opus 4.7 (1M context) --- gently/hardware/switchbot.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gently/hardware/switchbot.py b/gently/hardware/switchbot.py index 131c50ef..713a785f 100644 --- a/gently/hardware/switchbot.py +++ b/gently/hardware/switchbot.py @@ -40,7 +40,10 @@ "off": bytes([0x57, 0x01, 0x02]), "press": bytes([0x57, 0x01, 0x00]), } -_RESP_OK = 0x01 # first byte of the response notification on success +# First byte of the success response. Older Bot firmware returns 0x01 alone; +# modern firmware (≥ Bot v4.x) returns 0x05 followed by a status frame — +# byte 1 = battery %, byte 2 = flag bits. Both mean "press landed." +_RESP_OK = (0x01, 0x05) class SwitchBotError(RuntimeError): @@ -75,7 +78,7 @@ def _on_notify(_char, data: bytearray) -> None: pass data = response["data"] - if not data or data[0] != _RESP_OK: + if not data or data[0] not in _RESP_OK: raise SwitchBotError(f"SwitchBot returned non-OK response: {data.hex()}") return data From b6bd49ce21b4c9757576ce637615b6f08346be27 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Thu, 28 May 2026 18:16:31 -0400 Subject: [PATCH 21/71] config: register room_light SwitchBot for the diSPIM rig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the BLE-attached SwitchBot Bot that toggles the room light into the device-layer config so DeviceLayerServer registers it on boot. Plans address it via `bps.mv(room_light, 'on')`. MAC is the bot already mounted on the rig. Reached over BLE via the TP-Link UB500 dongle on this desktop — RSSI -70 dBm, well within reliable range. Co-Authored-By: Claude Opus 4.7 (1M context) --- config/config.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/config/config.yml b/config/config.yml index e59e9f6d..086de3f1 100644 --- a/config/config.yml +++ b/config/config.yml @@ -1,4 +1,13 @@ organism: "celegans" hardware: "dispim" mmconfig: "MMConfig_tracking_screening.cfg" -mmdirectory: "C:/Program Files/Micro-Manager-1.4" \ No newline at end of file +mmdirectory: "C:/Program Files/Micro-Manager-1.4" + +# SwitchBot Bot — physical button-pusher mounted on the diSPIM room light +# switch. Talks BLE direct (no SwitchBot Hub / cloud). Plans address it by +# name, e.g. `bps.mv(room_light, 'on')`. Remove this block to skip +# registration; the device layer is tolerant of either state. +switchbot: + name: room_light + address: "EC:6F:04:06:5B:23" + timeout: 20.0 \ No newline at end of file From 0158004e94d2b2e2e75cf182d13a6db3e498bf91 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Thu, 28 May 2026 18:25:12 -0400 Subject: [PATCH 22/71] =?UTF-8?q?switchbot:=20read=5Fstatus()=20=E2=80=94?= =?UTF-8?q?=20battery=20/=20firmware=20via=200x57=200x02=20query?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a dedicated status query (BLE 0x57 0x02) that returns battery percentage and firmware version without touching the actuator. Result is cached on the device instance and surfaced through read() / describe() as `_battery_pct` and `_firmware`, so the device-state stream picks them up automatically once polled. Verified on a Bot v4.2 over the TP-Link UB500: response `01 64 42 64 00 00 00 66 00 10 00 00 00` parses as battery 100%, firmware 0x42 (v4.2). Importantly, action-command responses are NOT used as a battery source — their byte-1 field looks like battery (an empirically 0x48-shaped value) but isn't: the dedicated query on the same bot reads 100%, so byte 1 of an action response is some other firmware-internal counter. Documented inline so the next reverse-engineer doesn't fall into the same trap. Periodic polling cadence is left to the caller; hourly is plenty for a battery that moves over months. Co-Authored-By: Claude Opus 4.7 (1M context) --- gently/hardware/switchbot.py | 72 +++++++++++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 5 deletions(-) diff --git a/gently/hardware/switchbot.py b/gently/hardware/switchbot.py index 713a785f..6a165054 100644 --- a/gently/hardware/switchbot.py +++ b/gently/hardware/switchbot.py @@ -40,9 +40,22 @@ "off": bytes([0x57, 0x01, 0x02]), "press": bytes([0x57, 0x01, 0x00]), } +# Dedicated status query: returns battery %, firmware version, mode flags. +# This is the only reliable source of battery — action-command responses +# also include status bytes but in a different format (byte 1 there isn't +# battery despite what's documented for older firmware). +_QUERY_STATUS = bytes([0x57, 0x02]) +# Status-query response format (firmware ≥ 6.x): +# byte 0 = 0x01 success +# byte 1 = battery % +# byte 2 = firmware version (BCD-ish: high nibble.low nibble — 0x42 = v4.2) +# bytes 3+ = mode flags / timer count / counters (firmware-dependent) +_STATUS_BATTERY_IDX = 1 +_STATUS_FIRMWARE_IDX = 2 # First byte of the success response. Older Bot firmware returns 0x01 alone; -# modern firmware (≥ Bot v4.x) returns 0x05 followed by a status frame — -# byte 1 = battery %, byte 2 = flag bits. Both mean "press landed." +# modern firmware (≥ Bot v4.x) returns 0x05 for action commands followed by +# action-status bytes. Both are "command landed" — the action payload format +# differs from the status-query payload format, so don't reuse parsers. _RESP_OK = (0x01, 0x05) @@ -104,6 +117,12 @@ def __init__(self, address: str, name: str = "switchbot", *, timeout: float = 20 self.timeout = timeout self.parent = None # required for Bluesky bps.mv() self._state = "unknown" # last commanded on/off state + # Status fields populated only by read_status(). Left as None until + # first contact — action commands deliberately don't update these, + # see note on _STATUS_BATTERY_IDX above. + self._battery_pct: int | None = None + self._firmware: int | None = None + self._status_ts: float | None = None self._lock = threading.Lock() # serialize BLE access (one radio, one bot) # -- Bluesky settable protocol ------------------------------------------- @@ -135,15 +154,58 @@ def worker(): threading.Thread(target=worker, name=f"{self.name}-set", daemon=True).start() return status + # -- Dedicated status query (no actuation) ------------------------------- + def read_status(self) -> dict: + """Query battery / firmware / mode without touching the switch arm. + + Synchronous: runs its own BLE connect → query → disconnect on the + caller's thread. Updates the cached status fields on success so + read() surfaces fresh values to the device-state stream. Use this + for periodic polls (~hourly is fine; battery doesn't move quickly). + + Returns a dict ``{battery_pct, firmware, raw_hex}``; raises + SwitchBotError on BLE / protocol failure. + """ + with self._lock: + data = asyncio.run( + _send_command(self.address, _QUERY_STATUS, self.timeout) + ) + info = { + "raw_hex": data.hex(), + "battery_pct": data[_STATUS_BATTERY_IDX] if len(data) > _STATUS_BATTERY_IDX else None, + "firmware": data[_STATUS_FIRMWARE_IDX] if len(data) > _STATUS_FIRMWARE_IDX else None, + } + if info["battery_pct"] is not None: + self._battery_pct = info["battery_pct"] + if info["firmware"] is not None: + self._firmware = info["firmware"] + self._status_ts = time.time() + logger.info("SwitchBot %s status: %s", self.name, info) + return info + # -- Bluesky readable protocol ------------------------------------------- def read(self): - return OrderedDict({ - self.name: {"value": self._state, "timestamp": time.time()} + ts = time.time() + out = OrderedDict({ + self.name: {"value": self._state, "timestamp": ts} }) + if self._battery_pct is not None: + out[f"{self.name}_battery_pct"] = { + "value": self._battery_pct, + "timestamp": self._status_ts or ts, + } + if self._firmware is not None: + out[f"{self.name}_firmware"] = { + "value": self._firmware, + "timestamp": self._status_ts or ts, + } + return out def describe(self): return OrderedDict({ - self.name: {"source": f"switchbot:{self.address}", "dtype": "string", "shape": []} + self.name: {"source": f"switchbot:{self.address}", "dtype": "string", "shape": []}, + f"{self.name}_battery_pct": {"source": f"switchbot:{self.address}", "dtype": "integer", "shape": []}, + f"{self.name}_firmware": {"source": f"switchbot:{self.address}", "dtype": "integer", "shape": []}, }) def read_configuration(self): From a2a82bca0bf92df1082310e8c3624c136252fbc5 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Thu, 28 May 2026 23:32:34 -0400 Subject: [PATCH 23/71] feat(web): add agent chat window + control lock; retire TUI launch Begin the TUI->web convergence: - Floating agent-chat window in the web UI (agent-chat.js/.css, wired into index.html) connecting to /ws/agent: streaming text/thinking/tool calls, choice pickers, applied-spec cards, slash-command routing. All untrusted text is escaped before insertion. - Single-driver control lock in agent_ws.py: only the holder may drive the agent; other clients are observers with a "Take control" banner. Fixes the latent shared-conversation corruption when >1 client connects. - launch_gently.py no longer spawns the Node TUI. It starts the agent + viz server, prints a launch banner (URL, device status, storage, Ctrl-C), auto-opens the browser (--no-browser to suppress), and serves until interrupted. Removes the Node/dist requirement; --resume resolves to latest (interactive picking deferred to the browser). TUI source kept in-tree (reversible). Auth not yet added: the browser is now the only control path and is unauthenticated on the LAN. Bind to 127.0.0.1 or trust the LAN until self-managed accounts land. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/ui/web/routes/agent_ws.py | 96 ++++++- gently/ui/web/static/css/agent-chat.css | 233 ++++++++++++++++ gently/ui/web/static/js/agent-chat.js | 351 ++++++++++++++++++++++++ gently/ui/web/templates/index.html | 18 ++ launch_gently.py | 112 +++++--- 5 files changed, 763 insertions(+), 47 deletions(-) create mode 100644 gently/ui/web/static/css/agent-chat.css create mode 100644 gently/ui/web/static/js/agent-chat.js diff --git a/gently/ui/web/routes/agent_ws.py b/gently/ui/web/routes/agent_ws.py index df59aaeb..d9b8d5cb 100644 --- a/gently/ui/web/routes/agent_ws.py +++ b/gently/ui/web/routes/agent_ws.py @@ -10,7 +10,7 @@ import json import logging from datetime import datetime -from typing import Dict, Optional +from typing import Callable, Dict, Optional from fastapi import APIRouter, WebSocket, WebSocketDisconnect @@ -31,6 +31,32 @@ def create_router(server) -> APIRouter: # Pending choice futures keyed by request_id _choice_futures: Dict[str, asyncio.Future] = {} + # ── Single-driver control arbitration ───────────────────── + # Shared across all /ws/agent clients (the router is created once). + # Only the control holder may drive the agent (chat/command/cancel); + # everyone else is an observer until they take control. This is the + # seed of the multi-user control lock and also prevents the shared + # agent conversation from being corrupted when >1 client connects. + _control: Dict[str, Optional[str]] = {"holder": None} + _clients: Dict[str, Callable] = {} + _client_labels: Dict[str, str] = {} + _client_counter = {"n": 0} + + async def _broadcast_control_status(): + """Tell every connected agent client who currently holds control.""" + holder = _control["holder"] + holder_label = _client_labels.get(holder) if holder else None + for cid, fn in list(_clients.items()): + try: + await fn({ + "type": "control_status", + "holder": holder, + "holder_label": holder_label, + "you_have_control": (cid == holder), + }) + except Exception: + pass + async def _run_wizard(wizard, websocket, send_fn, _choice_futures, bridge=None, log_transcript=None): """Run the wizard's interactive loop. @@ -136,6 +162,13 @@ async def agent_websocket(websocket: WebSocket): await websocket.close() return + # Assign a stable id for control arbitration. The first client to + # connect (in practice the TUI, spawned at launch) is labelled the + # terminal; later connections are browser windows. + _client_counter["n"] += 1 + client_id = f"agent_client_{_client_counter['n']}" + client_label = "the terminal" if _client_counter["n"] == 1 else "a browser window" + # Send connection metadata (version, tokens, embryo count, commands) meta = bridge.get_connect_metadata() _connected_msg = { @@ -409,6 +442,13 @@ def choice_future_factory(choice_data: dict) -> asyncio.Future: _choice_futures[request_id] = future return future + # Register this client for control arbitration; grant control if free. + _clients[client_id] = send_fn + _client_labels[client_id] = client_label + if _control["holder"] is None: + _control["holder"] = client_id + await _broadcast_control_status() + try: # ── Wizard phase ────────────────────────────────────── # Run startup wizard (if needed) before entering the REPL. @@ -497,6 +537,38 @@ async def _run_resolution_bootstrap(): _log_transcript("in", data) msg_type = data.get("type") + # ── Control arbitration ─────────────────────────── + # A client requesting the wheel. + if msg_type == "take_control": + prev = _control["holder"] + _control["holder"] = client_id + if prev and prev != client_id and prev in _clients: + try: + await _clients[prev]({ + "type": "notification", + "level": "warning", + "title": f"Control taken by {client_label}", + "body": "You are now viewing.", + }) + except Exception: + pass + await _broadcast_control_status() + continue + + # Only the holder may drive the agent. Observers are told + # to take control rather than silently corrupting the + # single shared conversation. + if msg_type in ("chat", "command", "cancel") and client_id != _control["holder"]: + holder_label = _client_labels.get(_control["holder"]) or "another client" + await send_fn({ + "type": "notification", + "level": "info", + "title": f"Viewing only — control is held by {holder_label}", + "body": "Take control to drive the microscope.", + }) + await _broadcast_control_status() + continue + if msg_type == "chat": text = data.get("text", "").strip() if not text: @@ -615,11 +687,23 @@ async def _run_resolution_bootstrap(): active_task.cancel() if bootstrap_task is not None and not bootstrap_task.done(): bootstrap_task.cancel() - # Clean up pending futures - for future in _choice_futures.values(): - if not future.done(): - future.cancel() - _choice_futures.clear() + # Release control arbitration for this client; hand the wheel + # to any remaining client (or free it) and resync everyone. + _clients.pop(client_id, None) + _client_labels.pop(client_id, None) + if _control["holder"] == client_id: + _control["holder"] = next(iter(_clients), None) + try: + await _broadcast_control_status() + except Exception: + pass + # Clean up pending futures only when the last client leaves — + # otherwise we'd cancel another connected client's pending choices. + if not _clients: + for future in _choice_futures.values(): + if not future.done(): + future.cancel() + _choice_futures.clear() return router diff --git a/gently/ui/web/static/css/agent-chat.css b/gently/ui/web/static/css/agent-chat.css new file mode 100644 index 00000000..99dbcf33 --- /dev/null +++ b/gently/ui/web/static/css/agent-chat.css @@ -0,0 +1,233 @@ +/* Floating agent-chat window — the web-side control surface. */ + +.agent-fab { + position: fixed; + right: 22px; + bottom: 22px; + width: 56px; + height: 56px; + border-radius: 50%; + border: none; + cursor: pointer; + z-index: 1200; + font-size: 24px; + color: #fff; + background: var(--gradient-primary); + box-shadow: var(--shadow-glow-strong); + transition: transform 0.15s ease, box-shadow 0.15s ease; +} +.agent-fab:hover { transform: translateY(-2px) scale(1.04); } +.agent-fab.ac-fab-active { transform: rotate(45deg); } + +.agent-chat { + position: fixed; + right: 22px; + bottom: 90px; + width: 400px; + max-width: calc(100vw - 44px); + height: 560px; + max-height: calc(100vh - 130px); + display: flex; + flex-direction: column; + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 14px; + box-shadow: 0 12px 48px rgba(0, 0, 0, 0.45); + z-index: 1199; + overflow: hidden; +} +.agent-chat.hidden, .agent-control-banner.hidden { display: none; } + +.agent-chat-header { + display: flex; + align-items: center; + gap: 8px; + padding: 10px 14px; + border-bottom: 1px solid var(--border); + background: var(--bg-hover); +} +.agent-chat-title { font-weight: 600; color: var(--text); flex: 0 0 auto; } +.agent-chat-conn { font-size: 11px; margin-left: 4px; color: var(--text-muted); } +.agent-chat-conn.ac-conn-ok { color: var(--accent-green); } +.agent-chat-conn.ac-conn-bad { color: var(--color-danger, #f87171); } +.agent-chat-close { + margin-left: auto; + background: none; + border: none; + color: var(--text-muted); + font-size: 22px; + line-height: 1; + cursor: pointer; +} +.agent-chat-close:hover { color: var(--text); } + +.agent-control-banner { + display: flex; + align-items: center; + gap: 10px; + padding: 8px 14px; + background: rgba(251, 146, 60, 0.12); + border-bottom: 1px solid var(--border); + color: var(--accent-orange, #fb923c); + font-size: 13px; +} +.ac-take-control { + margin-left: auto; + padding: 4px 12px; + border-radius: 8px; + border: 1px solid var(--accent); + background: var(--accent); + color: #fff; + cursor: pointer; + font-size: 12px; + font-weight: 600; +} +.ac-take-control:hover { background: var(--accent-hover); } + +.agent-chat-log { + flex: 1 1 auto; + overflow-y: auto; + padding: 14px; + display: flex; + flex-direction: column; + gap: 8px; + font-size: 14px; + line-height: 1.5; +} + +.ac-msg { + padding: 8px 12px; + border-radius: 12px; + max-width: 90%; + word-wrap: break-word; + white-space: normal; +} +.ac-user { + align-self: flex-end; + background: var(--accent); + color: #fff; + border-bottom-right-radius: 4px; +} +.ac-agent { + align-self: flex-start; + background: var(--bg-hover); + color: var(--text); + border-bottom-left-radius: 4px; +} +.ac-agent code, .ac-agent strong { color: var(--accent-cyan); } +.ac-agent code { + background: rgba(0, 0, 0, 0.25); + padding: 1px 5px; + border-radius: 4px; + font-size: 12.5px; +} + +.ac-tool { + align-self: flex-start; + font-size: 12.5px; + color: var(--text-muted); + padding: 3px 10px; + border-radius: 8px; + background: rgba(255, 255, 255, 0.03); + border: 1px solid var(--border); +} +.ac-tool-done .ac-tool-check { color: var(--accent-green); } +.ac-tool-spin { display: inline-block; animation: ac-spin 1.4s linear infinite; } +@keyframes ac-spin { to { transform: rotate(360deg); } } + +.ac-thinking { + align-self: flex-start; + font-size: 12.5px; + color: var(--text-muted); + font-style: italic; + opacity: 0.8; +} + +.ac-system { + align-self: center; + font-size: 12px; + color: var(--text-muted); + text-align: center; + padding: 2px 8px; +} +.ac-level-error { color: var(--color-danger, #f87171); } +.ac-level-warning { color: var(--accent-orange, #fb923c); } +.ac-level-success { color: var(--accent-green); } + +.ac-choice { + align-self: stretch; + display: flex; + flex-direction: column; + gap: 6px; + padding: 10px; + border: 1px solid var(--border); + border-radius: 12px; + background: rgba(96, 165, 250, 0.06); +} +.ac-choice-q { color: var(--text); font-weight: 500; margin-bottom: 2px; } +.ac-choice-opt { + text-align: left; + padding: 8px 12px; + border-radius: 8px; + border: 1px solid var(--border); + background: var(--bg-card); + color: var(--text); + cursor: pointer; + display: flex; + flex-direction: column; + gap: 2px; +} +.ac-choice-opt:hover:not(:disabled) { border-color: var(--accent); background: var(--bg-hover); } +.ac-choice-opt:disabled { opacity: 0.5; cursor: default; } +.ac-choice-label { font-weight: 600; } +.ac-choice-desc { font-size: 12px; color: var(--text-muted); } +.ac-choice-picked { border-color: var(--accent-green); } + +.ac-spec { + align-self: flex-start; + border: 1px solid var(--border); + border-radius: 10px; + padding: 8px 12px; + background: var(--bg-hover); + font-size: 13px; +} +.ac-spec-title { font-weight: 600; color: var(--accent-purple); margin-bottom: 4px; } +.ac-spec-row { display: flex; justify-content: space-between; gap: 16px; color: var(--text-muted); } +.ac-spec-row span:last-child { color: var(--text); } + +.agent-chat-input { + display: flex; + gap: 8px; + padding: 10px; + border-top: 1px solid var(--border); + background: var(--bg-hover); +} +.agent-chat-input textarea { + flex: 1 1 auto; + resize: none; + border: 1px solid var(--border); + border-radius: 10px; + background: var(--bg-card); + color: var(--text); + padding: 8px 10px; + font-family: inherit; + font-size: 14px; + line-height: 1.4; + max-height: 140px; +} +.agent-chat-input textarea:focus { outline: none; border-color: var(--accent); } +.agent-chat-input textarea:disabled { opacity: 0.6; } +.agent-chat-send { + flex: 0 0 auto; + align-self: flex-end; + padding: 8px 16px; + border-radius: 10px; + border: none; + background: var(--accent); + color: #fff; + font-weight: 600; + cursor: pointer; +} +.agent-chat-send:hover:not(:disabled) { background: var(--accent-hover); } +.agent-chat-send:disabled { opacity: 0.5; cursor: default; } +.agent-chat-send.ac-busy { background: var(--color-danger, #f87171); } diff --git a/gently/ui/web/static/js/agent-chat.js b/gently/ui/web/static/js/agent-chat.js new file mode 100644 index 00000000..0b83f7a5 --- /dev/null +++ b/gently/ui/web/static/js/agent-chat.js @@ -0,0 +1,351 @@ +/** + * Floating agent-chat window — the web-side control surface. + * + * Connects to the same /ws/agent bridge the TUI uses, streams the agent's + * responses, and renders interactive choice pickers. A single-driver control + * lock on the server arbitrates who may drive the microscope; this client + * shows a banner and offers "Take control" when another client holds it. + * + * Self-contained IIFE (no build step). All untrusted text is escaped before + * insertion — never assign agent/user/tool strings to innerHTML directly. + */ +const AgentChat = (() => { + let ws = null; + let reconnectDelay = 1000; + const MAX_DELAY = 30000; + + let panelOpen = false; + let hasControl = true; // optimistic until the server says otherwise + let holderLabel = null; + let streaming = false; + let currentAgentEl = null; // the agent bubble currently being streamed into + let thinkingEl = null; + + // DOM refs (resolved in init) + let fab, panel, log, input, sendBtn, connDot, banner, closeBtn; + + // ── Safe rendering ──────────────────────────────────────── + function escapeHtml(s) { + const d = document.createElement('div'); + d.textContent = String(s == null ? '' : s); + return d.innerHTML; + } + + /** Minimal, safe markdown: escape first, then a few inline transforms. */ + function mdToHtml(text) { + let html = escapeHtml(text); + html = html.replace(/`([^`]+)`/g, '$1'); + html = html.replace(/\*\*([^*]+)\*\*/g, '$1'); + html = html.replace(/\*([^*]+)\*/g, '$1'); + html = html.replace(/\n/g, '
'); + return html; + } + + // ── Message helpers ─────────────────────────────────────── + function scrollToBottom() { + log.scrollTop = log.scrollHeight; + } + + function addBubble(role, htmlOrText, { html = false } = {}) { + const el = document.createElement('div'); + el.className = `ac-msg ac-${role}`; + if (html) el.innerHTML = htmlOrText; + else el.textContent = htmlOrText; + log.appendChild(el); + scrollToBottom(); + return el; + } + + function addSystemLine(text, level = 'info') { + const el = document.createElement('div'); + el.className = `ac-system ac-level-${level}`; + el.textContent = text; + log.appendChild(el); + scrollToBottom(); + return el; + } + + function clearThinking() { + if (thinkingEl && thinkingEl.parentNode) thinkingEl.parentNode.removeChild(thinkingEl); + thinkingEl = null; + } + + // ── Protocol handlers ───────────────────────────────────── + function handle(msg) { + switch (msg.type) { + case 'connected': + reconnectDelay = 1000; + setConn(true); + // version / session in the header tooltip + if (msg.version) connDot.title = `connected · v${msg.version}`; + break; + + case 'control_status': + hasControl = !!msg.you_have_control; + holderLabel = msg.holder_label || null; + renderControl(); + break; + + case 'stream_start': + streaming = true; + clearThinking(); + currentAgentEl = null; // created lazily on first text + setBusy(true); + break; + + case 'thinking': + if (!thinkingEl) { + thinkingEl = document.createElement('div'); + thinkingEl.className = 'ac-thinking'; + thinkingEl.textContent = 'thinking…'; + log.appendChild(thinkingEl); + scrollToBottom(); + } + break; + + case 'text': { + clearThinking(); + if (!currentAgentEl) { + currentAgentEl = addBubble('agent', '', { html: true }); + currentAgentEl._raw = ''; + } + currentAgentEl._raw += (msg.text || ''); + currentAgentEl.innerHTML = mdToHtml(currentAgentEl._raw); + scrollToBottom(); + break; + } + + case 'tool_start': { + clearThinking(); + const label = msg.tool_label || msg.tool_name || 'tool'; + const el = document.createElement('div'); + el.className = 'ac-tool ac-tool-running'; + el.dataset.tool = msg.tool_name || ''; + el.innerHTML = ` ${escapeHtml(label)}…`; + log.appendChild(el); + scrollToBottom(); + break; + } + + case 'tool_call': { + // Mark the most recent running entry for this tool as done. + const running = [...log.querySelectorAll('.ac-tool-running')] + .filter(e => e.dataset.tool === (msg.tool_name || '')); + const el = running[running.length - 1]; + const label = msg.tool_name || 'tool'; + const dur = msg.duration ? ` · ${msg.duration.toFixed ? msg.duration.toFixed(1) : msg.duration}s` : ''; + const summary = msg.result_summary ? ` — ${escapeHtml(msg.result_summary)}` : ''; + if (el) { + el.className = 'ac-tool ac-tool-done'; + el.innerHTML = ` ${escapeHtml(label)}${dur}${summary}`; + } else { + addBubble('tool', `✓ ${escapeHtml(label)}${dur}${summary}`, { html: true }); + } + scrollToBottom(); + break; + } + + case 'choice_request': + renderChoice(msg); + break; + + case 'applied_spec': + renderSpec(msg.spec || {}); + break; + + case 'stream_end': + streaming = false; + clearThinking(); + currentAgentEl = null; + setBusy(false); + break; + + case 'command_result': + if (msg.error) addSystemLine(`${msg.command}: ${msg.error}`, 'error'); + else if (msg.content) addSystemLine(`${msg.command} ✓`, 'info'); + break; + + case 'notification': + addSystemLine(msg.body ? `${msg.title} — ${msg.body}` : msg.title, msg.level || 'info'); + break; + + case 'error': + streaming = false; + setBusy(false); + addSystemLine(msg.error || 'Unknown error', 'error'); + break; + + case 'ping': + send({ type: 'pong' }); + break; + + case 'pong': + case 'state_update': + case 'browse_result': + break; // not surfaced in the chat window (yet) + + default: + break; // unknown types ignored (forward-compatible) + } + } + + function renderChoice(msg) { + clearThinking(); + const data = msg.choice_data || {}; + const reqId = msg.request_id || data.request_id || ''; + const wrap = document.createElement('div'); + wrap.className = 'ac-choice'; + const q = document.createElement('div'); + q.className = 'ac-choice-q'; + q.innerHTML = mdToHtml(data.question || 'Choose:'); + wrap.appendChild(q); + + (data.options || []).forEach(opt => { + const btn = document.createElement('button'); + btn.className = 'ac-choice-opt'; + btn.disabled = !!opt.disabled; + const desc = opt.description ? `${escapeHtml(opt.description)}` : ''; + btn.innerHTML = `${escapeHtml(opt.label)}${desc}`; + btn.addEventListener('click', () => { + send({ type: 'choice_response', request_id: reqId, selected: opt.id }); + // lock the picker and show the pick + [...wrap.querySelectorAll('button')].forEach(b => b.disabled = true); + wrap.classList.add('ac-choice-answered'); + btn.classList.add('ac-choice-picked'); + }); + wrap.appendChild(btn); + }); + log.appendChild(wrap); + scrollToBottom(); + } + + function renderSpec(spec) { + const rows = []; + const add = (k, v) => { if (v !== undefined && v !== null && v !== '') rows.push([k, v]); }; + add('strain', spec.strain); + add('temp °C', spec.temperature_c); + add('slices', spec.num_slices); + add('exposure ms', spec.exposure_ms); + add('interval s', spec.interval_s); + add('stop at', spec.stop_condition); + if (!rows.length) return; + const html = '
Imaging spec applied
' + + rows.map(([k, v]) => `
${escapeHtml(k)}${escapeHtml(v)}
`).join(''); + addBubble('spec', html, { html: true }); + } + + // ── Control / UI state ──────────────────────────────────── + function renderControl() { + if (hasControl) { + banner.classList.add('hidden'); + banner.innerHTML = ''; + input.disabled = false; + sendBtn.disabled = false; + input.placeholder = 'Message the agent…'; + } else { + banner.classList.remove('hidden'); + const who = holderLabel || 'another client'; + banner.innerHTML = `🔒 ${escapeHtml(who)} is driving`; + const btn = document.createElement('button'); + btn.className = 'ac-take-control'; + btn.textContent = 'Take control'; + btn.addEventListener('click', () => send({ type: 'take_control' })); + banner.appendChild(btn); + input.disabled = true; + sendBtn.disabled = true; + input.placeholder = 'Viewing only — take control to drive…'; + } + } + + function setBusy(busy) { + sendBtn.textContent = busy ? 'Stop' : 'Send'; + sendBtn.classList.toggle('ac-busy', busy); + } + + function setConn(ok) { + connDot.classList.toggle('ac-conn-ok', ok); + connDot.classList.toggle('ac-conn-bad', !ok); + if (!ok) connDot.title = 'reconnecting…'; + } + + // ── Transport ───────────────────────────────────────────── + function send(obj) { + if (ws && ws.readyState === WebSocket.OPEN) ws.send(JSON.stringify(obj)); + } + + function connect() { + const proto = location.protocol === 'https:' ? 'wss:' : 'ws:'; + ws = new WebSocket(`${proto}//${location.host}/ws/agent`); + ws.onopen = () => { reconnectDelay = 1000; setConn(true); }; + ws.onclose = () => { + setConn(false); + setBusy(false); + streaming = false; + setTimeout(connect, reconnectDelay); + reconnectDelay = Math.min(reconnectDelay * 2, MAX_DELAY); + }; + ws.onerror = () => {}; + ws.onmessage = (e) => { + let msg; + try { msg = JSON.parse(e.data); } catch { return; } + handle(msg); + }; + } + + // ── Input handling ──────────────────────────────────────── + function submit() { + if (streaming) { send({ type: 'cancel' }); return; } // Send doubles as Stop + const text = input.value.trim(); + if (!text) return; + if (!hasControl) { renderControl(); return; } + addBubble('user', text); + if (text.startsWith('/')) { + send({ type: 'command', command: text }); // slash commands (e.g. /status) + } else { + send({ type: 'chat', text }); + } + input.value = ''; + autosize(); + } + + function autosize() { + input.style.height = 'auto'; + input.style.height = Math.min(input.scrollHeight, 140) + 'px'; + } + + function togglePanel(open) { + panelOpen = (open === undefined) ? !panelOpen : open; + panel.classList.toggle('hidden', !panelOpen); + fab.classList.toggle('ac-fab-active', panelOpen); + if (panelOpen) { + if (!ws) connect(); + setTimeout(() => input.focus(), 50); + } + } + + // ── Init ────────────────────────────────────────────────── + function init() { + fab = document.getElementById('agent-fab'); + panel = document.getElementById('agent-chat'); + log = document.getElementById('agent-chat-log'); + input = document.getElementById('agent-chat-text'); + sendBtn = document.getElementById('agent-chat-send'); + connDot = document.getElementById('agent-chat-conn'); + banner = document.getElementById('agent-control-banner'); + closeBtn = document.getElementById('agent-chat-close'); + if (!fab || !panel) return; // markup not present + + fab.addEventListener('click', () => togglePanel()); + closeBtn.addEventListener('click', () => togglePanel(false)); + sendBtn.addEventListener('click', submit); + input.addEventListener('input', autosize); + input.addEventListener('keydown', (e) => { + if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); submit(); } + if (e.key === 'Escape' && streaming) { e.preventDefault(); send({ type: 'cancel' }); } + }); + } + + document.addEventListener('DOMContentLoaded', init); + + return { togglePanel }; +})(); diff --git a/gently/ui/web/templates/index.html b/gently/ui/web/templates/index.html index dd3bbfb0..6314d7a9 100644 --- a/gently/ui/web/templates/index.html +++ b/gently/ui/web/templates/index.html @@ -14,6 +14,7 @@ + {% include '_header.html' %} @@ -499,6 +500,22 @@

Properties

+ + + + @@ -519,5 +536,6 @@

Properties

+ diff --git a/launch_gently.py b/launch_gently.py index 1727dd2a..44e1e4bb 100644 --- a/launch_gently.py +++ b/launch_gently.py @@ -4,11 +4,16 @@ Conversational AI agent for diSPIM microscope control. +Starts the agent + web visualization server, then opens the browser UI. +The web UI is the control surface (the legacy Ink TUI is retired — its +source is kept in the tree but no longer launched). + Usage: - python launch_gently.py # Ink TUI (default) - python launch_gently.py --offline + python launch_gently.py # Start server + open browser + python launch_gently.py --no-browser # Start server, don't open a browser + python launch_gently.py --offline # Run without the device layer python launch_gently.py --sessions # List sessions and exit - python launch_gently.py --resume # Interactive session picker + python launch_gently.py --resume # Resume most recent session python launch_gently.py --resume latest # Resume most recent session python launch_gently.py --resume # Resume specific session python launch_gently.py -v # Verbose (INFO) logging @@ -89,10 +94,40 @@ def list_sessions(store: FileStore): print("Use: python launch_gently.py --resume ") +def _print_banner(viz_url, device_connected, offline, storage_dir, log_file, resumed): + """Print a human-readable launch banner to the terminal. + + This is the "what you see when you open it" surface now that the + server (not a TUI) is the long-running process. + """ + line = "─" * 56 + if offline: + dev = "○ offline (--offline)" + elif device_connected: + dev = "● connected" + else: + dev = "○ offline — run: python start_device_layer.py" + url = viz_url or "(viz server failed to start — check the log)" + tag = " [resumed session]" if resumed else "" + print() + print(f" ✦ Gently is running.{tag}") + print(f" {line}") + print(f" Open: {url}") + print(f" Device: {dev}") + print(f" Storage: {storage_dir}") + print(f" Logs: {log_file}") + print(f" Stop: Ctrl-C") + print(f" {line}") + print() + + def run_ink_picker(tui_dist: Path, sessions_json: str) -> str | None: """ Spawn the Ink TUI in session-picker mode and capture the selection. + Retired: kept for reference / potential reuse by a future web session + picker. No longer called by the launcher. + Returns the selected session ID, or None for a new session. """ proc = subprocess.run( @@ -116,7 +151,7 @@ def run_ink_picker(tui_dist: Path, sessions_json: str) -> str | None: return None -async def main(offline: bool = False, resume_session: str = None, show_sessions: bool = False, pick_session: bool = False, log_level: str = "WARNING"): +async def main(offline: bool = False, resume_session: str = None, show_sessions: bool = False, pick_session: bool = False, log_level: str = "WARNING", no_browser: bool = False): # Set up log file in storage directory storage_base = Path(os.environ.get("GENTLY_STORAGE", "D:/Gently3")) log_dir = storage_base / "logs" @@ -151,30 +186,20 @@ async def main(offline: bool = False, resume_session: str = None, show_sessions: store.close() return - # Ensure TUI is available - tui_dist = Path(__file__).parent / "gently" / "tui" / "dist" / "index.js" - if not tui_dist.exists() or not shutil.which("node"): - print("Error: TUI not available.") - if not tui_dist.exists(): - print(" Run: cd gently/tui && npm install && npm run build") - if not shutil.which("node"): - print(" Node.js not found in PATH") - store.close() - return + # Web-only: the TUI is retired. The browser is the control surface and + # the launcher just starts the server — no Node/dist requirement. - # Handle --resume (interactive picker, "latest", or specific session) + # Handle --resume. Interactive session picking has moved to the browser; + # without an explicit ID ("latest" or bare --resume) we resume the most + # recent session. session_to_resume = None - if pick_session: - # Two-phase launch: spawn Ink picker to select a session - items = _build_session_items(store) - if not items: - print("No saved sessions found. Starting new session.") - else: - session_to_resume = run_ink_picker(tui_dist, json.dumps(items)) - elif resume_session == "latest": + if pick_session or resume_session == "latest": sessions = store.list_sessions() if sessions: session_to_resume = sessions[0].get("session_id") + if pick_session: + print(f"Resuming most recent session: {session_to_resume} " + "(interactive session picking is moving into the browser)") else: print("No sessions found - starting fresh") elif resume_session: @@ -391,29 +416,32 @@ def _status_provider(): agent.viz_server.agent_bridge = bridge agent.viz_server.set_context_store(context_store) - ws_url = f"ws://localhost:{settings.network.viz_port}/ws/agent" - - # Spawn the Node.js TUI — it inherits stdin/stdout/stderr so Ink - # takes over the terminal. - tui_proc = subprocess.Popen( - ["node", str(tui_dist), "--ws-url", ws_url], - stdin=sys.stdin, - stdout=sys.stdout, - stderr=sys.stderr, + # ── Banner + serve ────────────────────────────────────────────── + # The viz server runs in-process (uvicorn in a background task). With + # the TUI retired, the launcher's job is to keep that server alive and + # point the operator at the browser. + _print_banner( + viz_url=viz_url, + device_connected=bool(client and client.is_connected), + offline=offline, + storage_dir=storage_dir, + log_file=log_file, + resumed=session_to_resume is not None, ) - try: - # Wait for TUI to exit (blocks the event loop in a thread so - # the asyncio loop stays responsive for the viz server). - exit_code = await asyncio.get_event_loop().run_in_executor( - None, tui_proc.wait - ) - except (KeyboardInterrupt, asyncio.CancelledError): - tui_proc.terminate() + if viz_url and not no_browser: try: - tui_proc.wait(timeout=5) + import webbrowser + webbrowser.open(viz_url) except Exception: pass + + try: + # Serve until interrupted (Ctrl-C). Keep the event loop alive so + # the in-process viz server keeps handling browser clients. + await asyncio.Event().wait() + except (KeyboardInterrupt, asyncio.CancelledError): + pass finally: # Suppress noisy CancelledError / overlapped IO errors from # uvicorn during shutdown on Windows. @@ -448,6 +476,7 @@ def cli_main(): help="Resume a session. Without ID: shows picker. With ID: resumes that session.") parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose (INFO) logging") parser.add_argument("--debug", action="store_true", help="Enable debug logging (most verbose)") + parser.add_argument("--no-browser", action="store_true", help="Do not auto-open the web UI in a browser") args = parser.parse_args() log_level = "WARNING" @@ -466,6 +495,7 @@ def cli_main(): resume_session=resume_id, pick_session=pick_session, log_level=log_level, + no_browser=args.no_browser, )) except (KeyboardInterrupt, RuntimeError, SystemExit): pass From 9460b785fc9641b27d200aa82ea38f9c5e5a2d54 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Thu, 28 May 2026 23:32:39 -0400 Subject: [PATCH 24/71] docs: add biologist-readiness plan Consolidated plan from the codebase audit: robustness gaps, biologist-UX gaps, complexity audit (legitimate vs refactorable + ~4000 lines of dead duplicate code), frontend audit, startup/topology, multi-user auth + single-driver control arbitration, a 5-day plan, the web-only convergence roadmap (milestones A-F), and progress to date. Co-Authored-By: Claude Opus 4.8 (1M context) --- notes/biologist-readiness-plan.md | 336 ++++++++++++++++++++++++++++++ 1 file changed, 336 insertions(+) create mode 100644 notes/biologist-readiness-plan.md diff --git a/notes/biologist-readiness-plan.md b/notes/biologist-readiness-plan.md new file mode 100644 index 00000000..00aae259 --- /dev/null +++ b/notes/biologist-readiness-plan.md @@ -0,0 +1,336 @@ +# Gently — Biologist-Readiness Plan + +> Engineering plan to make Gently more robust, easier for a non-programmer biologist to operate, +> and to evolve it into a multi-user, web-first microscope control system. +> Compiled from a codebase audit (architecture map, complexity audit of all >200-line files in `gently/`, +> robustness + UX review, frontend audit, startup/topology trace, and auth/multi-user ground-truth). + +**Author:** engineering analysis · **Date:** 2026-05-28 · **Horizon:** 1 focused week + a multi-sprint convergence arc + +--- + +## 0. Strategic decisions (already made) + +These are settled and shape everything below: + +1. **Frontend → converge on web-only.** The browser becomes the single surface (a floating agent chat window + the existing rich visuals). The Ink TUI becomes **legacy / maintenance-only** and is retired once the web reaches control parity. → *Do not invest in TUI refactors.* +2. **Processes → keep the two-process split, improve feedback.** The device layer (`start_device_layer.py`) stays a separate process from the agent (`launch_gently.py`) — this isolation is a safety feature, not an accident. Fix the *visibility* of its state, not the topology. +3. **Multi-user → LAN deployment, pluggable auth (no IT dependency to start).** Auth is a thin pluggable layer. Start with **Gently-managed accounts** (or shared/role tokens as an MVP) — needs nothing from institute IT. **Institute SSO (e.g. Janelia/HHMI login via a reverse proxy) is an optional later upgrade** that slots into the same layer if/when IT provides an endpoint. Gently owns the **control arbitration + roles + audit**, regardless of which login backend is used. +4. **Roles → viewers vs operators.** Anyone authenticated can **watch** (today's read-only experience, unchanged). Only **operators** can take control and drive the microscope. **Admins** can force-release and manage roles. +5. **Permission model → an explicit observable-vs-inputable classification.** Every endpoint/WS-message is tagged `observable` (read-only) or `inputable` (control). One registry drives all gating: viewer = observable set; operator-with-lock = observable + inputable. Adding a new action forces a classification; the audit log falls out of the `inputable` tag. +6. **Plan shape → balanced.** Interleave robustness/UX hardening with safe, high-value refactors. Bold-but-safe: refactor where features *won't* break; add tests *before* touching anything that might. + +--- + +## 1. Executive summary + +Gently is in **good architectural shape**. The hard parts (async acquisition state machine, hardware-safety code, the LLM loop) are well-factored. The problems that matter are **not "too complex"** — they are a handful of **silent, high-consequence failure modes**, an **opt-in/jargon UX that assumes a programmer**, and the **operational friction** of starting and using a multi-process, dual-frontend system. The web-only + multi-user direction resolves much of the friction *by construction* (e.g. it dissolves the embryo-marking hand-off and removes the Node dependency). + +**Top priorities, in order:** + +1. **Fix the verified, provable bugs** (status-tool KeyError, non-atomic writes, the silent device-down, the env-var split). Low risk, immediate value. +2. **Wire crash/restart auto-resume** — the single biggest data-loss risk; the code already exists but is never called. +3. **Harden transient-failure handling** (device hiccups, perception/Claude outages) so a brief blip doesn't silently end a run or image a dead embryo. +4. **Make state visible** — live device heartbeat, connection banner, liveness line, acquisition-settings panel, armed-rules display. +5. **Begin the web-only + multi-user arc** — browser agent chat, then the auth + single-driver control lock (the control lock must land *with* browser control, not after). + +--- + +## 2. State of the codebase — legitimate vs. accidental complexity + +Most large files are **legitimately large** (broad-but-cohesive domain modules), not tangled. Accidental complexity is concentrated and well-localized. + +### Leave alone — legitimate complexity (high feature-break risk) +- `harness/state.py` (979L) — shared mutable `EmbryoState`/`ExperimentState`. Splitting *creates* the duplication the design avoids. **Riskiest refactor target in the repo.** +- `harness/conversation.py` (774L) — core LLM loop (asend-recursion, observed-failure guards). +- `hardware/dispim/devices/*` (stage/optical/scanner/acquisition/camera/piezo) — laser/stage safety constants + MMCore vocab. +- `hardware/dispim/plans/calibration.py` (958L) — irreducible multi-phase calibration state machine. +- `core/imaging.py`, `event_bus.py`, `service.py`; `app/device_state_monitor.py`; `organisms/celegans/stages.py`. + +### Top refactor targets — accidental complexity worth fixing + +| File | Verdict | Risk | Effort | The fix | +|---|---|---|---|---| +| `app/tools/timelapse_tools.py` (815L) | REFACTORABLE | low | ~4h | Contains the confirmed KeyError bug. `@timelapse_tool` decorator kills the 6-line preamble in 17 tools; stop reaching into `orchestrator._embryo_states`. | +| `app/tools/calibration_tools.py` (1504L) | REFACTORABLE | low | ~2h | Delete ~450 lines of **dead code** (`fast_calibrate_embryo`, `hybrid_focus_selection`, `binary_edge_search`, `_fine_focus_sweep`) — unregistered, uncalled, reference nonexistent agent attrs. | +| `harness/bridge.py` (2215L) | REFACTORABLE | med | ~10h | God-object: 720-line `handle_command` if/elif ladder + case-folding bug (lowercases session/embryo IDs). Dispatch table off `CommandRegistry`. **High value for web convergence** — the browser control surface leans on this. | +| `harness/detection/verifier.py` (1158L) | REFACTORABLE | med | ~6h | `verify()`/`verify_with_context()` + two `_evaluate_consensus*` are superset/subset dupes; 5 `_run_*` + 4 `_parse_*` copy-paste. ~250 lines. **Capture consensus truth-table fixtures first.** | +| `mesh/peer_client.py` (393L) | REFACTORABLE | low | ~4h | 11 near-identical authed methods → one `_authed_json` helper (~270→~80 lines). | +| `hardware/dispim/claude_client.py` (631L) | REFACTORABLE | low | ~3h | 4 vision methods copy-paste → one `_vision_call`. | +| `harness/memory/file_store.py` (2552L) | MIXED | med | ~10h | Mixin split + shared serde. Lower priority than deleting the SQLite twin. | + +### The dominant *reduction* opportunity — ~4000 lines of dead duplicate code +The **legacy SQLite store stack** is a complete duplicate of the live file stores (CLAUDE.md says "No SQLite databases"): +- `core/store.py` (1064L) twins `core/file_store.py` +- `harness/memory/{store,_intentions,_plans,_understanding,_ml_pipelines}.py` (~2960L) twin `harness/memory/file_store.py` + +Dead in production, pinned only by ~41 tests. Delete **after** migrating tests to the `file_context_store` fixture → ~4000 lines gone, zero runtime change. **Friday work** (gated on test migration). + +--- + +## 3. Verified bugs (confirmed in source, not just inferred) + +| # | Bug | Location | Impact | +|---|---|---|---| +| V1 | `get_timelapse_status` reads `next_embryo`/`next_acquisition_in_seconds` that `to_dict()` never emits → **KeyError every call**. Same dead keys in `detection_tools.py`. | `app/tools/timelapse_tools.py:145-146,154` | Biologist's primary "is it working?" tool is broken. | +| V2 | `load_state()` fully implemented, `save_state()` runs every acquisition — but `load_state()` has **zero callers**. | `app/orchestration/timelapse.py:1643` | **No crash/restart auto-resume.** Overnight crash = whole night lost. | +| V3 | `_write_yaml` does `unlink()` then `rename()`; `save_state()` writes with no temp file. | `core/file_store.py:123-125` | **Non-atomic on Windows** — a power blip corrupts the files `/resume` needs. | +| V4 | Launcher reads `GENTLY_STORAGE`; everything else uses `GENTLY_STORAGE_PATH`. | `launch_gently.py:121` | Logs and data silently split to different paths. | +| V5 | Device-layer-down is a `logger.debug` (invisible at default log level). | `launch_gently.py:209-212` | Biologist starts with scope off, gets a normal-looking startup, discovers it mid-conversation. | +| V6 | **XSS / HTML injection** — event key/value (perception prose, paths, agent text) assigned via `innerHTML` with no escaping. | `ui/web/static/js/events.js:69-77, 130-151, 237` | Real injection surface in the events table. `escapeHtml` exists and is used elsewhere. | +| V7 | `/ws/agent` has **no connection guard/lock**; conversation state is a single shared object. | `routes/agent_ws.py:128`, `bridge.py:565`, `agent.py:759` | Latent today (TUI is sole client); **becomes live corruption the moment a browser drives the agent.** Fixed by the control lock (§9). | +| V8 | `bridge.handle_command` does `command.strip().lower()` then branches on it. | `harness/bridge.py:647,696` | Case-sensitive args (session IDs, hostnames, embryo IDs) silently corrupted. | +| V9 | Embryo marking blocks forever; `wait_for_marking(timeout=None)`; TUI never shows the viz URL or signals a browser is needed. | `ui/web/embryo_marker.py:79`, `server.py:481`, `detection_tools.py` | **Worst operational friction** — hangs if no browser is open. Dissolved by web-only convergence. | +| V10 | Marking is global shared state broadcast to all `/ws` clients; any client's `marking_done` clobbers. | `server.py:459-472`, `websocket.py:164-188` | Two browsers marking simultaneously clobber each other. Fixed by driver-only gating (§9). | + +--- + +## 4. Robustness gaps (ranked, for unattended multi-hour sessions) + +1. **[CRITICAL] No crash/restart auto-resume** (V2). `_resume_session` (`manager.py:40-117`) restores embryos+conversation but never the orchestrator or runtime fields (stop_condition, cadence_phase, next_due_at, error_count). +2. **[CRITICAL] Device hiccup permanently drops embryos.** `_acquire_embryo` (`timelapse.py:712`) treats network/timeout as terminal; 3 strikes → `complete: errors`. No auto-reconnect in `client.py`. +3. **[CRITICAL] Silent perception/detector outage.** `_run_perception`/`_run_detector` are log-only, no retry, no event. A Claude outage silently freezes stage/hatching detection **while the laser keeps firing.** +4. **[HIGH] Non-atomic writes** (V3). +5. **[HIGH] No abort path for a hung device-layer plan** — one RunEngine, no abort endpoint; one stuck acquisition freezes the wheel each round. +6. **[HIGH] Disk-full silently stops persistence** — `save_state` failures are `logger.debug`. +7. **[HIGH] Orphaned volume TIFFs** — swallowed `register_volume` failure + 300s `cleanup_incoming` race deletes valid volumes. +8. **[HIGH] Unbounded fatal exception kills the whole session** — `_run_loop` top-level except → FAILED, no per-iteration recovery. +9. **[MEDIUM]** Perception task leak / no per-call timeout (`timelapse.py:2706, 2483`). +10. **[MEDIUM]** Startup picker / `wait_for_marking` block forever. +11. **[MEDIUM]** Advisory `session.lock`, no PID check. + +--- + +## 5. Biologist usability gaps (ranked) + +1. **[CRITICAL] "Microscope not connected" is silent** (V5). → persistent banner worded as consequence+fix; live heartbeat dot; `/reconnect`. +2. **[CRITICAL] Phototoxicity protection is opt-in, silent, expert-only** — only arms if Claude is passed `monitoring_mode='expression_monitoring'`. → make it **default** for reporter/hatching experiments; agent states plainly what it armed; show armed rules in plain English. +3. **[CRITICAL] No LLM-independent emergency stop** — pause/stop are only LLM tools. → `/stop` `/pause` that call the orchestrator directly (no API round-trip). +4. **[HIGH] Silent auto-complete / auto-pause** — biologist must inspect `completion_reason`. → push plain-language notice; distinguish hardware-error (offer retry) from biological endpoint. +5. **[HIGH] No liveness reassurance.** → "last volume 0:47 ago · next in 1:13" line, yellow/red when stalled. +6. **[HIGH] Marking blocks with no browser cue** (V9). +7. **[HIGH] Cryptic launch hard-stops** (`ANTHROPIC_API_KEY not set`, "TUI not available", Node/npm). +8. **[HIGH] First-run setup landmines** — stale model IDs (`settings.py:55-58`), env-var split, raw `ModuleNotFoundError` on bad organism, README version drift (v0.11.0 vs 0.20.0). → `--doctor` preflight. +9. **[MEDIUM]** Jargon mismatch (campaign/role=test/burst/SAM/photodose). → relabel human-facing strings. +10. **[MEDIUM]** Stop-condition vocabulary mismatch ("pretzel"/"2fold" shown but rejected as targets); casing drift. +11. **[MEDIUM]** Generic error strings (raw `str(e)`/tracebacks reach the biologist). + +--- + +## 6. Frontend audit + +### Web UI (`gently/ui/web`) — the future single surface +- **Stack:** vanilla JS, no build step, FastAPI + Jinja2, Three.js for 3D. ~15k JS / 21 ` + + diff --git a/launch_gently.py b/launch_gently.py index 9a38e815..34cdff8e 100644 --- a/launch_gently.py +++ b/launch_gently.py @@ -185,6 +185,20 @@ async def main(offline: bool = False, resume_session: str = None, show_sessions: write_manifest(storage_dir) store = FileStore(storage_dir) + # ── Accounts / auth ─────────────────────────────────────────── + # Self-managed user accounts gate microscope control on the LAN. On first + # run we bootstrap an admin and print its one-time password in the banner. + # Set GENTLY_NO_AUTH=1 to disable accounts (legacy localhost-control mode). + admin_creds = None + if os.environ.get("GENTLY_NO_AUTH", "").strip().lower() not in ("1", "true", "yes"): + try: + from gently.ui.web.accounts import AccountStore, set_account_store + account_store = AccountStore(storage_dir / "auth") + set_account_store(account_store) + admin_creds = account_store.bootstrap_admin_if_empty() + except Exception as e: + logger.error("Account store init failed (continuing without auth): %s", e) + # Handle --sessions (just list and exit) if show_sessions: list_sessions(store) @@ -434,6 +448,13 @@ def _status_provider(): resumed=session_to_resume is not None, ) + if admin_creds: + _u, _p = admin_creds + print(" First-run admin account created — sign in at the URL above:") + print(f" username: {_u}") + print(f" password: {_p}") + print(" (Save this now. Add users via the admin API; GENTLY_NO_AUTH=1 disables auth.)\n") + if viz_url and not no_browser: try: import webbrowser From 6917a4282da3af2a198a8feb861c6b0dfd17d1bb Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 00:11:29 -0400 Subject: [PATCH 26/71] fix(launcher): make Ctrl-C stop the server on Windows The serve loop blocked on a bare asyncio.Event().wait(), which the Windows Proactor loop won't interrupt on Ctrl-C, leaving the server unstoppable. Install SIGINT/SIGTERM handlers (loop.add_signal_handler, falling back to signal.signal + call_soon_threadsafe on Windows) and poll a stop Event on a short interval so the interrupt surfaces and shutdown runs cleanly. Co-Authored-By: Claude Opus 4.8 (1M context) --- launch_gently.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/launch_gently.py b/launch_gently.py index 34cdff8e..37d4913c 100644 --- a/launch_gently.py +++ b/launch_gently.py @@ -462,10 +462,30 @@ def _status_provider(): except Exception: pass + # Keep the event loop alive so the in-process viz server keeps serving. + # On Windows the Proactor loop won't surface Ctrl-C while blocked on a + # bare Event().wait(), so install signal handlers and poll on a short + # interval (which also lets a pending KeyboardInterrupt surface). + import signal as _signal + _loop = asyncio.get_running_loop() + _stop = asyncio.Event() try: - # Serve until interrupted (Ctrl-C). Keep the event loop alive so - # the in-process viz server keeps handling browser clients. - await asyncio.Event().wait() + _loop.add_signal_handler(_signal.SIGINT, _stop.set) + _loop.add_signal_handler(_signal.SIGTERM, _stop.set) + except (NotImplementedError, AttributeError, RuntimeError, ValueError): + # Windows Proactor: add_signal_handler is unsupported — fall back to + # signal.signal, waking the loop via call_soon_threadsafe. + def _sig(*_a): + _loop.call_soon_threadsafe(_stop.set) + try: + _signal.signal(_signal.SIGINT, _sig) + _signal.signal(_signal.SIGTERM, _sig) + except (ValueError, OSError): + pass + + try: + while not _stop.is_set(): + await asyncio.sleep(0.3) except (KeyboardInterrupt, asyncio.CancelledError): pass finally: From e075dcd1f42ce45899952ca610d9efed270bc8de Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 00:16:16 -0400 Subject: [PATCH 27/71] fix(web): viewing is open; login elevates to control (not a page gate) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Opening the dashboard no longer redirects to /login — viewing is open to everyone (the "watch like it is now" model). Login is an elevation to the control role, not a gate on the app. - pages.py: drop the /login redirect on the main page. - agent_ws.py: anonymous clients may connect to /ws/agent and *watch*; only authenticated operators/admins can hold/take the control lock (drive actions stay gated). No more close-on-unauthenticated. - agent-chat.js: distinguish anonymous ("Viewing — sign in to control", with a Sign in button) from a viewer-role account ("view-only"); header button is Sign in / Sign out accordingly. API model: observable (read) endpoints + watching the agent need no auth; only inputable (control) actions are gated via require_control / the control lock — auth is not attached to every endpoint. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/ui/web/routes/agent_ws.py | 15 ++++++-------- gently/ui/web/routes/pages.py | 12 +++++------ gently/ui/web/static/js/agent-chat.js | 29 ++++++++++++++++++++++++--- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/gently/ui/web/routes/agent_ws.py b/gently/ui/web/routes/agent_ws.py index c29dfbce..ba49cb76 100644 --- a/gently/ui/web/routes/agent_ws.py +++ b/gently/ui/web/routes/agent_ws.py @@ -171,18 +171,15 @@ async def agent_websocket(websocket: WebSocket): from gently.ui.web.auth import SESSION_COOKIE _acct = get_account_store() username = None - can_control = True # legacy default + can_control = True # legacy default when no accounts are configured if _acct is not None and _acct.has_users(): + # Viewing is open: anonymous clients may connect and *watch* the + # conversation. Only authenticated operators/admins can hold or + # take the control lock (enforced on the drive actions below). _token = websocket.cookies.get(SESSION_COOKIE) username = _acct.verify_session(_token) if _token else None - if username is None: - await websocket.send_json({ - "type": "error", - "error": "Authentication required — please sign in.", - }) - await websocket.close() - return - can_control = _acct.get_role(username) in CONTROL_ROLES + role = _acct.get_role(username) if username else None + can_control = role in CONTROL_ROLES # Assign a stable id for control arbitration. The label shown to other # clients is the username when authenticated, else a generic window id. diff --git a/gently/ui/web/routes/pages.py b/gently/ui/web/routes/pages.py index 9c48912a..0cff7d82 100644 --- a/gently/ui/web/routes/pages.py +++ b/gently/ui/web/routes/pages.py @@ -9,12 +9,12 @@ def create_router(server) -> APIRouter: @router.get("/", response_class=HTMLResponse) async def index(request: Request): - """Serve the main SPA page (redirect to login if accounts require it).""" - from gently.ui.web.accounts import get_account_store - from gently.ui.web.auth import current_username - store = get_account_store() - if store is not None and store.has_users() and not current_username(request): - return RedirectResponse("/login", status_code=302) + """Serve the main SPA page. + + Viewing is open to everyone — the dashboard loads in view mode with no + login. Signing in is an *elevation* to control (handled in-app via the + chat window's "Sign in" affordance), not a gate on the page itself. + """ return server.templates.TemplateResponse( "index.html", {"request": request, "active_section": "embryos", "is_live": True} diff --git a/gently/ui/web/static/js/agent-chat.js b/gently/ui/web/static/js/agent-chat.js index cb42c409..8a60f3a0 100644 --- a/gently/ui/web/static/js/agent-chat.js +++ b/gently/ui/web/static/js/agent-chat.js @@ -262,8 +262,17 @@ const AgentChat = (() => { const who = holderLabel || 'another session'; input.disabled = true; sendBtn.disabled = true; - if (me && me.authenticated && me.can_control === false) { - // Viewer role — watching is all this account can do. + if (me && me.accounts && !me.authenticated) { + // Anonymous — viewing is open; sign in to control. + banner.innerHTML = `Viewing — sign in to control.`; + const btn = document.createElement('button'); + btn.className = 'ac-take-control'; + btn.textContent = 'Sign in'; + btn.addEventListener('click', () => { window.location.href = '/login'; }); + banner.appendChild(btn); + input.placeholder = 'Viewing — sign in to control…'; + } else if (me && me.authenticated && me.can_control === false) { + // Viewer-role account — watching is all this account can do. banner.innerHTML = `View-only access — you can watch but not control.`; input.placeholder = 'View-only access'; } else { @@ -356,8 +365,18 @@ const AgentChat = (() => { if (m && m.authenticated) { userEl.textContent = m.username; userEl.title = `Signed in as ${m.username} (${m.role})`; + signoutBtn.textContent = 'Sign out'; + signoutBtn.dataset.action = 'logout'; + signoutBtn.style.display = ''; + } else if (m && m.accounts) { + // Anonymous — viewing is open; sign in to gain control. + userEl.textContent = 'viewing'; + userEl.title = 'Not signed in — view-only'; + signoutBtn.textContent = 'Sign in'; + signoutBtn.dataset.action = 'login'; signoutBtn.style.display = ''; } else { + // No accounts configured (legacy mode). userEl.textContent = ''; signoutBtn.style.display = 'none'; } @@ -382,8 +401,12 @@ const AgentChat = (() => { fab.addEventListener('click', () => togglePanel()); closeBtn.addEventListener('click', () => togglePanel(false)); signoutBtn.addEventListener('click', async () => { + if (signoutBtn.dataset.action === 'login') { + window.location.href = '/login'; + return; + } try { await fetch('/api/auth/logout', { method: 'POST' }); } catch (_) {} - window.location.href = '/login'; + window.location.reload(); }); fetchMe(); sendBtn.addEventListener('click', submit); From 645aacef15a4fe8a85739767c4cd80fd16c2bacc Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 00:24:29 -0400 Subject: [PATCH 28/71] feat(web): gate person-driven control actions by role MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Complete the inputable-action gating beyond the REST routes already covered in data.py: - chat.py: the per-timepoint VLM follow-up (POST /api/perception/chat/...) now requires the control role — it spends API budget and writes traces, so anonymous viewers can't trigger it. - websocket.py (/ws): marking actions (embryo_marked / marking_update / marking_done / marking_redetect) are gated to control-role clients via the session cookie; pure read/presence messages stay open so anyone can watch. Deliberately NOT gated here: device-layer ingest (POST images/volumes — a machine trust domain, would break under account mode where localhost is no longer auto-control) and campaign mutations (their own mesh scope auth). These need a separate machine-token pass. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/ui/web/routes/chat.py | 7 +++++-- gently/ui/web/routes/websocket.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/gently/ui/web/routes/chat.py b/gently/ui/web/routes/chat.py index 12833b13..1cdd15f7 100644 --- a/gently/ui/web/routes/chat.py +++ b/gently/ui/web/routes/chat.py @@ -16,10 +16,12 @@ from pathlib import Path from typing import Optional -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, Depends, HTTPException from fastapi.responses import StreamingResponse from pydantic import BaseModel +from gently.ui.web.auth import require_control + logger = logging.getLogger(__name__) CHAT_MODEL = "claude-opus-4-7" @@ -106,7 +108,8 @@ async def get_chat(sid: str, eid: str, tp: int): return {"turns": _load_history(path)} @router.post("/api/perception/chat/{sid}/{eid}/{tp}") - async def post_chat(sid: str, eid: str, tp: int, body: ChatRequest): + async def post_chat(sid: str, eid: str, tp: int, body: ChatRequest, + _control=Depends(require_control)): """Append a user message and stream the assistant reply as SSE. Each SSE event is JSON: ``{"type": "delta", "text": "..."}`` for diff --git a/gently/ui/web/routes/websocket.py b/gently/ui/web/routes/websocket.py index b49518e0..3dce3320 100644 --- a/gently/ui/web/routes/websocket.py +++ b/gently/ui/web/routes/websocket.py @@ -11,6 +11,30 @@ logger = logging.getLogger(__name__) +# /ws message types that mutate experiment state (define what gets imaged). +# These are control actions and are gated by role; pure read/presence +# messages stay open so anyone can watch. +_MARKING_TYPES = frozenset({ + "embryo_marked", "marking_update", "marking_done", "marking_redetect", +}) + + +def _ws_can_control(websocket: WebSocket) -> bool: + """Whether this /ws client may perform control actions (marking). + + Account mode: operators/admins (by session cookie) only. Legacy mode + (no accounts configured): open, preserving prior behavior. + """ + from gently.ui.web.accounts import get_account_store, CONTROL_ROLES + from gently.ui.web.auth import SESSION_COOKIE + store = get_account_store() + if store is None or not store.has_users(): + return True + token = websocket.cookies.get(SESSION_COOKIE) + user = store.verify_session(token) if token else None + role = store.get_role(user) if user else None + return role in CONTROL_ROLES + def create_router(server) -> APIRouter: router = APIRouter() @@ -77,6 +101,11 @@ async def _handle_ws_message(server, websocket: WebSocket, message: str): msg_type = data.get("type") embryo_id = data.get("embryo_id") + # Gate control actions (marking) by role; viewing/presence stays open. + if msg_type in _MARKING_TYPES and not _ws_can_control(websocket): + logger.warning("Ignored %s from a view-only /ws client", msg_type) + return + if msg_type == "get_calibration": images = server.store.get_all_calibration(embryo_id) await websocket.send_json({ From 0fc43c6e1aa05897e63091d49ad0bc224af41be7 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 00:28:30 -0400 Subject: [PATCH 29/71] feat(web): uniform session transcript across all clients MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The conversation is now the same for every client of a session — operators and viewers, live and on reconnect/refresh. - Broadcast: user messages and the agent's streamed reply (text/thinking/ tool calls/choice requests) go to ALL connected /ws/agent clients via a raw-websocket registry, not just the driver. Observers watch live. - History: a display transcript is accumulated server-side and persisted to /chat_display.json (user/agent/tool turns, capped to 500). On connect each client is sent a "history" message and rebuilds the transcript, so refreshes and late joiners see the full conversation. - Choice pickers are interactive only for the control holder; observers see them read-only and only the holder's choice_response resolves. - Client: handles "history" (rebuild) and "user_message" (live echo with author); stops double-echoing the sender's own chat (it now arrives via the broadcast); slash commands still echo locally (not broadcast). Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/ui/web/routes/agent_ws.py | 98 ++++++++++++++++++++++++++- gently/ui/web/static/js/agent-chat.js | 59 ++++++++++++++-- 2 files changed, 150 insertions(+), 7 deletions(-) diff --git a/gently/ui/web/routes/agent_ws.py b/gently/ui/web/routes/agent_ws.py index ba49cb76..570c112a 100644 --- a/gently/ui/web/routes/agent_ws.py +++ b/gently/ui/web/routes/agent_ws.py @@ -41,6 +41,14 @@ def create_router(server) -> APIRouter: _clients: Dict[str, Callable] = {} _client_labels: Dict[str, str] = {} _client_counter = {"n": 0} + _raw_clients: Dict[str, WebSocket] = {} # client_id -> websocket (broadcast) + + # ── Uniform display transcript ──────────────────────────── + # A single conversation history shared by every client of this session. + # Persisted to /chat_display.json so it survives reconnects and + # restarts; broadcast live so all instances stay in sync. + _history: list = [] + _history_state = {"loaded": False, "path": None, "agent_buf": None} async def _broadcast_control_status(): """Tell every connected agent client who currently holds control.""" @@ -57,6 +65,75 @@ async def _broadcast_control_status(): except Exception: pass + def _load_history_once(bridge): + if _history_state["loaded"]: + return + _history_state["loaded"] = True + try: + agent = bridge.agent + store = getattr(agent, "store", None) + sid = getattr(agent, "session_id", None) + if store and sid: + sdir = store._session_dir(sid) + if sdir: + p = sdir / "chat_display.json" + _history_state["path"] = p + if p.exists(): + loaded = json.loads(p.read_text(encoding="utf-8")) or [] + if isinstance(loaded, list): + _history.extend(loaded) + except Exception: + logger.debug("Could not load chat history", exc_info=True) + + def _save_history(): + p = _history_state["path"] + if not p: + return + try: + tmp = p.with_suffix(".json.tmp") + tmp.write_text(json.dumps(_history[-500:]), encoding="utf-8") + tmp.replace(p) + except Exception: + pass + + def _record(item): + _history.append(item) + if len(_history) > 500: + del _history[: len(_history) - 500] + _save_history() + + def _flush_agent_buf(): + buf = _history_state["agent_buf"] + if buf: + _record({"role": "agent", "text": buf}) + _history_state["agent_buf"] = None + + def _record_display(msg): + """Fold a streamed chunk into the persistent display history.""" + t = msg.get("type") + if t == "user_message": + _flush_agent_buf() + _record({"role": "user", "text": msg.get("text", ""), + "author": msg.get("author")}) + elif t == "text": + _history_state["agent_buf"] = (_history_state["agent_buf"] or "") + msg.get("text", "") + elif t == "tool_call": + _flush_agent_buf() + _record({"role": "tool", "name": msg.get("tool_name"), + "duration": msg.get("duration"), + "summary": msg.get("result_summary")}) + elif t == "stream_end": + _flush_agent_buf() + + async def _broadcast(msg): + """Record to history + send a display message to ALL clients.""" + _record_display(msg) + for cid, ws in list(_raw_clients.items()): + try: + await ws.send_json(msg) + except Exception: + pass + async def _run_wizard(wizard, websocket, send_fn, _choice_futures, bridge=None, log_transcript=None): """Run the wizard's interactive loop. @@ -464,10 +541,20 @@ def choice_future_factory(choice_data: dict) -> asyncio.Future: # (only to clients allowed to drive — viewers never auto-hold). _clients[client_id] = send_fn _client_labels[client_id] = client_label + _raw_clients[client_id] = websocket if _control["holder"] is None and can_control: _control["holder"] = client_id await _broadcast_control_status() + # Replay the uniform session transcript so every client (and every + # reconnect/refresh) shows the same conversation. + _load_history_once(bridge) + if _history: + try: + await websocket.send_json({"type": "history", "items": list(_history)}) + except Exception: + pass + try: # ── Wizard phase ────────────────────────────────────── # Run startup wizard (if needed) before entering the REPL. @@ -606,11 +693,19 @@ async def _run_resolution_bootstrap(): if active_task and not active_task.done(): active_task.cancel() + # Echo the user's message to ALL clients (so observers see + # what was asked), then stream the reply to everyone. + await _broadcast({"type": "user_message", "text": text, + "author": client_label}) active_task = asyncio.create_task( - bridge.stream_response(text, send_fn, choice_future_factory) + bridge.stream_response(text, _broadcast, choice_future_factory) ) elif msg_type == "choice_response": + # Only the control holder answers pickers (observers see + # them read-only). + if _control["holder"] != client_id: + continue request_id = data.get("request_id", "") selected = data.get("selected", "") # Check if bridge owns this choice (e.g. /import-embryos picker) @@ -719,6 +814,7 @@ async def _run_resolution_bootstrap(): # to any remaining client (or free it) and resync everyone. _clients.pop(client_id, None) _client_labels.pop(client_id, None) + _raw_clients.pop(client_id, None) if _control["holder"] == client_id: _control["holder"] = next(iter(_clients), None) try: diff --git a/gently/ui/web/static/js/agent-chat.js b/gently/ui/web/static/js/agent-chat.js index 8a60f3a0..577d18bd 100644 --- a/gently/ui/web/static/js/agent-chat.js +++ b/gently/ui/web/static/js/agent-chat.js @@ -81,9 +81,47 @@ const AgentChat = (() => { return content; } - function addUserMessage(text) { - const content = addTurn('user'); + function addUserMessage(text, author) { + const wrap = document.createElement('div'); + wrap.className = 'ac-turn ac-turn-user'; + if (author) { + const label = document.createElement('div'); + label.className = 'ac-role ac-role-user'; + label.textContent = author; + wrap.appendChild(label); + } + const content = document.createElement('div'); + content.className = 'ac-content'; content.textContent = text; + wrap.appendChild(content); + log.appendChild(wrap); + scrollToBottom(); + } + + /** Rebuild the transcript from a persisted/replayed history list. */ + function renderHistory(items) { + log.innerHTML = ''; + currentAgentEl = null; + activityEl = null; + (items || []).forEach(it => { + if (it.role === 'user') { + addUserMessage(it.text, it.author); + } else if (it.role === 'agent') { + const c = addTurn('agent'); + c._raw = it.text || ''; + c.innerHTML = mdToHtml(c._raw); + } else if (it.role === 'tool') { + const el = document.createElement('div'); + el.className = 'ac-tool ac-tool-done'; + const dur = it.duration ? ` · ${(it.duration.toFixed ? it.duration.toFixed(1) : it.duration)}s` : ''; + const summary = it.summary ? ` — ${escapeHtml(it.summary)}` : ''; + el.innerHTML = `${escapeHtml(it.name || 'tool')}${dur}${summary}`; + log.appendChild(el); + } else if (it.role === 'system') { + addSystemLine(it.text, it.level || 'info'); + } + }); + scrollToBottom(); } function addSystemLine(text, level = 'info') { @@ -108,6 +146,15 @@ const AgentChat = (() => { renderControl(); break; + case 'history': + renderHistory(msg.items || []); + break; + + case 'user_message': + hideActivity(); + addUserMessage(msg.text, msg.author); + break; + case 'stream_start': streaming = true; currentAgentEl = null; // created lazily on first text @@ -215,7 +262,7 @@ const AgentChat = (() => { (data.options || []).forEach(opt => { const btn = document.createElement('button'); btn.className = 'ac-choice-opt'; - btn.disabled = !!opt.disabled; + btn.disabled = !!opt.disabled || !hasControl; // observers see it read-only const desc = opt.description ? `${escapeHtml(opt.description)}` : ''; btn.innerHTML = `${escapeHtml(opt.label)}${desc}`; btn.addEventListener('click', () => { @@ -330,11 +377,11 @@ const AgentChat = (() => { const text = input.value.trim(); if (!text) return; if (!hasControl) { renderControl(); return; } - addUserMessage(text); if (text.startsWith('/')) { - send({ type: 'command', command: text }); // slash commands (e.g. /status) + addUserMessage(text); // commands aren't broadcast; echo locally + send({ type: 'command', command: text }); // slash commands (e.g. /status) } else { - send({ type: 'chat', text }); + send({ type: 'chat', text }); // echoed to all via 'user_message' } // Instant feedback before the first chunk arrives. setBusy(true); From f6aad7452fd1ef4b9e49ed1311709faa2a04f313 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 00:30:53 -0400 Subject: [PATCH 30/71] fix(store): crash-safe atomic YAML/JSON writes (V3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _write_yaml and save_conversation wrote to a temp file, then unlink()'d the target and rename()'d — a non-atomic window where a crash/power-loss between unlink and rename leaves the file (session.yaml, embryo.yaml, conversation .json, timelapse state) missing or truncated. The very files /resume depends on were the ones at risk. Use os.replace(tmp, path) (atomic + overwrites on Windows) and fsync the temp before the swap, so the target always reflects either the old or new content, never a gap. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/core/file_store.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/gently/core/file_store.py b/gently/core/file_store.py index 1abbc971..0231c14c 100644 --- a/gently/core/file_store.py +++ b/gently/core/file_store.py @@ -156,10 +156,11 @@ def _write_yaml(path: Path, data: Any) -> None: with os.fdopen(fd, "w", encoding="utf-8") as f: yaml.safe_dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True) - # On Windows, rename over an existing file requires removing it first. - if path.exists(): - path.unlink() - Path(tmp).rename(path) + f.flush() + os.fsync(f.fileno()) + # os.replace is atomic and overwrites on Windows — no unlink gap that + # a crash/power-loss could leave the target missing. + os.replace(tmp, path) except BaseException: # Clean up temp file on failure try: @@ -410,9 +411,9 @@ def save_session_snapshot(self, session_id: str, snapshot: dict) -> None: try: with os.fdopen(fd, "w", encoding="utf-8") as f: json.dump(snapshot, f, indent=2, ensure_ascii=False, default=str) - if path.exists(): - path.unlink() - Path(tmp).rename(path) + f.flush() + os.fsync(f.fileno()) + os.replace(tmp, path) except BaseException: try: os.unlink(tmp) From 7698decb4019bc7c924b6811a7a4d293a2c631f4 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 00:31:49 -0400 Subject: [PATCH 31/71] fix(tools): get_timelapse_status crashed with KeyError (V1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The status tool read status_dict['next_embryo'], ['next_acquisition_in_seconds'] and per-embryo ['interval_seconds'] — none of which TimelapseState.to_dict() emits (it has next_round_time / seconds_until_next_round, and embryo_details carries timepoints / is_complete / completion_reason). Every successful call raised KeyError, so the biologist's primary "is it working?" tool was broken. Use the real keys: report seconds_until_next_round for the next acquisition and drop the unavailable per-embryo interval from the line. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/app/tools/timelapse_tools.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/gently/app/tools/timelapse_tools.py b/gently/app/tools/timelapse_tools.py index fad0ff95..2e01fa60 100644 --- a/gently/app/tools/timelapse_tools.py +++ b/gently/app/tools/timelapse_tools.py @@ -142,16 +142,15 @@ def get_timelapse_status(context: Dict = None) -> str: lines.append(f"Completed embryos: {status_dict['completed_embryos']}") lines.append("") - if status_dict['next_embryo']: - lines.append(f"Next acquisition: {status_dict['next_embryo']} in {status_dict['next_acquisition_in_seconds']:.0f}s") + if status_dict.get('seconds_until_next_round') is not None: + lines.append(f"Next acquisition in {status_dict['seconds_until_next_round']:.0f}s") lines.append("") if status_dict['embryo_details']: lines.append("Embryo Details:") for eid, details in status_dict['embryo_details'].items(): status_marker = "[done]" if details['is_complete'] else "[active]" - lines.append(f" {status_marker} {eid}: t={details['timepoints']} " - f"(interval={details['interval_seconds']}s)") + lines.append(f" {status_marker} {eid}: t={details['timepoints']}") if details['is_complete']: lines.append(f" Completed: {details['completion_reason']}") From 8e3531b14709d4b05999f6c60efe2554a95243e0 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 00:32:07 -0400 Subject: [PATCH 32/71] =?UTF-8?q?fix(launcher):=20unify=20storage=20env=20?= =?UTF-8?q?var=20=E2=80=94=20logs=20follow=20data=20(V4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The launcher read GENTLY_STORAGE for its log directory while the rest of the system uses GENTLY_STORAGE_PATH (via settings.storage.base_path). Setting only one silently split logs from data. Use settings.storage.base_path for the log dir too so both honor the same configured root. Co-Authored-By: Claude Opus 4.8 (1M context) --- launch_gently.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/launch_gently.py b/launch_gently.py index 37d4913c..6caa3cba 100644 --- a/launch_gently.py +++ b/launch_gently.py @@ -154,7 +154,11 @@ def run_ink_picker(tui_dist: Path, sessions_json: str) -> str | None: async def main(offline: bool = False, resume_session: str = None, show_sessions: bool = False, pick_session: bool = False, log_level: str = "WARNING", no_browser: bool = False): # Set up log file in storage directory - storage_base = Path(os.environ.get("GENTLY_STORAGE", "D:/Gently3")) + # Unified with FileStore: logs live under the same root as data + # (settings.storage.base_path reads GENTLY_STORAGE_PATH). Previously this + # read a separate GENTLY_STORAGE env var, so setting only one split logs + # from data. + storage_base = settings.storage.base_path log_dir = storage_base / "logs" log_dir.mkdir(parents=True, exist_ok=True) log_file = str(log_dir / f"gently_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log") From 2f4e3e61f4288dfbcd4104d2c3f38630407ba0af Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 00:33:21 -0400 Subject: [PATCH 33/71] fix(web): escape event text to close events-table XSS (V6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit highlightSearchTerms returned arbitrary event data (perception reasoning, file paths, agent free-text) unescaped, and callers assigned it via innerHTML — so any '<' in event data was interpreted as markup (a real injection surface). Escape the input before inserting the search tags; this closes the hole at every call site (event key/value, log message, logger name) since they all route through this function. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/ui/web/static/js/events.js | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/gently/ui/web/static/js/events.js b/gently/ui/web/static/js/events.js index 3e37eb66..998431a1 100644 --- a/gently/ui/web/static/js/events.js +++ b/gently/ui/web/static/js/events.js @@ -101,12 +101,17 @@ function eventMatchesSearch(event) { } function highlightSearchTerms(text) { - if (!searchQuery || !text) return text; + // Escape first — event keys/values/messages are arbitrary text (perception + // prose, file paths, agent output) and are inserted via innerHTML by the + // callers. Escaping here closes the XSS hole at every call site; the + // injected tags are the only markup we add. + const safe = escapeHtml(text == null ? '' : String(text)); + if (!searchQuery) return safe; try { const regex = new RegExp(`(${escapeRegex(searchQuery)})`, 'gi'); - return String(text).replace(regex, '$1'); + return safe.replace(regex, '$1'); } catch (e) { - return text; + return safe; } } From 2b94ffe0275cb3dc5a63fb137eca9896b1b96a6d Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 00:33:54 -0400 Subject: [PATCH 34/71] =?UTF-8?q?docs:=20update=20progress=20=E2=80=94=20a?= =?UTF-8?q?uth,=20gating,=20transcript,=20verified=20bugs=20done?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 (1M context) --- notes/biologist-readiness-plan.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/notes/biologist-readiness-plan.md b/notes/biologist-readiness-plan.md index 00aae259..701a4fc5 100644 --- a/notes/biologist-readiness-plan.md +++ b/notes/biologist-readiness-plan.md @@ -316,7 +316,11 @@ Incremental — the backend protocol for agent control already exists (`/ws/agen - **[done] Milestone A (start):** floating agent-chat window in the web UI (`static/js/agent-chat.js`, `static/css/agent-chat.css`, wired into `index.html`) connecting to `/ws/agent` with streaming + choice pickers + applied-spec cards + slash-command routing, XSS-safe. - **[done] Milestone B (seed):** single-driver control lock in `routes/agent_ws.py` (holder drives; observers get a "Take control" banner; control passes on disconnect). Fixes latent V7. *Not yet gated by auth — that's the next increment.* - **[done] Milestone E/F (TUI retired + launcher reshaped):** `launch_gently.py` no longer spawns the Node TUI — it starts the agent + viz server, prints a launch banner (URL · device status [fixes V5] · storage · Ctrl-C), auto-opens the browser (`--no-browser` to suppress), and serves until interrupted. Node/dist requirement removed; `--resume`/bare `--resume` resolves to most-recent (interactive picker deferred to the web). TUI source kept in-tree (reversible), `run_ink_picker` retained for reference. -- **[‼ NEXT] Auth gating:** with the TUI gone, the browser is the *only* control path and it is **unauthenticated on the LAN**. Bind viz to `127.0.0.1` or trust the LAN until self-managed accounts + viewer/operator roles land. This is now the top priority. +- **[done] Self-managed auth + roles:** `accounts.py` (PBKDF2 users, HMAC session cookies, first-run admin bootstrap), `auth.py` cookie-aware `resolve_role`, `/login` + `/api/auth/*`, `/ws/agent` authenticates and gates control by role. Viewing is **open** (anonymous watch); login elevates to control; admin/operator/viewer. `GENTLY_NO_AUTH=1` disables. +- **[done] Control-action gating pass:** perception-chat POST + `/ws` marking actions gated to control role (`require_control` / session cookie). Device-ingest + campaign-mesh routes deliberately left to their own machine/mesh auth (documented). +- **[done] Uniform session transcript:** user messages + agent stream broadcast to ALL `/ws/agent` clients (observers watch live); display history persisted to `/chat_display.json` and replayed on connect so refreshes/late-joiners see the full conversation. Choice pickers interactive only for the holder. +- **[done] Verified bugs:** V1 `get_timelapse_status` KeyError, V3 crash-safe atomic writes (`os.replace`+fsync), V4 storage env-var unify, V6 events-table XSS escape. Plus the Windows Ctrl-C launcher fix. +- **[remaining] Not yet done (need tests / live verification):** V2 acquisition auto-resume (add round-trip tests first), dead-calibration-code + SQLite-stack deletion (grep + test migration), device/perception transient-failure hardening, friendly organism/hardware config errors, machine-token auth for device-ingest endpoints. --- From 2038bf7846f1a9969c932cf3390ab73d15700e03 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 05:37:57 -0400 Subject: [PATCH 35/71] feat(web): resume a session from the Sessions tab The Sessions tab was empty: /api/sessions globbed sessions_dir/*.json, but Gently3's FileStore stores each session as a directory (session.yaml). And there was no way to switch the live agent's session from the browser after the TUI session picker was retired. - /api/sessions now lists from the live FileStore (via the agent), with an `active` flag for the current session. - New control-gated POST /api/sessions/{id}/resume calls agent.resume_session (the same machinery as CLI resume: saves current, loads target's embryos + conversation), then broadcasts `session_changed` so all browsers reload. - agent_ws transcript is now session-aware: _load_history reloads the display history when the session id changes, so a resumed session shows its own conversation instead of the previous one. - Sessions tab: a "Resume in agent" button per session (active one badged); /ws clients reload on `session_changed`. Note: resume restores embryos + conversation; restarting a previously-running timelapse on resume is the separate V2 auto-resume item (not yet wired). Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/ui/web/routes/agent_ws.py | 21 ++++-- gently/ui/web/routes/sessions.py | 97 ++++++++++++++++++++++------ gently/ui/web/static/css/review.css | 27 ++++++++ gently/ui/web/static/js/review.js | 22 ++++++- gently/ui/web/static/js/websocket.js | 4 ++ 5 files changed, 143 insertions(+), 28 deletions(-) diff --git a/gently/ui/web/routes/agent_ws.py b/gently/ui/web/routes/agent_ws.py index 570c112a..0ffd9d76 100644 --- a/gently/ui/web/routes/agent_ws.py +++ b/gently/ui/web/routes/agent_ws.py @@ -48,7 +48,7 @@ def create_router(server) -> APIRouter: # Persisted to /chat_display.json so it survives reconnects and # restarts; broadcast live so all instances stay in sync. _history: list = [] - _history_state = {"loaded": False, "path": None, "agent_buf": None} + _history_state = {"sid": None, "path": None, "agent_buf": None} async def _broadcast_control_status(): """Tell every connected agent client who currently holds control.""" @@ -65,14 +65,23 @@ async def _broadcast_control_status(): except Exception: pass - def _load_history_once(bridge): - if _history_state["loaded"]: - return - _history_state["loaded"] = True + def _load_history_for_session(bridge): + """Load the current session's display history, reloading if the + session changed (e.g. after a resume from the Sessions tab).""" try: agent = bridge.agent store = getattr(agent, "store", None) sid = getattr(agent, "session_id", None) + except Exception: + return + if sid == _history_state["sid"]: + return # already loaded for this session + # Session changed (or first load): reset and reload from disk. + _history.clear() + _history_state["sid"] = sid + _history_state["path"] = None + _history_state["agent_buf"] = None + try: if store and sid: sdir = store._session_dir(sid) if sdir: @@ -548,7 +557,7 @@ def choice_future_factory(choice_data: dict) -> asyncio.Future: # Replay the uniform session transcript so every client (and every # reconnect/refresh) shows the same conversation. - _load_history_once(bridge) + _load_history_for_session(bridge) if _history: try: await websocket.send_json({"type": "history", "items": list(_history)}) diff --git a/gently/ui/web/routes/sessions.py b/gently/ui/web/routes/sessions.py index 69d70d47..9447738a 100644 --- a/gently/ui/web/routes/sessions.py +++ b/gently/ui/web/routes/sessions.py @@ -1,9 +1,11 @@ -"""Session routes - list and retrieve saved sessions.""" +"""Session routes - list, retrieve, and resume saved sessions.""" import json import logging -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, Depends, HTTPException + +from gently.ui.web.auth import require_control logger = logging.getLogger(__name__) @@ -11,28 +13,83 @@ def create_router(server) -> APIRouter: router = APIRouter() + def _file_store(): + """The live FileStore (current Gently3 layout), via the agent.""" + bridge = getattr(server, "agent_bridge", None) + if bridge is not None and getattr(bridge, "agent", None) is not None: + st = getattr(bridge.agent, "store", None) + if st is not None: + return st + return getattr(server, "gently_store", None) + + def _active_session_id(): + bridge = getattr(server, "agent_bridge", None) + agent = bridge.agent if bridge is not None else None + return getattr(agent, "session_id", None) if agent is not None else None + @router.get("/api/sessions") async def list_sessions(): - """List available sessions with metadata""" + """List available sessions (from the live FileStore).""" + store = _file_store() + if store is None: + return {"sessions": []} + active_id = _active_session_id() sessions = [] - if server.sessions_dir.exists(): - for path in server.sessions_dir.glob("*.json"): + try: + for s in store.list_sessions(): + sid = s.get("session_id") try: - with open(path, encoding='utf-8') as f: - data = json.load(f) - sessions.append({ - 'session_id': data.get('session_id', path.stem), - 'name': data.get('name', path.stem), - 'created_at': data.get('created_at', ''), - 'last_active': data.get('last_active', ''), - 'embryo_count': len(data.get('embryo_states', {})), - 'description': data.get('description', '') - }) - except Exception as e: - logger.warning(f"Failed to read session {path}: {e}") - # Sort by created_at descending (newest first) - sessions.sort(key=lambda x: x.get('created_at', ''), reverse=True) - return {'sessions': sessions} + count = len(store.list_embryos(sid) or []) + except Exception: + count = 0 + sessions.append({ + "session_id": sid, + "name": s.get("name") or sid, + "created_at": s.get("created_at", ""), + "last_active": s.get("last_active", ""), + "embryo_count": count, + "description": s.get("description", ""), + "active": sid == active_id, + }) + except Exception as e: + logger.warning("Failed to list sessions from FileStore: %s", e) + return {"sessions": sessions} + + @router.post("/api/sessions/{session_id}/resume", + dependencies=[Depends(require_control)]) + async def resume_session(session_id: str): + """Switch the live agent to a different saved session. + + Reuses the same machinery as CLI resume (saves the current session, + loads the target's embryos + conversation). Then nudges all browser + clients to reload so they pick up the new session's state and + transcript. + """ + bridge = getattr(server, "agent_bridge", None) + agent = bridge.agent if bridge is not None else None + if agent is None: + raise HTTPException(status_code=503, detail="Agent not ready") + store = getattr(agent, "store", None) + if store is None or store.get_session(session_id) is None: + raise HTTPException(status_code=404, detail="Session not found") + if session_id == getattr(agent, "session_id", None): + return {"ok": True, "session_id": session_id, "active": True, + "note": "already active"} + try: + ok = agent.resume_session(session_id) + except Exception as e: + logger.exception("Session resume failed") + raise HTTPException(status_code=500, detail=f"resume failed: {e}") + if not ok: + raise HTTPException(status_code=500, detail="resume returned false") + # Tell every connected browser to reload — they'll reconnect to the + # new session's state (embryos, transcript). + try: + await server.manager.broadcast({"type": "session_changed", + "session_id": session_id}) + except Exception: + pass + return {"ok": True, "session_id": session_id, "active": True} @router.get("/api/sessions/{session_id}") async def get_session(session_id: str): diff --git a/gently/ui/web/static/css/review.css b/gently/ui/web/static/css/review.css index de2bd66d..735d42a5 100644 --- a/gently/ui/web/static/css/review.css +++ b/gently/ui/web/static/css/review.css @@ -443,3 +443,30 @@ color: var(--text-muted); } + +/* Resume-in-agent action on session list items */ +.session-resume-btn { + margin-top: 8px; + padding: 5px 10px; + border-radius: 7px; + border: 1px solid var(--accent, #60a5fa); + background: transparent; + color: var(--accent, #60a5fa); + font-size: 12px; + font-weight: 600; + cursor: pointer; +} +.session-resume-btn:hover { background: var(--accent, #60a5fa); color: #fff; } +.session-active-badge { + font-size: 10px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.05em; + color: var(--accent-green, #4ade80); + border: 1px solid var(--accent-green, #4ade80); + border-radius: 999px; + padding: 1px 7px; + margin-left: 6px; + vertical-align: middle; +} +.session-item.active-session { border-left: 2px solid var(--accent-green, #4ade80); } diff --git a/gently/ui/web/static/js/review.js b/gently/ui/web/static/js/review.js index bd2c75fa..07dcf49e 100644 --- a/gently/ui/web/static/js/review.js +++ b/gently/ui/web/static/js/review.js @@ -86,17 +86,35 @@ const ReviewApp = { } list.innerHTML = filtered.map(s => ` -
-
${this.escapeHtml(s.name || s.session_id)}
+
+
${this.escapeHtml(s.name || s.session_id)}${s.active ? ' active' : ''}
${this.formatDate(s.created_at)} ${s.embryo_count ? `${s.embryo_count} embryo${s.embryo_count !== 1 ? 's' : ''}` : ''}
${s.description ? `
${this.escapeHtml(s.description)}
` : ''} + ${s.active ? '' : ``}
`).join(''); }, + async resumeSession(sessionId) { + if (!confirm('Switch the live agent to this session?\nThe current session is saved first.')) return; + try { + const resp = await fetch(`/api/sessions/${sessionId}/resume`, { method: 'POST' }); + if (resp.ok) { + // Server broadcasts session_changed to reload all clients; we + // navigate home as well so the operator lands on the new session. + window.location.href = '/'; + } else { + const d = await resp.json().catch(() => ({})); + alert('Resume failed: ' + (d.detail || ('HTTP ' + resp.status))); + } + } catch (e) { + alert('Resume failed: ' + e); + } + }, + renderSessionContent() { const content = document.getElementById('session-content'); const s = this.currentSession; diff --git a/gently/ui/web/static/js/websocket.js b/gently/ui/web/static/js/websocket.js index 38a2794c..1e3a534c 100644 --- a/gently/ui/web/static/js/websocket.js +++ b/gently/ui/web/static/js/websocket.js @@ -127,6 +127,10 @@ function handleMessage(msg) { // Switch to embryos tab if not already there if (state.tab !== 'embryos') switchTab('embryos'); } + } else if (msg.type === 'session_changed') { + // The live agent switched sessions (resume from the Sessions tab) — + // reload so every client picks up the new session's state + transcript. + window.location.href = '/'; } else if (msg.type === 'ping') { state.ws.send(JSON.stringify({type: 'pong'})); } else if (msg.type === 'presence') { From a14372dfd8a16ec2d13b1b1f204dd80ffb03c2d4 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 05:39:42 -0400 Subject: [PATCH 36/71] fix(config): friendly errors for unknown organism/hardware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit load_organism / load_hardware raised a raw ModuleNotFoundError on a typo'd config value. Now they raise a clear ValueError listing the available plugins ("Unknown organism 'celgans'. Available: celegans. Set 'organism:' in config/config.yml.") — while re-raising if a dependency *inside* a valid plugin module is what's actually missing, so real import errors aren't masked. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/hardware/__init__.py | 25 +++++++++++++++++++++++-- gently/organisms/__init__.py | 26 ++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/gently/hardware/__init__.py b/gently/hardware/__init__.py index 2f07e9ce..c8886da7 100644 --- a/gently/hardware/__init__.py +++ b/gently/hardware/__init__.py @@ -14,14 +14,24 @@ import importlib import logging +import pkgutil from types import ModuleType -from typing import Optional +from typing import List, Optional logger = logging.getLogger(__name__) _active_hardware: Optional[ModuleType] = None +def available_hardware() -> List[str]: + """Names of the hardware plugins shipped under gently.hardware.""" + import gently.hardware as _pkg + return sorted( + m.name for m in pkgutil.iter_modules(_pkg.__path__) + if m.ispkg and not m.name.startswith("_") + ) + + def load_hardware(name: str) -> ModuleType: """ Load a hardware module by name and set it as active. @@ -43,7 +53,18 @@ def load_hardware(name: str) -> ModuleType: If the hardware module cannot be found. """ global _active_hardware - module = importlib.import_module(f"gently.hardware.{name}") + try: + module = importlib.import_module(f"gently.hardware.{name}") + except ModuleNotFoundError as e: + # Only a missing hardware *package* is a config error; re-raise if a + # dependency inside the module is what's missing. + if e.name in (f"gently.hardware.{name}", name): + avail = ", ".join(available_hardware()) or "(none found)" + raise ValueError( + f"Unknown hardware '{name}'. Available: {avail}. " + f"Set 'hardware:' in config/config.yml." + ) from e + raise _active_hardware = module logger.info("Loaded hardware module: %s", name) return module diff --git a/gently/organisms/__init__.py b/gently/organisms/__init__.py index d388ab60..e4f9d522 100644 --- a/gently/organisms/__init__.py +++ b/gently/organisms/__init__.py @@ -14,14 +14,24 @@ import importlib import logging +import pkgutil from types import ModuleType -from typing import Optional +from typing import List, Optional logger = logging.getLogger(__name__) _active_organism: Optional[ModuleType] = None +def available_organisms() -> List[str]: + """Names of the organism plugins shipped under gently.organisms.""" + import gently.organisms as _pkg + return sorted( + m.name for m in pkgutil.iter_modules(_pkg.__path__) + if m.ispkg and not m.name.startswith("_") + ) + + def load_organism(name: str) -> ModuleType: """ Load an organism module by name and set it as active. @@ -43,7 +53,19 @@ def load_organism(name: str) -> ModuleType: If the organism module cannot be found. """ global _active_organism - module = importlib.import_module(f"gently.organisms.{name}") + try: + module = importlib.import_module(f"gently.organisms.{name}") + except ModuleNotFoundError as e: + # Only treat a missing organism *package* as a config error; if a + # dependency *inside* the organism module is missing, re-raise so the + # real ImportError isn't masked. + if e.name in (f"gently.organisms.{name}", name): + avail = ", ".join(available_organisms()) or "(none found)" + raise ValueError( + f"Unknown organism '{name}'. Available: {avail}. " + f"Set 'organism:' in config/config.yml." + ) from e + raise _active_organism = module logger.info("Loaded organism module: %s (%s)", name, module.ORGANISM_DISPLAY_NAME) return module From 638f827ed3106dbb66ea4c42f3ffd3e24f1760dc Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 05:41:16 -0400 Subject: [PATCH 37/71] fix(web): serve session review data from FileStore GET /api/sessions/{id} still read sessions_dir/{id}.json (old layout), so clicking a session in the Sessions tab 404'd. Map the FileStore session snapshot onto the review view's shape (embryo_states from experiment_data, conversation from conversation_history). detection_history is left empty for now (per-timepoint predictions live in a separate store). Drops the now-unused json import. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/ui/web/routes/sessions.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/gently/ui/web/routes/sessions.py b/gently/ui/web/routes/sessions.py index 9447738a..d47a146b 100644 --- a/gently/ui/web/routes/sessions.py +++ b/gently/ui/web/routes/sessions.py @@ -1,6 +1,5 @@ """Session routes - list, retrieve, and resume saved sessions.""" -import json import logging from fastapi import APIRouter, Depends, HTTPException @@ -93,14 +92,29 @@ async def resume_session(session_id: str): @router.get("/api/sessions/{session_id}") async def get_session(session_id: str): - """Get full session state for review""" - path = server.sessions_dir / f"{session_id}.json" - if not path.exists(): + """Get session state for review, from the live FileStore. + + Maps the FileStore session snapshot onto the shape the Sessions review + view expects (embryo_states / conversation). detection_history isn't + reconstructed here (per-timepoint predictions live elsewhere). + """ + store = _file_store() + if store is None: + raise HTTPException(status_code=503, detail="Store not available") + info = store.get_session(session_id) + if info is None: raise HTTPException(status_code=404, detail="Session not found") - try: - with open(path, encoding='utf-8') as f: - return json.load(f) - except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to load session: {e}") + snapshot = store.load_session_snapshot(session_id) or {} + experiment = snapshot.get("experiment_data", {}) or {} + return { + "session_id": session_id, + "name": info.get("name") or session_id, + "description": info.get("description", ""), + "created_at": info.get("created_at", ""), + "last_active": info.get("last_active", ""), + "embryo_states": experiment.get("embryos", {}) or {}, + "conversation": snapshot.get("conversation_history", []) or [], + "detection_history": {}, + } return router From cf2c24e212cc6191120a39ffdc863fbb5f09e4c2 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 05:56:15 -0400 Subject: [PATCH 38/71] fix(web): resumed sessions show history + correct session id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes for the confusing "resume shows an empty session" experience (the data was always safe on disk; it just wasn't surfaced): - Chat history: when a session has no chat_display.json (created before the uniform-transcript feature, or resumed for the first time), derive a best-effort transcript from the saved Claude conversation (conversation_history) so the chat panel shows the prior conversation instead of nothing. - Header session id: the top-left session label is driven by the timelapse tracker's session_id, which goes stale after a resume with no active timelapse — so the header showed the launch session, not the resumed one. The /ws connect now overrides it with the live agent session_id. Still open (separate, larger): rehydrating the viz image/projection/trace stores from disk on resume so a resumed session's volumes/traces/calibration actually display, and the apparent embryo double-count on resume. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/ui/web/routes/agent_ws.py | 26 ++++++++++++++++++++++++++ gently/ui/web/routes/websocket.py | 11 +++++++++++ 2 files changed, 37 insertions(+) diff --git a/gently/ui/web/routes/agent_ws.py b/gently/ui/web/routes/agent_ws.py index 0ffd9d76..0d27a5d0 100644 --- a/gently/ui/web/routes/agent_ws.py +++ b/gently/ui/web/routes/agent_ws.py @@ -94,6 +94,32 @@ def _load_history_for_session(bridge): except Exception: logger.debug("Could not load chat history", exc_info=True) + # Fallback: sessions created before chat_display.json existed (or any + # session resumed for the first time) — derive a best-effort transcript + # from the saved Claude conversation so the chat still shows history. + if not _history and store and sid: + try: + snap = store.load_session_snapshot(sid) or {} + for m in (snap.get("conversation_history") or []): + role = m.get("role") + content = m.get("content") + if isinstance(content, list): + text = "".join( + b.get("text", "") for b in content + if isinstance(b, dict) and b.get("type") == "text" + ) + else: + text = content if isinstance(content, str) else "" + text = (text or "").strip() + if not text: + continue + if role == "user": + _history.append({"role": "user", "text": text}) + elif role == "assistant": + _history.append({"role": "agent", "text": text}) + except Exception: + logger.debug("Could not derive history from conversation", exc_info=True) + def _save_history(): p = _history_state["path"] if not p: diff --git a/gently/ui/web/routes/websocket.py b/gently/ui/web/routes/websocket.py index 3dce3320..3b35f3e1 100644 --- a/gently/ui/web/routes/websocket.py +++ b/gently/ui/web/routes/websocket.py @@ -56,6 +56,17 @@ async def websocket_endpoint(websocket: WebSocket): # Always send timelapse state on connect so client can reconcile # (if IDLE with no session_id, client will clear stale cached state) timelapse_state = server.timelapse_tracker.to_dict() + # The header's session id is driven by this payload; the tracker's + # session_id goes stale after a resume with no active timelapse, so + # override it with the live agent session (the source of truth). + try: + bridge = getattr(server, "agent_bridge", None) + if bridge is not None and getattr(bridge, "agent", None) is not None: + live_sid = bridge.agent.session_id + if live_sid: + timelapse_state["session_id"] = live_sid + except Exception: + pass await websocket.send_json({ "type": "timelapse_state", "data": timelapse_state From df3db33d5e0a50512e6e3efe37c3dfd3c415d89d Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 06:02:01 -0400 Subject: [PATCH 39/71] feat(web): rehydrate viz imagery from disk on session resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resuming a session restored embryo state but not the visual data: the viz image store is in-memory (filled by live acquisition events), so a resumed session's volumes/projections never appeared in the galleries/filmstrips even though they were safe on disk. - VisualizationServer.rehydrate_session(): enumerates the session's embryos x projection timepoints from the FileStore and adds lightweight ImageData entries (uid volume_{embryo}_t{NNNN}, no base64). JPEG pixels load lazily via /api/images/{uid}/png, which already falls back to the FileStore projection. Resets the store first so the previous session's images don't linger. - FileStore.list_projection_timepoints(): cheap glob-only timepoint listing (avoids list_projections' per-file PIL/meta cost — matters for sessions with thousands of projections). - _resolve_projection_path now uses the live agent session (via a new _current_session_id helper) instead of the stale timelapse-tracker session, so lazy projection serving targets the resumed session's folder. - Wired into the resume endpoint and CLI --resume startup. Verified against a real 256-projection session: all 256 rehydrate, filmstrip sequences resolve, and the on-disk JPEGs serve. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/core/file_store.py | 21 ++++++++++++ gently/ui/web/routes/sessions.py | 12 +++++-- gently/ui/web/server.py | 56 ++++++++++++++++++++++++++++++-- launch_gently.py | 7 ++++ 4 files changed, 91 insertions(+), 5 deletions(-) diff --git a/gently/core/file_store.py b/gently/core/file_store.py index 0231c14c..4b33dedd 100644 --- a/gently/core/file_store.py +++ b/gently/core/file_store.py @@ -792,6 +792,27 @@ def get_projection_path( return proj_path return None + def list_projection_timepoints( + self, session_id: str, embryo_id: str + ) -> List[int]: + """Cheaply list projection timepoints (glob only, no PIL/meta reads). + + Used to rehydrate the viz image store on resume without paying the + per-file cost of list_projections(). + """ + sd = self._session_dir(session_id) + if sd is None: + return [] + proj_dir = sd / "embryos" / embryo_id / "projections" + if not proj_dir.exists(): + return [] + tps: List[int] = [] + for jpg in proj_dir.glob("t*.jpg"): + m = re.match(r"t(\d+)\.jpg$", jpg.name) + if m: + tps.append(int(m.group(1))) + return sorted(tps) + def get_projection_b64( self, session_id: str, embryo_id: str, timepoint: int ) -> Optional[str]: diff --git a/gently/ui/web/routes/sessions.py b/gently/ui/web/routes/sessions.py index d47a146b..58f3394d 100644 --- a/gently/ui/web/routes/sessions.py +++ b/gently/ui/web/routes/sessions.py @@ -81,14 +81,22 @@ async def resume_session(session_id: str): raise HTTPException(status_code=500, detail=f"resume failed: {e}") if not ok: raise HTTPException(status_code=500, detail="resume returned false") + # Rehydrate the viz image store from disk so the resumed session's + # projections/filmstrips show (pixels load lazily from the FileStore). + rehydrated = 0 + try: + rehydrated = server.rehydrate_session(session_id) + except Exception: + logger.exception("rehydrate_session failed") # Tell every connected browser to reload — they'll reconnect to the - # new session's state (embryos, transcript). + # new session's state (embryos, transcript, rehydrated imagery). try: await server.manager.broadcast({"type": "session_changed", "session_id": session_id}) except Exception: pass - return {"ok": True, "session_id": session_id, "active": True} + return {"ok": True, "session_id": session_id, "active": True, + "rehydrated_projections": rehydrated} @router.get("/api/sessions/{session_id}") async def get_session(session_id: str): diff --git a/gently/ui/web/server.py b/gently/ui/web/server.py index ff14f9bb..957e98fc 100644 --- a/gently/ui/web/server.py +++ b/gently/ui/web/server.py @@ -214,12 +214,24 @@ def _resolve_volume_path(self, embryo_id: str, timepoint: int) -> Optional[str]: return None + def _current_session_id(self) -> Optional[str]: + """The live agent session (source of truth), falling back to the + timelapse tracker. The tracker's session_id goes stale after a resume + with no active timelapse, so the live agent session is preferred.""" + bridge = getattr(self, "agent_bridge", None) + if bridge is not None and getattr(bridge, "agent", None) is not None: + sid = getattr(bridge.agent, "session_id", None) + if sid: + return sid + return self.timelapse_tracker.session_id + def _resolve_projection_path(self, embryo_id: str, timepoint: int) -> Optional[Path]: - """Resolve projection file path from FileStore.""" - if self.gently_store and self.timelapse_tracker.session_id: + """Resolve projection file path from FileStore (current session).""" + sid = self._current_session_id() + if self.gently_store and sid: try: proj_path = self.gently_store.get_projection_path( - self.timelapse_tracker.session_id, embryo_id, timepoint, + sid, embryo_id, timepoint, ) if proj_path and proj_path.exists(): return proj_path @@ -227,6 +239,44 @@ def _resolve_projection_path(self, embryo_id: str, timepoint: int) -> Optional[P logger.debug(f"FileStore projection path lookup failed: {e}") return None + def rehydrate_session(self, session_id: str) -> int: + """Repopulate the in-memory image store with the FileStore's persisted + projections for a (resumed) session, so galleries and filmstrips show + its historical data. + + Lightweight: only metadata-bearing ImageData entries are created (uid + ``volume_{embryo}_t{NNNN}``); the JPEG pixels load lazily on demand via + /api/images/{uid}/png (which falls back to the FileStore projection). + Resets the store first so the previous session's images don't linger. + Returns the number of projection entries added. + """ + if self.gently_store is None or not session_id: + return 0 + self.store = ImageStore() # drop the previous session's images + added = 0 + try: + embryos = self.gently_store.list_embryos(session_id) or [] + except Exception: + embryos = [] + for emb in embryos: + eid = emb.get("embryo_id") if isinstance(emb, dict) else getattr(emb, "embryo_id", None) + if not eid: + continue + try: + tps = self.gently_store.list_projection_timepoints(session_id, eid) + except Exception: + tps = [] + for tp in tps: + self.store.add_image(ImageData( + uid=f"volume_{eid}_t{tp:04d}", + data_type="volume_projection", + timestamp=f"{tp:06d}", # monotonic with timepoint for ordering + metadata={"embryo_id": eid, "timepoint": tp}, + )) + added += 1 + logger.info("Rehydrated %d projections for session %s", added, session_id) + return added + def _subscribe_to_events(self): """Subscribe to EventBus for automatic updates - broadcasts ALL events""" diff --git a/launch_gently.py b/launch_gently.py index 6caa3cba..b6523bb2 100644 --- a/launch_gently.py +++ b/launch_gently.py @@ -438,6 +438,13 @@ def _status_provider(): if agent.viz_server is not None: agent.viz_server.agent_bridge = bridge agent.viz_server.set_context_store(context_store) + # If launched into an existing session, rehydrate its persisted + # imagery so the galleries/filmstrips show data from the start. + if session_to_resume: + try: + agent.viz_server.rehydrate_session(session_to_resume) + except Exception: + logger.debug("Startup rehydrate failed", exc_info=True) # ── Banner + serve ────────────────────────────────────────────── # The viz server runs in-process (uvicorn in a background task). With From c316399948af24448c58e2f620872aabc85dfba7 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 06:13:30 -0400 Subject: [PATCH 40/71] feat(web): rehydrate perception reasoning on resume (populates the views) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Projection rehydration made the imagery serve, but the Default/Film/reasoning views stayed empty — those are driven by the tracker's detection_reasoning (per-timepoint stage + reasoning + image uid), not the raw image store, and resume never repopulated it. Extend rehydrate_session to load predictions.jsonl per embryo and rebuild the timelapse tracker's detection_reasoning + projection_uids + per-embryo timepoints/current_stage. The reasoning items carry projection uids (volume_{embryo}_t{NNNN}) so filmstrip thumbnails resolve via the lazy FileStore projection path. to_dict() already emits detection_reasoning, so on the post-resume reload the client populates the filmstrip, status board, and reasoning panels. Verified against the real session: 246 predictions rehydrate across 5 embryos with stage progressions (early -> bean -> comma -> 1.5fold) and full reasoning. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/ui/web/server.py | 51 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/gently/ui/web/server.py b/gently/ui/web/server.py index 957e98fc..fc584cbc 100644 --- a/gently/ui/web/server.py +++ b/gently/ui/web/server.py @@ -274,6 +274,57 @@ def rehydrate_session(self, session_id: str) -> int: metadata={"embryo_id": eid, "timepoint": tp}, )) added += 1 + + # Rehydrate the timelapse tracker's per-embryo perception state from + # predictions.jsonl so the Default / Film / reasoning views populate + # (those are driven by detection_reasoning, not the raw image store). + # Thumbnails resolve via the projection uids added above. + tracker = self.timelapse_tracker + try: + tracker.session_id = session_id + tracker.detection_reasoning = {} + tracker.projection_uids = {} + for emb in embryos: + eid = emb.get("embryo_id") if isinstance(emb, dict) else getattr(emb, "embryo_id", None) + if not eid: + continue + try: + preds = self.gently_store.get_predictions(session_id, eid) or [] + except Exception: + preds = [] + if not preds: + continue + items, puids, last_stage = [], {}, None + for p in preds: + tp = p.get("timepoint") + if tp is None: + continue + uid = f"volume_{eid}_t{tp:04d}" + puids[tp] = uid + stage = p.get("predicted_stage") + last_stage = stage or last_stage + items.append({ + "timepoint": tp, + "stage": stage, + "detected_stage": stage, + "reasoning": p.get("reasoning"), + "confidence": p.get("confidence"), + "projection_uid": uid, + "image_uid": uid, + "detector_name": "perception", + }) + tracker.detection_reasoning[eid] = items + tracker.projection_uids[eid] = puids + entry = tracker.embryos.setdefault(eid, { + "embryo_id": eid, "timepoints": 0, "is_complete": False, + "detections": {}, "current_stage": None, + }) + entry["timepoints"] = max((it["timepoint"] for it in items), default=0) + entry["current_stage"] = last_stage + tracker.total_timepoints = sum(len(v) for v in tracker.detection_reasoning.values()) + except Exception: + logger.exception("Tracker perception rehydration failed") + logger.info("Rehydrated %d projections for session %s", added, session_id) return added From 6d3cc12024a1db7a6af00c80d91ad388b41b0d4f Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 07:07:54 -0400 Subject: [PATCH 41/71] feat(web): filmstrip reasoning/detail panel on the side, not the bottom MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Film view stacked the rows above the detail (reasoning) panel, so it sat at the bottom. Lay #view-filmstrip out as a flex row: scrollable rows on the left, the reasoning/detail panel pinned to the right (flex 0 0 360px, own scroll, border-left). The panel is hidden via :empty until a cell is clicked, so the rows use the full width when nothing is selected. Restores the side-panel layout (the weekend change appears to have been lost — no commit for it in history, likely uncommitted). CSS-only: a hard refresh picks it up. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/ui/web/static/css/main.css | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/gently/ui/web/static/css/main.css b/gently/ui/web/static/css/main.css index ea8dd142..fa584e30 100644 --- a/gently/ui/web/static/css/main.css +++ b/gently/ui/web/static/css/main.css @@ -3532,6 +3532,13 @@ kbd { padding: 0; } +/* Filmstrip: rows on the left, reasoning/detail panel pinned on the right. */ +.view-filmstrip { + display: flex; + flex-direction: row; + overflow: hidden; /* the two children scroll internally */ +} + /* ======================================== AMBIENT HEALTH PULSE ======================================== */ @@ -3714,8 +3721,11 @@ kbd { scrollbar shared by all rows. Labels pin to the left via position:sticky inside each row. */ display: block; + flex: 1 1 auto; /* take the space left of the detail panel */ + min-width: 0; /* allow shrinking so the panel keeps its width */ + min-height: 0; overflow-x: auto; - overflow-y: hidden; + overflow-y: auto; scrollbar-width: thin; scrollbar-gutter: stable; position: relative; @@ -3871,17 +3881,17 @@ kbd { } .filmstrip-detail { + /* Side panel: pinned to the right of the rows, full height, own scroll. + Hidden until a cell is clicked (see :empty below) so the rows use the + full width when nothing is selected. */ + flex: 0 0 360px; + max-width: 42%; + border-left: 1px solid var(--border); background: var(--bg-card); - /* Cap the detail panel so it always leaves the rows visible AND - has a scrollable body of its own. Without this, when an item is - expanded the detail panel can grow past the viewport bottom and - the parent's scroll is unintuitive (mouse wheel over the rows - converts to horizontal). max-height keeps it bounded; overflow-y - lets long VLM summaries scroll on their own. */ - max-height: 60vh; overflow-y: auto; overscroll-behavior: contain; } +.filmstrip-detail:empty { display: none; } /* ======================================== VITALS VIEW From 8e3d26643df4452624b65db52c50b95807742f9d Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 07:18:13 -0400 Subject: [PATCH 42/71] feat(web): restore the original filmstrip side-panel CSS (from lost WIP) Replace my simpler side-panel layout with the polished version recovered from the orphaned WIP commit 0269e18d (preserved as branch recovered-filmstrip-wip): responsive width clamp(360px, 32vw, 520px), slide-in animation, :empty collapse, and the #filmstrip-detail .detail-split vertical stack. The JS detail rendering was already present in 0.21-dev, so this is CSS-only. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/ui/web/static/css/main.css | 33 ++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/gently/ui/web/static/css/main.css b/gently/ui/web/static/css/main.css index fa584e30..aeb46a01 100644 --- a/gently/ui/web/static/css/main.css +++ b/gently/ui/web/static/css/main.css @@ -3532,11 +3532,13 @@ kbd { padding: 0; } -/* Filmstrip: rows on the left, reasoning/detail panel pinned on the right. */ +/* Filmstrip: rows on the left, reasoning/detail panel pinned on the right. + (Recovered from the lost WIP commit 0269e18d.) */ .view-filmstrip { display: flex; flex-direction: row; - overflow: hidden; /* the two children scroll internally */ + align-items: stretch; + overflow: hidden; } /* ======================================== @@ -3721,9 +3723,8 @@ kbd { scrollbar shared by all rows. Labels pin to the left via position:sticky inside each row. */ display: block; - flex: 1 1 auto; /* take the space left of the detail panel */ - min-width: 0; /* allow shrinking so the panel keeps its width */ - min-height: 0; + flex: 1 1 0; /* flex-1 child: shrinks/grows as the panel opens */ + min-width: 0; overflow-x: auto; overflow-y: auto; scrollbar-width: thin; @@ -3881,17 +3882,27 @@ kbd { } .filmstrip-detail { - /* Side panel: pinned to the right of the rows, full height, own scroll. - Hidden until a cell is clicked (see :empty below) so the rows use the - full width when nothing is selected. */ - flex: 0 0 360px; - max-width: 42%; - border-left: 1px solid var(--border); background: var(--bg-card); + /* Right-side panel: fixed-ish width that shrinks gracefully on narrow + viewports. Scrolls vertically inside itself so long VLM summaries + don't push the layout. When empty (no frame selected) it collapses + entirely so the rows get full width. (Recovered from WIP 0269e18d.) */ + flex: 0 0 auto; + width: clamp(360px, 32vw, 520px); + border-left: 1px solid var(--border); overflow-y: auto; overscroll-behavior: contain; + animation: filmstripDetailIn 0.18s ease-out; } .filmstrip-detail:empty { display: none; } +@keyframes filmstripDetailIn { + from { transform: translateX(8px); opacity: 0; } + to { transform: translateX(0); opacity: 1; } +} +/* In the narrow side panel, stack the image | VLM summary split vertically. */ +#filmstrip-detail .detail-split { + grid-template-columns: 1fr; +} /* ======================================== VITALS VIEW From 0c69e55cfb43c050f74aae9b369644fba5f7b5f9 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 07:26:35 -0400 Subject: [PATCH 43/71] fix(imaging): stored projection is the three-view of View A, not A|B MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit generate_jpeg_projection used extract_view_a_and_max_project, which only splits views for 4D input. Volumes are stored 3D (Z, Y, X) with the two diSPIM views concatenated along X (width = 2*height), so it max-projected BOTH views -> the A|B side-by-side image, as a flat XY max. Now: pick View A (index 0 of 4D, or the left half of a 3D dual-view), then build the three-orthogonal-view layout (projection_three_view: [XY|YZ] over [XZ]) — matching what the perceiver sees and what we intend to store. Also fixes near-black filmstrip thumbnails (a single cropped view fills the frame instead of a tiny embryo in a wide A|B canvas). 2D/other inputs fall back to a percentile-stretched max projection. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/core/imaging.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/gently/core/imaging.py b/gently/core/imaging.py index 6e830351..bc74ec26 100644 --- a/gently/core/imaging.py +++ b/gently/core/imaging.py @@ -282,9 +282,23 @@ def generate_jpeg_projection( return None try: - max_proj = extract_view_a_and_max_project(volume) - normalized = normalize_to_uint8(max_proj, method="percentile", - p_low=1, p_high=99.5) + # Reduce to a single 3D view, then build the three-orthogonal-view + # layout (the projection we actually want — matches what the + # perceiver sees). View A comes from index 0 of a 4D (Views,Z,Y,X) + # volume; a 3D dual-view volume stores the two views concatenated + # along X (width >= 2*height), so take the left half (View A) — + # otherwise the projection is an A|B side-by-side, not a clean view. + vol = np.squeeze(volume) + if vol.ndim == 4: + vol = vol[0] + if vol.ndim == 3: + z, h, w = vol.shape + if w >= 2 * h: + vol = vol[:, :, : w // 2] + normalized, _ = projection_three_view(vol) + else: + normalized = normalize_to_uint8(vol, method="percentile", + p_low=1, p_high=99.5) pil_image = Image.fromarray(normalized) From e6646fd3c9c8faa23c36a9da8a8e1745f50905ec Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 07:29:51 -0400 Subject: [PATCH 44/71] =?UTF-8?q?feat(scripts):=20regenerate=5Fprojections?= =?UTF-8?q?=20=E2=80=94=20refresh=20a=20session's=20projections=20from=20v?= =?UTF-8?q?olumes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One-off tool to rewrite a session's projections/t*.jpg from its stored volume TIFFs using the current generate_jpeg_projection (e.g. after the three-view fix, to refresh older sessions whose on-disk projections are the stale A|B dual-view). Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/regenerate_projections.py | 75 +++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 scripts/regenerate_projections.py diff --git a/scripts/regenerate_projections.py b/scripts/regenerate_projections.py new file mode 100644 index 00000000..54e07281 --- /dev/null +++ b/scripts/regenerate_projections.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +"""Regenerate JPEG projections for a session from its stored volumes. + +Use after the projection format changes (e.g. the three-view fix) to refresh +older sessions whose on-disk projections are stale (e.g. the old A|B +side-by-side max projection). Reads each volume TIFF and rewrites its +projections/t{NNNN}.jpg via the current generate_jpeg_projection. + +Usage: + python scripts/regenerate_projections.py + python scripts/regenerate_projections.py --embryo embryo_001 +""" + +import argparse +import re +import sys +from pathlib import Path + +from gently.settings import settings +from gently.core.imaging import load_volume, generate_jpeg_projection + + +def _session_dir(storage_base: Path, session_id: str) -> Path | None: + sessions = storage_base / "sessions" + if not sessions.exists(): + return None + # Folder names are {date}_{time}_{slug}_{id8}; match the id suffix. + matches = [p for p in sessions.glob(f"*{session_id}*") if p.is_dir()] + return matches[0] if matches else None + + +def regenerate(session_id: str, only_embryo: str | None = None) -> int: + base = Path(settings.storage.base_path) + sdir = _session_dir(base, session_id) + if sdir is None: + print(f"Session {session_id} not found under {base / 'sessions'}") + return 0 + embryos_dir = sdir / "embryos" + if not embryos_dir.exists(): + print(f"No embryos dir in {sdir}") + return 0 + + total = 0 + for emb_dir in sorted(d for d in embryos_dir.iterdir() if d.is_dir()): + if only_embryo and emb_dir.name != only_embryo: + continue + vol_dir = emb_dir / "volumes" + proj_dir = emb_dir / "projections" + if not vol_dir.exists(): + continue + tifs = sorted(vol_dir.glob("t*.tif")) + n = 0 + for vf in tifs: + m = re.match(r"(t\d+)", vf.stem) + if not m: + continue + proj_path = proj_dir / f"{m.group(1)}.jpg" + try: + vol = load_volume(vf) + if generate_jpeg_projection(vol, proj_path) is not None: + n += 1 + except Exception as e: + print(f" ! {vf.name}: {e}") + print(f" {emb_dir.name}: regenerated {n}/{len(tifs)} projections") + total += n + print(f"Done: {total} projections regenerated for {session_id}") + return total + + +if __name__ == "__main__": + ap = argparse.ArgumentParser(description="Regenerate session projections from volumes") + ap.add_argument("session_id") + ap.add_argument("--embryo", default=None, help="Only this embryo id") + args = ap.parse_args() + regenerate(args.session_id, args.embryo) From eead6384baac76fdc70697d6b6d55541c39619a5 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 07:30:07 -0400 Subject: [PATCH 45/71] fix(scripts): add project root to sys.path in regenerate_projections Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/regenerate_projections.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/regenerate_projections.py b/scripts/regenerate_projections.py index 54e07281..68110dbd 100644 --- a/scripts/regenerate_projections.py +++ b/scripts/regenerate_projections.py @@ -16,6 +16,11 @@ import sys from pathlib import Path +# Ensure the project root is importable when run as scripts/regenerate_projections.py +_ROOT = Path(__file__).resolve().parent.parent +if str(_ROOT) not in sys.path: + sys.path.insert(0, str(_ROOT)) + from gently.settings import settings from gently.core.imaging import load_volume, generate_jpeg_projection From b2bf1f936b4e8947e1d8370f7440c2846d40ef3f Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 07:35:51 -0400 Subject: [PATCH 46/71] fix(web): content-aware cache for FileStore projections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On-disk projections can be regenerated (e.g. after the three-view fix), but /api/images/{uid}/png served them with Cache-Control: immutable + a uid ETag, so the browser kept the stale image forever. For the FileStore path, use an mtime+size ETag and a short max-age so a regenerated projection is picked up without a hard refresh. (In-memory base64 images keep immutable — they don't change.) Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/ui/web/routes/images.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/gently/ui/web/routes/images.py b/gently/ui/web/routes/images.py index 2ecc805a..5d505b6a 100644 --- a/gently/ui/web/routes/images.py +++ b/gently/ui/web/routes/images.py @@ -102,15 +102,24 @@ async def get_image_png(uid: str): except Exception as e: logger.warning(f"Failed to load image {uid} from DataStore: {e}") - # Fallback to FileStore JPEG projections (persistent on-disk) + # Fallback to FileStore JPEG projections (persistent on-disk). + # Unlike the in-memory base64 images, an on-disk projection CAN change + # (e.g. regenerated after a projection-format fix), so we must NOT mark + # it immutable with a content-independent (uid) ETag — that pins the + # browser to the stale image. Use a content-aware ETag (mtime+size) + # and a short max-age so a regeneration is picked up. if server.gently_store and parsed: embryo_id, timepoint = parsed proj_path = server._resolve_projection_path(embryo_id, timepoint) if proj_path: + st = proj_path.stat() return FileResponse( str(proj_path), media_type="image/jpeg", - headers=cache_headers, + headers={ + "Cache-Control": "public, max-age=3600", + "ETag": f'"{uid}-{int(st.st_mtime)}-{st.st_size}"', + }, ) raise HTTPException(status_code=404, detail=f"Image {uid} not found") From 92406310cf68a35f6ff4d708d7cc9ce1588f821e Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 07:48:13 -0400 Subject: [PATCH 47/71] =?UTF-8?q?fix(imaging):=20don't=20split=203D=20volu?= =?UTF-8?q?me=20by=20width=20=E2=80=94=20it=20halved=20centered=20embryos?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix split a 3D volume to the "left half" when width >= 2*height, assuming dual-view A|B concatenation. But the embryo is typically centered and straddles the X midline in a 2:1 (e.g. 512x1024) field, so the crop sliced it in half — the "XY rendered halfway" bug. Project the full 3D volume into the three orthogonal views instead; only an explicit 4D (Views,Z,Y,X) volume uses View A. Verified: the XY view now shows the complete embryo. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/core/imaging.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/gently/core/imaging.py b/gently/core/imaging.py index bc74ec26..9f5d2a96 100644 --- a/gently/core/imaging.py +++ b/gently/core/imaging.py @@ -282,19 +282,16 @@ def generate_jpeg_projection( return None try: - # Reduce to a single 3D view, then build the three-orthogonal-view - # layout (the projection we actually want — matches what the - # perceiver sees). View A comes from index 0 of a 4D (Views,Z,Y,X) - # volume; a 3D dual-view volume stores the two views concatenated - # along X (width >= 2*height), so take the left half (View A) — - # otherwise the projection is an A|B side-by-side, not a clean view. + # Build the three-orthogonal-view layout (the projection we actually + # want — matches what the perceiver sees). For an explicit 4D + # (Views, Z, Y, X) volume, use View A. For a 3D volume, project the + # whole thing — do NOT try to split views by aspect ratio: the embryo + # is often centered and straddles the X midline, so a width-based + # "dual-view" guess slices it in half (the XY-rendered-halfway bug). vol = np.squeeze(volume) if vol.ndim == 4: vol = vol[0] if vol.ndim == 3: - z, h, w = vol.shape - if w >= 2 * h: - vol = vol[:, :, : w // 2] normalized, _ = projection_three_view(vol) else: normalized = normalize_to_uint8(vol, method="percentile", From 95224f714dce2aba96776ed83b63ed42b6aa0298 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 07:52:24 -0400 Subject: [PATCH 48/71] docs: record dual-view readiness as a design constraint Co-Authored-By: Claude Opus 4.8 (1M context) --- notes/biologist-readiness-plan.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/notes/biologist-readiness-plan.md b/notes/biologist-readiness-plan.md index 701a4fc5..03c25844 100644 --- a/notes/biologist-readiness-plan.md +++ b/notes/biologist-readiness-plan.md @@ -332,6 +332,8 @@ Incremental — the backend protocol for agent control already exists (`/ws/agen **Generalization:** per-sample **channels/wavelengths/per-line-power model** (+ objective/binning) into `EmbryoState`/volume metadata — the "looks like microscope software" deliverable *and* the stain generalizer; **parameterize `gently_perception`** for non-celegans (cross-repo); wire startup wizard → `load_organism()`; route `validation.py` through `get_organism()`; reconcile the two `DevelopmentalStage` enums. +**Dual-view readiness (design constraint).** All current data is single-view; dual-view is the intended general capability (single-view = `n_views == 1`). Today view B is dropped only at the explicit 4D `(Views,Z,Y,X)` → `vol[0]` path in `generate_jpeg_projection`. **Rule: view count/layout is declared by acquisition in volume metadata — never inferred from pixel shape** (an aspect-ratio "dual-view" guess sliced centered single-view embryos in half; fixed by removing it). To go dual-view later (additive): (1) device layer records `n_views`/layout in the volume sidecar; (2) uid scheme extends `volume_{embryo}_t{NNNN}` → optional `…_vA`/`…_vB` (bare form stays back-compat); (3) `generate_jpeg_projection` emits a three-view per present view; (4) filmstrip/detail panel + perception become view-aware (toggle / side-by-side / feed one or both). Nothing in the single-view path blocks this. + **Refactors (after their tests; medium risk):** `bridge.py` dispatch table + case-fold fix (high value for web control); `verifier.py` consensus dedup (capture truth-table fixtures first; preserve `ensemble_size=50` + conservative on-error defaults); `timelapse.py` rule `to_dict()/from_dict()` then `TimelapseStatePersister` + `RuleEngine`; `file_store.py` mixin split; `device_layer.py` `@json_endpoint`; `claude_client.py` `_vision_call`; consolidate pixel→stage math (duplicated 4×) into `gently.core.coordinates`; web `embryos.js` split (`copilot-chat.js`, `markdown.js`, `embryo-views.js`). --- From a356dfd43baeea43d553136cd6e16ec491e6d9dc Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 09:08:31 -0400 Subject: [PATCH 49/71] feat(launcher): open the web UI in Chrome (GENTLY_BROWSER override, default fallback) Co-Authored-By: Claude Opus 4.8 (1M context) --- launch_gently.py | 51 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/launch_gently.py b/launch_gently.py index b6523bb2..a2e590fa 100644 --- a/launch_gently.py +++ b/launch_gently.py @@ -122,6 +122,51 @@ def _print_banner(viz_url, device_connected, offline, storage_dir, log_file, res print() +def _open_browser(url: str) -> None: + """Open the web UI, preferring Google Chrome. + + Override with GENTLY_BROWSER (a webbrowser name like 'firefox', or a full + path to a browser executable). Falls back to the OS default browser if + Chrome can't be found, so this never blocks startup. + """ + import webbrowser + + override = os.environ.get("GENTLY_BROWSER", "").strip() + + # 1) Registered browser names (override first, then Chrome aliases). + for name in ([override] if override else []) + ["chrome", "google-chrome", "chromium"]: + try: + webbrowser.get(name).open(url) + return + except Exception: + pass + + # 2) Explicit executables (an override path, then known Chrome locations). + candidates = [override] if override else [] + candidates += [ + shutil.which("chrome"), + r"C:\Program Files\Google\Chrome\Application\chrome.exe", + r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe", + ] + for exe in candidates: + try: + if exe and Path(exe).exists(): + webbrowser.register( + "gently-browser", None, + webbrowser.BackgroundBrowser(exe), preferred=True, + ) + webbrowser.get("gently-browser").open(url) + return + except Exception: + pass + + # 3) Fall back to the OS default. + try: + webbrowser.open(url) + except Exception: + pass + + def run_ink_picker(tui_dist: Path, sessions_json: str) -> str | None: """ Spawn the Ink TUI in session-picker mode and capture the selection. @@ -467,11 +512,7 @@ def _status_provider(): print(" (Save this now. Add users via the admin API; GENTLY_NO_AUTH=1 disables auth.)\n") if viz_url and not no_browser: - try: - import webbrowser - webbrowser.open(viz_url) - except Exception: - pass + _open_browser(viz_url) # Keep the event loop alive so the in-process viz server keeps serving. # On Windows the Proactor loop won't surface Ctrl-C while blocked on a From 76505f4444d9c65cfb30e833e911f4b5d74277e9 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 09:34:49 -0400 Subject: [PATCH 50/71] feat(scripts): --all to regenerate projections for every session Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/regenerate_projections.py | 85 +++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 28 deletions(-) diff --git a/scripts/regenerate_projections.py b/scripts/regenerate_projections.py index 68110dbd..7b6241ff 100644 --- a/scripts/regenerate_projections.py +++ b/scripts/regenerate_projections.py @@ -1,14 +1,15 @@ #!/usr/bin/env python3 -"""Regenerate JPEG projections for a session from its stored volumes. +"""Regenerate JPEG projections for a session (or all sessions) from volumes. Use after the projection format changes (e.g. the three-view fix) to refresh -older sessions whose on-disk projections are stale (e.g. the old A|B -side-by-side max projection). Reads each volume TIFF and rewrites its -projections/t{NNNN}.jpg via the current generate_jpeg_projection. +sessions whose on-disk projections are stale (e.g. the old flat-XY max +projection). Reads each volume TIFF and rewrites its projections/t{NNNN}.jpg +via the current generate_jpeg_projection. Usage: python scripts/regenerate_projections.py python scripts/regenerate_projections.py --embryo embryo_001 + python scripts/regenerate_projections.py --all """ import argparse @@ -25,26 +26,10 @@ from gently.core.imaging import load_volume, generate_jpeg_projection -def _session_dir(storage_base: Path, session_id: str) -> Path | None: - sessions = storage_base / "sessions" - if not sessions.exists(): - return None - # Folder names are {date}_{time}_{slug}_{id8}; match the id suffix. - matches = [p for p in sessions.glob(f"*{session_id}*") if p.is_dir()] - return matches[0] if matches else None - - -def regenerate(session_id: str, only_embryo: str | None = None) -> int: - base = Path(settings.storage.base_path) - sdir = _session_dir(base, session_id) - if sdir is None: - print(f"Session {session_id} not found under {base / 'sessions'}") - return 0 - embryos_dir = sdir / "embryos" +def _regen_folder(folder: Path, only_embryo: str | None = None) -> int: + embryos_dir = folder / "embryos" if not embryos_dir.exists(): - print(f"No embryos dir in {sdir}") return 0 - total = 0 for emb_dir in sorted(d for d in embryos_dir.iterdir() if d.is_dir()): if only_embryo and emb_dir.name != only_embryo: @@ -54,6 +39,8 @@ def regenerate(session_id: str, only_embryo: str | None = None) -> int: if not vol_dir.exists(): continue tifs = sorted(vol_dir.glob("t*.tif")) + if not tifs: + continue n = 0 for vf in tifs: m = re.match(r"(t\d+)", vf.stem) @@ -61,20 +48,62 @@ def regenerate(session_id: str, only_embryo: str | None = None) -> int: continue proj_path = proj_dir / f"{m.group(1)}.jpg" try: - vol = load_volume(vf) - if generate_jpeg_projection(vol, proj_path) is not None: + if generate_jpeg_projection(load_volume(vf), proj_path) is not None: n += 1 except Exception as e: - print(f" ! {vf.name}: {e}") - print(f" {emb_dir.name}: regenerated {n}/{len(tifs)} projections") + print(f" ! {vf.name}: {e}") + print(f" {emb_dir.name}: regenerated {n}/{len(tifs)} projections") total += n + return total + + +def _session_dir(storage_base: Path, session_id: str) -> Path | None: + sessions = storage_base / "sessions" + if not sessions.exists(): + return None + matches = [p for p in sessions.glob(f"*{session_id}*") if p.is_dir()] + return matches[0] if matches else None + + +def regenerate(session_id: str, only_embryo: str | None = None) -> int: + base = Path(settings.storage.base_path) + sdir = _session_dir(base, session_id) + if sdir is None: + print(f"Session {session_id} not found under {base / 'sessions'}") + return 0 + total = _regen_folder(sdir, only_embryo) print(f"Done: {total} projections regenerated for {session_id}") return total +def regenerate_all() -> int: + base = Path(settings.storage.base_path) + sessions = base / "sessions" + if not sessions.exists(): + print(f"No sessions dir at {sessions}") + return 0 + folders = sorted( + p for p in sessions.iterdir() + if p.is_dir() and any((p / "embryos").glob("*/volumes/t*.tif")) + ) + print(f"Regenerating projections for {len(folders)} session(s) with volumes...") + grand = 0 + for i, folder in enumerate(folders, 1): + print(f"[{i}/{len(folders)}] {folder.name}") + grand += _regen_folder(folder) + print(f"ALL DONE: {grand} projections regenerated across {len(folders)} sessions") + return grand + + if __name__ == "__main__": ap = argparse.ArgumentParser(description="Regenerate session projections from volumes") - ap.add_argument("session_id") + ap.add_argument("session_id", nargs="?", help="Session id (omit with --all)") + ap.add_argument("--all", action="store_true", help="Regenerate every session that has volumes") ap.add_argument("--embryo", default=None, help="Only this embryo id") args = ap.parse_args() - regenerate(args.session_id, args.embryo) + if args.all: + regenerate_all() + elif args.session_id: + regenerate(args.session_id, args.embryo) + else: + ap.error("provide a session_id or --all") From f87b3c9844d36efc1ee2b8dffda6250b865f2103 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 09:38:11 -0400 Subject: [PATCH 51/71] fix(web): filmstrip thumbnail crops to the XY embryo, not the black centre The stored projection is now a three-view ([XY|YZ] over [XZ]); object-fit:cover center-cropped the square thumb to the black XY|YZ divider, so thumbs looked empty. object-position: left center crops to the embryo-bearing XY/XZ column. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/ui/web/static/css/main.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gently/ui/web/static/css/main.css b/gently/ui/web/static/css/main.css index aeb46a01..5fcab675 100644 --- a/gently/ui/web/static/css/main.css +++ b/gently/ui/web/static/css/main.css @@ -3859,6 +3859,10 @@ kbd { border-radius: 4px; border: 2px solid; object-fit: cover; + /* The stored projection is a three-view ([XY|YZ] over [XZ]); the embryo is + in the LEFT column (XY/XZ), and the centre is the black XY|YZ divider. + Crop to the left so the square thumbnail shows the embryo, not the gap. */ + object-position: left center; background: var(--bg-dark); } From 9d0790d8dc93b54d4f2f6f5d66e045110c17cc94 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 12:36:22 -0400 Subject: [PATCH 52/71] Add room-light toggle and device-layer terminal UI - Devices tab: SwitchBot room-light on/off button (web button + JS, gated route, device-layer handlers, and client methods). - Device layer: dependency-free curated console UI (console_ui.py) with starting/READY status panels and a plain-language hardware-failure diagnostic panel instead of a raw Python traceback when a device (e.g. a COM port) is powered off. ASCII-safe + color/Unicode capability detection for Windows consoles. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/hardware/console_ui.py | 187 +++++++++++++++++++++++++ gently/hardware/dispim/client.py | 14 ++ gently/hardware/dispim/device_layer.py | 151 ++++++++++++++++++++ gently/ui/web/routes/data.py | 41 ++++++ gently/ui/web/static/css/main.css | 42 ++++++ gently/ui/web/static/js/devices.js | 102 ++++++++++++++ gently/ui/web/templates/index.html | 12 ++ start_device_layer.py | 79 +++++++++-- 8 files changed, 617 insertions(+), 11 deletions(-) create mode 100644 gently/hardware/console_ui.py diff --git a/gently/hardware/console_ui.py b/gently/hardware/console_ui.py new file mode 100644 index 00000000..abc8ca64 --- /dev/null +++ b/gently/hardware/console_ui.py @@ -0,0 +1,187 @@ +"""Lightweight terminal styling for the device-layer console. + +Plain ``print`` to stdout, no third-party dependency. ``rich`` is deliberately +avoided here — it has caused Unicode/encoding issues on Windows consoles (see +the stdout-suppression note in ``dispim/device_layer.py``). + +The point of this module is to give the operator a readable, always-visible +picture of the device layer at the terminal — distinct from the file log. The +file log keeps the full INFO/DEBUG firehose; the console shows a curated set of +milestones and a status panel. + +Robust by construction, because the device layer runs on Windows consoles: + +* **Colour** (ANSI) is auto-disabled unless stdout is a TTY. On Windows we try + to enable virtual-terminal processing first; if that fails, colour is off so + raw escape codes never leak. ``NO_COLOR`` (https://no-color.org) and a + ``dumb`` ``TERM`` also disable it. +* **Box-drawing** glyphs are used only when stdout's encoding is UTF-based; + otherwise ASCII equivalents are used so a cp1252 console shows clean output. +* ``out()`` is defensive: any residual ``UnicodeEncodeError`` is caught and the + line re-emitted with ``errors="replace"`` rather than crashing startup. +""" +from __future__ import annotations + +import os +import sys + +# Visible width of the status panel (border rules). Content lines are written +# without a right border so coloured text never needs width arithmetic. +WIDTH = 64 + + +def _enable_windows_vt() -> bool: + """Best-effort: turn on ANSI escape handling for the Windows console. + + Returns True if VT processing is (now) enabled or we're not on Windows. + """ + if sys.platform != "win32": + return True + try: + import ctypes + from ctypes import wintypes + + kernel32 = ctypes.windll.kernel32 + ENABLE_VT = 0x0004 + handle = kernel32.GetStdHandle(-11) # STD_OUTPUT_HANDLE + mode = wintypes.DWORD() + if not kernel32.GetConsoleMode(handle, ctypes.byref(mode)): + return False + return bool(kernel32.SetConsoleMode(handle, mode.value | ENABLE_VT)) + except Exception: + return False + + +def _detect_color() -> bool: + if sys.stdout is None or not hasattr(sys.stdout, "isatty") or not sys.stdout.isatty(): + return False + if os.environ.get("NO_COLOR") is not None or os.environ.get("TERM") == "dumb": + return False + return _enable_windows_vt() + + +def _detect_unicode() -> bool: + enc = (getattr(sys.stdout, "encoding", None) or "").lower() + return "utf" in enc + + +_USE_COLOR = _detect_color() +_USE_UNICODE = _detect_unicode() + +# Glyphs: pretty (UTF) vs ASCII fallback. +if _USE_UNICODE: + _HEAVY, _LIGHT, _DOT, _CHECK, _MID, _BULLET = "═", "─", "●", "✓", "·", "•" +else: + _HEAVY, _LIGHT, _DOT, _CHECK, _MID, _BULLET = "=", "-", "*", "+", "-", "-" + +# Public separator for callers that build their own value strings. +MIDDOT = f" {_MID} " + +_CODES = { + "reset": "\033[0m", + "bold": "\033[1m", + "dim": "\033[2m", + "green": "\033[32m", + "cyan": "\033[36m", + "yellow": "\033[33m", + "red": "\033[31m", + "blue": "\033[34m", + "magenta": "\033[35m", + "grey": "\033[90m", +} + + +def supports_color() -> bool: + return _USE_COLOR + + +def c(text, *styles: str) -> str: + """Wrap *text* in ANSI styles, or return it unchanged when colour is off.""" + if not _USE_COLOR or not styles: + return str(text) + prefix = "".join(_CODES.get(s, "") for s in styles) + return f"{prefix}{text}{_CODES['reset']}" + + +def out(text: str = "") -> None: + """Print one line to stdout, flushing so it shows immediately. + + Never raises on encoding: a console that can't represent a character gets + a replacement rather than a crashed startup. + """ + try: + print(text, flush=True) + except UnicodeEncodeError: + enc = (getattr(sys.stdout, "encoding", None) or "ascii") + sys.stdout.write(text.encode(enc, "replace").decode(enc, "replace") + "\n") + sys.stdout.flush() + + +def rule(heavy: bool = True, style: str = "grey") -> None: + out(c((_HEAVY if heavy else _LIGHT) * WIDTH, style)) + + +def header(title: str, badge: str | None = None, badge_style: str = "yellow") -> None: + """Top of a panel: a heavy rule, a title row (optional right-aligned badge), + and a closing heavy rule.""" + rule(heavy=True) + line = " " + c(title, "bold", "cyan") + if badge: + # Right-align using uncoloured widths so padding ignores ANSI codes. + pad = max(1, WIDTH - len(" " + title) - len(badge) - 1) + line += " " * pad + c(badge, "bold", badge_style) + out(line) + rule(heavy=True) + + +def row(label: str, value: str, label_w: int = 12, label_style: str = "grey") -> None: + """A `` label value`` line inside a panel.""" + out(f" {c(label.ljust(label_w), label_style)}{value}") + + +def sub(label: str, value: str, label_w: int = 10) -> None: + """An indented sub-row, e.g. a device-group breakdown.""" + out(f" {c(label.ljust(label_w), 'grey')}{value}") + + +def step(n: int, total: int, label: str) -> None: + """A startup progress line: `` [2/5] Starting Micro-Manager core``""" + out(f" {c(f'[{n}/{total}]', 'cyan')} {label}") + + +def step_done(detail: str = "ok") -> None: + """A check-mark continuation under the most recent step.""" + out(f" {c(_CHECK, 'green')} {c(detail, 'grey')}") + + +def note(text: str, style: str = "grey") -> None: + out(f" {c(text, style)}") + + +def bullet(text: str) -> None: + out(f" {c(_BULLET, 'cyan')} {text}") + + +def error_panel(title: str, summary: str, details: str | None = None, + hints=None, log_file=None) -> None: + """A red FAILED panel: one-line summary, optional detail, fix hints, log path. + + Used at the top-level startup catch so an operator sees a plain-language + diagnosis instead of a Python traceback (which still goes to the log file). + """ + out() + header(title, badge="FAILED", badge_style="red") + note(summary, "yellow") + if details: + out() + row("Details", details, label_w=10) + if hints: + out() + note("Try this:", "bold") + for h in hints: + bullet(h) + if log_file: + out() + row("Full log", str(log_file), label_w=10) + rule(heavy=True) + out() diff --git a/gently/hardware/dispim/client.py b/gently/hardware/dispim/client.py index 03ff8009..2490dc24 100644 --- a/gently/hardware/dispim/client.py +++ b/gently/hardware/dispim/client.py @@ -802,6 +802,20 @@ async def get_led_status(self) -> Dict: """Get current LED status.""" return await self._api_get('/api/led/status') + async def set_room_light(self, state: str = 'off') -> Dict: + """Switch the diSPIM room light on/off via the SwitchBot Bot. + + Hits ``POST /api/room_light/set`` directly (no Bluesky queue, no + experiment trace) — a setup accessory poke. ``state`` is + 'on' | 'off' | 'press'. Blocks at the device layer until the BLE + command lands (~1-2 s). + """ + return await self._api_post('/api/room_light/set', {'state': state}) + + async def get_room_light_status(self) -> Dict: + """Read the room light's cached on/off state (no BLE round-trip).""" + return await self._api_get('/api/room_light/status') + async def set_temperature(self, target_c: float) -> Dict: """Command the thermal-controller setpoint (Celsius). Non-blocking — the controller ramps; poll get_temperature() for the lock state.""" diff --git a/gently/hardware/dispim/device_layer.py b/gently/hardware/dispim/device_layer.py index a1af5e4a..3b50aded 100644 --- a/gently/hardware/dispim/device_layer.py +++ b/gently/hardware/dispim/device_layer.py @@ -44,6 +44,7 @@ from gently.exceptions import HardwareError, AcquisitionError from gently.log_config import configure_logging from gently.settings import settings +from gently.hardware import console_ui as cui # Bluesky imports from bluesky import RunEngine @@ -184,15 +185,21 @@ async def initialize(self): logger.info("GENTLY DEVICE LAYER") logger.info("=" * 60) + cui.out() + cui.note("Starting device layer...", "bold") + # [1/5] Load config + cui.step(1, 5, "Loading configuration") logger.info("[1/5] Loading configuration...") with open(self.config_path, 'r') as f: self.config = yaml.safe_load(f) logger.info("Config loaded from %s", self.config_path) + cui.step_done(str(self.config_path)) # [2/5] MMCore initialization, routed through the DiSPIMSystem facade # so this process never touches `core.*` directly outside the # devices/ package. + cui.step(2, 5, "Initializing Micro-Manager core") logger.info("[2/5] Initializing Micro-Manager Core (direct)...") from .devices.system import DiSPIMSystem @@ -215,6 +222,7 @@ async def initialize(self): self.system.load_system_configuration(mm_config_path) logger.info("MMCore initialized (direct, in-process)") logger.info("Loaded devices: %s", self.system.get_loaded_devices()) + cui.step_done(Path(mm_config_path).name) # Register MMCore event callback so we get push notifications for # property changes, stage moves, exposure changes, etc. — anything the @@ -223,6 +231,7 @@ async def initialize(self): self._register_mmcore_callbacks() # [3/5] Create Ophyd devices + cui.step(3, 5, "Creating devices") logger.info("[3/5] Creating Ophyd devices...") from .device_factory import create_devices_from_mmcore # Suppress rich console output to avoid Unicode issues on Windows @@ -270,6 +279,8 @@ async def initialize(self): except Exception as exc: logger.warning("Could not create temperature controller: %s", exc) + cui.step_done(f"{len(self.devices)} devices") + # Push XY safety bounds down to the ASI Tiger firmware so the joystick # can't drive past Layer-1 software limits. The XY_STAGE_*_UM constants # in devices/stage.py are the single source of truth — both the @@ -320,6 +331,7 @@ async def initialize(self): logger.error("Could not enable XY joystick: %s", exc) # [4/5] Initialize RunEngine + cui.step(4, 5, "Initializing RunEngine") logger.info("[4/5] Initializing RunEngine...") self.RE = RunEngine({}) @@ -381,10 +393,13 @@ def collect_docs(name, doc): self.RE.subscribe(collect_docs) logger.info("RunEngine ready") + cui.step_done("ready") # [5/5] Load plans + cui.step(5, 5, "Loading plans") logger.info("[5/5] Loading plans...") self._load_plans() + cui.step_done(f"{len(self.plans)} plans") logger.info("=" * 60) logger.info("Device layer initialized successfully") @@ -1327,6 +1342,77 @@ async def handle_set_led(self, request): 'traceback': traceback.format_exc() }, status=500) + def _room_light_device(self): + """Resolve the room-light SwitchBot from the device registry. + + Prefers the conventional 'room_light' key (config.yml name), but + falls back to scanning for any SwitchBot instance so a differently + named bot still works. Returns None when no bot is configured. + """ + bot = self.devices.get('room_light') + if bot is not None: + return bot + try: + from gently.hardware.switchbot import SwitchBot + except Exception: + return None + for dev in self.devices.values(): + if isinstance(dev, SwitchBot): + return dev + return None + + async def handle_get_room_light_status(self, request): + """GET /api/room_light/status - cached on/off state of the room light. + + Reads the SwitchBot's last-commanded state (no BLE round-trip, so it's + cheap to poll). 'unknown' until the first on/off command lands. + """ + try: + bot = self._room_light_device() + if bot is None: + return web.json_response({'success': False, 'available': False, + 'error': 'room_light device not configured'}) + state = bot.read().get(bot.name, {}).get('value', 'unknown') + return web.json_response({'success': True, 'available': True, 'state': state}) + except Exception as e: + import traceback + return web.json_response({'success': False, 'available': False, 'error': str(e), + 'traceback': traceback.format_exc()}, status=500) + + async def handle_set_room_light(self, request): + """POST /api/room_light/set - drive the room-light SwitchBot. + + Body: {"state": "on" | "off" | "press"}. Blocks until the BLE command + lands (the bot's servo move is ~0.5-1 s plus connect latency). + """ + try: + data = await request.json() + state = str(data.get('state', '')).lower() + if state not in ('on', 'off', 'press'): + return web.json_response({'success': False, + 'error': f"state {state!r} must be on, off, or press"}, status=400) + bot = self._room_light_device() + if bot is None: + return web.json_response({'success': False, + 'error': 'room_light device not configured'}, status=503) + + status = bot.set(state) + import time + timeout = float(getattr(bot, 'timeout', 20.0)) + 5 + start = time.time() + while not status.done and (time.time() - start) < timeout: + await asyncio.sleep(0.1) + + if status.done and status.success: + new_state = bot.read().get(bot.name, {}).get('value', state) + return web.json_response({'success': True, 'state': new_state}) + return web.json_response({'success': False, + 'error': f'failed to set room light to {state}'}, status=502) + except Exception as e: + import traceback + return web.json_response({'success': False, 'error': str(e), + 'traceback': traceback.format_exc()}, status=500) + async def handle_get_temperature_status(self, request): """GET /api/temperature/status - current temperature, setpoint, lock state.""" try: @@ -2339,6 +2425,8 @@ async def on_start(self): self._app.router.add_post('/api/led/set', self.handle_set_led) self._app.router.add_get('/api/temperature/status', self.handle_get_temperature_status) self._app.router.add_post('/api/temperature/set', self.handle_set_temperature) + self._app.router.add_get('/api/room_light/status', self.handle_get_room_light_status) + self._app.router.add_post('/api/room_light/set', self.handle_set_room_light) self._app.router.add_post('/api/camera/led_mode', self.handle_set_camera_led_mode) self._app.router.add_post('/api/camera/exposure', self.handle_set_camera_exposure) self._app.router.add_get('/api/camera/exposure', self.handle_get_camera_exposure) @@ -2390,10 +2478,72 @@ async def on_start(self): logger.info("Endpoints: GET /api/status, GET /api/devices, GET /api/plans, POST /api/queue/item/add, ...") await site.start() + self._print_ready_panel() + + def _categorize_devices(self): + """Group device names into human-readable buckets for the console panel. + + First-match-wins so 'room_light' lands in Accessory (not Light) and + 'volume_scanner' in Motion. Accessory entries carry live state. + """ + buckets = {"Motion": [], "Imaging": [], "Light": [], "Accessory": [], "Other": []} + for name in sorted(self.devices): + low = name.lower() + if low in ("room_light", "temperature"): + label = name + try: + dev = self.devices[name] + val = dev.read().get(dev.name, {}).get("value") + if val is not None: + label = f"{name} ({val})" + except Exception: + pass + buckets["Accessory"].append(label) + elif "cam" in low or "snap" in low: + buckets["Imaging"].append(name) + elif any(k in low for k in ("stage", "piezo", "galvo", "scanner")): + buckets["Motion"].append(name) + elif any(k in low for k in ("laser", "led", "light", "illum")): + buckets["Light"].append(name) + else: + buckets["Other"].append(name) + return list(buckets.items()) + + def _print_ready_panel(self): + """Curated, always-visible status summary at the terminal. + + Separate from the file log: the operator (often a biologist) gets the + URL the agent connects to, a grouped device inventory and accessory + states at a glance — instead of a silent console after the banner. + """ + def _fmt(names, limit=6): + if len(names) <= limit: + return " · ".join(names) + return " · ".join(names[:limit]) + cui.c(f" +{len(names) - limit} more", "grey") + + host = self.host or "0.0.0.0" + url_host = "localhost" if host in ("0.0.0.0", "::", "") else host + + cui.out() + cui.header(f"GENTLY{cui.MIDDOT}DEVICE LAYER", badge="READY", badge_style="green") + cui.row("URL", cui.c(f"http://{url_host}:{self.port}", "bold")) + cui.row("Hardware", str((self.config or {}).get("hardware", "dispim"))) + cui.row("Devices", f"{len(self.devices)} loaded") + for label, names in self._categorize_devices(): + if names: + cui.sub(label, _fmt(names)) + cui.row("Detection", f"SAM on {self._sam_device} (loads on first use)") + cui.row("Plans", f"{len(self.plans)} available") + cui.rule(heavy=False) + cui.note("Waiting for the agent to connect. Press Ctrl+C to stop.") + cui.rule(heavy=True) + cui.out() async def on_stop(self): """Shut down the HTTP server and plan executor.""" logger.info("Shutting down...") + cui.out() + cui.note("Shutting down device layer...", "yellow") self._running = False # Cancel any pending coalesced-broadcast timer. @@ -2446,6 +2596,7 @@ async def on_stop(self): if self._runner: await self._runner.cleanup() logger.info("Device layer stopped.") + cui.note("Device layer stopped.", "grey") async def health_check(self) -> Dict: """Return health status with device count, queue size, SAM status.""" diff --git a/gently/ui/web/routes/data.py b/gently/ui/web/routes/data.py index 7bb5ad3b..3133ec60 100644 --- a/gently/ui/web/routes/data.py +++ b/gently/ui/web/routes/data.py @@ -263,6 +263,47 @@ async def stop_bottom_camera_stream(): raise HTTPException(status_code=500, detail=f"stop failed: {exc}") return {"streaming": False} + def _resolve_client(): + """Resolve the live microscope client from the agent bridge, or None.""" + bridge = getattr(server, "agent_bridge", None) + agent = bridge.agent if bridge is not None else None + return getattr(agent, "client", None) if agent else None + + @router.get("/api/devices/room_light/status") + async def get_room_light_status(): + """Cached on/off state of the room-light SwitchBot (cheap to poll).""" + client = _resolve_client() + if client is None: + return {"available": False, "state": "unknown"} + try: + res = await client.get_room_light_status() + except Exception as exc: + logger.debug("room light status fetch failed: %s", exc) + return {"available": False, "state": "unknown"} + return { + "available": bool(res.get("available", res.get("success", False))), + "state": res.get("state", "unknown"), + } + + @router.post("/api/devices/room_light/set", + dependencies=[Depends(require_control)]) + async def set_room_light(payload: dict = Body(...)): + """Switch the room light on/off. Body: {"state": "on"|"off"|"press"}.""" + state = str(payload.get("state", "")).lower() + if state not in ("on", "off", "press"): + raise HTTPException(status_code=400, detail="state must be on, off, or press") + client = _resolve_client() + if client is None: + raise HTTPException(status_code=503, detail="Microscope not connected") + try: + res = await client.set_room_light(state) + except Exception as exc: + logger.exception("Room light command failed") + raise HTTPException(status_code=502, detail=f"room light command failed: {exc}") + if not res.get("success"): + raise HTTPException(status_code=502, detail=res.get("error", "room light command failed")) + return {"state": res.get("state", state)} + @router.get("/api/calibration") async def list_calibration(embryo_id: Optional[str] = None): """Get calibration images""" diff --git a/gently/ui/web/static/css/main.css b/gently/ui/web/static/css/main.css index 5fcab675..bd6ca7b9 100644 --- a/gently/ui/web/static/css/main.css +++ b/gently/ui/web/static/css/main.css @@ -9494,6 +9494,48 @@ body.modal-open { .devices-status-led.stale::before { background: var(--map-warm); } .devices-status-led.error::before { background: #f87171; } +/* --- Room-light toggle (header) -------------------------------------- */ +.devices-room-light { + display: inline-flex; + align-items: center; + gap: 0.4rem; + padding: 0.18rem 0.6rem 0.18rem 0.45rem; + border: 1px solid var(--map-overlay-edge); + background: var(--map-overlay-bg); + border-radius: 999px; + color: var(--map-ink-mute); + font-family: inherit; + font-size: 0.65rem; + font-weight: 600; + letter-spacing: 0.06em; + text-transform: uppercase; + cursor: pointer; + transition: color 0.15s, border-color 0.15s, background 0.15s; +} +.devices-room-light[hidden] { display: none; } +.devices-room-light:hover:not(:disabled) { + border-color: var(--map-accent); + color: var(--map-ink); +} +.devices-room-light:disabled { opacity: 0.5; cursor: default; } +.devices-room-light-bulb { + display: inline-flex; + align-items: center; + color: var(--map-ink-mute); + transition: color 0.15s, filter 0.15s; +} +/* "on" — warm glow on the bulb to read like a lit lamp */ +.devices-room-light.is-on { + border-color: rgba(255, 210, 74, 0.7); + color: #ffd24a; + background: rgba(255, 210, 74, 0.12); +} +.devices-room-light.is-on .devices-room-light-bulb { + color: #ffd24a; + filter: drop-shadow(0 0 5px rgba(255, 210, 74, 0.7)); +} +.devices-room-light.is-busy { opacity: 0.65; cursor: progress; } + /* --- Containers ------------------------------------------------------- */ .devices-view { display: flex; flex-direction: column; flex: 1; min-height: 0; } .devices-view-details { gap: 1rem; } diff --git a/gently/ui/web/static/js/devices.js b/gently/ui/web/static/js/devices.js index b4cb30d2..27bc12c3 100644 --- a/gently/ui/web/static/js/devices.js +++ b/gently/ui/web/static/js/devices.js @@ -72,6 +72,15 @@ const DevicesManager = (function () { const _CAM_ZOOM_MAX = 8; const _CAM_ZOOM_STEP = 1.15; // multiplicative per wheel notch + // Room-light toggle (header). Drives the SwitchBot Bot that switches the + // diSPIM room light. State is the bot's cached on/off; hidden until the + // device layer reports the accessory is configured. + let _roomLightToggle, _roomLightLabel; + let _roomLightState = 'unknown'; + let _roomLightAvailable = false; + let _roomLightBusy = false; + let _roomLightTimer = null; + let _lastTs = 0; let _previousTs = 0; let _lastWallTs = 0; @@ -135,6 +144,9 @@ const DevicesManager = (function () { _camLed = document.getElementById('devices-camera-led'); _camMeta = document.getElementById('devices-camera-meta'); + _roomLightToggle = document.getElementById('devices-room-light-toggle'); + _roomLightLabel = document.getElementById('devices-room-light-label'); + // Recompute the scale bar caption whenever the canvas resizes. if (_mapSvg && window.ResizeObserver) { new ResizeObserver(() => updateScalebar()).observe(_mapSvg); @@ -1268,6 +1280,95 @@ const DevicesManager = (function () { } } + // ===================================================================== + // Room-light toggle + // ===================================================================== + + function applyRoomLight(state, available) { + _roomLightState = state || 'unknown'; + _roomLightAvailable = !!available; + if (!_roomLightToggle) return; + _roomLightToggle.hidden = !_roomLightAvailable; + _roomLightToggle.disabled = !_roomLightAvailable || _roomLightBusy; + const on = _roomLightState === 'on'; + _roomLightToggle.classList.toggle('is-on', on); + _roomLightToggle.setAttribute('aria-pressed', on ? 'true' : 'false'); + if (_roomLightLabel && !_roomLightBusy) { + _roomLightLabel.textContent = on ? 'Room light: on' + : (_roomLightState === 'off' ? 'Room light: off' : 'Room light'); + } + } + + async function loadRoomLightStatus() { + if (!_roomLightToggle || _roomLightBusy) return; + try { + const res = await fetch('/api/devices/room_light/status'); + if (!res.ok) { applyRoomLight('unknown', false); return; } + const data = await res.json(); + applyRoomLight(data.state, data.available); + } catch (err) { + console.debug('room light status fetch failed:', err); + applyRoomLight('unknown', false); + } + } + + async function toggleRoomLight() { + if (!_roomLightToggle || _roomLightBusy || !_roomLightAvailable) return; + const next = _roomLightState === 'on' ? 'off' : 'on'; + _roomLightBusy = true; + _roomLightToggle.classList.add('is-busy'); + _roomLightToggle.disabled = true; + if (_roomLightLabel) { + _roomLightLabel.textContent = next === 'on' ? 'Turning on…' : 'Turning off…'; + } + + // Settle back to the resolved state, or surface a transient message + // (insufficient control / error) for 2 s before reverting. + const finish = (msg) => { + _roomLightBusy = false; + _roomLightToggle.classList.remove('is-busy'); + if (msg) { + if (_roomLightLabel) _roomLightLabel.textContent = msg; + _roomLightToggle.disabled = false; + setTimeout(() => applyRoomLight(_roomLightState, _roomLightAvailable), 2000); + } else { + applyRoomLight(_roomLightState, _roomLightAvailable); + } + }; + + try { + const res = await fetch('/api/devices/room_light/set', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ state: next }), + }); + if (res.status === 401 || res.status === 403) { finish('Need control'); return; } + if (!res.ok) { + console.error('room light set failed:', await res.text()); + finish('Error'); + return; + } + const data = await res.json(); + _roomLightState = data.state || next; + finish(null); + } catch (err) { + console.error('room light toggle failed:', err); + finish('Error'); + } + } + + function setupRoomLight() { + if (!_roomLightToggle) return; + _roomLightToggle.addEventListener('click', toggleRoomLight); + loadRoomLightStatus(); + // Light periodic refresh: state can also change from agent plans + // (e.g. brightfield imaging turns it on). Status read is cached at the + // device layer (no BLE), so polling is cheap; it also makes the toggle + // appear automatically once the device layer connects. + if (_roomLightTimer) clearInterval(_roomLightTimer); + _roomLightTimer = setInterval(loadRoomLightStatus, 15000); + } + // ===================================================================== // View switching // ===================================================================== @@ -1327,6 +1428,7 @@ const DevicesManager = (function () { cacheDom(); setupViewSwitcher(); setupCameraWiring(); + setupRoomLight(); loadCoverslip(); loadEmbryosSnapshot(); switchView(_currentView); diff --git a/gently/ui/web/templates/index.html b/gently/ui/web/templates/index.html index b66893da..528189f9 100644 --- a/gently/ui/web/templates/index.html +++ b/gently/ui/web/templates/index.html @@ -258,6 +258,18 @@

Device

+ disconnected no data
diff --git a/start_device_layer.py b/start_device_layer.py index 5d4d7486..1ce25931 100644 --- a/start_device_layer.py +++ b/start_device_layer.py @@ -28,6 +28,58 @@ sys.path.insert(0, str(project_root)) +def _render_startup_failure(exc, log_file): + """Turn a startup exception into a plain-language console panel. + + The full traceback goes to the log file; the operator (often a biologist) + sees a diagnosis and concrete things to check. Recognises the common + "hardware powered off / COM port" case from the MMCore error text. + """ + import re + import traceback + from gently.hardware import console_ui as cui + + logging.getLogger("gently.device_layer").error( + "Startup failed:\n%s", "".join(traceback.format_exception(exc)) + ) + + text = str(exc) + low = text.lower() + first_line = next((ln.strip() for ln in text.splitlines() if ln.strip()), "Unknown error") + + summary = "The device layer could not start." + details = first_line + hints = [] + + dev_m = re.search(r'device "([^"]+)"', text) + device = dev_m.group(1) if dev_m else None + + if device and "initialize" in low: + # MMCore failed to initialize a hardware device — almost always the + # instrument is off, unplugged, or its COM port is held by another app. + summary = "Can't reach the microscope - it looks powered off or disconnected." + details = f'Device "{device}" failed to initialize.' + if device.upper().startswith("COM"): + hints = [ + "Is the microscope / stage controller powered on?", + "Are the USB / serial cables connected?", + f"Is another program using {device}? (e.g. Micro-Manager still open)", + ] + else: + hints = [ + f'Check that "{device}" is powered on and connected.', + "Is another program (e.g. Micro-Manager) holding the hardware?", + ] + elif "access is denied" in low or "system error code 5" in low or "already" in low: + summary = "A hardware port is busy — another program may be holding it." + hints = [ + "Close Micro-Manager or any other app using the microscope.", + "Then start the device layer again.", + ] + + cui.error_panel("GENTLY DEVICE LAYER", summary, details, hints, log_file) + + def main(): parser = argparse.ArgumentParser( description="Gently Device Layer - Hardware Server", @@ -100,15 +152,15 @@ def main(): hardware_name = config.get("hardware", "dispim") - print("\n" + "=" * 60) - print("GENTLY DEVICE LAYER") - print("=" * 60) - print(f"\nConfiguration:") - print(f" Hardware: {hardware_name}") - print(f" HTTP Port: {args.port}") - print(f" SAM Device: {args.sam_device}") - print(f" Config: {args.config}") - print() + from gently.hardware import console_ui as cui + cui.out() + cui.header(f"GENTLY{cui.MIDDOT}DEVICE LAYER", badge="starting", badge_style="cyan") + cui.row("Hardware", str(hardware_name)) + cui.row("HTTP port", str(args.port)) + cui.row("SAM device", str(args.sam_device)) + cui.row("Config", str(args.config)) + cui.row("Log file", str(log_file)) + cui.rule(heavy=True) # Load hardware module and create device layer from gently.hardware import load_hardware @@ -129,7 +181,8 @@ async def run_server(): loop = asyncio.get_running_loop() def request_shutdown(): - print("\n\nReceived interrupt signal...") + cui.out() + cui.note("Interrupt received - stopping...", "yellow") logging.getLogger("gently.device_layer.signal").warning( "Interrupt signal received — initiating shutdown" ) @@ -154,7 +207,11 @@ def win_signal_handler(sig, frame): try: asyncio.run(run_server()) except KeyboardInterrupt: - print("\n\nDevice layer stopped.") + cui.note("Device layer stopped.", "grey") + except Exception as exc: + # Hardware/init failure — show a diagnosis, not a raw traceback. + _render_startup_failure(exc, log_file) + sys.exit(1) if __name__ == "__main__": From cdeabb8ecd483790f9aed1d5924b2413ed487499 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 12:36:40 -0400 Subject: [PATCH 53/71] Retire napari from the agent; add web-chat autocomplete; prune dead tools Visualization (web-only): - view_volume and batch_lightsheet no longer launch a blocking napari window. view_volume broadcasts an open_volume message so the existing in-browser ProjectionViewer opens for every connected client and the tool returns immediately (removes the Qt-loop freeze behind the SSE watchdog). batch_lightsheet pushes captures to the web image strip. - New server.open_volume_in_browser() + websocket.js open_volume handler; bridge connect frame now ships the tool registry too. - Removed the broken napari-only example; updated docs and the watchdog comment. No napari left in the agent runtime. Chat UX: - Slash-command and @tool autocomplete in the web chat (the command/tool registries were already on the connect frame, just unused). - Tool rows now show arguments, a one-line result summary, and a warning icon when a tool errored or returned a failure string. - conversation.py tool_call stream now carries result_summary + is_error. Tool surface: - Removed 4 permanently-dead databroker tools (no databroker is wired). - calibrate_embryo use_v04_plan returns gracefully instead of raising. - Allowlisted snapshot_plan/list_plan_versions/restore_plan_version in plan mode. Fixes from adversarial review: - open_volume resolves the volume against the live agent session (not the stale timelapse tracker), so it doesn't 404 after a web /resume. - ProjectionViewer clears stale volume state on open and falls back to the projection grid when the 3D view is requested without volume data. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/TOOLS.md | 15 +- docs/guides/capabilities.md | 1 - examples/README.md | 2 - examples/example_napari_visualization.py | 522 ------------------- gently/app/device_state_monitor.py | 8 +- gently/app/tools/__init__.py | 1 - gently/app/tools/acquisition_tools.py | 48 +- gently/app/tools/calibration_tools.py | 22 +- gently/app/tools/data_tools.py | 203 -------- gently/app/tools/volume_tools.py | 136 ++--- gently/harness/bridge.py | 29 ++ gently/harness/conversation.py | 19 +- gently/harness/prompts/manager.py | 1 + gently/ui/web/server.py | 34 +- gently/ui/web/static/css/agent-chat.css | 38 ++ gently/ui/web/static/js/agent-chat.js | 181 ++++++- gently/ui/web/static/js/projection-viewer.js | 8 + gently/ui/web/static/js/websocket.js | 14 + 18 files changed, 410 insertions(+), 872 deletions(-) delete mode 100644 examples/example_napari_visualization.py delete mode 100644 gently/app/tools/data_tools.py diff --git a/docs/TOOLS.md b/docs/TOOLS.md index 6d9e020c..feafbfbe 100644 --- a/docs/TOOLS.md +++ b/docs/TOOLS.md @@ -13,7 +13,7 @@ Source: `gently/agent/tools/` (run mode) and `gently/agent/plan_mode/tools/` (pl |------|-------------| | `acquire_volume` | Acquire a single 3D lightsheet volume for a specific embryo with calibration data | | `capture_lightsheet` | Capture a single 2D lightsheet fluorescence image at specified piezo/galvo position | -| `batch_lightsheet` | Capture lightsheet images from ALL embryos and display as a stack | +| `batch_lightsheet` | Capture lightsheet images from ALL embryos and show them in the web UI viewer | ### Analysis (`analysis_tools.py`) @@ -29,22 +29,13 @@ Source: `gently/agent/tools/` (run mode) and `gently/agent/plan_mode/tools/` (pl | `calibrate_embryo` | Run full piezo-galvo calibration for a specific embryo using Claude vision | | `calibrate_all_embryos` | Run piezo-galvo calibration for all detected embryos sequentially | -### Data (`data_tools.py`) - -| Tool | Description | -|------|-------------| -| `list_runs` | List recent Bluesky runs from Databroker | -| `get_run_data` | Get data from a specific Bluesky run | -| `get_run_image` | Get an image from a Bluesky run for analysis | -| `search_runs` | Search Databroker runs by metadata criteria | - ### Detection (`detection_tools.py`) | Tool | Description | |------|-------------| | `detect_embryos` | Automatically detect embryos using brightness detection and SAM segmentation | | `manual_mark_embryos` | Open interactive window to manually mark embryos by clicking | -| `edit_embryos` | Open napari editor to add/remove/move embryo positions | +| `edit_embryos` | Add/remove/move embryo positions in the web map view | | `show_detected_embryos` | Capture fresh image and display all tracked embryos with labeled bounding boxes | ### Detectors (`detector_tools.py`) @@ -145,7 +136,7 @@ Source: `gently/agent/tools/` (run mode) and `gently/agent/plan_mode/tools/` (pl | Tool | Description | |------|-------------| | `view_image` | Capture and display current bottom camera widefield image | -| `view_volume` | Open a volume in napari for 3D visualization | +| `view_volume` | Open a volume in the in-browser 3D viewer | | `list_volumes` | List available volumes for an embryo or all embryos | --- diff --git a/docs/guides/capabilities.md b/docs/guides/capabilities.md index d3b097c7..846d662c 100644 --- a/docs/guides/capabilities.md +++ b/docs/guides/capabilities.md @@ -133,7 +133,6 @@ This design means experimental AI code — perception systems, coding agents, no | **Analysis** | analyze_volume, classify_embryo_stage | No | | **Experiment** | get_experiment_summary, query_embryo_status | No | | **Session** | list_sessions, import_embryos_from_session | No | -| **Data** | list_runs, get_run_data, search_runs | No | | **Planning** | create_campaign, propose_plan, search_literature | No | | **Research** | search_literature, read_paper, search_strains | No | diff --git a/examples/README.md b/examples/README.md index c538fbc0..cb6f6b06 100644 --- a/examples/README.md +++ b/examples/README.md @@ -5,11 +5,9 @@ Working examples of Gently's Bluesky plan system and visualization pipeline. | Example | Description | |---------|-------------| | `example_dispim_workflows.py` | Complete DiSPIM workflows: atomic plans, autofocus, two-point calibration, embryo detection, multi-embryo acquisition | -| `example_napari_visualization.py` | Real-time napari visualization: focus sweeps, embryo detection, dual-sided DiSPIM, custom configurations | ## Requirements ```bash pip install gently[device] # Bluesky + Ophyd for hardware plans -pip install napari[all] # For visualization examples ``` diff --git a/examples/example_napari_visualization.py b/examples/example_napari_visualization.py deleted file mode 100644 index cfbad522..00000000 --- a/examples/example_napari_visualization.py +++ /dev/null @@ -1,522 +0,0 @@ -#!/usr/bin/env python -""" -DiSPIM Napari Visualization Examples -=================================== - -Demonstrates real-time image visualization for DiSPIM experiments using napari. -Shows different visualization patterns for various experiment types. - -This example shows: -1. Basic napari setup with Bluesky RunEngine -2. Focus sweep visualization (3D image stacks) -3. Embryo detection visualization (2D image sequences) -4. Dual-sided DiSPIM visualization (multi-channel) -5. Custom visualization configurations -6. Integration with complete DiSPIM workflows - -Requirements: - pip install napari[all] - # or with specific backend: pip install napari[pyqt5] -""" - -import logging -import numpy as np -from bluesky import RunEngine -from bluesky.callbacks import LiveTable - -# Import gently components -from gently import ( - # Device classes - create_dispim_system, - DiSPIMSystem, - - # Plan functions - focus_sweep, - dispim_piezo_autofocus, - find_embryos_with_bottom_camera, - full_dispim_workflow, - - # Configuration classes - AutofocusConfig, - CalibrationConfig, - - # Analysis utilities - FocusAlgorithm, - FitFunction -) - -# Import visualization utilities -from gently.ui.web import ( - EmbryoMarker, - mark_embryos_napari, - generate_focus_curve_plot, - generate_calibration_summary_plot, - generate_edge_detection_plot, -) - -# Napari availability check -try: - import napari - NAPARI_AVAILABLE = True -except ImportError: - NAPARI_AVAILABLE = False - - -def check_napari_installation(): - """Check if napari is available and provide installation instructions""" - if not NAPARI_AVAILABLE: - print("❌ Napari not available!") - print("\nTo enable image visualization, install napari:") - print(" pip install napari[all]") - print("\nOr with specific backend:") - print(" pip install napari[pyqt5]") - print(" # or napari[pyside2]") - print("\nAfter installation, restart and run this example again.") - return False - - print("✅ Napari is available - image visualization enabled!") - return True - - -def setup_demo_system(): - """Setup demo DiSPIM system for visualization examples""" - print("Setting up demo DiSPIM system...") - - # Create RunEngine - RE = RunEngine({}) - - # For demonstration, we'll use mock system - # In practice: system = create_dispim_system("/path/to/micromanager", "config.cfg") - print(" [Note: Using mock system for demonstration]") - system = None # Would be actual DiSPIMSystem - light_sheet = None # Would be system.side_a - - return RE, system, light_sheet - - -def demonstrate_basic_napari_setup(RE): - """Demonstrate basic napari visualization setup""" - print("\n" + "="*60) - print("1. BASIC NAPARI SETUP - Real-time Image Visualization") - print("="*60) - - print("\nSetting up napari for DiSPIM visualization:") - - # Create napari callback with default settings - napari_callback = setup_napari_callback() - - if not napari_callback.enabled: - print(" ❌ Napari callback disabled (napari not available)") - return None - - # Subscribe to RunEngine - RE.subscribe(napari_callback) - - print(" ✅ Napari callback created and subscribed") - print(f" ✅ Viewer title: {napari_callback.viewer.title}") - print(f" ✅ Focus sweeps: {napari_callback.show_focus_sweeps}") - print(f" ✅ Embryo detection: {napari_callback.show_embryo_detection}") - print(f" ✅ Dual channel: {napari_callback.dual_channel_mode}") - - print("\nBasic usage pattern:") - print(" RE = RunEngine({})") - print(" napari_callback = setup_napari_callback()") - print(" RE.subscribe(napari_callback)") - print(" # Now any plan with images will display in napari!") - - return napari_callback - - -def demonstrate_focus_sweep_visualization(RE, light_sheet, napari_callback): - """Demonstrate focus sweep visualization""" - print("\n" + "="*60) - print("2. FOCUS SWEEP VISUALIZATION - 3D Image Stacks") - print("="*60) - - if not napari_callback or not napari_callback.enabled: - print(" ⚠ Skipping - napari not available") - return - - print("\nFocus sweep creates 3D image stacks visualized in real-time:") - - # Configure autofocus - config = AutofocusConfig( - num_positions=15, # Fewer positions for faster demo - step_size_um=1.0, - algorithm=FocusAlgorithm.VOLATH.value, - fit_function=FitFunction.GAUSSIAN.value - ) - - print(f"\nAutofocus configuration:") - print(f" Positions: {config.num_positions}") - print(f" Step size: {config.step_size_um} μm") - print(f" Total range: ±{config.num_positions * config.step_size_um / 2} μm") - - if light_sheet is not None: - print(f"\nExecuting autofocus with napari visualization:") - print(f" RE(dispim_piezo_autofocus(light_sheet, config))") - - # This would display images in napari as they're acquired - # RE(dispim_piezo_autofocus(light_sheet, config)) - - print(f"\nNapari display:") - print(f" ✅ Images stream to napari as they're acquired") - print(f" ✅ 3D stack builds up in real-time") - print(f" ✅ Can scrub through Z positions") - print(f" ✅ Focus curve visible as image stack") - - else: - print(f"\n[Would execute: RE(dispim_piezo_autofocus(light_sheet, config))]") - print(f"\nExpected napari behavior:") - print(f" - New layer: 'Focus Sweep (Side A)'") - print(f" - Green colormap for side A data") - print(f" - 3D stack: shape (15, height, width)") - print(f" - Real-time updates as images acquired") - - print(f"\nVisualization features:") - print(f" - Real-time focus quality assessment") - print(f" - Immediate feedback on scan progress") - print(f" - Visual validation of focus curve") - - -def demonstrate_embryo_detection_visualization(RE, system, napari_callback): - """Demonstrate embryo detection visualization""" - print("\n" + "="*60) - print("3. EMBRYO DETECTION VISUALIZATION - 2D Image Sequences") - print("="*60) - - if not napari_callback or not napari_callback.enabled: - print(" ⚠ Skipping - napari not available") - return - - print("\nEmbryo detection creates sequences of 2D images from XY scanning:") - - # Configure embryo detection - detection_config = { - 'scan_area': { - 'x_start': -500, 'x_stop': 500, # Smaller area for demo - 'y_start': -500, 'y_stop': 500, - 'step_size': 100 # μm between positions - }, - 'detection': { - 'min_size_pixels': 50, - 'max_size_pixels': 500, - 'brightness_threshold': 0.3 - } - } - - print(f"\nDetection configuration:") - print(f" Scan area: {detection_config['scan_area']['x_start']} to {detection_config['scan_area']['x_stop']} μm") - print(f" Step size: {detection_config['scan_area']['step_size']} μm") - print(f" Grid size: 11x11 = 121 positions") - - if system is not None: - print(f"\nExecuting embryo detection with napari visualization:") - print(f" RE(find_embryos_with_bottom_camera(system, detection_config))") - - # This would display images in napari as XY scan progresses - # RE(find_embryos_with_bottom_camera(system, detection_config)) - - print(f"\nNapari display:") - print(f" ✅ Each XY position shows in napari immediately") - print(f" ✅ Can see scan progress across sample") - print(f" ✅ Potential embryos highlighted as found") - print(f" ✅ Final mosaic view of scanned area") - - else: - print(f"\n[Would execute: RE(find_embryos_with_bottom_camera(system, detection_config))]") - print(f"\nExpected napari behavior:") - print(f" - New layer: 'Embryo Detection (Side A)'") - print(f" - Updates with each XY position") - print(f" - 121 total images in sequence") - print(f" - Detected embryos marked/highlighted") - - print(f"\nVisualization benefits:") - print(f" - Real-time quality control of scan") - print(f" - Immediate feedback on embryo locations") - print(f" - Visual verification of detection algorithm") - - -def demonstrate_dual_channel_visualization(RE, system, napari_callback): - """Demonstrate dual-sided DiSPIM visualization""" - print("\n" + "="*60) - print("4. DUAL-CHANNEL VISUALIZATION - Multi-Camera Display") - print("="*60) - - if not napari_callback or not napari_callback.enabled: - print(" ⚠ Skipping - napari not available") - return - - print("\nDual-sided DiSPIM generates images from two cameras simultaneously:") - - if system is not None: - print(f"\nSimulating dual-sided acquisition:") - print(f" # Both sides acquire simultaneously") - print(f" side_a_image = system.side_a.camera.read()") - print(f" side_b_image = system.side_b.camera.read()") - - print(f"\nNapari display:") - print(f" ✅ Side A: Green channel") - print(f" ✅ Side B: Magenta channel") - print(f" ✅ Additive blending for overlay") - print(f" ✅ Separate layers for independent control") - print(f" ✅ Synchronized updates") - - else: - print(f"\n[Would show both camera feeds simultaneously]") - - print(f"\nColor scheme:") - print(f" - Side A (illumination from left): Green") - print(f" - Side B (illumination from right): Magenta") - print(f" - Overlaid: Shows complementary information") - - print(f"\nVisualization advantages:") - print(f" - Compare image quality from both sides") - print(f" - See complementary sample information") - print(f" - Identify optimal viewing angle") - print(f" - Real-time feedback for dual-sided experiments") - - -def demonstrate_custom_visualization_configs(RE): - """Demonstrate custom visualization configurations""" - print("\n" + "="*60) - print("5. CUSTOM CONFIGURATIONS - Tailored Visualization") - print("="*60) - - if not NAPARI_AVAILABLE: - print(" ⚠ Skipping - napari not available") - return - - print("\nCustom configurations for different experiment needs:") - - # Configuration 1: Focus-only visualization - print(f"\n1. Focus-Only Configuration:") - print(f" config = {{'show_focus_sweeps': True, 'show_embryo_detection': False}}") - print(f" napari_callback = setup_napari_callback(config)") - - focus_config = { - 'show_focus_sweeps': True, - 'show_embryo_detection': False, - 'show_single_images': False, - 'update_interval': 0.05 # Faster updates - } - - print(f" - Only shows focus sweep experiments") - print(f" - Faster update rate (0.05s)") - print(f" - Optimized for autofocus development") - - # Configuration 2: High-throughput visualization - print(f"\n2. High-Throughput Configuration:") - print(f" config = {{'show_single_images': False, 'update_interval': 1.0}}") - - throughput_config = { - 'show_focus_sweeps': True, - 'show_embryo_detection': True, - 'show_single_images': False, # Skip individual images - 'update_interval': 1.0 # Slower updates for performance - } - - print(f" - Skip individual images to reduce overhead") - print(f" - Slower update rate (1.0s) for performance") - print(f" - Better for automated, high-throughput experiments") - - # Configuration 3: Development/debugging - print(f"\n3. Development/Debugging Configuration:") - print(f" config = {{'show_single_images': True, 'update_interval': 0.01}}") - - debug_config = { - 'show_focus_sweeps': True, - 'show_embryo_detection': True, - 'show_single_images': True, - 'update_interval': 0.01 # Very fast updates - } - - print(f" - Show every image for detailed inspection") - print(f" - Very fast updates (0.01s)") - print(f" - Maximum detail for troubleshooting") - - print(f"\nUsage pattern:") - print(f" # Choose configuration for your needs") - print(f" config = focus_config # or throughput_config, debug_config") - print(f" napari_callback = setup_napari_callback(config)") - print(f" RE.subscribe(napari_callback)") - - -def demonstrate_convenience_functions(RE): - """Demonstrate convenience functions for common patterns""" - print("\n" + "="*60) - print("6. CONVENIENCE FUNCTIONS - Common Usage Patterns") - print("="*60) - - if not NAPARI_AVAILABLE: - print(" ⚠ Skipping - napari not available") - return - - print("\nConvenience functions for common visualization needs:") - - print(f"\n1. Focus Sweep Only:") - print(f" from gently.visualization import enable_focus_sweep_visualization") - print(f" enable_focus_sweep_visualization(RE)") - print(f" # Optimized for autofocus experiments") - - print(f"\n2. Embryo Detection Only:") - print(f" from gently.visualization import enable_embryo_detection_visualization") - print(f" enable_embryo_detection_visualization(RE)") - print(f" # Optimized for sample detection") - - print(f"\n3. Full Visualization:") - print(f" from gently.visualization import enable_full_visualization") - print(f" enable_full_visualization(RE)") - print(f" # Shows everything - good for general use") - - print(f"\n4. Custom Viewer:") - print(f" from gently.visualization import create_napari_viewer") - print(f" viewer = create_napari_viewer('My DiSPIM Experiment')") - print(f" callback = setup_napari_callback(viewer=viewer)") - print(f" # Use your own configured viewer") - - print(f"\nBenefits:") - print(f" - One-line setup for common patterns") - print(f" - Pre-configured for specific experiment types") - print(f" - Easy to integrate into existing workflows") - - -def demonstrate_complete_workflow_visualization(RE, system): - """Demonstrate visualization with complete DiSPIM workflow""" - print("\n" + "="*60) - print("7. COMPLETE WORKFLOW VISUALIZATION - Full Experiment") - print("="*60) - - if not NAPARI_AVAILABLE: - print(" ⚠ Skipping - napari not available") - return - - print("\nVisualization during complete multi-embryo workflow:") - - # Setup full visualization - print(f"\nSetting up comprehensive visualization:") - print(f" napari_callback = enable_full_visualization(RE)") - print(f" # Will show all stages of the workflow") - - # Complete workflow configuration - workflow_config = { - 'system_setup': { - 'center_devices': True, - 'run_calibration': True - }, - 'calibration': { - 'point1_um': 25.0, - 'point2_um': 75.0, - 'autofocus_each_point': True - }, - 'embryo_detection': { - 'x_start': -1000, 'x_stop': 1000, - 'y_start': -1000, 'y_stop': 1000, - 'step_size': 200 - }, - 'acquisition': { - 'z_stack': {'range_um': 50, 'step_size_um': 1.0}, - 'dual_sided': True, - 'time_points': 3 - } - } - - print(f"\nWorkflow stages with visualization:") - - if system is not None: - print(f"\n RE(full_dispim_workflow(system, workflow_config))") - print(f"\n Expected napari display sequence:") - - print(f" 1. Calibration stage:") - print(f" - Focus sweeps at calibration points") - print(f" - Real-time focus quality assessment") - - print(f" 2. Embryo detection stage:") - print(f" - XY scan images streaming in") - print(f" - Detected embryo positions highlighted") - - print(f" 3. Multi-embryo acquisition:") - print(f" - Focus sweeps for each embryo") - print(f" - Z-stack acquisitions (dual-channel)") - print(f" - Time series progression") - - print(f"\nVisualization benefits for complete workflow:") - print(f" ✅ Monitor entire experiment progress") - print(f" ✅ Quality control at each stage") - print(f" ✅ Early detection of issues") - print(f" ✅ Real-time data assessment") - print(f" ✅ Immediate feedback on results") - - -def main(): - """Main napari visualization demonstration""" - print("DiSPIM Napari Visualization Examples") - print("=" * 60) - print() - print("This example demonstrates real-time image visualization") - print("for DiSPIM experiments using napari and Bluesky callbacks.") - print() - - # Check napari installation - if not check_napari_installation(): - return - - # Setup demo system - RE, system, light_sheet = setup_demo_system() - - # Run demonstrations - napari_callback = demonstrate_basic_napari_setup(RE) - demonstrate_focus_sweep_visualization(RE, light_sheet, napari_callback) - demonstrate_embryo_detection_visualization(RE, system, napari_callback) - demonstrate_dual_channel_visualization(RE, system, napari_callback) - demonstrate_custom_visualization_configs(RE) - demonstrate_convenience_functions(RE) - demonstrate_complete_workflow_visualization(RE, system) - - # Summary - print("\n" + "="*60) - print("SUMMARY - Napari Visualization Integration") - print("="*60) - - print(f"\n✅ Napari integration complete:") - print(f" - Real-time image streaming from Bluesky plans") - print(f" - Automatic 3D stack visualization for focus sweeps") - print(f" - 2D image sequences for embryo detection") - print(f" - Dual-channel support for two-sided DiSPIM") - print(f" - Configurable visualization options") - - print(f"\n✅ Key benefits:") - print(f" - Immediate visual feedback during experiments") - print(f" - Quality control and error detection") - print(f" - Interactive data exploration") - print(f" - Non-intrusive - works with existing plans") - print(f" - Optional - graceful fallback if napari not available") - - print(f"\n✅ Usage patterns:") - print(f" - Basic: setup_napari_callback() → RE.subscribe()") - print(f" - Custom: setup_napari_callback(config) for specific needs") - print(f" - Convenience: enable_focus_sweep_visualization(RE)") - print(f" - Integration: Works with all existing DiSPIM plans") - - print(f"\nNext steps:") - print(f" 1. Install napari: pip install napari[all]") - print(f" 2. Add visualization to your DiSPIM experiments") - print(f" 3. Customize configurations for your needs") - print(f" 4. Enjoy real-time image feedback!") - - if napari_callback and napari_callback.enabled: - print(f"\nNapari viewer is open - explore the interface!") - print(f" - Layer controls for each image type") - print(f" - Color/brightness adjustments") - print(f" - 3D visualization controls") - print(f" - Screenshot and movie export options") - - print(f"\nGently DiSPIM + Napari: Real-time microscopy visualization! 🔬✨") - - -if __name__ == "__main__": - # Setup logging - logging.basicConfig(level=logging.INFO) - - # Run demonstration - main() \ No newline at end of file diff --git a/gently/app/device_state_monitor.py b/gently/app/device_state_monitor.py index beff9a54..55e61daf 100644 --- a/gently/app/device_state_monitor.py +++ b/gently/app/device_state_monitor.py @@ -14,9 +14,11 @@ Watchdog -------- -The SSE iterator can silently stall in the agent process — most reliably -when a Qt window (napari) freezes the asyncio loop synchronously during a -tool call, but in principle any half-open TCP path can cause it. aiohttp's +The SSE iterator can silently stall in the agent process whenever a +half-open TCP path or a long synchronous tool call wedges the asyncio loop. +(Historically the worst offender was a Qt window — napari — blocking the +loop during a tool call; that path is gone now that all visualization is +in-browser, but the watchdog stays for general robustness.) aiohttp's async iterator won't raise on a stalled socket; the ``async for`` just waits forever. To recover, a sibling watchdog task tracks the timestamp of the last received event; if no event arrives within ``stale_timeout_sec`` diff --git a/gently/app/tools/__init__.py b/gently/app/tools/__init__.py index 4dd1b776..99c91e34 100644 --- a/gently/app/tools/__init__.py +++ b/gently/app/tools/__init__.py @@ -15,7 +15,6 @@ from . import acquisition_tools from . import volume_tools from . import analysis_tools -from . import data_tools from . import timelapse_tools from . import session_tools from . import focus_tools diff --git a/gently/app/tools/acquisition_tools.py b/gently/app/tools/acquisition_tools.py index 2500fe41..0f429982 100644 --- a/gently/app/tools/acquisition_tools.py +++ b/gently/app/tools/acquisition_tools.py @@ -302,10 +302,11 @@ async def capture_lightsheet( @tool( name="batch_lightsheet", - description="""Capture lightsheet images from ALL embryos and display them together in a single napari viewer. + description="""Capture lightsheet images from ALL embryos and show them together in the web UI. Use when user says "lightsheet all embryos", "capture all embryos", "show me all embryos in lightsheet". -Moves to each embryo, captures a lightsheet image, then opens napari with all images as separate layers. -Much more efficient than capturing one at a time.""", +Moves to each embryo, captures a lightsheet image, saves it, and pushes it to the +web viewer (live image strip) for everyone watching. Much more efficient than +capturing one at a time.""", category=ToolCategory.HARDWARE, requires_microscope=True, examples=[ @@ -317,7 +318,7 @@ async def batch_lightsheet( galvo_position: float = 0.0, context: Dict = None ) -> str: - """Capture lightsheet images from all embryos and show in single napari viewer""" + """Capture lightsheet images from all embryos and show them in the web UI""" agent = context.get('agent') client = context.get('client') @@ -403,32 +404,25 @@ async def batch_lightsheet( logger.info("Saved %d images to %s", len(images), save_dir) - # Open single napari viewer with all images as a stack - import napari - import numpy as np - logger.info("Opening napari with %d embryo images as stack...", len(images)) - - # Stack images into a single array for slider navigation - image_stack = np.stack(images, axis=0) - - viewer = napari.Viewer(title=f"Batch Lightsheet - {len(images)} embryos") - - # Add as single stack with slider (grayscale) - viewer.add_image( - image_stack, - name='Embryos', - colormap='gray', - ) - - # Print embryo ID mapping for reference - logger.info("Slider index -> Embryo ID:") - for i, eid in enumerate(embryo_ids): - logger.info(" %d: %s", i, eid) - - napari.run() + # Push each captured image to the web UI \u2014 no blocking desktop window. + # They appear in the live viewer / recent strip for everyone watching. + pushed = 0 + if agent.viz_server is not None: + for img, eid in zip(images, embryo_ids): + uid = f"batch_lightsheet_{eid}_{timestamp}" + agent.push_viz( + img, uid, "image", + {"embryo_id": eid, "source": "batch_lightsheet", "label": eid}, + ) + pushed += 1 + logger.info("Pushed %d batch-lightsheet images to the web UI", pushed) # Summary summary = f"\u2713 Captured {len(images)} embryos: {', '.join(embryo_ids)}" + if pushed: + summary += f"\nShowing {pushed} image(s) in the web UI viewer." + elif agent.viz_server is None: + summary += "\n(Web UI not running \u2014 images saved to disk only.)" if errors: summary += f"\n\u26a0 Errors: {'; '.join(errors)}" summary += f"\nSaved to: {save_dir}" diff --git a/gently/app/tools/calibration_tools.py b/gently/app/tools/calibration_tools.py index 963dc2d0..f827b8f8 100644 --- a/gently/app/tools/calibration_tools.py +++ b/gently/app/tools/calibration_tools.py @@ -973,18 +973,16 @@ async def calibrate_embryo( return f"Error: Not connected to microscope server. Cannot calibrate {embryo_id}." if use_v04_plan: - # Escape hatch hook. Not wired yet - delegating to the real Bluesky - # plan requires a RunEngine and device objects that live on the device - # layer, so the caller would have to submit the plan through the queue - # server. Since the surgical path already mirrors v0.4.0's behavior, - # this is a placeholder for a future hardware-regression follow-up. - raise NotImplementedError( - "use_v04_plan=True is not wired yet. The default surgical path " - "in calibrate_embryo already replicates the v0.4.0 calibration " - "plan's behavior (edge detection + inset + wide adaptive sweep). " - "If that path regresses on hardware, wire this branch to submit " - "gently.hardware.dispim.plans.calibration.calibrate_embryo_piezo_galvo " - "through the queue server's plan-submission API." + # Escape hatch reserved for a future hardware-regression follow-up. It is + # intentionally unwired (delegating to the real Bluesky plan needs a + # RunEngine + device objects that live on the device layer). Return a + # clear message instead of raising, so a model that sets this flag gets a + # graceful answer rather than a hard NotImplementedError — the default + # surgical path already mirrors v0.4.0 behavior. + return ( + "use_v04_plan is not available: the default calibration path already " + "replicates the v0.4.0 plan (edge detection + inset + wide adaptive " + "sweep). Re-run calibrate_embryo without use_v04_plan." ) logger.info("calibration path: surgical (v0.4.0-equivalent inset + adaptive sweep)") diff --git a/gently/app/tools/data_tools.py b/gently/app/tools/data_tools.py deleted file mode 100644 index 9435a433..00000000 --- a/gently/app/tools/data_tools.py +++ /dev/null @@ -1,203 +0,0 @@ -""" -Databroker Tools - -Tools for querying and retrieving data from Bluesky/Databroker. -""" - -from typing import Dict, List - -from gently.harness.tools.registry import tool, ToolCategory -from gently.harness.tools.helpers import require_agent - - -@tool( - name="list_runs", - description="List recent Bluesky runs from Databroker", - category=ToolCategory.DATA, -) -def list_runs( - limit: int = 10, - embryo_id: str = None, - plan_name: str = None, - context: Dict = None -) -> str: - """List recent runs""" - agent = context.get('agent') - - if not agent or not agent.databroker: - return "Databroker not available" - - try: - db = agent.databroker - - query = {} - if embryo_id: - query['embryo_id'] = embryo_id - if plan_name: - query['plan_name'] = plan_name - - runs = list(db(**query))[:limit] - - if not runs: - return "No runs found" - - lines = [f"Recent runs ({len(runs)}):", ""] - - for run_uid in runs: - run = db[run_uid] - start = run.metadata.get('start', {}) - lines.append(f"* {run_uid[:8]}...") - lines.append(f" Plan: {start.get('plan_name', 'unknown')}") - lines.append(f" Time: {start.get('time', 'unknown')}") - if 'embryo_id' in start: - lines.append(f" Embryo: {start['embryo_id']}") - lines.append("") - - return "\n".join(lines) - - except Exception as e: - return f"Error listing runs: {str(e)}" - - -@tool( - name="get_run_data", - description="Get data from a specific Bluesky run", - category=ToolCategory.DATA, -) -def get_run_data( - run_id: str, - data_keys: List[str] = None, - stream: str = "primary", - context: Dict = None -) -> str: - """Get run data""" - agent = context.get('agent') - - if not agent or not agent.databroker: - return "Databroker not available" - - try: - db = agent.databroker - - if run_id.startswith('-'): - run = db[int(run_id)] - else: - run = db[run_id] - - data = run.primary.read() - - if data_keys: - data = {k: data[k] for k in data_keys if k in data} - - lines = [f"Run: {run.metadata['start']['uid'][:8]}...", ""] - lines.append(f"Available keys: {list(data.keys())}") - - for key, values in data.items(): - shape = values.shape if hasattr(values, 'shape') else 'scalar' - lines.append(f" {key}: shape={shape}") - - return "\n".join(lines) - - except Exception as e: - return f"Error getting run data: {str(e)}" - - -@tool( - name="get_run_image", - description="Get an image from a Bluesky run for analysis", - category=ToolCategory.DATA, -) -async def get_run_image( - run_id: str, - detector: str = None, - analyze: bool = False, - analysis_prompt: str = None, - context: Dict = None -) -> str: - """Get run image""" - agent = context.get('agent') - - if not agent or not agent.databroker: - return "Databroker not available" - - try: - db = agent.databroker - - if run_id.startswith('-'): - run = db[int(run_id)] - else: - run = db[run_id] - - data = run.primary.read() - - if not detector: - for key in ['bottom_camera', 'camera', 'detector']: - if key in data: - detector = key - break - - if detector not in data: - return f"Detector '{detector}' not found. Available: {list(data.keys())}" - - image = data[detector] - shape = image.shape if hasattr(image, 'shape') else 'unknown' - - result = f"Retrieved image from {detector}\nShape: {shape}" - - if analyze and analysis_prompt: - analysis = await agent._analyze_image_with_vision( - image=image, - prompt=analysis_prompt - ) - result += f"\n\nAnalysis:\n{analysis}" - - return result - - except Exception as e: - return f"Error getting image: {str(e)}" - - -@tool( - name="search_runs", - description="Search Databroker runs by metadata criteria", - category=ToolCategory.DATA, -) -def search_runs( - since: str = None, - until: str = None, - metadata: Dict = None, - limit: int = 20, - context: Dict = None -) -> str: - """Search runs""" - agent = context.get('agent') - - if not agent or not agent.databroker: - return "Databroker not available" - - try: - db = agent.databroker - - query = metadata or {} - - if since: - query['since'] = since - if until: - query['until'] = until - - runs = list(db(**query))[:limit] - - if not runs: - return "No matching runs found" - - lines = [f"Found {len(runs)} runs:", ""] - - for run_uid in runs: - run = db[run_uid] - start = run.metadata.get('start', {}) - lines.append(f"* {run_uid[:8]}: {start.get('plan_name', 'unknown')}") - - return "\n".join(lines) - - except Exception as e: - return f"Error searching runs: {str(e)}" diff --git a/gently/app/tools/volume_tools.py b/gently/app/tools/volume_tools.py index 080e56dc..b1b69e59 100644 --- a/gently/app/tools/volume_tools.py +++ b/gently/app/tools/volume_tools.py @@ -118,15 +118,17 @@ async def view_image( @tool( name="view_volume", - description="""Open a volume in napari for 3D visualization. -Can open a volume by file path OR by embryo ID (opens latest volume or specific timepoint). -Use when user says "open volume", "view volume", "show volume in napari", or "look at the 3D data".""", + description="""Open an acquired volume in the in-browser 3D viewer. +Opens by embryo ID \u2014 the latest volume, or a specific timepoint. The volume +appears in the web UI's volume viewer (interactive 3D raymarcher + projections) +for everyone watching the session; nothing pops up on the instrument desktop. +Use when the user says "open volume", "view volume", "show the 3D data", or +"look at timepoint N of embryo X".""", category=ToolCategory.ANALYSIS, requires_microscope=False, examples=[ ToolExample("Open latest volume for embryo 2", {"embryo_id": "embryo_2"}), ToolExample("Open specific timepoint", {"embryo_id": "embryo_2", "timepoint": 5}), - ToolExample("Open volume file", {"file_path": "D:/Gently/volumes/embryo_1_t0001.tif"}), ], ) async def view_volume( @@ -135,95 +137,71 @@ async def view_volume( file_path: str = None, context: Dict = None ) -> str: - """Open a volume in napari for visualization""" - import napari - import tifffile - import numpy as np + """Open a volume in the browser-based viewer (no blocking desktop window).""" from pathlib import Path agent, err = require_agent(context) if err: return err - volume = None - volume_path = None - title = "Volume Viewer" + session_id = agent.session_id - # Determine which volume to open - if file_path: - # Open from file path - volume_path = Path(file_path) - if not volume_path.exists(): + # file_path is legacy. In-browser viewing is addressed by embryo + timepoint, + # so map a FileStore path (embryos/{embryo_id}/volumes/t{NNNN}.tif) back to + # those when possible. + if file_path and not embryo_id: + p = Path(file_path) + if not p.exists(): return f"Error: File not found: {file_path}" - title = f"Volume: {volume_path.name}" - - elif embryo_id: - # Get volume for embryo from FileStore - session_id = agent.session_id - - if timepoint is not None: - # Try to find specific timepoint via FileStore - volume_path = agent.store.get_volume_path(session_id, embryo_id, timepoint) - if volume_path and volume_path.exists(): - title = f"{embryo_id} - t{timepoint:04d}" - else: - # Check recent_images as fallback - embryo, err = get_embryo_or_error(agent, embryo_id) - if err: - return err - if embryo.recent_images: - matching = [img for img in embryo.recent_images if img.timepoint == timepoint] - if matching: - volume_path = Path(matching[0].volume_path) - title = f"{embryo_id} - t{timepoint:04d}" - - if not volume_path or not volume_path.exists(): - # List available timepoints from store - volumes = agent.store.list_volumes(session_id, embryo_id) - available = sorted([v['timepoint'] for v in volumes]) - return f"Timepoint {timepoint} not found for {embryo_id}. Available: {available}" - else: - # Find latest volume from store + stem = p.stem # e.g. "t0005" + try: + if stem.startswith("t"): + timepoint = int(stem[1:]) + # .../embryos/{embryo_id}/volumes/t{NNNN}.tif \u2192 embryo dir is parent of "volumes" + embryo_id = p.parent.parent.name + except (ValueError, IndexError): + pass + if not embryo_id or timepoint is None: + return ("Volume viewing is now in-browser and addressed by embryo + " + "timepoint. Please specify embryo_id (and optionally timepoint) " + "rather than a raw file path.") + + if not embryo_id: + return "Error: Specify embryo_id (and optionally timepoint)." + + # Resolve the timepoint (specific or latest) and confirm the volume exists. + if timepoint is not None: + volume_path = agent.store.get_volume_path(session_id, embryo_id, timepoint) + if not volume_path or not Path(volume_path).exists(): volumes = agent.store.list_volumes(session_id, embryo_id) - if not volumes: + available = sorted(v['timepoint'] for v in volumes) + if not available: return f"No volumes found for {embryo_id} in session {session_id}" - - # Find highest timepoint - latest = max(volumes, key=lambda v: v['timepoint']) - latest_tp = latest['timepoint'] - volume_path = agent.store.get_volume_path(session_id, embryo_id, latest_tp) - - title = f"{embryo_id} - t{latest_tp:04d}" - + return f"Timepoint {timepoint} not found for {embryo_id}. Available: {available}" else: - return "Error: Specify either embryo_id or file_path" + volumes = agent.store.list_volumes(session_id, embryo_id) + if not volumes: + return f"No volumes found for {embryo_id} in session {session_id}" + timepoint = max(v['timepoint'] for v in volumes) + + # Drive the in-browser viewer \u2014 no blocking Qt/desktop window. + viz = getattr(agent, "viz_server", None) + if viz is None: + return (f"Resolved {embryo_id} t{timepoint:04d}, but the web UI isn't running, " + f"so there's nowhere to display it. Start the web UI and try again.") - # Load the volume try: - volume = tifffile.imread(str(volume_path)) - logger.info("Loaded volume: %s, dtype=%s", volume.shape, volume.dtype) + n_clients = await viz.open_volume_in_browser(embryo_id, timepoint) except Exception as e: - return f"Error loading volume: {e}" - - # Open in napari - logger.info("Opening napari viewer...") - viewer = napari.Viewer(title=title) - - # Add volume with appropriate settings - viewer.add_image( - volume, - name='Volume', - colormap='gray', - rendering='mip', # Maximum intensity projection for 3D - ) - - # Add scale bar info - viewer.scale_bar.visible = True - viewer.scale_bar.unit = "um" - - napari.run() - - return f"\u2713 Opened volume in napari: {volume_path.name} (shape: {volume.shape})" + logger.exception("open_volume_in_browser failed") + return f"Error opening volume in the web viewer: {e}" + + url = f"http://localhost:{getattr(viz, 'port', 8080)}/" + if n_clients <= 0: + return (f"Resolved {embryo_id} t{timepoint:04d}, but no browser is connected. " + f"Open {url} and select that embryo/timepoint to view it.") + return (f"\u2713 Opening {embryo_id} t{timepoint:04d} in the web volume viewer " + f"({n_clients} view(s) connected) \u2014 {url}") @tool( diff --git a/gently/harness/bridge.py b/gently/harness/bridge.py index 4561b0f5..4ac10f90 100644 --- a/gently/harness/bridge.py +++ b/gently/harness/bridge.py @@ -1364,6 +1364,34 @@ def get_commands_json(self) -> list: }) return commands + def get_tools_json(self) -> list: + """Serialize the agent tool registry for client-side autocomplete. + + Trimmed on purpose (first description line + lightweight param list) so + the connect frame stays small. The web chat uses this for @tool-name + completion and to show a tool's arguments inline. + """ + try: + from gently.harness.tools.registry import get_tool_registry + registry = get_tool_registry() + except Exception: + return [] + tools = [] + for t in registry.list_all(): + desc = (t.description or "").strip().split("\n", 1)[0][:200] + category = getattr(t.category, "name", None) or str(t.category) + tools.append({ + "name": t.name, + "description": desc, + "category": category, + "params": [ + {"name": p.name, "type": p.type, "required": bool(p.required)} + for p in t.parameters if p.name != "context" + ], + }) + tools.sort(key=lambda x: x["name"]) + return tools + # ------------------------------------------------------------------ # Private helpers for structured command data # ------------------------------------------------------------------ @@ -1430,6 +1458,7 @@ def get_connect_metadata(self) -> dict: meta = { "session_id": self.agent.session_id, "commands": self.get_commands_json(), + "tools": self.get_tools_json(), "version": getattr(gently, "__version__", "dev"), "tokens": self._get_token_snapshot(), "embryo_count": len(exp.embryos), diff --git a/gently/harness/conversation.py b/gently/harness/conversation.py index 765d16b2..e5de53d9 100644 --- a/gently/harness/conversation.py +++ b/gently/harness/conversation.py @@ -604,6 +604,8 @@ def stream_and_collect(): 'tool_label': tool_label_fn(block.name, block.input), } + is_error_flag = False + result_text = "" try: tool_result = await self._execute_single_tool(block.name, block.input) @@ -620,24 +622,39 @@ def stream_and_collect(): except (json.JSONDecodeError, TypeError): pass + result_text = tool_result if isinstance(tool_result, str) else str(tool_result) tool_results.append({ "type": "tool_result", "tool_use_id": block.id, "content": tool_result }) except Exception as e: + is_error_flag = True + result_text = f"Error: {str(e)}" tool_results.append({ "type": "tool_result", "tool_use_id": block.id, - "content": f"Error: {str(e)}", + "content": result_text, "is_error": True }) + # First non-empty line of the result, trimmed — gives the chat + # UI a one-line summary so the operator can see what a tool did + # (or didn't do), not just that it ran. + result_summary = next( + (ln.strip() for ln in (result_text or "").splitlines() if ln.strip()), + "", + ) + if len(result_summary) > 140: + result_summary = result_summary[:139] + "…" + yield { 'type': 'tool_call', 'tool_name': block.name, 'tool_input': block.input, 'duration': time.time() - start_time, + 'result_summary': result_summary, + 'is_error': is_error_flag, } self.conversation_history.append({ diff --git a/gently/harness/prompts/manager.py b/gently/harness/prompts/manager.py index 23cb9291..2d2f6f42 100644 --- a/gently/harness/prompts/manager.py +++ b/gently/harness/prompts/manager.py @@ -152,6 +152,7 @@ def get_tools_for_mode(self, mode: str, has_microscope: bool) -> list: "search_literature", "search_strains", "validate_plan", "batch_update_status", "batch_update_spec", + "snapshot_plan", "list_plan_versions", "restore_plan_version", "save_plan_template", "list_templates", "apply_template", "ask_user_choice", } diff --git a/gently/ui/web/server.py b/gently/ui/web/server.py index fc584cbc..58cfbd65 100644 --- a/gently/ui/web/server.py +++ b/gently/ui/web/server.py @@ -201,11 +201,15 @@ def _resolve_volume_path(self, embryo_id: str, timepoint: int) -> Optional[str]: if path: return path - # 2. Try FileStore (file-based, persistent) - if self.gently_store and self.timelapse_tracker.session_id: + # 2. Try FileStore (file-based, persistent). Key on the LIVE agent + # session, not the tracker's (which goes stale after a resume with no + # active timelapse) — mirrors _resolve_projection_path so an agent-driven + # open_volume hand-off doesn't 404 after a /resume. + sid = self._current_session_id() + if self.gently_store and sid: try: vol_path = self.gently_store.get_volume_path( - self.timelapse_tracker.session_id, embryo_id, timepoint, + sid, embryo_id, timepoint, ) if vol_path and vol_path.exists(): return str(vol_path) @@ -682,6 +686,30 @@ async def push_volume_3d( logger.info(f"Pushed 3D volume {uid} ({volume.shape}) to {len(self.manager.active_connections)} clients") + async def open_volume_in_browser( + self, + embryo_id: str, + timepoint: int, + view: str = "3d_viewer", + ) -> int: + """Ask every connected browser to open the in-browser volume viewer. + + This is the web-native replacement for the old napari ``view_volume``: + the agent triggers the existing ProjectionViewer (WebGL raymarcher + + projections) instead of launching a desktop Qt window that would block + the shared agent/web event loop. Returns the number of clients notified. + """ + await self.manager.broadcast({ + "type": "open_volume", + "embryo_id": embryo_id, + "timepoint": timepoint, + "view": view, + }) + n = len(self.manager.active_connections) + logger.info("Requested browser open_volume for %s t%s (%d client(s))", + embryo_id, timepoint, n) + return n + async def on_start(self): """Start the visualization server""" # Set the event loop on the event bus so async handlers work diff --git a/gently/ui/web/static/css/agent-chat.css b/gently/ui/web/static/css/agent-chat.css index 3fa05eb5..777582b2 100644 --- a/gently/ui/web/static/css/agent-chat.css +++ b/gently/ui/web/static/css/agent-chat.css @@ -170,6 +170,14 @@ .ac-tool-name { color: var(--text); } .ac-tool-meta { color: var(--text-muted); } .ac-tool-check { color: var(--accent-green); } + +/* Multi-line tool rows: head (icon + name + meta) over args / summary. */ +.ac-tool { flex-direction: column; align-items: stretch; gap: 4px; } +.ac-tool-head { display: flex; align-items: center; gap: 8px; } +.ac-tool-args { color: var(--text-muted); padding-left: 19px; word-break: break-word; } +.ac-tool-summary { color: var(--text-muted); padding-left: 19px; word-break: break-word; } +.ac-tool-summary-err, .ac-tool-warn { color: var(--accent-orange, #fb923c); } +.ac-tool-err { border-color: rgba(251, 146, 60, 0.35); } .ac-tool-spin { width: 11px; height: 11px; border-radius: 50%; border: 1.6px solid var(--border); @@ -234,6 +242,36 @@ display: flex; gap: 8px; padding: 12px; border-top: 1px solid var(--border); + position: relative; /* anchor for the autocomplete dropdown */ +} + +/* ── Autocomplete dropdown ──────────────────────────────── */ +.ac-complete { + position: absolute; + left: 12px; right: 12px; bottom: calc(100% + 4px); + max-height: 240px; overflow-y: auto; + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 9px; + box-shadow: 0 -8px 28px rgba(0, 0, 0, 0.45); + padding: 4px; + z-index: 5; +} +.ac-complete.hidden { display: none; } +.ac-complete-item { + display: flex; flex-direction: column; gap: 1px; + padding: 6px 9px; border-radius: 6px; + cursor: pointer; +} +.ac-complete-item.active, +.ac-complete-item:hover { background: var(--bg-hover, rgba(127, 127, 127, 0.12)); } +.ac-complete-name { + font-family: 'JetBrains Mono', ui-monospace, monospace; + font-size: 12.5px; color: var(--accent); +} +.ac-complete-desc { + font-size: 11.5px; color: var(--text-muted); + white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } .agent-chat-input textarea { flex: 1 1 auto; resize: none; diff --git a/gently/ui/web/static/js/agent-chat.js b/gently/ui/web/static/js/agent-chat.js index 577d18bd..3b10d8be 100644 --- a/gently/ui/web/static/js/agent-chat.js +++ b/gently/ui/web/static/js/agent-chat.js @@ -22,8 +22,16 @@ const AgentChat = (() => { let activityEl = null; // the persistent "working…" indicator (reused) let me = null; // { authenticated, username, role, can_control } + // Autocomplete: slash-command + @tool registries (pushed by the server on + // connect) and the live dropdown state. + let commands = []; // [{name, description, aliases, ...}] + let tools = []; // [{name, description, params, ...}] + let acItems = []; // current completion items shown in the dropdown + let acIdx = -1; // highlighted item index + // DOM refs (resolved in init) let fab, panel, log, input, sendBtn, conn, banner, closeBtn, userEl, signoutBtn; + let acComplete = null; // the autocomplete dropdown element // ── Safe rendering ──────────────────────────────────────── function escapeHtml(s) { @@ -138,6 +146,11 @@ const AgentChat = (() => { case 'connected': reconnectDelay = 1000; setConn(true, msg.version ? `Connected · v${msg.version}` : 'Connected'); + // The bridge ships the command + tool registries on connect. + // Capture them so the composer can offer autocomplete — the + // data was always on the wire; we just never used it. + commands = Array.isArray(msg.commands) ? msg.commands : []; + tools = Array.isArray(msg.tools) ? msg.tools : []; break; case 'control_status': @@ -182,10 +195,14 @@ const AgentChat = (() => { hideActivity(); // the running tool row is the signal now currentAgentEl = null; // text after a tool starts a fresh bubble const label = msg.tool_label || msg.tool_name || 'tool'; + const args = fmtArgs(msg.tool_input); const el = document.createElement('div'); el.className = 'ac-tool ac-tool-running'; el.dataset.tool = msg.tool_name || ''; - el.innerHTML = `${escapeHtml(label)}`; + el.innerHTML = + `
` + + `${escapeHtml(label)}
` + + (args ? `
${escapeHtml(args)}
` : ''); log.appendChild(el); scrollToBottom(); break; @@ -198,10 +215,29 @@ const AgentChat = (() => { const label = msg.tool_name || 'tool'; const dur = msg.duration ? ` · ${(msg.duration.toFixed ? msg.duration.toFixed(1) : msg.duration)}s` : ''; - const summary = msg.result_summary ? ` — ${escapeHtml(msg.result_summary)}` : ''; + const args = fmtArgs(msg.tool_input); + const summary = msg.result_summary || ''; + // Show ⚠ instead of ✓ when the tool errored or its result reads + // like a failure — so the operator can tell when a tool did nothing. + const isErr = !!msg.is_error || looksLikeError(summary); + const icon = isErr + ? `` + : ``; + const html = + `
${icon}` + + `${escapeHtml(label)}` + + `${dur}
` + + (args ? `
${escapeHtml(args)}
` : '') + + (summary ? `
${escapeHtml(summary)}
` : ''); if (el) { - el.className = 'ac-tool ac-tool-done'; - el.innerHTML = `${escapeHtml(label)}${dur}${summary}`; + el.className = 'ac-tool ac-tool-done' + (isErr ? ' ac-tool-err' : ''); + el.innerHTML = html; + } else { + // No matching running row (e.g. after a reconnect) — append fresh. + const fresh = document.createElement('div'); + fresh.className = 'ac-tool ac-tool-done' + (isErr ? ' ac-tool-err' : ''); + fresh.innerHTML = html; + log.appendChild(fresh); } if (streaming) setActivity('Working…'); // agent continues after the tool scrollToBottom(); @@ -296,6 +332,118 @@ const AgentChat = (() => { scrollToBottom(); } + // ── Tool argument formatting ────────────────────────────── + /** Compact, escaped "key=value" rendering of a tool's input for the chat. */ + function fmtArgs(input) { + if (!input || typeof input !== 'object') return ''; + const parts = []; + for (const [k, v] of Object.entries(input)) { + if (k === 'context' || v === null || v === undefined || v === '') continue; + let val = (typeof v === 'object') ? JSON.stringify(v) : String(v); + if (val.length > 48) val = val.slice(0, 47) + '…'; + parts.push(`${k}=${val}`); + } + return parts.join(' '); + } + + /** Heuristic: does a tool's result summary read like a failure? + * Used to show ⚠ for tools that return an error STRING (the agent only + * flags raised exceptions). Avoids false alarms like "No errors found". */ + function looksLikeError(s) { + if (!s) return false; + const t = s.trim(); + if (/^no\s+(errors?|issues?|problems?|anomal|changes?|warnings?)\b/i.test(t)) return false; + if (/^(error|failed|failure|unable|cannot|can'?t|could\s?n'?t|could not|denied|invalid|no |not )/i.test(t)) return true; + // mid-string failure markers, e.g. "Timepoint 7 not found for embryo_2". + return /\bnot (found|available|connected|recognized|valid|supported)\b/i.test(t); + } + + // ── Autocomplete ────────────────────────────────────────── + /** The whitespace-delimited token immediately left of the caret. */ + function currentToken() { + const v = input.value; + const pos = (input.selectionStart != null) ? input.selectionStart : v.length; + const before = v.slice(0, pos); + const m = before.match(/(\S+)$/); + return { token: m ? m[1] : '', start: m ? pos - m[1].length : pos, pos }; + } + + /** Compute completion items for the current input/caret, or []. */ + function computeCompletions() { + const trimmed = input.value.trimStart().toLowerCase(); + // Slash commands: whole-input prefix (mirrors the TUI). A trailing space + // (i.e. typing args) naturally yields no matches and hides the menu. + if (trimmed.startsWith('/')) { + return commands.filter(c => + (c.name && c.name.toLowerCase().startsWith(trimmed)) || + (c.aliases || []).some(a => String(a).toLowerCase().startsWith(trimmed)) + ).slice(0, 8).map(c => ({ kind: 'command', name: c.name, desc: c.description || '' })); + } + // @tool mention: complete the token under the caret against tool names. + const tok = currentToken(); + if (tok.token.startsWith('@') && tools.length) { + const q = tok.token.slice(1).toLowerCase(); + return tools.filter(t => t.name.toLowerCase().includes(q)) + .slice(0, 8) + .map(t => ({ kind: 'tool', name: t.name, desc: t.description || '', token: tok })); + } + return []; + } + + function renderCompletions(items) { + acItems = items || []; + acIdx = acItems.length ? 0 : -1; + if (!acComplete) return; + if (!acItems.length) { hideCompletions(); return; } + acComplete.innerHTML = ''; + acItems.forEach((it, i) => { + const row = document.createElement('div'); + row.className = 'ac-complete-item' + (i === acIdx ? ' active' : ''); + row.innerHTML = + `${escapeHtml(it.name)}` + + (it.desc ? `${escapeHtml(it.desc)}` : ''); + // mousedown (not click) so it fires before the textarea blurs. + row.addEventListener('mousedown', (e) => { e.preventDefault(); acceptCompletion(it); }); + acComplete.appendChild(row); + }); + acComplete.classList.remove('hidden'); + } + + function hideCompletions() { + acItems = []; + acIdx = -1; + if (acComplete) { acComplete.classList.add('hidden'); acComplete.innerHTML = ''; } + } + + function updateCompletions() { + renderCompletions(computeCompletions()); + } + + function moveCompletion(delta) { + if (!acItems.length || !acComplete) return; + acIdx = (acIdx + delta + acItems.length) % acItems.length; + [...acComplete.children].forEach((c, i) => c.classList.toggle('active', i === acIdx)); + } + + function acceptCompletion(item) { + if (!item) return; + if (item.kind === 'command') { + input.value = item.name + ' '; + const p = input.value.length; + try { input.setSelectionRange(p, p); } catch (_) {} + } else if (item.kind === 'tool') { + const tok = item.token || currentToken(); + const v = input.value; + const insert = '@' + item.name + ' '; + input.value = v.slice(0, tok.start) + insert + v.slice(tok.pos); + const p = tok.start + insert.length; + try { input.setSelectionRange(p, p); } catch (_) {} + } + hideCompletions(); + input.focus(); + autosize(); + } + // ── Control / UI state ──────────────────────────────────── function renderControl() { if (hasControl) { @@ -303,7 +451,7 @@ const AgentChat = (() => { banner.innerHTML = ''; input.disabled = false; sendBtn.disabled = false; - input.placeholder = 'Message Gently…'; + input.placeholder = 'Message Gently… ( / commands · @ tools )'; } else { banner.classList.remove('hidden'); const who = holderLabel || 'another session'; @@ -374,6 +522,7 @@ const AgentChat = (() => { // ── Input handling ──────────────────────────────────────── function submit() { if (streaming) { send({ type: 'cancel' }); return; } // Send doubles as Stop + hideCompletions(); const text = input.value.trim(); if (!text) return; if (!hasControl) { renderControl(); return; } @@ -456,9 +605,29 @@ const AgentChat = (() => { window.location.reload(); }); fetchMe(); + + // Build the autocomplete dropdown inside the composer (positioned above + // the textarea via CSS). + const inputWrap = input.parentNode; + if (inputWrap) { + acComplete = document.createElement('div'); + acComplete.className = 'ac-complete hidden'; + inputWrap.insertBefore(acComplete, inputWrap.firstChild); + } + sendBtn.addEventListener('click', submit); - input.addEventListener('input', autosize); + input.addEventListener('input', () => { autosize(); updateCompletions(); }); + // Close the menu shortly after blur (delay lets a mousedown selection land). + input.addEventListener('blur', () => setTimeout(hideCompletions, 120)); input.addEventListener('keydown', (e) => { + // While the completion menu is open it owns the navigation keys. + if (acItems.length) { + if (e.key === 'ArrowDown') { e.preventDefault(); moveCompletion(1); return; } + if (e.key === 'ArrowUp') { e.preventDefault(); moveCompletion(-1); return; } + if (e.key === 'Tab') { e.preventDefault(); acceptCompletion(acItems[acIdx]); return; } + if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); acceptCompletion(acItems[acIdx]); return; } + if (e.key === 'Escape') { e.preventDefault(); hideCompletions(); return; } + } if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); submit(); } if (e.key === 'Escape' && streaming) { e.preventDefault(); send({ type: 'cancel' }); } }); diff --git a/gently/ui/web/static/js/projection-viewer.js b/gently/ui/web/static/js/projection-viewer.js index 1f5f530e..7360f2d6 100644 --- a/gently/ui/web/static/js/projection-viewer.js +++ b/gently/ui/web/static/js/projection-viewer.js @@ -118,6 +118,10 @@ const ProjectionViewer = { this.projections = []; this.selectedMethod = null; this.isOpen = true; + // Clear any volume from a previous open so a failed /api/volume-raw fetch + // can't leave the prior embryo/timepoint's 3D data bound (stale-render). + this.volumeData = null; + this.volumeShape = null; const modal = document.getElementById('projection-viewer-modal'); const loading = document.getElementById('pv-loading'); @@ -280,6 +284,10 @@ const ProjectionViewer = { }, selectMethod(method) { + // If the 3D view is requested but no volume loaded (e.g. /api/volume-raw + // failed while projections succeeded), fall back to the projections grid + // rather than showing an empty, never-initialized 3D panel. + if (method === '3d_viewer' && !this.volumeData) method = null; this.selectedMethod = method; this.renderProjections(); this.renderTabs(); diff --git a/gently/ui/web/static/js/websocket.js b/gently/ui/web/static/js/websocket.js index 1e3a534c..069724a2 100644 --- a/gently/ui/web/static/js/websocket.js +++ b/gently/ui/web/static/js/websocket.js @@ -127,6 +127,20 @@ function handleMessage(msg) { // Switch to embryos tab if not already there if (state.tab !== 'embryos') switchTab('embryos'); } + } else if (msg.type === 'open_volume') { + // The agent asked us to open the in-browser volume viewer — the + // web-native replacement for the old desktop napari window. + if (typeof ProjectionViewer !== 'undefined' && msg.embryo_id != null) { + const view = msg.view || '3d_viewer'; + Promise.resolve(ProjectionViewer.open(msg.embryo_id, msg.timepoint)) + .then(() => { + // Default to the 3D viewer tab when the agent opens it. + if (view && typeof ProjectionViewer.selectMethod === 'function') { + ProjectionViewer.selectMethod(view); + } + }) + .catch((e) => console.warn('open_volume failed', e)); + } } else if (msg.type === 'session_changed') { // The live agent switched sessions (resume from the Sessions tab) — // reload so every client picks up the new session's state + transcript. From c5009acf497e8624b9820fa278bda3113ca166cd Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 17:42:01 -0400 Subject: [PATCH 54/71] Integrate the agent with perception: pull tool, prompt context, event bridge, wake-router Closes the gap where perception ran as a fire-and-forget tool the agent never saw. agent.perceiver is the same Perceiver the orchestrator drives, so all reads are direct and side-effect-free. B1 (read-only): - get_recent_perceptions tool returns live per-embryo stage / stability / arrest / trajectory + the perceiver's reasoning. - A deterministic '## Perception (live)' section is injected into the system prompt (build_perception_snapshot), bypassing the AI context-summary cache so stage data is never stale. B2 (bridge + unify): - The perception path's DETECTOR_EVALUATED now mirrors into EmbryoState (latest_developmental_stage) on stage CHANGE only; role=test pseudo-stages, recheck-skips, and the 'no_object' sentinel are filtered out. This fixes the long-standing dead wiring (the agent subscribed to STAGE_DETECTED, which the perception loop never emits). - get_stage_history / predict_hatching now read the live Perceiver (hatching time computed from gently_perception's own organism stage durations), falling back to the DevelopmentalTracker. B3 (decision-moment wake-router, opt-in / default OFF): - gently/app/wake_router.py wakes the agent on stage transitions + critical events (hatching / arrest / embryo-terminated / errors), coalesced and throttled (critical bypasses the throttle); deferred events are re-armed, not dropped. Enabled via the set_autonomy tool. Full autonomy on a wake; device limits still bound it. - A new agent turn-lock serializes wake turns against user turns on the shared conversation history; run_wake_turn drives the normal streaming pipeline. Review fixes (adversarial pass): - bridge.stream_response now closes the agent generator in a finally, so the turn-lock always releases on cancel/error (was: stalled the next turn). - wake-router evaluates its guards before draining _pending so co-pending critical events survive an in-flight turn. - 'no_object' no longer mirrors as a developmental stage or triggers a wake; autonomous turns log when they auto-cancel an interactive picker. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/app/agent.py | 155 +++++++++++++++--- gently/app/tools/analysis_tools.py | 88 ++++++++++- gently/app/tools/timelapse_tools.py | 128 ++++++++++++++- gently/app/wake_router.py | 235 ++++++++++++++++++++++++++++ gently/harness/bridge.py | 10 ++ gently/harness/prompts/manager.py | 3 +- gently/harness/prompts/templates.py | 55 +++++++ 7 files changed, 646 insertions(+), 28 deletions(-) create mode 100644 gently/app/wake_router.py diff --git a/gently/app/agent.py b/gently/app/agent.py index 01c7371d..4d83b4b9 100644 --- a/gently/app/agent.py +++ b/gently/app/agent.py @@ -143,6 +143,10 @@ def __init__( self.on_message_callback: Optional[Callable] = None self.choice_handler: Optional[Callable] = None + # Serializes conversation turns: user turns and autonomous wake turns + # must not interleave on the shared conversation_history. + self._turn_lock = asyncio.Lock() + # Interaction logger for structured logging (research data collection) self.interaction_logger: Optional[InteractionLogger] = None @@ -241,6 +245,16 @@ def __init__( # Subscribe to CV result events for EmbryoState integration self._subscribe_to_cv_events() + # Decision-moment wake-router (opt-in, default OFF). Wakes the agent on + # wake-worthy perception/lifecycle events so it can adapt acquisition + # autonomously; enabled via the set_autonomy tool. + try: + from gently.app.wake_router import WakeRouter + self.wake_router = WakeRouter(self, self._event_bus) + except Exception: + logger.exception("Failed to init wake-router") + self.wake_router = None + # Build initial system prompt self._update_system_prompt() @@ -420,7 +434,8 @@ def exit_plan_mode(self) -> str: def _update_system_prompt(self, context_summary: str = None): """Rebuild system prompt via PromptManager.""" self.system_prompt = self.prompts.update_system_prompt( - self.experiment, self.client, self.mode, context_summary + self.experiment, self.client, self.mode, context_summary, + perceiver=getattr(self, "perceiver", None), ) def _get_active_plan_summary(self) -> Optional[str]: @@ -637,6 +652,45 @@ def on_stage_detected(event): unsub = self._event_bus.subscribe(EventType.STAGE_DETECTED, on_stage_detected) self._cv_subscriptions.append(unsub) + def on_perception(event): + # Bridge the perception loop's DETECTOR_EVALUATED into EmbryoState so + # the prompt/display developmental stage reflects the live Perceiver. + # (The STAGE_DETECTED wiring above is never emitted by the perception + # path — this closes that long-standing gap.) Record only on an + # actual stage CHANGE to keep cv_analyses a clean transition log and + # avoid per-timepoint disk/cache churn; live stability/timing is read + # straight from the Perceiver by the prompt snapshot + pull tool. + try: + data = event.data + if data.get("skipped") or data.get("detector_name") != "perception": + return # ignore recheck-skips and role=test pseudo-stages + embryo_id = data.get("embryo_id") + stage = data.get("stage") + # 'no_object' is an empty-field sentinel, not a developmental + # stage — don't mirror it into latest_developmental_stage. + if (not stage or stage == "no_object" or not embryo_id + or embryo_id not in self.experiment.embryos): + return + embryo = self.experiment.embryos[embryo_id] + if stage == getattr(embryo, "latest_developmental_stage", None): + return # steady state — nothing new to mirror + embryo.add_cv_result("stage_classification", { + "stage": stage, + "timepoint": data.get("timepoint"), + "stability": data.get("stability"), + "temporal_analysis": data.get("temporal_analysis"), + "detector_name": "perception", + }) + self.invalidate_context_cache() + self._auto_save() + logger.info("Perception: %s -> stage %s (t%s)", + embryo_id, stage, data.get("timepoint")) + except Exception as e: + logger.warning(f"Error handling perception event: {e}") + + unsub = self._event_bus.subscribe(EventType.DETECTOR_EVALUATED, on_perception) + self._cv_subscriptions.append(unsub) + logger.debug("Subscribed to CV result events") except Exception as e: @@ -877,35 +931,86 @@ async def handle_message_stream(self, user_message: str): yield {'type': 'text', 'text': quick_response} return - context_summary = await self.prompts.get_cached_context_summary( - self.experiment, self.timelapse_orchestrator, self.timeline_manager - ) - self._update_system_prompt(context_summary) + # Hold the turn-lock for the whole streamed turn so an autonomous wake + # turn cannot interleave on the shared conversation_history. + lock = getattr(self, "_turn_lock", None) + acquired = False + if lock is not None: + await lock.acquire() + acquired = True + try: + context_summary = await self.prompts.get_cached_context_summary( + self.experiment, self.timelapse_orchestrator, self.timeline_manager + ) + self._update_system_prompt(context_summary) - self.conversation.conversation_history.append({ - "role": "user", - "content": user_message - }) + self.conversation.conversation_history.append({ + "role": "user", + "content": user_message + }) - tools = self._get_tools_for_mode() - cached_prompt = self._get_cached_system_prompt() + tools = self._get_tools_for_mode() + cached_prompt = self._get_cached_system_prompt() - inner_gen = self.conversation.call_claude_stream( - cached_prompt, tools, - tool_label_fn=self.conversation.tool_label, - auto_save_fn=self._auto_save, - ) - sent_value = None + inner_gen = self.conversation.call_claude_stream( + cached_prompt, tools, + tool_label_fn=self.conversation.tool_label, + auto_save_fn=self._auto_save, + ) + sent_value = None + try: + while True: + if sent_value is None: + chunk = await inner_gen.__anext__() + else: + chunk = await inner_gen.asend(sent_value) + sent_value = yield chunk + except StopAsyncIteration: + return + finally: + if acquired: + lock.release() + + async def run_wake_turn(self, wake_note: str): + """Drive one autonomous (no-user) turn for the wake-router. + + Runs through the normal streaming pipeline (so it acquires the turn-lock + and is recorded to conversation history / auto-saved). Surfaces text via + on_message_callback if a UI wired one; always runs so the decision and + any tool actions are persisted even with no client attached. Only acts in + run mode. + """ + if self.mode != "run": + logger.info("Wake turn skipped — agent not in run mode (mode=%s)", self.mode) + return "" + text_parts = [] try: - while True: - if sent_value is None: - chunk = await inner_gen.__anext__() - else: - chunk = await inner_gen.asend(sent_value) - sent_value = yield chunk - except StopAsyncIteration: - return + async for chunk in self.handle_message_stream(wake_note): + if isinstance(chunk, dict) and chunk.get("type") == "text": + text_parts.append(chunk.get("text", "")) + elif isinstance(chunk, dict) and chunk.get("type") == "choice_request": + # A plain async-for resumes the generator with None, which the + # picker resolves as 'cancelled'. Make that visible — there is + # no operator to answer during an autonomous turn. + cd = chunk.get("choice_data", {}) + logger.warning( + "Wake turn invoked an interactive picker (%s); auto-cancelling " + "— no operator present.", cd.get("question", "?")) + cb = getattr(self, "on_message_callback", None) + if cb is not None: + try: + res = cb(chunk) + if asyncio.iscoroutine(res): + await res + except Exception: + logger.debug("on_message_callback failed for wake chunk", exc_info=True) + except Exception: + logger.exception("run_wake_turn error") + summary = "".join(text_parts).strip() + if summary: + logger.info("Autonomous wake turn result: %s", summary[:500]) + return summary async def get_tool_call(self, user_message: str) -> Optional[Dict]: """Dry-run tool call (for benchmarking).""" diff --git a/gently/app/tools/analysis_tools.py b/gently/app/tools/analysis_tools.py index 75422e77..ff813ad3 100644 --- a/gently/app/tools/analysis_tools.py +++ b/gently/app/tools/analysis_tools.py @@ -6,7 +6,7 @@ from typing import Dict, Optional -from gently.harness.tools.registry import tool, ToolCategory +from gently.harness.tools.registry import tool, ToolCategory, ToolExample from gently.harness.tools.helpers import require_agent, get_embryo_or_error @@ -43,6 +43,92 @@ async def analyze_volume( return f"Error analyzing volume: {str(e)}" +@tool( + name="get_recent_perceptions", + description="""Get the latest perception state for one embryo or all embryos: +current developmental stage, how many consecutive observations it has held that stage +(stability), a possible-arrest signal, the recent stage trajectory, and the +perceiver's reasoning. Source: the LIVE perception loop (reads accumulated state, +does not trigger a fresh capture). +Use when the user asks "what stage is embryo X", "is anything stuck/arrested", +"how are the embryos developing", or before deciding whether to adapt acquisition.""", + category=ToolCategory.ANALYSIS, + examples=[ + ToolExample("What stage is embryo_1 at?", {"embryo_id": "embryo_1"}), + ToolExample("How is everything developing?", {}), + ToolExample("Is anything arrested?", {}), + ], +) +def get_recent_perceptions( + embryo_id: Optional[str] = None, + n: int = 5, + context: Dict = None, +) -> str: + """Read live per-embryo perception state from the perception sessions. + + All reads here (get_session / summary / attribute access) are synchronous and + side-effect-free — they never trigger a VLM call. + """ + agent, err = require_agent(context) + if err: + return err + + perceiver = getattr(agent, "perceiver", None) + if perceiver is None: + return "Perception system not available." + + def _one(eid: str) -> str: + try: + session = perceiver.get_session(eid) + except Exception as e: + return f"{eid}: perception read failed ({e})" + if session is None or not getattr(session, "current_stage", None): + return f"{eid}: no perceptions recorded yet" + summary = session.summary() + lines = [ + f"{eid}: stage={summary.get('current_stage')} " + f"(stable for {summary.get('stability', 0)} obs, " + f"{summary.get('observation_count', 0)} total)" + ] + seq = summary.get("stage_sequence") or [] + if seq: + lines.append(f" trajectory: {' -> '.join(seq)}") + temporal = summary.get("temporal") # TemporalContext dataclass or None + if temporal is not None: + tmin = getattr(temporal, "time_in_stage_min", 0.0) + exp = getattr(temporal, "expected_duration_min", None) + seg = f" time in stage: {tmin:.0f} min" + if exp: + seg += (f" (expected ~{exp:.0f} min, " + f"{getattr(temporal, 'overtime_ratio', 0.0):.1f}x)") + lines.append(seg) + if getattr(temporal, "is_potentially_arrested", False): + lines.append(" ** potentially ARRESTED **") + observations = getattr(session, "observations", None) or [] + if observations and n > 0: + recent = observations[-n:] + lines.append(f" recent observations (last {len(recent)}):") + for o in recent: + reason = (getattr(o, "reasoning", "") or "").strip().replace("\n", " ") + if len(reason) > 160: + reason = reason[:159] + "…" + lines.append(f" t{getattr(o, 'timepoint', '?')}: " + f"{getattr(o, 'stage', '?')} - {reason}") + return "\n".join(lines) + + if embryo_id: + return _one(embryo_id) + + embryos = getattr(agent.experiment, "embryos", {}) or {} + if not embryos: + return "No embryos in the experiment." + out = ["Perception state (all embryos):", ""] + for eid in sorted(embryos): + out.append(_one(eid)) + out.append("") + return "\n".join(out).rstrip() + + @tool( name="get_detection_summary", description="Get summary of all detections across all embryos", diff --git a/gently/app/tools/timelapse_tools.py b/gently/app/tools/timelapse_tools.py index 2e01fa60..62b846f5 100644 --- a/gently/app/tools/timelapse_tools.py +++ b/gently/app/tools/timelapse_tools.py @@ -6,7 +6,7 @@ from typing import Dict, List, Optional -from gently.harness.tools.registry import tool, ToolCategory +from gently.harness.tools.registry import tool, ToolCategory, ToolExample from gently.harness.tools.helpers import ( require_agent, get_embryo_or_error, require_timelapse_orchestrator, require_developmental_tracker @@ -535,6 +535,32 @@ def get_stage_history( if err: return err + # Prefer the live perception session (the orchestrator's Perceiver, which the + # agent shares). The DevelopmentalTracker below is only populated by manual + # classify_embryo_stage calls, so it is usually empty in autonomous runs. + perceiver = getattr(agent, "perceiver", None) + session = perceiver.get_session(embryo_id) if perceiver else None + if session is not None and getattr(session, "current_stage", None): + s = session.summary() + lines = [ + f"Stage progression for {embryo_id} (live perception):", + f" Current stage: {s.get('current_stage')} (stable for {s.get('stability', 0)} obs)", + f" Observations: {s.get('observation_count', 0)}", + ] + seq = s.get("stage_sequence") or [] + if seq: + lines.append(f" Trajectory: {' -> '.join(seq)}") + t = s.get("temporal") # TemporalContext dataclass or None + if t is not None: + exp = getattr(t, "expected_duration_min", None) + seg = f" Time in current stage: {getattr(t, 'time_in_stage_min', 0.0):.0f} min" + if exp: + seg += f" (expected ~{exp:.0f} min)" + lines.append(seg) + if getattr(t, "is_potentially_arrested", False): + lines.append(" ** potentially ARRESTED **") + return "\n".join(lines) + tracker, err = require_developmental_tracker(agent) if err: return err @@ -558,6 +584,42 @@ def get_stage_history( return "\n".join(lines) +def _perceiver_hatching_estimate(session) -> Optional[float]: + """Estimate minutes until the 'hatching' stage from the perception session. + + Uses gently_perception's own organism stage ordering + typical durations, so + no DevelopmentalStage enum mapping is needed. Returns None when unknown + (no_object / off-vocabulary stage), 0.0 when already hatching/hatched. + """ + try: + from gently_perception.organism import CELEGANS + except Exception: + return None + stage = getattr(session, "current_stage", None) + if not stage or stage == "no_object": + return None + stages = list(CELEGANS.stages) + durations = dict(CELEGANS.stage_durations) + if stage in ("hatching", "hatched"): + return 0.0 + if stage not in stages or "hatching" not in stages: + return None + idx = stages.index(stage) + target = stages.index("hatching") + if idx >= target: + return 0.0 + # Remaining time in the current stage (expected minus already-elapsed). + elapsed = 0.0 + t = session.summary().get("temporal") + if t is not None: + elapsed = getattr(t, "time_in_stage_min", 0.0) or 0.0 + remaining = max(0.0, durations.get(stage, 0.0) - elapsed) + # Plus the full expected duration of each stage between current and hatching. + for s in stages[idx + 1:target]: + remaining += durations.get(s, 0.0) + return remaining + + @tool( name="predict_hatching", description="Predict time-to-hatching for an embryo with confidence intervals based on developmental stage", @@ -573,6 +635,39 @@ def predict_hatching( if err: return err + # Prefer the live perception session; the DevelopmentalTracker is usually + # empty in autonomous runs (only manual classify_embryo_stage feeds it). + perceiver = getattr(agent, "perceiver", None) + + def _perc_line(eid: str): + session = perceiver.get_session(eid) if perceiver else None + if session is None or not getattr(session, "current_stage", None): + return None + stage = session.current_stage + if stage in ("hatching", "hatched"): + return f" {eid}: stage={stage} (hatching now / already hatched)" + est = _perceiver_hatching_estimate(session) + if est is None: + return f" {eid}: stage={stage} (time-to-hatching unknown)" + return f" {eid}: stage={stage}, ~{est / 60:.1f}h to hatching ({est:.0f} min)" + + if perceiver is not None: + if all_embryos: + ids = list(agent.experiment.embryos.keys()) + perc = [_perc_line(e) for e in ids] + if any(perc): + out = ["Hatching predictions (live perception):", ""] + out += [p for p in perc if p] + missing = [e for e, p in zip(ids, perc) if not p] + if missing: + out.append("") + out.append(f"(no perception yet for: {', '.join(missing)})") + return "\n".join(out) + elif embryo_id: + line = _perc_line(embryo_id) + if line: + return f"Hatching prediction for {embryo_id} (live perception):\n{line}" + tracker, err = require_developmental_tracker(agent) if err: return err @@ -629,6 +724,37 @@ def predict_hatching( return "\n".join(lines) +@tool( + name="set_autonomy", + description="""Enable or disable autonomous mode (the decision-moment wake-router). +When ON, the agent wakes itself between user messages on important perception events +(developmental stage transitions, potential arrest, hatching, embryo termination, or +errors) and may adjust acquisition on its own (interval, power, stop conditions, +bursts). Default is OFF. Device-layer safety limits still bound any action. +Use when the user says "enable autopilot", "watch and adapt on your own", "go +autonomous", or "turn off autonomy".""", + category=ToolCategory.ANALYSIS, + examples=[ + ToolExample("Enable autonomous mode", {"enabled": True}), + ToolExample("Turn off autopilot", {"enabled": False}), + ], +) +def set_autonomy(enabled: bool = True, context: Dict = None) -> str: + """Toggle the decision-moment wake-router.""" + agent, err = require_agent(context) + if err: + return err + router = getattr(agent, "wake_router", None) + if router is None: + return "Autonomy is not available (wake-router failed to initialize)." + state = router.set_enabled(enabled) + if state: + return ("Autonomous mode ENABLED. I'll wake on stage transitions, potential " + "arrest, hatching, termination, and errors — and adapt acquisition as " + "needed. Say 'turn off autonomy' to stop.") + return "Autonomous mode disabled. I'll only act when you message me." + + # --------------------------------------------------------------------------- # Reactive monitoring modes (Phase 5) — high-level "install canonical # detector → cadence + power reactive rules" entry points. Without one of diff --git a/gently/app/wake_router.py b/gently/app/wake_router.py new file mode 100644 index 00000000..344e9d71 --- /dev/null +++ b/gently/app/wake_router.py @@ -0,0 +1,235 @@ +"""Decision-moment wake-router for autonomous agent turns. + +Subscribes to wake-worthy perception/lifecycle events and, when enabled, wakes +the conversational agent between user messages so it can re-decide acquisition +(cadence, power, stop conditions) in response to what perception sees — the +closed loop. + +Design (opt-in, default OFF): + * Triggers: critical events (hatching / arrest / embryo-terminated / errors) + plus developmental stage transitions. No periodic heartbeat. + * Debounce: a burst of events inside COALESCE_WINDOW collapses into ONE wake. + * Throttle: non-critical wakes are rate-limited by MIN_WAKE_INTERVAL; critical + events bypass the throttle. + * Serialization: the wake turn runs through the agent's normal streaming + pipeline, which holds the agent turn-lock, so it never races a user turn. + A wake therefore waits for any in-progress user turn — including an open + choice picker — to finish before it runs; "critical bypasses the throttle" + means it skips MIN_WAKE_INTERVAL, not that it preempts an active user turn + (preempting would interleave on the shared conversation history). + +Nothing fires until ``set_enabled(True)`` (e.g. via the set_autonomy tool). +""" +from __future__ import annotations + +import asyncio +import logging + +from gently.core.event_bus import EventType + +logger = logging.getLogger(__name__) + +# Tunables (seconds). +COALESCE_WINDOW = 20.0 # collapse a burst of events into one wake +MIN_WAKE_INTERVAL = 120.0 # throttle non-critical wakes + +# Events that always wake immediately (bypass MIN_WAKE_INTERVAL). +CRITICAL_EVENTS = frozenset({ + EventType.HATCHING_DETECTED, + EventType.EMBRYO_TERMINATED, + EventType.ERROR_OCCURRED, + EventType.ACQUISITION_FAILED, + EventType.ANOMALY_DETECTED, +}) +# Non-critical events we also inspect (filtered for real transitions / arrest). +WATCH_EVENTS = frozenset({EventType.DETECTOR_EVALUATED}) + + +class WakeRouter: + """Routes wake-worthy events into coalesced, throttled autonomous agent turns.""" + + def __init__(self, agent, bus): + self.agent = agent + self.bus = bus + self.enabled = False + self._loop = None + self._pending = [] # list[(EventType, dict)] + self._flush_handle = None # TimerHandle for the coalesce window + self._last_wake = 0.0 # loop.time() of the last fired wake + self._last_stage = {} # embryo_id -> last stage seen (transition detection) + self._in_flight = False + self._unsubs = [] + self._subscribe() + + # -- public control ------------------------------------------------- + def set_enabled(self, enabled: bool) -> bool: + self.enabled = bool(enabled) + if not self.enabled: + self._cancel_flush() + self._pending.clear() + logger.info("Wake-router %s", "ENABLED" if self.enabled else "disabled") + return self.enabled + + def is_enabled(self) -> bool: + return self.enabled + + def shutdown(self): + self._cancel_flush() + for unsub in self._unsubs: + try: + unsub() + except Exception: + pass + self._unsubs.clear() + + # -- subscription --------------------------------------------------- + def _subscribe(self): + for et in (CRITICAL_EVENTS | WATCH_EVENTS): + try: + self._unsubs.append( + self.bus.subscribe(et, lambda e, _et=et: self._on_event(_et, e)) + ) + except Exception: + logger.exception("wake-router failed to subscribe %s", et) + + # -- event intake --------------------------------------------------- + def _on_event(self, event_type, event): + # Synchronous handler (the bus calls it inline). Cheap-filter, then + # schedule a coalesced flush on the running loop. Never raise — the bus + # swallows handler exceptions, so failures would otherwise vanish. + try: + if not self.enabled: + return + if self._loop is None: + try: + self._loop = asyncio.get_running_loop() + except RuntimeError: + return # no running loop -> can't schedule a wake; drop + data = getattr(event, "data", None) or {} + if not self._is_wake_worthy(event_type, data): + return + self._pending.append((event_type, data)) + self._schedule_flush(critical=event_type in CRITICAL_EVENTS) + except Exception: + logger.exception("wake-router _on_event error") + + def _is_wake_worthy(self, event_type, data) -> bool: + if event_type in CRITICAL_EVENTS: + return True + if event_type == EventType.DETECTOR_EVALUATED: + if data.get("skipped"): + return False + if data.get("detector_name") != "perception": + return False # role=test pseudo-stages are not developmental + stage = data.get("stage") + if not stage or stage == "no_object": + return False # empty-field sentinel — not a developmental change + ta = data.get("temporal_analysis") or {} + if ta.get("is_potentially_arrested"): + return True + eid = data.get("embryo_id") + last = self._last_stage.get(eid) + self._last_stage[eid] = stage + return stage != last # only a real transition wakes + return False + + # -- coalescing / flush -------------------------------------------- + def _schedule_flush(self, critical: bool): + loop = self._loop + if loop is None: + return + delay = 0.0 if critical else COALESCE_WINDOW + if self._flush_handle is None: + self._flush_handle = loop.call_later(delay, self._fire_flush) + elif critical: + # bring a pending window-flush forward + self._flush_handle.cancel() + self._flush_handle = loop.call_later(0.0, self._fire_flush) + + def _cancel_flush(self): + if self._flush_handle is not None: + try: + self._flush_handle.cancel() + except Exception: + pass + self._flush_handle = None + + def _fire_flush(self): + self._flush_handle = None + loop = self._loop + if loop is not None: + asyncio.ensure_future(self._flush(), loop=loop) + + async def _flush(self): + if not self._pending or not self.enabled: + self._pending.clear() + return + # Evaluate the guards BEFORE draining so a deferral can't lose events. + critical = any(et in CRITICAL_EVENTS for et, _ in self._pending) + now = self._loop.time() if self._loop else 0.0 + if self._in_flight or (not critical and (now - self._last_wake) < MIN_WAKE_INTERVAL): + # A wake is already running, or we're inside the non-critical throttle + # window. Keep _pending intact and re-arm so these events — including + # any CRITICAL ones — are retried once the turn finishes / window + # elapses, rather than being dropped. + logger.debug("wake deferred (in_flight=%s critical=%s)", self._in_flight, critical) + # Retry on the coalesce window (not delay 0) so a critical event + # deferred behind an in-flight turn doesn't busy-spin call_later(0). + self._schedule_flush(critical=False) + return + events = self._pending + self._pending = [] + self._in_flight = True + self._last_wake = now + try: + note = self._build_wake_note(events) + logger.info("Wake-router firing autonomous turn (%d event(s))", len(events)) + await self.agent.run_wake_turn(note) + except Exception: + logger.exception("wake turn failed") + finally: + self._in_flight = False + + # -- wake package --------------------------------------------------- + def _build_wake_note(self, events) -> str: + from gently.harness.prompts.templates import build_perception_snapshot + triggers = [] + for et, data in events: + name = getattr(et, "name", str(et)) + eid = data.get("embryo_id", "?") + stage = data.get("stage") + if et == EventType.HATCHING_DETECTED: + triggers.append(f"{eid}: hatching detected") + elif et == EventType.EMBRYO_TERMINATED: + triggers.append(f"{eid}: terminated ({data.get('completion_reason', '?')})") + elif et in (EventType.ERROR_OCCURRED, EventType.ACQUISITION_FAILED, + EventType.ANOMALY_DETECTED): + triggers.append(f"{eid}: {name.lower().replace('_', ' ')}") + elif et == EventType.DETECTOR_EVALUATED: + ta = data.get("temporal_analysis") or {} + if ta.get("is_potentially_arrested"): + triggers.append(f"{eid}: potential arrest at stage {stage}") + else: + triggers.append(f"{eid}: stage -> {stage}") + else: + triggers.append(f"{eid}: {name.lower()}") + triggers = list(dict.fromkeys(triggers)) # dedupe, preserve order + + try: + snap = build_perception_snapshot( + getattr(self.agent, "perceiver", None), + getattr(getattr(self.agent, "experiment", None), "embryos", {}) or {}, + ) + except Exception: + snap = "" + snap = snap or "(no live perception data)" + + return ( + "[AUTONOMOUS WAKE] Something changed while no one was typing — decide if " + "any acquisition change is warranted.\n\n" + f"What triggered this: {'; '.join(triggers)}\n\n" + f"{snap}\n\n" + "If a change helps (adjust interval/power, add a stop condition, queue a " + "burst, or stop an embryo), do it now using your tools. If nothing needs " + "doing, say so briefly and take no action." + ) diff --git a/gently/harness/bridge.py b/gently/harness/bridge.py index 4ac10f90..028ce2d7 100644 --- a/gently/harness/bridge.py +++ b/gently/harness/bridge.py @@ -623,6 +623,16 @@ async def stream_response( except Exception as e: logger.error(f"Stream error: {e}", exc_info=True) await send_fn({"type": "error", "error": str(e)}) + finally: + # Deterministically close the agent generator so its turn-lock (and + # any other resources) release immediately. Without this, a cancelled + # or aborted stream leaves the generator suspended at a `yield` still + # holding self._turn_lock until non-deterministic GC, stalling the + # next user turn and any autonomous wake turn on lock.acquire(). + try: + await stream_iter.aclose() + except Exception: + pass async def handle_command( self, diff --git a/gently/harness/prompts/manager.py b/gently/harness/prompts/manager.py index 2d2f6f42..cde5db44 100644 --- a/gently/harness/prompts/manager.py +++ b/gently/harness/prompts/manager.py @@ -47,7 +47,7 @@ def __init__(self, claude_client, model): self.memory = None # AgentMemory instance def update_system_prompt(self, experiment, client, mode: str, - context_summary: str = None) -> str: + context_summary: str = None, perceiver=None) -> str: """ Rebuild system prompt with current experiment state and connection status. @@ -96,6 +96,7 @@ def update_system_prompt(self, experiment, client, mode: str, experiment, connection_status, context_summary, memory_awareness=memory_awareness, microscope=client, + perceiver=perceiver, ) def get_tools_for_mode(self, mode: str, has_microscope: bool) -> list: diff --git a/gently/harness/prompts/templates.py b/gently/harness/prompts/templates.py index 55894c22..a2f7634f 100644 --- a/gently/harness/prompts/templates.py +++ b/gently/harness/prompts/templates.py @@ -290,12 +290,57 @@ """ +def build_perception_snapshot(perceiver, embryos) -> str: + """One compact line per embryo of live perception state for the system prompt. + + Reads straight from the perception sessions (current stage, stability, time in + stage, arrest signal, short trajectory). Every read here is synchronous and + side-effect-free — it never triggers a VLM call. Returns '' when there is + nothing to show, so callers can drop the section entirely. + """ + if not perceiver or not embryos: + return "" + lines = [] + for embryo_id in sorted(embryos): + try: + session = perceiver.get_session(embryo_id) + summary = session.summary() if session is not None else None + except Exception: + summary = None + if not summary or not summary.get("current_stage"): + lines.append(f"- {embryo_id}: no perception yet") + continue + parts = [ + f"stage={summary['current_stage']}", + f"stable={summary.get('stability', 0)}x", + ] + temporal = summary.get("temporal") # TemporalContext dataclass or None + if temporal is not None: + tmin = getattr(temporal, "time_in_stage_min", None) + exp = getattr(temporal, "expected_duration_min", None) + if tmin is not None: + seg = f"in_stage={tmin:.0f}min" + if exp: + seg += f"/{exp:.0f}" + parts.append(seg) + if getattr(temporal, "is_potentially_arrested", False): + parts.append("ARRESTED?") + seq = summary.get("stage_sequence") or [] + if len(seq) > 1: + parts.append("traj=" + "->".join(seq[-4:])) + lines.append(f"- {embryo_id}: " + " ".join(parts)) + if not lines: + return "" + return "## Perception (live)\n\n" + "\n".join(lines) + + def build_system_prompt( experiment_state: ExperimentState, connection_status: dict = None, context_summary: str = None, memory_awareness: str = None, microscope=None, + perceiver=None, ) -> str: """ Build complete system prompt for Claude @@ -357,6 +402,15 @@ def build_system_prompt( else: context_section = "" + # Live per-embryo perception snapshot (deterministic, read straight from the + # perception sessions — bypasses the AI context-summary cache so stage data is + # never stale). + perception_section = "" + if perceiver is not None and experiment_state.embryos: + snap = build_perception_snapshot(perceiver, experiment_state.embryos) + if snap: + perception_section = f"\n{snap}\n" + # Pull organism-specific content from the active organism module organism = get_organism() organism_display = organism.ORGANISM_DISPLAY_NAME @@ -404,6 +458,7 @@ def build_system_prompt( # Current Experiment State {embryo_summary} +{perception_section} {context_section} # Tool Use Guidelines From c4bfac2d57fbe03744358831ec5916baa338b53e Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 19:13:57 -0400 Subject: [PATCH 55/71] Live acquisition control, observable + permissioned autonomy, and a refreshed prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Builds on the perception integration: gives the agent real live control over acquisition, makes its autonomous decisions visible and gated, fixes message interleaving, and updates the long-stale system prompt. Live control (A/B): - modify_timelapse_interval (whole run) + set_embryo_cadence (one embryo) change cadence on a running timelapse and correctly re-anchor next_due_at (closes the gap where no working live-interval tool existed). set_embryo_cadence reports a no-op instead of a misleading reschedule message. - set_photodose_budget (caps cumulative exposure; resumes budget-paused embryos on a raise, only when they're back under the cap) + get_photodose_status. - (snap-mode hardware plumbing deferred — needs real-hardware validation.) Observability (C): - Autonomous wake turns now stream to every web chat client and persist to the transcript, rendered distinctly: a 'Gently woke up — ' banner + a 'Gently · autonomous' bubble. bridge.register_display_broadcaster wires the previously-dead on_message_callback; run_wake_turn brackets the turn with autonomous_start/stream_end. Previously autonomous turns were invisible. Interleaving (D): - Typing while the agent is busy now QUEUES (with per-message remove + clear-all + auto-drain on idle) instead of cancelling; a separate Stop button replaces Send-as-Stop; the composer shows 'working' vs 'acting autonomously'. Slash commands no longer wedge the composer busy. Autonomy modes (E): - OFF / ASK / AUTO tri-state via set_autonomy, switchable mid-run. ASK proposes a change and waits for Approve/Modify/Skip in the chat (round-tripped through the wake choice channel, bounded by a timeout->Skip, lock released via aclose). Hybrid backstop: a few irreversible tools (set_laser_power, remove_embryo, stop_timelapse) can never run during an autonomous turn, enforced in the registry regardless of mode. System prompt (F): - Replaced the fictional cv_analyze 'CV subagent' block with an accurate Perception & Analysis section; added an Adapting-Acquisition (gentleness-first) + Autonomy (OFF/ASK/AUTO) section; removed the nonexistent enable_preset_detector reference and the stale interval tool name. Review fixes: turn-lock released on disconnect-cancel (CancelledError handled as 'cancelled'); picker futures discarded on timeout/cancel; deferred critical wakes re-fire promptly after a turn; Escape-cancel + the autonomous history flag reset cleanly. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/app/agent.py | 124 +++++++++--- gently/app/tools/timelapse_tools.py | 238 ++++++++++++++++++++++-- gently/app/wake_router.py | 68 +++++-- gently/harness/bridge.py | 42 +++++ gently/harness/prompts/templates.py | 150 ++++++++------- gently/harness/tools/registry.py | 14 ++ gently/ui/web/routes/agent_ws.py | 29 ++- gently/ui/web/static/css/agent-chat.css | 69 ++++++- gently/ui/web/static/js/agent-chat.js | 158 ++++++++++++++-- 9 files changed, 752 insertions(+), 140 deletions(-) diff --git a/gently/app/agent.py b/gently/app/agent.py index 4d83b4b9..9b144f92 100644 --- a/gently/app/agent.py +++ b/gently/app/agent.py @@ -147,6 +147,17 @@ def __init__( # must not interleave on the shared conversation_history. self._turn_lock = asyncio.Lock() + # Autonomy backstop: while a wake turn runs, _autonomous_active is True + # and the registry refuses these irreversible tools (they require a + # human). User turns are unaffected. _wake_choice_factory is set by the + # web bridge so ASK-mode wake turns can round-trip an approval picker. + self._autonomous_active = False + self._autonomous_blocked_tools = frozenset({ + "set_laser_power", "remove_embryo", "stop_timelapse", + }) + self._wake_choice_factory = None + self._wake_choice_discard = None + # Interaction logger for structured logging (research data collection) self.interaction_logger: Optional[InteractionLogger] = None @@ -972,46 +983,113 @@ async def handle_message_stream(self, user_message: str): if acquired: lock.release() - async def run_wake_turn(self, wake_note: str): + async def run_wake_turn(self, wake_note: str, trigger: str = None, interactive: bool = False): """Drive one autonomous (no-user) turn for the wake-router. Runs through the normal streaming pipeline (so it acquires the turn-lock - and is recorded to conversation history / auto-saved). Surfaces text via - on_message_callback if a UI wired one; always runs so the decision and - any tool actions are persisted even with no client attached. Only acts in - run mode. + and is recorded to conversation history / auto-saved). Brackets the turn + with an 'autonomous_start' (carrying the wake trigger) and a synthesized + 'stream_end' so it streams to the web chat distinctly. Sets + _autonomous_active so the registry backstop refuses irreversible tools. + When interactive (ASK mode) a choice_request round-trips through the + operator; otherwise it is auto-cancelled. Run mode only. """ if self.mode != "run": logger.info("Wake turn skipped — agent not in run mode (mode=%s)", self.mode) return "" + + async def _emit(chunk): + cb = getattr(self, "on_message_callback", None) + if cb is None: + return + try: + res = cb(chunk) + if asyncio.iscoroutine(res): + await res + except Exception: + logger.debug("on_message_callback failed for wake chunk", exc_info=True) + + await _emit({"type": "autonomous_start", "trigger": trigger or ""}) text_parts = [] + self._autonomous_active = True + agen = self.handle_message_stream(wake_note) + sent_value = None try: - async for chunk in self.handle_message_stream(wake_note): - if isinstance(chunk, dict) and chunk.get("type") == "text": + while True: + try: + if sent_value is None: + chunk = await agen.__anext__() + else: + chunk = await agen.asend(sent_value) + sent_value = None + except StopAsyncIteration: + break + ctype = chunk.get("type") if isinstance(chunk, dict) else None + if ctype == "text": text_parts.append(chunk.get("text", "")) - elif isinstance(chunk, dict) and chunk.get("type") == "choice_request": - # A plain async-for resumes the generator with None, which the - # picker resolves as 'cancelled'. Make that visible — there is - # no operator to answer during an autonomous turn. - cd = chunk.get("choice_data", {}) - logger.warning( - "Wake turn invoked an interactive picker (%s); auto-cancelling " - "— no operator present.", cd.get("question", "?")) - cb = getattr(self, "on_message_callback", None) - if cb is not None: - try: - res = cb(chunk) - if asyncio.iscoroutine(res): - await res - except Exception: - logger.debug("on_message_callback failed for wake chunk", exc_info=True) + if ctype == "choice_request": + # Resolve via the operator (ASK) or auto-cancel (AUTO). + sent_value = await self._resolve_wake_choice(chunk, _emit, interactive) + continue # don't re-emit the raw choice_request + await _emit(chunk) except Exception: logger.exception("run_wake_turn error") + finally: + self._autonomous_active = False + try: + # Release the turn-lock even if a picker hung / timed out. + await agen.aclose() + except Exception: + pass + await _emit({"type": "stream_end"}) summary = "".join(text_parts).strip() if summary: logger.info("Autonomous wake turn result: %s", summary[:500]) return summary + async def _resolve_wake_choice(self, chunk, emit, interactive): + """Resolve a choice_request raised during a wake turn. + + AUTO (or no operator channel) -> 'cancelled'. ASK -> register a future via + the web choice-factory, broadcast the picker to clients, and await the + operator's selection (timeout -> 'skip' so an unanswered picker can't hold + the turn-lock forever).""" + choice_data = chunk.get("choice_data", {}) if isinstance(chunk, dict) else {} + factory = getattr(self, "_wake_choice_factory", None) + if not interactive or factory is None: + logger.info("Wake picker auto-cancelled (interactive=%s, channel=%s)", + interactive, factory is not None) + return "cancelled" + try: + future = factory(choice_data) # registers future + sets request_id + except Exception: + logger.exception("wake choice factory failed") + return "cancelled" + request_id = choice_data.get("request_id", "") + await emit({**chunk, "origin": "wake", "request_id": request_id}) + from gently.app.wake_router import ASK_TIMEOUT_SEC + try: + selected = await asyncio.wait_for(future, timeout=ASK_TIMEOUT_SEC) + except asyncio.TimeoutError: + logger.info("Wake ASK timed out (%.0fs) -> skip", ASK_TIMEOUT_SEC) + selected = "skip" + except asyncio.CancelledError: + # The picker future was cancelled (e.g. the operator disconnected) — + # treat as a cancelled proposal so the turn finishes cleanly. + logger.info("Wake ASK future cancelled -> cancelled") + selected = "cancelled" + except Exception: + selected = "cancelled" + finally: + # Don't leak the future in the router-scoped registry on timeout/cancel. + discard = getattr(self, "_wake_choice_discard", None) + if discard is not None and request_id: + try: + discard(request_id) + except Exception: + pass + return selected or "skip" + async def get_tool_call(self, user_message: str) -> Optional[Dict]: """Dry-run tool call (for benchmarking).""" context_summary = await self.prompts.get_cached_context_summary( diff --git a/gently/app/tools/timelapse_tools.py b/gently/app/tools/timelapse_tools.py index 62b846f5..316913a8 100644 --- a/gently/app/tools/timelapse_tools.py +++ b/gently/app/tools/timelapse_tools.py @@ -726,33 +726,237 @@ def _perc_line(eid: str): @tool( name="set_autonomy", - description="""Enable or disable autonomous mode (the decision-moment wake-router). -When ON, the agent wakes itself between user messages on important perception events -(developmental stage transitions, potential arrest, hatching, embryo termination, or -errors) and may adjust acquisition on its own (interval, power, stop conditions, -bursts). Default is OFF. Device-layer safety limits still bound any action. -Use when the user says "enable autopilot", "watch and adapt on your own", "go -autonomous", or "turn off autonomy".""", + description="""Set the autonomy mode of the decision-moment wake-router (default OFF). +Modes: + 'off' — never act on its own; only respond to your messages. + 'ask' — on a notable event (stage transition, arrest, hatching, termination, + errors) the agent PROPOSES a change and waits for you to Approve / + Modify / Skip in the chat before acting. + 'auto' — the agent adapts acquisition on its own (still bounded by device + limits; a few irreversible actions always require your confirmation). +You can switch modes mid-run. Use when the user says "enable autopilot/autonomous", +"ask me before changing things", "go fully autonomous", or "turn off autonomy".""", category=ToolCategory.ANALYSIS, examples=[ - ToolExample("Enable autonomous mode", {"enabled": True}), - ToolExample("Turn off autopilot", {"enabled": False}), + ToolExample("Ask me before adapting", {"mode": "ask"}), + ToolExample("Go fully autonomous", {"mode": "auto"}), + ToolExample("Turn off autonomy", {"mode": "off"}), ], ) -def set_autonomy(enabled: bool = True, context: Dict = None) -> str: - """Toggle the decision-moment wake-router.""" +def set_autonomy(mode: str = None, enabled: bool = None, context: Dict = None) -> str: + """Set the wake-router mode (off/ask/auto). `enabled` kept for back-compat.""" agent, err = require_agent(context) if err: return err router = getattr(agent, "wake_router", None) if router is None: return "Autonomy is not available (wake-router failed to initialize)." - state = router.set_enabled(enabled) - if state: - return ("Autonomous mode ENABLED. I'll wake on stage transitions, potential " - "arrest, hatching, termination, and errors — and adapt acquisition as " - "needed. Say 'turn off autonomy' to stop.") - return "Autonomous mode disabled. I'll only act when you message me." + if mode is not None: + m = str(mode).strip().lower() + if m not in ("off", "ask", "auto"): + return "mode must be 'off', 'ask', or 'auto'." + router.set_mode(m) + elif enabled is not None: + router.set_enabled(bool(enabled)) + else: + return "Specify mode ('off', 'ask', or 'auto')." + cur = router.mode + if cur == "auto": + return ("Autonomy set to AUTO. I'll wake on stage transitions, arrest, " + "hatching, termination, and errors and adapt acquisition on my own " + "(irreversible actions still need your okay). Say 'ask mode' or " + "'turn off autonomy' to change.") + if cur == "ask": + return ("Autonomy set to ASK. On a notable event I'll propose a change and " + "wait for your Approve / Modify / Skip before doing anything.") + return "Autonomy OFF. I'll only act when you message me." + + +# --------------------------------------------------------------------------- +# Live cadence / dose modulation — direct knobs for a running timelapse. +# --------------------------------------------------------------------------- + + +@tool( + name="modify_timelapse_interval", + description="""Change the base acquisition interval for ALL embryos on a running timelapse, effective immediately. +Re-anchors every embryo's next acquisition to now + the new interval and notifies the UI. +Lower interval = more frequent imaging = more photodose; raise it to be gentler. +Use when the user says "image every N minutes/seconds now", "speed up/slow down the whole run". +For a single embryo use set_embryo_cadence instead.""", + category=ToolCategory.EXPERIMENT, + examples=[ + ToolExample("Image every 2 minutes now", {"new_interval_seconds": 120}), + ToolExample("Slow everything down to 10 minutes", {"new_interval_seconds": 600}), + ], +) +def modify_timelapse_interval(new_interval_seconds: float, context: Dict = None) -> str: + """Globally re-anchor the timelapse interval (live).""" + agent, err = require_agent(context) + if err: + return err + orchestrator, err = require_timelapse_orchestrator(agent) + if err: + return err + return orchestrator.modify_interval(new_interval_seconds) + + +@tool( + name="set_embryo_cadence", + description="""Change ONE embryo's acquisition cadence on a running timelapse, effective immediately. +Set new_interval_seconds to re-anchor that embryo's next acquisition to now + interval (lower = more frequent = more dose). +Set new_phase to 'normal' to resume a paused embryo, or 'paused' to pause it. +NOTE: re-issuing the SAME interval with the SAME phase is a no-op (it won't re-anchor). +Use for per-embryo tuning, e.g. speed up the one that's developing fastest.""", + category=ToolCategory.EXPERIMENT, + examples=[ + ToolExample("Image embryo_2 every minute", {"embryo_id": "embryo_2", "new_interval_seconds": 60}), + ToolExample("Resume embryo_3", {"embryo_id": "embryo_3", "new_phase": "normal"}), + ], +) +def set_embryo_cadence( + embryo_id: str, + new_interval_seconds: float = None, + new_phase: str = None, + context: Dict = None, +) -> str: + """Per-embryo cadence change routed through the re-anchoring path.""" + agent, err = require_agent(context) + if err: + return err + orchestrator, err = require_timelapse_orchestrator(agent) + if err: + return err + embryo, err = get_embryo_or_error(agent, embryo_id) + if err: + return err + if new_interval_seconds is None and new_phase is None: + return "Specify new_interval_seconds and/or new_phase." + if new_interval_seconds is not None and new_interval_seconds < 1: + return "Interval must be >= 1 second." + if new_phase is not None and new_phase not in ("normal", "fast", "burst", "paused"): + return "new_phase must be one of: normal, fast, burst, paused." + # Detect the no-op (transition_cadence silently does nothing, and would NOT + # re-anchor next_due_at, if neither interval nor phase actually changes). + cur_interval = getattr(embryo, "interval_seconds", None) + cur_phase = getattr(embryo, "cadence_phase", None) + interval_change = new_interval_seconds is not None and new_interval_seconds != cur_interval + phase_change = new_phase is not None and new_phase != cur_phase + if not interval_change and not phase_change: + shown = f"{cur_interval:.0f}s" if cur_interval is not None else "default" + return f"{embryo.id}: no change (already interval={shown}, phase={cur_phase})." + orchestrator.transition_cadence( + embryo, + new_interval_seconds=new_interval_seconds if interval_change else None, + new_phase=new_phase if phase_change else None, + reason="agent:set_embryo_cadence", + ) + bits = [] + if interval_change: + bits.append(f"interval={new_interval_seconds:.0f}s") + if phase_change: + bits.append(f"phase={new_phase}") + due = getattr(embryo, "next_due_at", None) + tail = f"; next acquisition ~{due.strftime('%H:%M:%S')}" if due else "" + return f"{embryo.id}: {', '.join(bits)}{tail}" + + +@tool( + name="set_photodose_budget", + description="""Set or clear the per-embryo photodose budget (a hard cap on cumulative laser exposure). +base_dose_budget_ms is the ceiling for a 1x-role (test) embryo; calibration embryos get 10x. +When an embryo's cumulative exposure exceeds its budget it is auto-PAUSED to protect the sample. +Pass null/None to DISABLE the cap. Raising the budget also resumes embryos that were paused for the old cap. +Use to enforce gentleness on precious samples, or to lift the cap when the user okays more dose.""", + category=ToolCategory.EXPERIMENT, + examples=[ + ToolExample("Cap each embryo at 5 seconds of light", {"base_dose_budget_ms": 5000}), + ToolExample("Remove the photodose cap", {"base_dose_budget_ms": None}), + ], +) +def set_photodose_budget( + base_dose_budget_ms: float = None, + resume_paused: bool = True, + context: Dict = None, +) -> str: + """Set/clear the photodose budget; optionally resume budget-paused embryos.""" + agent, err = require_agent(context) + if err: + return err + orchestrator, err = require_timelapse_orchestrator(agent) + if err: + return err + # Capture who was budget-paused BEFORE set_photodose_budget clears the set, + # so we only resume embryos paused for the budget (not manual pauses/bursts). + prev_exceeded = set(getattr(orchestrator, "_dose_budget_exceeded", set()) or set()) + msg = orchestrator.set_photodose_budget(base_dose_budget_ms) + resumed = [] + if resume_paused: + states = getattr(orchestrator, "_embryo_states", {}) or {} + try: + from gently.harness.roles import REGISTRY as ROLE_REGISTRY + except Exception: + ROLE_REGISTRY = {} + for eid in prev_exceeded: + e = states.get(eid) + if e is None or getattr(e, "cadence_phase", None) != "paused": + continue + # Only resume if the embryo is now UNDER the new budget (or the cap + # was disabled); otherwise it would just immediately re-pause. + if base_dose_budget_ms is not None: + rdef = ROLE_REGISTRY.get(getattr(e, "role", "test")) if hasattr(ROLE_REGISTRY, "get") else None + mult = getattr(rdef, "photodose_budget_multiplier", 1.0) if rdef else 1.0 + if (getattr(e, "total_exposure_ms", 0.0) or 0.0) > base_dose_budget_ms * mult: + continue + orchestrator.transition_cadence(e, new_phase="normal", reason="agent:budget change resume") + resumed.append(eid) + if resumed: + msg += f" Resumed: {', '.join(sorted(resumed))}." + return msg + + +@tool( + name="get_photodose_status", + description="""Report each embryo's cumulative light exposure vs its photodose budget, and which are paused over budget. +Use to reason about gentleness before/after changing the budget, power, or cadence.""", + category=ToolCategory.ANALYSIS, + examples=[ToolExample("How much light has each embryo gotten?", {})], +) +def get_photodose_status(context: Dict = None) -> str: + """Read-only photodose / budget status across embryos.""" + agent, err = require_agent(context) + if err: + return err + orchestrator, err = require_timelapse_orchestrator(agent) + if err: + return err + base = getattr(orchestrator, "_dose_budget_base_ms", None) + exceeded = getattr(orchestrator, "_dose_budget_exceeded", set()) or set() + states = getattr(orchestrator, "_embryo_states", {}) or {} + if base is None: + lines = ["Photodose budget: DISABLED (no cap).", ""] + else: + lines = [f"Photodose budget: {base:.0f} ms base (scaled per role).", ""] + try: + from gently.harness.roles import REGISTRY as ROLE_REGISTRY + except Exception: + ROLE_REGISTRY = {} + for eid in sorted(states): + e = states[eid] + used = getattr(e, "total_exposure_ms", 0.0) or 0.0 + role = getattr(e, "role", "test") + if base is not None: + rdef = ROLE_REGISTRY.get(role) if hasattr(ROLE_REGISTRY, "get") else None + mult = getattr(rdef, "photodose_budget_multiplier", 1.0) if rdef else 1.0 + cap = base * mult + pct = (used / cap * 100.0) if cap else 0.0 + flag = " [PAUSED: over budget]" if eid in exceeded else "" + lines.append(f" {eid} ({role}): {used:.0f}/{cap:.0f} ms ({pct:.0f}%){flag}") + else: + lines.append(f" {eid} ({role}): {used:.0f} ms used") + if len(lines) == 2: + lines.append(" (no embryos)") + return "\n".join(lines) # --------------------------------------------------------------------------- diff --git a/gently/app/wake_router.py b/gently/app/wake_router.py index 344e9d71..71faa7ce 100644 --- a/gently/app/wake_router.py +++ b/gently/app/wake_router.py @@ -32,6 +32,7 @@ # Tunables (seconds). COALESCE_WINDOW = 20.0 # collapse a burst of events into one wake MIN_WAKE_INTERVAL = 120.0 # throttle non-critical wakes +ASK_TIMEOUT_SEC = 300.0 # ASK mode: how long to wait for operator approval -> Skip # Events that always wake immediately (bypass MIN_WAKE_INTERVAL). CRITICAL_EVENTS = frozenset({ @@ -51,7 +52,7 @@ class WakeRouter: def __init__(self, agent, bus): self.agent = agent self.bus = bus - self.enabled = False + self.mode = "off" # 'off' | 'ask' | 'auto' self._loop = None self._pending = [] # list[(EventType, dict)] self._flush_handle = None # TimerHandle for the coalesce window @@ -62,12 +63,24 @@ def __init__(self, agent, bus): self._subscribe() # -- public control ------------------------------------------------- - def set_enabled(self, enabled: bool) -> bool: - self.enabled = bool(enabled) - if not self.enabled: + @property + def enabled(self) -> bool: + return self.mode != "off" + + def set_mode(self, mode: str) -> str: + mode = (mode or "off").strip().lower() + if mode not in ("off", "ask", "auto"): + mode = "off" + self.mode = mode + if mode == "off": self._cancel_flush() self._pending.clear() - logger.info("Wake-router %s", "ENABLED" if self.enabled else "disabled") + logger.info("Wake-router mode -> %s", mode.upper()) + return self.mode + + def set_enabled(self, enabled: bool) -> bool: + """Back-compat boolean toggle: maps to AUTO / OFF.""" + self.set_mode("auto" if enabled else "off") return self.enabled def is_enabled(self) -> bool: @@ -182,16 +195,28 @@ async def _flush(self): self._in_flight = True self._last_wake = now try: - note = self._build_wake_note(events) - logger.info("Wake-router firing autonomous turn (%d event(s))", len(events)) - await self.agent.run_wake_turn(note) + ask = (self.mode == "ask") + note, trigger = self._build_wake_note(events, ask=ask) + logger.info("Wake-router firing %s turn (%d event(s)): %s", + self.mode.upper(), len(events), trigger) + await self.agent.run_wake_turn(note, trigger=trigger, interactive=ask) except Exception: logger.exception("wake turn failed") finally: self._in_flight = False + # Events that arrived while we were busy (including deferred CRITICAL + # ones) are still in _pending — re-fire promptly rather than waiting + # out another coalesce window. _in_flight is now False so this flush + # will proceed instead of deferring (no busy-spin). + if self._pending and self.enabled: + self._schedule_flush( + critical=any(et in CRITICAL_EVENTS for et, _ in self._pending)) # -- wake package --------------------------------------------------- - def _build_wake_note(self, events) -> str: + def _build_wake_note(self, events, ask=False): + """Return (note, trigger_str). The note is the agent-facing wake prompt; + trigger_str is the short human-readable reason shown in the chat banner. + When ask=True the note instructs propose-then-confirm instead of acting.""" from gently.harness.prompts.templates import build_perception_snapshot triggers = [] for et, data in events: @@ -223,13 +248,24 @@ def _build_wake_note(self, events) -> str: except Exception: snap = "" snap = snap or "(no live perception data)" + trigger_str = "; ".join(triggers) - return ( - "[AUTONOMOUS WAKE] Something changed while no one was typing — decide if " - "any acquisition change is warranted.\n\n" - f"What triggered this: {'; '.join(triggers)}\n\n" + head = ( + "[AUTONOMOUS WAKE] Something changed while no one was typing.\n\n" + f"What triggered this: {trigger_str}\n\n" f"{snap}\n\n" - "If a change helps (adjust interval/power, add a stop condition, queue a " - "burst, or stop an embryo), do it now using your tools. If nothing needs " - "doing, say so briefly and take no action." ) + if ask: + tail = ( + "Decide whether any acquisition change is warranted. If so, briefly " + "state your proposed change and WHY, then call ask_user_choice with " + "options Approve / Modify / Skip and act ONLY if the operator approves. " + "If nothing needs doing, say so briefly and take no action (no need to ask)." + ) + else: + tail = ( + "If a change helps (adjust interval/power, add a stop condition, queue a " + "burst, or stop an embryo), do it now using your tools. If nothing needs " + "doing, say so briefly and take no action." + ) + return head + tail, trigger_str diff --git a/gently/harness/bridge.py b/gently/harness/bridge.py index 028ce2d7..812b9e15 100644 --- a/gently/harness/bridge.py +++ b/gently/harness/bridge.py @@ -43,6 +43,48 @@ def __init__(self, agent): self._wizard = None # StartupWizard, set by init_wizard() self._active_remote: Optional[Dict[str, Any]] = None # {"peer": PeerInfo, "campaign_id": str} self._pending_import: Optional[Dict] = None # For /import-embryos picker + # Set by the web layer (register_display_broadcaster) so AGENT-INITIATED + # turns (the wake-router) can stream to all chat clients + the transcript. + self._display_broadcaster: Optional[Callable] = None + + def register_display_broadcaster(self, broadcast_fn, choice_factory=None, + choice_discard=None) -> None: + """Register the web layer's broadcast fn for autonomous (wake) turns. + + The wake-router has no per-connection send_fn, so to make autonomous + turns visible we route their chunks through the same _broadcast the web + route uses for user turns (records to the display transcript AND fans out + to every connected chat client). Also wires the agent's dangling + on_message_callback to this path, and (for ASK mode) the choice-future + factory so an autonomous turn can round-trip an approval picker. + Idempotent — last registration wins; the registered fns are router-scoped + and fan out to whoever is connected. + """ + self._display_broadcaster = broadcast_fn + try: + self.agent.on_message_callback = self.broadcast_autonomous_chunk + if choice_factory is not None: + self.agent._wake_choice_factory = choice_factory + if choice_discard is not None: + self.agent._wake_choice_discard = choice_discard + except Exception: + pass + + async def broadcast_autonomous_chunk(self, chunk) -> None: + """Fan one autonomous-turn chunk to all chat clients + the transcript. + + No-op when no web client has registered a broadcaster (headless run) — + the wake turn still executes and is persisted to the conversation/log. + """ + fn = self._display_broadcaster + if fn is None: + return + try: + res = fn(chunk) + if asyncio.iscoroutine(res): + await res + except Exception: + logger.debug("broadcast_autonomous_chunk failed", exc_info=True) async def handle_choice_response(self, request_id: str, selected: str, send_fn) -> bool: """Handle a choice response that may belong to a bridge-initiated picker. diff --git a/gently/harness/prompts/templates.py b/gently/harness/prompts/templates.py index a2f7634f..61d06706 100644 --- a/gently/harness/prompts/templates.py +++ b/gently/harness/prompts/templates.py @@ -87,62 +87,32 @@ # CV Subagent capabilities CV_SUBAGENT = """ -# CV Subagent for Advanced Analysis - -For complex computer vision analysis, you have access to a specialized CV subagent via the `cv_analyze` tool. - -## IMPORTANT: Volume Required First! - -Before using cv_analyze or classify_embryo_stage, you MUST ensure the embryo has a volume acquired -in this session. If the user asks for cell counting, stage classification, or any analysis: - -1. Check if the embryo has been imaged (recent_images exists) -2. If NOT, acquire a volume first with `acquire_volume` -3. Then proceed with analysis - -Example workflow: -User: "Count the cells in embryo_3" -→ First: acquire_volume(embryo_id="embryo_3") # Get fresh data -→ Then: cv_analyze(intent="count cells", embryo_id="embryo_3") - -## When to use cv_analyze - -Use the CV subagent when you need: -- **Accurate stage classification** - It segments nuclei (Cellpose) and uses count + morphology for staging -- **Cell counting** - 3D segmentation gives precise nuclei counts, not visual estimates -- **Division tracking** - Tracks cells across timepoints, identifies division events -- **Morphology measurements** - Elongation ratio, circularity (important for comma/fold stages) -- **Anomaly detection** - Compares to expected developmental patterns - -## When NOT to use cv_analyze - -Don't use it for: -- Quick visual checks (use simple image viewing instead) -- Hatching detection (the hatching detector handles this) -- Basic "what stage is this?" if rough estimate is fine - -## How it works - -The CV subagent is itself an AI agent that: -1. Loads volume data from the data store -2. Segments with Cellpose/StarDist (nuclei count!) -3. Measures morphology (elongation for fold stages) -4. Adds scale bars and annotations -5. Uses Claude Vision with rich quantitative context - -This gives much more accurate results than just sending an image to vision. - -## Example usage - -User: "How many cells does embryo 1 have?" -→ First acquire_volume if needed, then cv_analyze with intent="count cells and nuclei" - -User: "What stage is embryo 2?" -→ If precision matters: acquire_volume then cv_analyze intent="classify developmental stage" -→ If quick check: view the image yourself - -User: "Track cell divisions over the last 5 timepoints" -→ cv_analyze with intent="track cell divisions" and timepoints=[t-4, t-3, t-2, t-1, t] +# Perception & Analysis + +You see and reason about embryo development through three channels: + +1. **Live perception (the perceiver).** During a timelapse a vision-language + perceiver classifies each acquired volume's developmental stage and tracks + each embryo's trajectory. Its current read is injected into your context + under "## Perception (live)" — stage, stability (how long it's held that + stage), time-in-stage, and a possible-arrest flag. Call + `get_recent_perceptions(embryo_id)` for the fuller picture: stage history, + trajectory, the arrest signal, and the perceiver's own reasoning. This is + your primary signal for "how is it developing?" and for deciding whether to + adapt acquisition. + +2. **On-demand vision (`analyze_volume`).** Ask Claude Vision a specific + question about an acquired volume (e.g. "is the reporter saturating?", + "describe the morphology"). Requires a volume in this session — acquire one + first with `acquire_volume` if none exists. + +3. **Stage tools.** `classify_embryo_stage` (a vision spot-check of the latest + image), `get_stage_history`, and `predict_hatching` — the latter two read the + live perceiver when available, so they work without a manual classify call. + +Prefer the live perception snapshot + `get_recent_perceptions` for routine +"what stage / is anything stuck" questions; reach for `analyze_volume` when you +need a specific visual judgement about a particular volume. """ @@ -266,30 +236,78 @@ 1. User: "Run timelapse until all embryos hatch" 2. Agent: - - Enables hatching detector (enable_preset_detector) - - Starts timelapse with stop_condition="hatching" + - Starts the timelapse with stop_condition="hatching" (the stop condition + wires the detection; the perception loop classifies each acquired volume) + - Optionally installs a monitoring mode (enable_monitoring_mode) for + reactive cadence/power - Reports progress on request - Each embryo stops automatically when it hatches -## Available Preset Detectors +## Stage detection -- **hatching**: Detects eggshell breach and embryo emergence -- **comma**: Detects comma stage morphology -- **pretzel**: Detects 3-fold/pretzel stage -- **gastrulation**: Detects cell internalization -- **first_division**: Detects 1-cell to 2-cell transition +Developmental stage comes from the live perception loop (see "Perception & +Analysis"), surfaced in your context and via get_recent_perceptions. Stop +conditions can key on it — e.g. stop_condition="hatching" or "comma". ## Commands During Timelapse - Query status: get_timelapse_status - Stop one embryo: stop_timelapse_embryo -- Change interval: modify_timelapse_embryo +- Change interval (all embryos): modify_timelapse_interval +- Change one embryo's cadence: set_embryo_cadence +- Other per-embryo params: modify_timelapse_embryo / modify_parameters - Pause all: pause_timelapse - Resume: resume_timelapse - Stop all: stop_timelapse """ +AUTONOMY_AND_ADAPTATION = """ +# Adapting Acquisition — Gently + +Gentleness is the prime directive: every imaging action spends photodose on a +precious, living sample. Always prefer the *least* light that answers the +question. When you do adapt, you have direct, live knobs — each takes effect on +the embryo's next acquisition, no restart: + +- **Cadence**: `modify_timelapse_interval` (whole run) / `set_embryo_cadence` + (one embryo). Speed up only around events worth catching (e.g. approaching + hatching); slow back down when nothing is changing. +- **Dose levers**: `modify_parameters` — num_slices, exposure_ms, acquisition + mode (volume ↔ snap, snap is far gentler), and per-embryo 488 power (hard + clamped 2–6%). `set_photodose_budget` caps cumulative exposure and pauses an + embryo that exceeds it; `get_photodose_status` shows where each stands. +- **Events**: `add_stop_condition` (auto-stop on hatching/stage/duration), + `queue_burst` (one-shot high-rate capture of a transient), and per-embryo + pause / resume / stop. +- **Reactive modes**: `enable_monitoring_mode` installs perception-driven rules + that fire on their own (pre-hatching speedup, 488 rampdown on saturation, + burst on stable structure). + +Bias toward the gentlest sufficient action — snap over volume, fewer slices, +lower power, longer interval — unless an event genuinely needs the resolution. + +# Autonomy (OFF / ASK / AUTO) + +You may act between user messages, but only as far as the operator allows. The +mode is set with `set_autonomy` and is **OFF by default**: + +- **off** — act only when the user messages you. +- **ask** — on a notable event (a developmental stage transition, possible + arrest, hatching, an embryo terminating, or an error) you wake, briefly state + your PROPOSED change and why, then call `ask_user_choice` with + Approve / Modify / Skip and act ONLY on Approve. +- **auto** — you adapt on your own on those events. Still: prefer the gentlest + action, and a few irreversible tools (turning the laser on via + `set_laser_power`, `remove_embryo`, `stop_timelapse`) are hard-blocked from + autonomous use — ask the operator for those. + +When you wake autonomously, your turn and the trigger that woke you are shown to +the operator in the chat. Keep autonomous turns tight: assess, make the smallest +helpful change (or none), and explain it in a sentence or two. +""" + + def build_perception_snapshot(perceiver, embryos) -> str: """One compact line per embryo of live perception state for the system prompt. @@ -451,6 +469,8 @@ def build_system_prompt( {REACTIVE_MONITORING_MODES} +{AUTONOMY_AND_ADAPTATION} + {USER_INTERACTION_GUIDELINES} {SESSION_MANAGEMENT} diff --git a/gently/harness/tools/registry.py b/gently/harness/tools/registry.py index 106bd7a2..48e1cf4f 100644 --- a/gently/harness/tools/registry.py +++ b/gently/harness/tools/registry.py @@ -425,6 +425,20 @@ async def execute(self, tool_name: str, tool_input: Dict, context: Dict = None) else: exec_context = self._context + # Hybrid-autonomy backstop: during an autonomous (wake) turn, a small set + # of irreversible tools (laser-on, embryo termination, stopping the run) + # must NEVER execute without a human — even if the model tries to call + # them directly. The agent sets these flags around its autonomous turns; + # user-driven turns are unaffected. The blocked set is supplied by the + # agent so this layer stays free of app-specific tool names. + _agent = exec_context.get('agent') if isinstance(exec_context, dict) else None + if _agent is not None and getattr(_agent, '_autonomous_active', False): + blocked = getattr(_agent, '_autonomous_blocked_tools', None) or () + if tool_name in blocked: + logger.info("Autonomy backstop blocked '%s' (irreversible)", tool_name) + return (f"'{tool_name}' is an irreversible action and cannot run " + f"autonomously. Ask the operator to confirm it.") + # Check microscope requirement if tool.requires_microscope: client = exec_context.get('client') diff --git a/gently/ui/web/routes/agent_ws.py b/gently/ui/web/routes/agent_ws.py index 0d27a5d0..36ca6ee5 100644 --- a/gently/ui/web/routes/agent_ws.py +++ b/gently/ui/web/routes/agent_ws.py @@ -48,7 +48,7 @@ def create_router(server) -> APIRouter: # Persisted to /chat_display.json so it survives reconnects and # restarts; broadcast live so all instances stay in sync. _history: list = [] - _history_state = {"sid": None, "path": None, "agent_buf": None} + _history_state = {"sid": None, "path": None, "agent_buf": None, "autonomous": False} async def _broadcast_control_status(): """Tell every connected agent client who currently holds control.""" @@ -81,6 +81,7 @@ def _load_history_for_session(bridge): _history_state["sid"] = sid _history_state["path"] = None _history_state["agent_buf"] = None + _history_state["autonomous"] = False try: if store and sid: sdir = store._session_dir(sid) @@ -140,7 +141,10 @@ def _record(item): def _flush_agent_buf(): buf = _history_state["agent_buf"] if buf: - _record({"role": "agent", "text": buf}) + # An autonomous (wake) turn's text is recorded distinctly so replay + # shows it as "Gently · autonomous", not an ordinary agent reply. + role = "autonomous" if _history_state.get("autonomous") else "agent" + _record({"role": role, "text": buf}) _history_state["agent_buf"] = None def _record_display(msg): @@ -148,8 +152,15 @@ def _record_display(msg): t = msg.get("type") if t == "user_message": _flush_agent_buf() + _history_state["autonomous"] = False _record({"role": "user", "text": msg.get("text", ""), "author": msg.get("author")}) + elif t == "autonomous_start": + # An autonomous wake turn is beginning — record the trigger banner + # and mark following text as autonomous until stream_end. + _flush_agent_buf() + _history_state["autonomous"] = True + _record({"role": "autonomous_start", "trigger": msg.get("trigger", "")}) elif t == "text": _history_state["agent_buf"] = (_history_state["agent_buf"] or "") + msg.get("text", "") elif t == "tool_call": @@ -159,6 +170,7 @@ def _record_display(msg): "summary": msg.get("result_summary")}) elif t == "stream_end": _flush_agent_buf() + _history_state["autonomous"] = False async def _broadcast(msg): """Record to history + send a display message to ALL clients.""" @@ -274,6 +286,11 @@ async def agent_websocket(websocket: WebSocket): await websocket.close() return + # Route autonomous (wake-router) turns through this router's _broadcast so + # they stream to all chat clients + persist to the display transcript. + # Idempotent; _broadcast is router-scoped and fans out to whoever is live. + bridge.register_display_broadcaster(_broadcast) + # ── Authenticate the connection (account mode) ──────────── # When user accounts are configured, identity comes from the signed # session cookie (set at login). Viewers may watch but not drive; @@ -572,6 +589,14 @@ def choice_future_factory(choice_data: dict) -> asyncio.Future: _choice_futures[request_id] = future return future + def _discard_choice(request_id: str) -> None: + _choice_futures.pop(request_id, None) + + # Give the bridge the choice-factory + discard too, so ASK-mode autonomous + # turns can round-trip an approval picker through this connection's channel + # and clean up the future on timeout/cancel. + bridge.register_display_broadcaster(_broadcast, choice_future_factory, _discard_choice) + # Register this client for control arbitration; grant control if free # (only to clients allowed to drive — viewers never auto-hold). _clients[client_id] = send_fn diff --git a/gently/ui/web/static/css/agent-chat.css b/gently/ui/web/static/css/agent-chat.css index 777582b2..862a92e8 100644 --- a/gently/ui/web/static/css/agent-chat.css +++ b/gently/ui/web/static/css/agent-chat.css @@ -140,6 +140,28 @@ white-space: pre-wrap; word-wrap: break-word; } +/* ── Autonomous (wake) turns ────────────────────────────── */ +.ac-autonomous-banner { + display: flex; align-items: center; gap: 8px; + align-self: stretch; + margin: 2px 0; + padding: 6px 10px; + font-size: 11.5px; font-weight: 500; + color: var(--accent-purple); + background: rgba(167, 139, 250, 0.10); + border: 1px solid rgba(167, 139, 250, 0.28); + border-radius: 8px; +} +.ac-autonomous-dot { + width: 7px; height: 7px; border-radius: 50%; + background: var(--accent-purple); + box-shadow: 0 0 0 3px rgba(167, 139, 250, 0.20); + flex: 0 0 auto; +} +/* Autonomous agent bubbles get an accent rail + a distinct role label. */ +.ac-turn-autonomous { border-left: 2px solid rgba(167, 139, 250, 0.45); padding-left: 8px; } +.ac-turn-autonomous .ac-role { color: var(--accent-purple); } + /* ── Activity indicator ─────────────────────────────────── */ .ac-activity { display: flex; align-items: center; gap: 9px; @@ -220,6 +242,11 @@ .ac-choice-label { font-weight: 600; font-size: 13px; } .ac-choice-desc { font-size: 12px; color: var(--text-muted); } .ac-choice-picked { border-color: var(--accent-green); background: rgba(74, 222, 128, 0.08); } +.ac-choice-wake { border-color: rgba(167, 139, 250, 0.45); } +.ac-choice-origin { + font-size: 10.5px; font-weight: 600; letter-spacing: 0.04em; text-transform: uppercase; + color: var(--accent-purple); margin-bottom: 2px; +} /* ── Applied-spec card ──────────────────────────────────── */ .ac-spec { @@ -293,4 +320,44 @@ } .agent-chat-send:hover:not(:disabled) { background: var(--accent-hover); } .agent-chat-send:disabled { opacity: 0.5; cursor: default; } -.agent-chat-send.ac-busy { background: var(--color-danger, #f87171); } +/* Send now queues while busy (it no longer doubles as Stop), so just dim it. */ +.agent-chat-send.ac-busy { opacity: 0.6; } + +/* Explicit Stop (separate from Send), shown only during a cancellable turn. */ +.ac-stop { + flex: 0 0 auto; align-self: flex-end; + padding: 9px 12px; border-radius: 9px; + border: 1px solid var(--color-danger, #f87171); + background: transparent; color: var(--color-danger, #f87171); + font-weight: 600; font-size: 13px; cursor: pointer; +} +.ac-stop:hover { background: rgba(248, 113, 113, 0.12); } +.ac-stop.hidden { display: none; } + +/* ── Queued-message panel (type-while-busy) ─────────────── */ +.ac-queue { + margin: 0 12px 6px; + border: 1px solid var(--border); border-radius: 9px; + background: rgba(127, 127, 127, 0.06); + padding: 6px; font-size: 12px; +} +.ac-queue.hidden { display: none; } +.ac-queue-head { + display: flex; align-items: center; justify-content: space-between; + padding: 2px 4px 6px; color: var(--text-muted); +} +.ac-queue-clear { + background: none; border: none; color: var(--accent); + cursor: pointer; font-size: 11.5px; font-family: inherit; +} +.ac-queue-clear:hover { text-decoration: underline; } +.ac-queue-item { display: flex; align-items: center; gap: 8px; padding: 4px; } +.ac-queue-text { + flex: 1 1 auto; color: var(--text); + white-space: nowrap; overflow: hidden; text-overflow: ellipsis; +} +.ac-queue-remove { + flex: 0 0 auto; background: none; border: none; + color: var(--text-muted); cursor: pointer; font-size: 12px; line-height: 1; +} +.ac-queue-remove:hover { color: var(--color-danger, #f87171); } diff --git a/gently/ui/web/static/js/agent-chat.js b/gently/ui/web/static/js/agent-chat.js index 3b10d8be..545b05ac 100644 --- a/gently/ui/web/static/js/agent-chat.js +++ b/gently/ui/web/static/js/agent-chat.js @@ -28,6 +28,12 @@ const AgentChat = (() => { let tools = []; // [{name, description, params, ...}] let acItems = []; // current completion items shown in the dropdown let acIdx = -1; // highlighted item index + let autonomousTurn = false; // true while rendering an autonomous (wake) turn + let agentBusy = false; // a turn (user or autonomous) is currently running + let busySource = null; // 'user' | 'wake' while busy + let msgQueue = []; // messages typed while busy, sent on idle + let queuePanel = null; // the "⏳ Queued (N)" panel element + let stopBtn = null; // explicit Stop button (separate from Send) // DOM refs (resolved in init) let fab, panel, log, input, sendBtn, conn, banner, closeBtn, userEl, signoutBtn; @@ -75,10 +81,11 @@ const AgentChat = (() => { function addTurn(role) { const wrap = document.createElement('div'); wrap.className = `ac-turn ac-turn-${role}`; + if (role === 'agent' && autonomousTurn) wrap.classList.add('ac-turn-autonomous'); if (role === 'agent') { const label = document.createElement('div'); label.className = 'ac-role'; - label.textContent = 'Gently'; + label.textContent = autonomousTurn ? 'Gently · autonomous' : 'Gently'; wrap.appendChild(label); } const content = document.createElement('div'); @@ -118,6 +125,14 @@ const AgentChat = (() => { const c = addTurn('agent'); c._raw = it.text || ''; c.innerHTML = mdToHtml(c._raw); + } else if (it.role === 'autonomous_start') { + addAutonomousBanner(it.trigger || ''); + } else if (it.role === 'autonomous') { + autonomousTurn = true; + const c = addTurn('agent'); + c._raw = it.text || ''; + c.innerHTML = mdToHtml(c._raw); + autonomousTurn = false; } else if (it.role === 'tool') { const el = document.createElement('div'); el.className = 'ac-tool ac-tool-done'; @@ -132,6 +147,16 @@ const AgentChat = (() => { scrollToBottom(); } + /** A divider announcing the agent woke itself, with the trigger reason. */ + function addAutonomousBanner(trigger) { + const el = document.createElement('div'); + el.className = 'ac-autonomous-banner'; + const t = trigger ? `Gently woke up — ${trigger}` : 'Gently woke up'; + el.innerHTML = `${escapeHtml(t)}`; + log.appendChild(el); + scrollToBottom(); + } + function addSystemLine(text, level = 'info') { const el = document.createElement('div'); el.className = `ac-system ac-level-${level}`; @@ -171,10 +196,20 @@ const AgentChat = (() => { case 'stream_start': streaming = true; currentAgentEl = null; // created lazily on first text - setBusy(true); + setBusy(true, 'user'); setActivity('Working…'); break; + case 'autonomous_start': + // The agent woke itself — render a distinct banner + label the + // following text as autonomous (no stream_start precedes this). + hideActivity(); + autonomousTurn = true; + currentAgentEl = null; + setBusy(true, 'wake'); + addAutonomousBanner(msg.trigger || ''); + break; + case 'thinking': if (streaming) setActivity('Thinking…'); break; @@ -256,6 +291,7 @@ const AgentChat = (() => { case 'stream_end': streaming = false; currentAgentEl = null; + autonomousTurn = false; hideActivity(); setBusy(false); break; @@ -289,7 +325,13 @@ const AgentChat = (() => { const data = msg.choice_data || {}; const reqId = msg.request_id || data.request_id || ''; const wrap = document.createElement('div'); - wrap.className = 'ac-choice'; + wrap.className = 'ac-choice' + (msg.origin === 'wake' ? ' ac-choice-wake' : ''); + if (msg.origin === 'wake') { + const tag = document.createElement('div'); + tag.className = 'ac-choice-origin'; + tag.textContent = 'Autonomy proposal — your approval needed'; + wrap.appendChild(tag); + } const q = document.createElement('div'); q.className = 'ac-choice-q'; q.innerHTML = mdToHtml(data.question || 'Choose:'); @@ -482,9 +524,67 @@ const AgentChat = (() => { } } - function setBusy(busy) { - sendBtn.textContent = busy ? 'Stop' : 'Send'; - sendBtn.classList.toggle('ac-busy', busy); + function setBusy(busy, source) { + agentBusy = !!busy; + busySource = agentBusy ? (source || 'user') : null; + // Send no longer doubles as Stop — it queues while busy. A separate Stop + // (shown only for a cancellable user turn) aborts the current turn. + if (stopBtn) stopBtn.classList.toggle('hidden', !(agentBusy && busySource === 'user')); + sendBtn.classList.toggle('ac-busy', agentBusy); + if (agentBusy) { + input.placeholder = (busySource === 'wake') + ? 'Gently is acting autonomously — your message will queue' + : 'Gently is working — your message will queue'; + } else { + if (hasControl) input.placeholder = 'Message Gently… ( / commands · @ tools )'; + drainQueue(); // a turn just ended — send the next queued message + } + } + + // ── Message queue (type-while-busy) ─────────────────────── + function enqueue(text) { msgQueue.push(text); renderQueue(); } + function removeQueued(i) { + if (i >= 0 && i < msgQueue.length) { msgQueue.splice(i, 1); renderQueue(); } + } + function clearQueue() { msgQueue = []; renderQueue(); } + function drainQueue() { + if (agentBusy || !msgQueue.length) return; + if (!ws || ws.readyState !== WebSocket.OPEN) return; // keep queued until reconnect + const next = msgQueue.shift(); + renderQueue(); + actuallySend(next); + } + function renderQueue() { + if (!queuePanel) return; + if (!msgQueue.length) { queuePanel.classList.add('hidden'); queuePanel.innerHTML = ''; return; } + queuePanel.classList.remove('hidden'); + queuePanel.innerHTML = ''; + const head = document.createElement('div'); + head.className = 'ac-queue-head'; + const lbl = document.createElement('span'); + lbl.textContent = `⏳ Queued (${msgQueue.length})`; + const clear = document.createElement('button'); + clear.className = 'ac-queue-clear'; + clear.textContent = 'Clear all'; + clear.addEventListener('click', clearQueue); + head.appendChild(lbl); + head.appendChild(clear); + queuePanel.appendChild(head); + msgQueue.forEach((m, i) => { + const row = document.createElement('div'); + row.className = 'ac-queue-item'; + const span = document.createElement('span'); + span.className = 'ac-queue-text'; + span.textContent = m; + const x = document.createElement('button'); + x.className = 'ac-queue-remove'; + x.textContent = '✕'; + x.title = 'Remove from queue'; + x.addEventListener('click', () => removeQueued(i)); + row.appendChild(span); + row.appendChild(x); + queuePanel.appendChild(row); + }); } function setConn(ok, label) { @@ -520,23 +620,32 @@ const AgentChat = (() => { } // ── Input handling ──────────────────────────────────────── - function submit() { - if (streaming) { send({ type: 'cancel' }); return; } // Send doubles as Stop - hideCompletions(); - const text = input.value.trim(); - if (!text) return; - if (!hasControl) { renderControl(); return; } + function actuallySend(text) { if (text.startsWith('/')) { addUserMessage(text); // commands aren't broadcast; echo locally send({ type: 'command', command: text }); // slash commands (e.g. /status) - } else { - send({ type: 'chat', text }); // echoed to all via 'user_message' + // Most commands reply with a single 'command_result' and no stream — + // do NOT mark the composer busy, or the queue would stick forever. + // Commands that DO stream (e.g. /wizard) set busy via stream_start. + return; } + send({ type: 'chat', text }); // echoed to all via 'user_message' // Instant feedback before the first chunk arrives. - setBusy(true); + setBusy(true, 'user'); setActivity('Working…'); + } + + function submit() { + hideCompletions(); + const text = input.value.trim(); + if (!text) return; + if (!hasControl) { renderControl(); return; } input.value = ''; autosize(); + // While the agent is busy (a user OR autonomous turn), queue instead of + // cancelling — Send no longer doubles as Stop. + if (agentBusy) { enqueue(text); return; } + actuallySend(text); } function autosize() { @@ -613,6 +722,19 @@ const AgentChat = (() => { acComplete = document.createElement('div'); acComplete.className = 'ac-complete hidden'; inputWrap.insertBefore(acComplete, inputWrap.firstChild); + + // Queued-message panel (above the composer) for type-while-busy. + queuePanel = document.createElement('div'); + queuePanel.className = 'ac-queue hidden'; + if (inputWrap.parentNode) inputWrap.parentNode.insertBefore(queuePanel, inputWrap); + + // Explicit Stop button — shown only during a cancellable user turn. + stopBtn = document.createElement('button'); + stopBtn.className = 'ac-stop hidden'; + stopBtn.textContent = 'Stop'; + stopBtn.title = 'Stop the current turn'; + stopBtn.addEventListener('click', () => { send({ type: 'cancel' }); setBusy(false); }); + inputWrap.appendChild(stopBtn); } sendBtn.addEventListener('click', submit); @@ -629,7 +751,11 @@ const AgentChat = (() => { if (e.key === 'Escape') { e.preventDefault(); hideCompletions(); return; } } if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); submit(); } - if (e.key === 'Escape' && streaming) { e.preventDefault(); send({ type: 'cancel' }); } + // Escape mirrors Stop: cancel a cancellable (user) turn and clear busy + // (a cancelled turn emits no stream_end, so clear optimistically). + if (e.key === 'Escape' && agentBusy && busySource === 'user') { + e.preventDefault(); send({ type: 'cancel' }); setBusy(false); + } }); } From f8f977159a83fb42491573439dca06972a622bdc Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 20:17:47 -0400 Subject: [PATCH 56/71] Redesign agent chat as a docked, sliding side panel (overlay + pin-to-dock) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the floating popup bubble with a professional VSCode-style side panel, per a UI/UX + biologist-cognition consult that converged on overlay-by-default with an opt-in pin-to-dock. - App shell: header/navbar stay full-width; a new .app-shell flex row holds .app-main (content) + the chat +
From 2a36834b241d880e39ed56c8b69c8613880541e7 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 20:31:22 -0400 Subject: [PATCH 57/71] Add a Home landing tab; stop the chat auto-running the startup wizard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the auto-popping startup wizard with a proper Home landing page (the new default tab). - Home tab: a scrollable landing with a 'Start / continue an experiment' button + a thin status line, and three at-a-glance cards — recent sessions (with Resume), recent plans (with progress chips), and a recent-images strip — all fed by existing endpoints (/api/sessions, /api/campaigns, /api/snapshots, /api/images/{uid}/png). New HomeApp module (mirrors ReviewApp/CampaignsApp); self-inits on load since it's the default tab. - Wizard: no longer auto-pops in the chat on connect — gated behind server.wizard_autorun (default off). 'Start / continue an experiment' opens the agent panel and runs /wizard on demand; the briefing/resolution path is unchanged (wizard_ran still derives from wizard.needed). - Wiring: TABS.HOME; Home is the default-active tab (navbar + panel + state.tab); switchTab lazy-inits HomeApp; #home in the hash-route whitelist; AgentChat now exposes runCommand() so Home can trigger /wizard. - Reuses .panel/.empty-state + theme vars; namespaced .home-* styles, responsive grid, scroll on an inner wrapper (the panel is overflow:hidden). Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/ui/web/routes/agent_ws.py | 9 +- gently/ui/web/static/css/main.css | 108 ++++++++++++++++++ gently/ui/web/static/js/agent-chat.js | 10 +- gently/ui/web/static/js/app.js | 7 +- gently/ui/web/static/js/home.js | 156 ++++++++++++++++++++++++++ gently/ui/web/static/js/utils.js | 2 +- gently/ui/web/templates/_navbar.html | 4 +- gently/ui/web/templates/index.html | 45 +++++++- 8 files changed, 333 insertions(+), 8 deletions(-) create mode 100644 gently/ui/web/static/js/home.js diff --git a/gently/ui/web/routes/agent_ws.py b/gently/ui/web/routes/agent_ws.py index 36ca6ee5..8f3a3262 100644 --- a/gently/ui/web/routes/agent_ws.py +++ b/gently/ui/web/routes/agent_ws.py @@ -617,9 +617,14 @@ def _discard_choice(request_id: str) -> None: try: # ── Wizard phase ────────────────────────────────────── - # Run startup wizard (if needed) before entering the REPL. + # The startup wizard no longer auto-pops in the chat — setup is now + # launched on demand from the Home page (which sends /wizard) or via + # the /wizard command. Re-enable auto-run by setting + # server.wizard_autorun = True. NOTE: wizard_ran below is still + # derived from wizard.needed, so the briefing/resolution path is + # unaffected by this gate. wizard = getattr(bridge, "_wizard", None) - if wizard is not None and wizard.needed: + if wizard is not None and wizard.needed and getattr(server, "wizard_autorun", False): wizard_task = await _run_wizard( wizard, websocket, send_fn, _choice_futures, bridge, log_transcript=_log_transcript, diff --git a/gently/ui/web/static/css/main.css b/gently/ui/web/static/css/main.css index bf36b569..fdafcc1d 100644 --- a/gently/ui/web/static/css/main.css +++ b/gently/ui/web/static/css/main.css @@ -652,6 +652,114 @@ a.tab-link.active { flex-direction: column; } +/* ── Home (landing) tab ───────────────────────────────────── + #home-content is a flex column with overflow:hidden, so the scroll lives on + .home-scroll. */ +.home-scroll { + flex: 1 1 auto; + min-height: 0; + overflow-y: auto; + padding: 24px; + display: flex; + flex-direction: column; + gap: 20px; +} +.home-hero { + display: flex; + align-items: center; + justify-content: space-between; + gap: 16px; + padding: 20px 22px; + border: 1px solid var(--border); + border-radius: 14px; + background: var(--bg-card); +} +.home-hero-title { font-size: 1.35rem; font-weight: 700; color: var(--text); margin: 0; } +.home-hero-status { font-size: 12.5px; color: var(--text-muted); margin-top: 4px; } +.home-start-btn { + flex: 0 0 auto; + padding: 10px 18px; + border: none; border-radius: 10px; + background: var(--gradient-primary, var(--accent)); + color: #fff; font-weight: 600; font-size: 13.5px; cursor: pointer; + box-shadow: var(--shadow-glow); + transition: transform 0.12s ease, box-shadow 0.12s ease; +} +.home-start-btn:hover { transform: translateY(-1px); box-shadow: var(--shadow-glow-strong); } + +.home-grid { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + gap: 16px; +} +.home-card-wide { grid-column: 1 / -1; } +@media (max-width: 820px) { + .home-grid { grid-template-columns: 1fr; } + .home-card-wide { grid-column: auto; } +} + +.home-card { + display: flex; flex-direction: column; + padding: 14px 16px; + border: 1px solid var(--border); + border-radius: 12px; + background: var(--bg-card); + min-height: 120px; +} +.home-card-head { + display: flex; align-items: center; justify-content: space-between; + margin-bottom: 10px; +} +.home-card-title { + font-size: 11px; font-weight: 600; letter-spacing: 0.06em; + text-transform: uppercase; color: var(--text-muted); +} +.home-card-link { font-size: 11.5px; color: var(--accent); text-decoration: none; } +.home-card-link:hover { text-decoration: underline; } +.home-card-body { display: flex; flex-direction: column; gap: 6px; } + +.home-item { + display: flex; align-items: center; justify-content: space-between; gap: 10px; + padding: 8px 10px; border-radius: 8px; + background: rgba(127, 127, 127, 0.05); + border: 1px solid transparent; +} +.home-item-clickable { cursor: pointer; } +.home-item-clickable:hover { border-color: var(--accent); background: var(--bg-hover); } +.home-item-main { display: flex; flex-direction: column; gap: 2px; min-width: 0; } +.home-item-row { display: flex; align-items: center; gap: 7px; } +.home-item-name { + font-size: 13px; color: var(--text); font-weight: 500; + white-space: nowrap; overflow: hidden; text-overflow: ellipsis; +} +.home-item-meta { font-size: 11.5px; color: var(--text-muted); } +.home-tag { + font-size: 9.5px; font-weight: 700; letter-spacing: 0.04em; text-transform: uppercase; + padding: 1px 6px; border-radius: 999px; +} +.home-tag-live { color: var(--accent-green); border: 1px solid rgba(74, 222, 128, 0.4); } +.home-resume { + flex: 0 0 auto; + padding: 4px 11px; border-radius: 7px; + border: 1px solid var(--accent); background: transparent; color: var(--accent); + font-size: 12px; font-weight: 600; cursor: pointer; +} +.home-resume:hover { background: var(--accent); color: #fff; } +.home-resume:disabled { opacity: 0.6; cursor: default; } +.home-chip { + flex: 0 0 auto; + font-size: 11px; font-weight: 600; + padding: 2px 8px; border-radius: 999px; + background: var(--bg-hover); color: var(--text-muted); +} + +.home-image-strip { display: flex; gap: 8px; flex-wrap: wrap; } +.home-image { + width: 84px; height: 84px; border-radius: 8px; overflow: hidden; + border: 1px solid var(--border); background: var(--img-bg); flex: 0 0 auto; +} +.home-image img { width: 100%; height: 100%; object-fit: cover; display: block; } + /* Live View - Clean full-width layout */ .live-view { display: flex; diff --git a/gently/ui/web/static/js/agent-chat.js b/gently/ui/web/static/js/agent-chat.js index 6b456955..170353c3 100644 --- a/gently/ui/web/static/js/agent-chat.js +++ b/gently/ui/web/static/js/agent-chat.js @@ -952,5 +952,13 @@ const AgentChat = (() => { document.addEventListener('DOMContentLoaded', init); - return { togglePanel }; + // Public: programmatically send a message/command (e.g. the Home page's + // "Start / continue an experiment" button sends '/wizard'). + function runCommand(text) { + if (!text) return; + if (!hasControl) { renderControl(); return; } + actuallySend(text); + } + + return { togglePanel, runCommand }; })(); diff --git a/gently/ui/web/static/js/app.js b/gently/ui/web/static/js/app.js index 203cea04..d1e75a72 100644 --- a/gently/ui/web/static/js/app.js +++ b/gently/ui/web/static/js/app.js @@ -6,7 +6,7 @@ const state = { ws: null, connected: false, - tab: TABS.EMBRYOS, // Default to Embryos tab + tab: TABS.HOME, // Default to the Home landing tab snapshots: [], calibration: [], embryos: [], @@ -71,6 +71,9 @@ function switchTab(tabName) { const content = document.getElementById(`${tabName}-content`); if (content) content.classList.add('active'); + // Lazy-init Home landing tab + if (tabName === TABS.HOME && typeof HomeApp !== 'undefined') HomeApp.init(); + // Render galleries if (tabName === TABS.CALIBRATION) renderCalibrationGallery(); if (tabName === TABS.EVENTS) renderEventsTable(); @@ -631,7 +634,7 @@ document.addEventListener('DOMContentLoaded', () => { const hash = window.location.hash.slice(1); // remove # if (hash) { const [tab, param] = hash.split(':'); - if (tab === TABS.PLANS || tab === TABS.SESSIONS || tab === TABS.EMBRYOS || tab === TABS.CALIBRATION || tab === TABS.EVENTS || tab === TABS.EXPERIMENT) { + if (tab === TABS.HOME || tab === TABS.PLANS || tab === TABS.SESSIONS || tab === TABS.EMBRYOS || tab === TABS.CALIBRATION || tab === TABS.EVENTS || tab === TABS.EXPERIMENT) { switchTab(tab); if (tab === TABS.PLANS && param && typeof openCampaign === 'function') { setTimeout(() => openCampaign(param), 200); diff --git a/gently/ui/web/static/js/home.js b/gently/ui/web/static/js/home.js new file mode 100644 index 00000000..54adadd9 --- /dev/null +++ b/gently/ui/web/static/js/home.js @@ -0,0 +1,156 @@ +/** + * HomeApp — the landing tab. + * + * A light at-a-glance landing surface: recent sessions, recent plans, recent + * images, a thin status line, and a "Start / continue an experiment" button + * that launches the setup flow (the wizard, which no longer auto-pops in chat). + * + * Read-only fetches against existing endpoints (/api/sessions, /api/campaigns, + * /api/snapshots); mirrors the ReviewApp/CampaignsApp module pattern. + */ +const HomeApp = (() => { + let _inited = false; + const SESSIONS_N = 5; + const CAMPAIGNS_N = 5; + const IMAGES_N = 8; + + function relTime(iso) { + if (!iso) return ''; + const t = Date.parse(iso); + if (isNaN(t)) return ''; + const s = Math.max(0, (Date.now() - t) / 1000); + if (s < 60) return 'just now'; + if (s < 3600) return `${Math.floor(s / 60)}m ago`; + if (s < 86400) return `${Math.floor(s / 3600)}h ago`; + const d = Math.floor(s / 86400); + return d < 30 ? `${d}d ago` : new Date(t).toLocaleDateString(); + } + + function empty(el, msg) { + el.innerHTML = `
${escapeHtml(msg)}
`; + } + + function wireGoTab(scope) { + (scope || document).querySelectorAll('[data-go-tab]').forEach(el => { + if (el._goWired) return; + el._goWired = true; + el.addEventListener('click', (e) => { + e.preventDefault(); + if (typeof switchTab === 'function') switchTab(el.dataset.goTab); + }); + }); + } + + async function loadSessions() { + const el = document.getElementById('home-recent-sessions'); + if (!el) return; + try { + const data = await (await fetch('/api/sessions')).json(); + const sessions = (data.sessions || []).slice(0, SESSIONS_N); + if (!sessions.length) { empty(el, 'No sessions yet.'); return; } + el.innerHTML = sessions.map(s => { + const live = s.active ? 'live' : ''; + const resume = s.active ? '' : + ``; + return `
+
+
${escapeHtml(s.name || s.session_id)}${live}
+ ${escapeHtml(relTime(s.last_active))} · ${s.embryo_count || 0} embryos +
${resume} +
`; + }).join(''); + el.querySelectorAll('[data-resume]').forEach(b => b.addEventListener('click', async () => { + b.disabled = true; + b.textContent = 'Resuming…'; + try { + await fetch(`/api/sessions/${encodeURIComponent(b.dataset.resume)}/resume`, { method: 'POST' }); + } catch (_) { b.disabled = false; b.textContent = 'Resume'; } + })); + } catch (e) { empty(el, 'Could not load sessions.'); } + } + + async function loadCampaigns() { + const el = document.getElementById('home-recent-campaigns'); + if (!el) return; + try { + const data = await (await fetch('/api/campaigns')).json(); + const items = (data.campaigns || []).slice(0, CAMPAIGNS_N); + if (!items.length) { empty(el, 'No plans yet.'); return; } + el.innerHTML = items.map(t => { + const c = t.campaign || {}; + const st = t.status || {}; + const name = c.shorthand || c.description || 'Untitled plan'; + const total = st.total || 0; + const chip = total ? `${st.completed || 0}/${total} done` : ''; + return `
+ ${escapeHtml(name)}${chip} +
`; + }).join(''); + wireGoTab(el); + } catch (e) { empty(el, 'Could not load plans.'); } + } + + async function loadImages() { + const el = document.getElementById('home-recent-images'); + if (!el) return; + try { + const data = await (await fetch('/api/snapshots')).json(); + // /api/snapshots is timestamp-ASCENDING; take the tail for "recent". + const recent = (data.snapshots || []).slice(-IMAGES_N).reverse(); + if (!recent.length) { + empty(el, 'No images yet — they appear once a session is active.'); + return; + } + el.innerHTML = '
' + recent.map(s => { + const m = s.metadata || {}; + const label = m.embryo_id + ? `${m.embryo_id}${m.timepoint != null ? ' · t' + m.timepoint : ''}` : ''; + return `
+ ${escapeHtml(label)} +
`; + }).join('') + '
'; + } catch (e) { empty(el, 'Could not load images.'); } + } + + function updateStatus() { + const el = document.getElementById('home-status'); + if (!el) return; + const connected = (typeof state !== 'undefined' && state.connected); + const n = (typeof state !== 'undefined' && Array.isArray(state.embryos)) ? state.embryos.length : 0; + el.textContent = connected + ? `Connected · ${n} embryo${n === 1 ? '' : 's'} in view` + : 'Offline — start the agent to connect.'; + } + + function refresh() { + updateStatus(); + loadSessions(); + loadCampaigns(); + loadImages(); + } + + function init() { + if (!_inited) { + _inited = true; + wireGoTab(document.getElementById('home-content')); + const start = document.getElementById('home-start-btn'); + if (start) start.addEventListener('click', () => { + if (typeof AgentChat !== 'undefined' && AgentChat.togglePanel) { + AgentChat.togglePanel(true); + // Let the panel's WS connect before sending the command. + if (AgentChat.runCommand) setTimeout(() => AgentChat.runCommand('/wizard'), 250); + } + }); + } + refresh(); // re-fetch on every entry to the tab + } + + // Self-initialise on load when Home is the default-active tab (switchTab's + // lazy-init hook only fires on a tab click / hash route, not initial paint). + document.addEventListener('DOMContentLoaded', () => { + const home = document.getElementById('home-content'); + if (home && home.classList.contains('active')) init(); + }); + + return { init, refresh }; +})(); diff --git a/gently/ui/web/static/js/utils.js b/gently/ui/web/static/js/utils.js index b0e6d2ac..4b8ff62b 100644 --- a/gently/ui/web/static/js/utils.js +++ b/gently/ui/web/static/js/utils.js @@ -3,7 +3,7 @@ // ══════════════════════════════════════════════════════════ // Tab and view name constants -const TABS = { EMBRYOS: 'embryos', CALIBRATION: 'calibration', EVENTS: 'events', PLANS: 'plans', SESSIONS: 'sessions', DEVICES: 'devices', EXPERIMENT: 'experiment' }; +const TABS = { HOME: 'home', EMBRYOS: 'embryos', CALIBRATION: 'calibration', EVENTS: 'events', PLANS: 'plans', SESSIONS: 'sessions', DEVICES: 'devices', EXPERIMENT: 'experiment' }; /** * HTML-escape a string (safe for insertion into innerHTML). diff --git a/gently/ui/web/templates/_navbar.html b/gently/ui/web/templates/_navbar.html index 821c8710..33d5f675 100644 --- a/gently/ui/web/templates/_navbar.html +++ b/gently/ui/web/templates/_navbar.html @@ -2,7 +2,8 @@
{% if is_live %} {# SPA tabs — JS-driven via switchTab() in app.js #} -
+
Home
+
Embryos 0
@@ -24,6 +25,7 @@
Sessions
{% else %} {# Standalone pages — all tabs link back to the SPA #} +
Home Embryos Calibration System diff --git a/gently/ui/web/templates/index.html b/gently/ui/web/templates/index.html index 31013b1f..e8164eca 100644 --- a/gently/ui/web/templates/index.html +++ b/gently/ui/web/templates/index.html @@ -25,6 +25,48 @@
+ +
+
+
+
+

Welcome to Gently

+
Connecting…
+
+ +
+
+
+
+ Recent sessions + All +
+
+
Loading…
+
+
+
+
+ Recent plans + All +
+
+
Loading…
+
+
+
+
+ Recent images + All +
+
+
No images yet — they appear once a session is active.
+
+
+
+
+
+

Calibration

@@ -127,7 +169,7 @@

Calibration

-
+
Monitoring
@@ -576,6 +618,7 @@

Properties

+ From befe98e2045678b5205cc7ffabf988ef9131f0f1 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 20:38:10 -0400 Subject: [PATCH 58/71] =?UTF-8?q?Cache=20YAML=20parses=20in=20FileContextS?= =?UTF-8?q?tore=20=E2=80=94=20fixes=20slow=20Plans/campaign=20loading?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _build_campaign_tree calls get_subcampaigns (reads every campaign.yaml) and get_plan_status -> get_plan_items(include_children) -> _get_campaign_tree_ids (re-scans every campaign.yaml) per node, so listing/opening campaigns re-parsed the same YAML files O(N^2)+ times per request. The new Home tab made /api/campaigns fire on every page load, compounding it. Add a parse cache to _read_yaml keyed by (mtime, size): repeated reads of the same file return a deepcopy of the cached parse instead of re-opening + re-parsing. Auto-invalidated when a file's mtime/size changes (incl. external writes) and explicitly on _write_yaml. Every return is a deepcopy, so callers that mutate raw plan-item lists (update/delete) can't corrupt the cache. Collapses the tree build from O(N^2) YAML parses to O(N) parses + cheap stat/deepcopy. Verified: deepcopy isolation, write- and mtime-invalidation, missing-file. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/harness/memory/file_store.py | 32 +++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/gently/harness/memory/file_store.py b/gently/harness/memory/file_store.py index 84beebf2..f8f797bb 100644 --- a/gently/harness/memory/file_store.py +++ b/gently/harness/memory/file_store.py @@ -28,6 +28,7 @@ assessments/{id}.yaml """ +import copy import dataclasses import json import logging @@ -106,6 +107,11 @@ class FileContextStore: def __init__(self, agent_dir: Path): self.agent_dir = Path(agent_dir) self._ensure_dirs() + # YAML parse cache: str(path) -> ((mtime, size), parsed). Collapses the + # O(N^2) re-parsing in campaign-tree builds; auto-invalidated by file + # mtime/size changes and explicitly on _write_yaml. Set BEFORE the index + # rebuild below, which reads YAML through the cache. + self._yaml_cache: Dict[str, tuple] = {} # In-memory index: campaign_id -> folder Path self._campaign_index: Dict[str, Path] = {} self._rebuild_campaign_index() @@ -177,22 +183,32 @@ def _write_yaml(self, path: Path, data): yaml.dump(data, fh, Dumper=_ISODumper, default_flow_style=False, allow_unicode=True, sort_keys=False) # Atomic rename (on Windows this replaces the target). - if os.name == "nt": - # os.replace is atomic on Windows when on same volume. - os.replace(str(tmp), str(path)) - else: - os.replace(str(tmp), str(path)) + os.replace(str(tmp), str(path)) + # Invalidate the parse cache so the next read reloads (new mtime anyway). + self._yaml_cache.pop(str(path), None) def _read_yaml(self, path: Path): - """Read a YAML file; return None if missing or empty.""" - if not path.exists(): + """Read a YAML file, parse-cached by (mtime, size). Returns None if + missing or empty. The cached object is never handed out directly — every + return is a deepcopy — so callers may freely mutate the result without + corrupting the cache.""" + try: + st = path.stat() + except OSError: return None + key = str(path) + sig = (st.st_mtime, st.st_size) + cached = self._yaml_cache.get(key) + if cached is not None and cached[0] == sig: + return copy.deepcopy(cached[1]) try: with open(path, "r", encoding="utf-8") as fh: - return yaml.safe_load(fh) + data = yaml.safe_load(fh) except Exception: logger.warning(f"Failed to read {path}", exc_info=True) return None + self._yaml_cache[key] = (sig, data) + return copy.deepcopy(data) def _append_jsonl(self, path: Path, record: dict): """Append one JSON line to a file.""" From 9369407619d1291e143cf8b646544bc95e9d2613 Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 20:56:08 -0400 Subject: [PATCH 59/71] Home: recent images aggregated across previous sessions The home "recent images" card previously read /api/snapshots, which is the in-memory ImageStore for the *current* session only -- so a freshly opened UI showed nothing until the live session captured volumes. Pull from the FileStore on disk instead so it reflects imagery from previous sessions. New routes (gently/ui/web/routes/sessions.py): - GET /api/home/recent-images -- latest projection per embryo across the most-recent sessions. Cheap by construction: session IDs from folder names (no session.yaml parse), embryo IDs from directory names (no embryo.yaml parse), timepoints from a filename glob (no pixel decode), and the walk stops as soon as `limit` images are collected. limit/sessions are clamped on both ends so a crafted query cannot turn this unauthenticated read into a full-disk scan. - GET /api/sessions/{id}/projection -- serves any saved session's JPEG projection, with a component-wise path-traversal guard (the resolved file must be a child of the session dir; not str.startswith, which a sibling like `_evil` would slip through). Cheap FileStore helpers (gently/core/file_store.py): - recent_session_ids(limit) -- newest-first by folder date prefix, no YAML. - list_embryo_ids(session) -- IDs from directory names, no embryo.yaml read. home.js loadImages() now fetches the aggregator and builds encoded thumbnail URLs, with an in-flight + 15s TTL guard so re-entering the Home tab does not re-walk the disk on every visit. Verified with a synthetic on-disk store (helpers, aggregation ordering + short-circuit + clamps, and the traversal-guard predicate including the sibling-prefix case). Findings from an adversarial review (missing upper-bound clamp, no early short-circuit, per-embryo YAML parse, redundant fetches) are all addressed here. Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/core/file_store.py | 31 +++++++++++ gently/ui/web/routes/sessions.py | 92 ++++++++++++++++++++++++++++++++ gently/ui/web/static/js/home.js | 45 +++++++++++----- 3 files changed, 156 insertions(+), 12 deletions(-) diff --git a/gently/core/file_store.py b/gently/core/file_store.py index 4b33dedd..e4f14009 100644 --- a/gently/core/file_store.py +++ b/gently/core/file_store.py @@ -388,6 +388,21 @@ def list_sessions(self) -> List[SessionInfo]: sessions.sort(key=lambda s: s.get("last_active", ""), reverse=True) return sessions + def recent_session_ids(self, limit: int = 8) -> List[str]: + """Most-recent session IDs by folder-name date prefix, *cheaply*. + + Folder names are ``{YYYYMMDD}_{HHMM}_{slug}_{id8}`` so a reverse lexical + sort of the index orders them newest-first by creation time — no + ``session.yaml`` parse required. This is a creation-recency proxy (a + long-dormant session that was just resumed sorts by its original date), + which is fine for at-a-glance landing views; use ``list_sessions`` when + exact ``last_active`` ordering matters. + """ + items = sorted(self._index.items(), key=lambda kv: kv[1], reverse=True) + if limit and limit > 0: + items = items[:limit] + return [sid for sid, _ in items] + def touch_session(self, session_id: str) -> None: """Update last_active timestamp.""" sd = self._session_dir(session_id) @@ -562,6 +577,22 @@ def list_embryos(self, session_id: str) -> List[EmbryoInfo]: result.append(_normalize_embryo_record(data)) return result + def list_embryo_ids(self, session_id: str) -> List[str]: + """Embryo IDs from directory names only — no ``embryo.yaml`` parse. + + The directory name *is* the embryo_id in this layout (see + ``_embryo_dir`` / ``put_embryo``), so callers that only need the ids + (e.g. enumerating projections) can skip the per-embryo YAML read that + ``list_embryos`` pays. + """ + sd = self._session_dir(session_id) + if sd is None: + return [] + embryos_dir = sd / "embryos" + if not embryos_dir.exists(): + return [] + return [e.name for e in sorted(embryos_dir.iterdir()) if e.is_dir()] + # ================================================================== # Volumes # ================================================================== diff --git a/gently/ui/web/routes/sessions.py b/gently/ui/web/routes/sessions.py index 58f3394d..09f3bd25 100644 --- a/gently/ui/web/routes/sessions.py +++ b/gently/ui/web/routes/sessions.py @@ -1,8 +1,10 @@ """Session routes - list, retrieve, and resume saved sessions.""" import logging +from pathlib import Path from fastapi import APIRouter, Depends, HTTPException +from fastapi.responses import FileResponse from gently.ui.web.auth import require_control @@ -54,6 +56,96 @@ async def list_sessions(): logger.warning("Failed to list sessions from FileStore: %s", e) return {"sessions": sessions} + @router.get("/api/home/recent-images") + async def recent_images(limit: int = 8, sessions: int = 6): + """Latest projection per embryo, aggregated across recent sessions. + + Unlike /api/snapshots (in-memory, current session only), this walks the + FileStore on disk so the home page can show imagery from *previous* + sessions. Cheap by construction: recent session IDs come from folder + names (no session.yaml parse), embryo IDs from directory names (no + embryo.yaml parse), timepoints from a filename glob (no pixel decode), + and the walk stops as soon as `limit` images are collected. Bounds are + clamped so a crafted ?sessions=/?limit= can't turn this unauthenticated + read into a full-disk scan. Returns components; the client builds the + (encoded) image URL. + """ + store = _file_store() + if store is None: + return {"images": []} + limit = max(1, min(int(limit), 48)) + sessions = max(1, min(int(sessions), 24)) + out = [] + try: + for sid in (store.recent_session_ids(sessions) or []): + try: + eids = store.list_embryo_ids(sid) + except Exception: + eids = [] + sname = None # parsed lazily, only if this session contributes + for eid in eids: + try: + tps = store.list_projection_timepoints(sid, eid) or [] + except Exception: + tps = [] + if not tps: + continue + if sname is None: + try: + info = store.get_session(sid) + except Exception: + info = None + sname = (info.get("name") if info else None) or sid + out.append({ + "session_id": sid, + "session_name": sname, + "embryo_id": eid, + "timepoint": int(max(tps)), + }) + if len(out) >= limit: + break + if len(out) >= limit: + break + except Exception as e: + logger.warning("recent_images failed: %s", e) + return {"images": out[:limit]} + + @router.get("/api/sessions/{session_id}/projection") + async def get_session_projection(session_id: str, embryo: str, t: int): + """Serve a saved JPEG projection from any session on disk. + + Path-traversal safe: the resolved file must live inside the session's + own directory, so a crafted `embryo` (e.g. '../..') can't escape. + """ + store = _file_store() + if store is None: + raise HTTPException(status_code=503, detail="Store not available") + path = store.get_projection_path(session_id, embryo, t) + if path is None: + raise HTTPException(status_code=404, detail="Projection not found") + try: + sd = store._session_dir(session_id) + resolved = Path(path).resolve() + # Component-wise ancestor check (not str.startswith, which would + # let a sibling like `_evil` slip through the prefix match). + sd_resolved = Path(sd).resolve() if sd is not None else None + if sd_resolved is None or sd_resolved not in resolved.parents: + raise HTTPException(status_code=404, detail="Not found") + except HTTPException: + raise + except Exception: + raise HTTPException(status_code=404, detail="Not found") + try: + st = resolved.stat() + etag = f'"{int(st.st_mtime)}-{st.st_size}"' + except OSError: + etag = None + headers = {"Cache-Control": "private, max-age=60"} + if etag: + headers["ETag"] = etag + return FileResponse(str(resolved), media_type="image/jpeg", + headers=headers) + @router.post("/api/sessions/{session_id}/resume", dependencies=[Depends(require_control)]) async def resume_session(session_id: str): diff --git a/gently/ui/web/static/js/home.js b/gently/ui/web/static/js/home.js index 54adadd9..089d7de3 100644 --- a/gently/ui/web/static/js/home.js +++ b/gently/ui/web/static/js/home.js @@ -6,13 +6,18 @@ * that launches the setup flow (the wizard, which no longer auto-pops in chat). * * Read-only fetches against existing endpoints (/api/sessions, /api/campaigns, - * /api/snapshots); mirrors the ReviewApp/CampaignsApp module pattern. + * /api/home/recent-images); mirrors the ReviewApp/CampaignsApp module pattern. */ const HomeApp = (() => { let _inited = false; const SESSIONS_N = 5; const CAMPAIGNS_N = 5; const IMAGES_N = 8; + // Recent images are stable (latest projection per embryo). refresh() runs on + // every Home-tab entry, so guard against redundant disk-walking fetches: + // skip if one is in flight or the strip was loaded within IMAGES_TTL_MS. + const IMAGES_TTL_MS = 15000; + let _imgState = { at: 0, inflight: false }; function relTime(iso) { if (!iso) return ''; @@ -90,26 +95,42 @@ const HomeApp = (() => { } catch (e) { empty(el, 'Could not load plans.'); } } - async function loadImages() { + async function loadImages(force) { const el = document.getElementById('home-recent-images'); if (!el) return; + if (_imgState.inflight) return; + // _imgState.at is set only after a completed fetch (images or empty), + // never after an error — so failures still retry on the next entry. + if (!force && _imgState.at && (Date.now() - _imgState.at) < IMAGES_TTL_MS) return; + _imgState.inflight = true; try { - const data = await (await fetch('/api/snapshots')).json(); - // /api/snapshots is timestamp-ASCENDING; take the tail for "recent". - const recent = (data.snapshots || []).slice(-IMAGES_N).reverse(); + const data = await (await fetch(`/api/home/recent-images?limit=${IMAGES_N}`)).json(); + // Latest projection per embryo across recent sessions (server orders + // most-recent session first). + const recent = (data.images || []).slice(0, IMAGES_N); if (!recent.length) { - empty(el, 'No images yet — they appear once a session is active.'); + empty(el, 'No images yet — they appear once a session has captured volumes.'); + _imgState.at = Date.now(); return; } el.innerHTML = '
' + recent.map(s => { - const m = s.metadata || {}; - const label = m.embryo_id - ? `${m.embryo_id}${m.timepoint != null ? ' · t' + m.timepoint : ''}` : ''; - return `
- ${escapeHtml(label)} + const tp = (s.timepoint != null) ? ` · t${s.timepoint}` : ''; + const label = `${s.embryo_id || ''}${tp}`; + const sub = s.session_name && s.session_name !== s.session_id + ? ` (${s.session_name})` : ''; + const src = `/api/sessions/${encodeURIComponent(s.session_id)}` + + `/projection?embryo=${encodeURIComponent(s.embryo_id)}` + + `&t=${encodeURIComponent(s.timepoint)}`; + return `
+ ${escapeHtml(label)}
`; }).join('') + '
'; - } catch (e) { empty(el, 'Could not load images.'); } + _imgState.at = Date.now(); + } catch (e) { + empty(el, 'Could not load images.'); + } finally { + _imgState.inflight = false; + } } function updateStatus() { From 7d877ba51daa470791a89f0f55fdeb873314f6cb Mon Sep 17 00:00:00 2001 From: Kesavan Date: Fri, 29 May 2026 20:57:06 -0400 Subject: [PATCH 60/71] Login: non-blocking page with a "Continue in view-only" escape hatch The /login page was a dead end: once a viewer landed there (via the chat window's "Sign in" button, a bookmark, or a redirect) the only way forward was valid credentials. Viewing is already open to everyone at / (index serves the SPA in view mode; signing in is an *elevation* to control, not a gate), so the login page should offer the same choice. Add a clearly-secondary "Continue without signing in ->" action beneath the Sign in button that drops straight into view-only mode, with a one-line note that you can sign in any time to take control. The subtitle now frames both paths. Only rendered when accounts are configured (otherwise /login already redirects to /). Co-Authored-By: Claude Opus 4.8 (1M context) --- gently/ui/web/templates/login.html | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/gently/ui/web/templates/login.html b/gently/ui/web/templates/login.html index 9f893995..62e5ec98 100644 --- a/gently/ui/web/templates/login.html +++ b/gently/ui/web/templates/login.html @@ -46,6 +46,25 @@ button:hover:not(:disabled) { background: var(--accent-hover); } button:disabled { opacity: 0.6; cursor: default; } .error { color: var(--danger); font-size: 13px; margin-top: 14px; min-height: 18px; } + .divider { + display: flex; align-items: center; gap: 10px; + margin: 18px 0 0; color: var(--text-muted); + font-size: 12px; text-transform: uppercase; letter-spacing: 0.06em; + } + .divider::before, .divider::after { + content: ""; flex: 1; height: 1px; background: var(--border); + } + .ghost { + display: block; width: 100%; margin-top: 14px; padding: 10px; + text-align: center; border: 1px solid var(--border); + border-radius: 9px; background: transparent; color: var(--text); + font-size: 13.5px; font-weight: 500; text-decoration: none; + } + .ghost:hover { border-color: var(--accent); color: var(--accent-hover); } + .alt-note { + margin: 12px 0 0; color: var(--text-muted); + font-size: 12px; line-height: 1.5; text-align: center; + } @@ -54,13 +73,16 @@

Gently

-

Sign in to control the microscope.

+

Sign in to control the microscope — or keep watching in view-only mode.

+
or
+ Continue without signing in → +

View-only: watch live sessions and imagery. You can sign in any time to take control.