diff --git a/docs/trajectory-debugging.md b/docs/trajectory-debugging.md
new file mode 100644
index 00000000..fb01b20d
--- /dev/null
+++ b/docs/trajectory-debugging.md
@@ -0,0 +1,66 @@
+# Trajectory Debugging
+
+Gently already captures several useful session artifacts, including
+`events.jsonl`, `decisions.jsonl`, `timeline.jsonl`, perception traces, and
+interaction logs. The debug exporter packages those artifacts into a compact
+context bundle for a coding agent.
+
+## Create a Bundle
+
+```shell
+python -m gently.debug --session abc12345 --annotate "should query embryo position before moving"
+```
+
+Options:
+
+- `--root`: storage root, defaulting to `GENTLY_STORAGE_PATH` or `D:/Gently3`.
+- `--output-dir`: explicit destination for the bundle.
+- `--max-records`: number of transcript excerpt records to include.
+
+The command writes:
+
+- `debug_context.md`: prompt/context for a coding agent.
+- `artifacts.json`: artifact inventory and source-file hints.
+- `transcript_excerpt.jsonl`: compact tail records from event, decision,
+  timeline, and interaction logs.
+- `profile_summary.json`: profiler span counts, duration by component, and
+  slowest spans when `profile.jsonl` or `profile_spans.jsonl` exists.
+- `source_files.txt`: source files inferred from tool calls in the logs.
+
+## Profiler Span Format
+
+Runtime profilers can write append-only JSONL records to either `profile.jsonl`
+or `profile_spans.jsonl` in the session directory. The exporter recognizes
+records with these fields:
+
+- `timestamp` or `start_time`
+- `component`, `subsystem`, `agent`, or `tool_name`
+- `operation`, `name`, `tool_name`, or `event`
+- `duration_ms`, `elapsed_ms`, `wall_ms`, or `duration_s`
+- optional `status` or `outcome`
+
+The schema is deliberately permissive so LLM calls, tool calls, hardware queue
+waits, perception steps, file I/O, and UI/WebSocket events can all be summarized
+without forcing them into one runtime dependency.
+
+Gently now records tool-call spans automatically when a running agent has an
+active FileStore session. Those spans are appended to:
+
+```text
+<session_dir>/profile_spans.jsonl
+```
+
+Set `GENTLY_PROFILE_PATH` to redirect spans to a specific JSONL file during
+tests or custom launches.
+
+## Workflow
+
+1. Run or replay a Gently agent scenario until the behavior diverges from what was
+   expected.
+2. Export the debug bundle with an annotation describing the expected behavior.
+3. Give the bundle to a coding agent with access to the repo.
+4. Ask for a root-cause analysis, a targeted fix, and an offline regression
+   test.
+
+The exporter does not require live hardware and does not copy large image or
+volume payloads into the bundle.
diff --git a/gently/debug/__init__.py b/gently/debug/__init__.py
new file mode 100644
index 00000000..619404d4
--- /dev/null
+++ b/gently/debug/__init__.py
@@ -0,0 +1,9 @@
+"""Debug export helpers for trajectory-guided agent development."""
+
+from .analyzer import DebugBundle, prepare_debug_context, resolve_session_dir
+
+__all__ = [
+    "DebugBundle",
+    "prepare_debug_context",
+    "resolve_session_dir",
+]
diff --git a/gently/debug/__main__.py b/gently/debug/__main__.py
new file mode 100644
index 00000000..fb0cedaf
--- /dev/null
+++ b/gently/debug/__main__.py
@@ -0,0 +1,7 @@
+"""Command-line entry point for ``python -m gently.debug``."""
+
+from .analyzer import main
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/gently/debug/analyzer.py b/gently/debug/analyzer.py
new file mode 100644
index 00000000..4a0795dc
--- /dev/null
+++ b/gently/debug/analyzer.py
@@ -0,0 +1,477 @@
+"""Prepare trajectory-debugging context for coding agents."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import re
+from dataclasses import asdict, dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Set, Tuple
+
+
+PROMPT_TEMPLATE = Path(__file__).parent / "prompts" / "debugging_prompt.md"
+_TOOL_DECORATOR_RE = re.compile(r"name\s*=\s*['\"]([^'\"]+)['\"]")
+
+
+@dataclass(frozen=True)
+class ArtifactSummary:
+    """Small summary of a session artifact included in a debug bundle."""
+
+    kind: str
+    path: str
+    exists: bool
+    bytes: int = 0
+    lines: int = 0
+
+
+@dataclass(frozen=True)
+class DebugBundle:
+    """Paths and metadata for a generated debug export."""
+
+    session_id: str
+    session_dir: str
+    output_dir: str
+    annotation: Optional[str]
+    artifacts: List[ArtifactSummary] = field(default_factory=list)
+    source_files: List[str] = field(default_factory=list)
+    profile_summary: Mapping[str, Any] = field(default_factory=dict)
+
+    def to_dict(self) -> Dict[str, Any]:
+        data = asdict(self)
+        data["artifacts"] = [asdict(artifact) for artifact in self.artifacts]
+        data["profile_summary"] = dict(self.profile_summary)
+        return data
+
+
+def resolve_session_dir(session: str, root: Optional[Path] = None) -> Tuple[str, Path]:
+    """Resolve a session id/prefix or direct path to a session directory."""
+    direct = Path(session)
+    if direct.exists() and direct.is_dir():
+        return direct.name, direct
+
+    from gently.core.file_store import FileStore
+
+    root_path = Path(root or os.environ.get("GENTLY_STORAGE_PATH", "D:/Gently3"))
+    store = FileStore(root_path)
+    sessions = store.list_sessions()
+    matches = [
+        item for item in sessions
+        if str(item.get("session_id", "")).startswith(session)
+    ]
+    if not matches:
+        raise FileNotFoundError(f"No session matching {session!r} under {root_path}")
+    if len(matches) > 1:
+        ids = ", ".join(str(item.get("session_id")) for item in matches)
+        raise ValueError(f"Multiple sessions match {session!r}: {ids}")
+
+    session_id = str(matches[0]["session_id"])
+    session_dir = store._session_dir(session_id)
+    if session_dir is None or not session_dir.exists():
+        raise FileNotFoundError(f"Session directory not found for {session_id}")
+    return session_id, session_dir
+
+
+def prepare_debug_context(
+    session: str,
+    *,
+    root: Optional[Path] = None,
+    output_dir: Optional[Path] = None,
+    annotation: Optional[str] = None,
+    max_records: int = 80,
+) -> DebugBundle:
+    """Create a debug bundle for a session and return its metadata."""
+    session_id, session_dir = resolve_session_dir(session, root=root)
+    if root is not None:
+        root_path = Path(root)
+    elif len(session_dir.parents) > 1:
+        root_path = session_dir.parents[1]
+    else:
+        root_path = session_dir.parent
+    if output_dir is None:
+        stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        output_dir = session_dir / "debug_exports" / stamp
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    artifacts = collect_artifacts(session_dir, root_path=root_path, session_id=session_id)
+    source_files = infer_relevant_source_files(session_dir, artifacts)
+    transcript_records = collect_transcript_excerpt(artifacts, max_records=max_records)
+    profile_summary = summarize_profile_records(artifacts)
+
+    bundle = DebugBundle(
+        session_id=session_id,
+        session_dir=str(session_dir),
+        output_dir=str(output_dir),
+        annotation=annotation,
+        artifacts=artifacts,
+        source_files=source_files,
+        profile_summary=profile_summary,
+    )
+
+    (output_dir / "artifacts.json").write_text(
+        json.dumps(bundle.to_dict(), indent=2),
+        encoding="utf-8",
+    )
+    (output_dir / "source_files.txt").write_text(
+        "\n".join(source_files) + ("\n" if source_files else ""),
+        encoding="utf-8",
+    )
+    _write_jsonl(output_dir / "transcript_excerpt.jsonl", transcript_records)
+    (output_dir / "profile_summary.json").write_text(
+        json.dumps(profile_summary, indent=2),
+        encoding="utf-8",
+    )
+    (output_dir / "debug_context.md").write_text(
+        build_debug_prompt(bundle, transcript_records),
+        encoding="utf-8",
+    )
+    return bundle
+
+
+def collect_artifacts(
+    session_dir: Path,
+    *,
+    root_path: Optional[Path] = None,
+    session_id: Optional[str] = None,
+) -> List[ArtifactSummary]:
+    """Collect known session artifacts without reading large binary payloads."""
+    candidates = [
+        ("session", session_dir / "session.yaml"),
+        ("events", session_dir / "events.jsonl"),
+        ("decisions", session_dir / "decisions.jsonl"),
+        ("timeline", session_dir / "timeline.jsonl"),
+        ("profile", session_dir / "profile.jsonl"),
+        ("profile_spans", session_dir / "profile_spans.jsonl"),
+        ("interaction_log", session_dir / "interaction_log.jsonl"),
+    ]
+    if root_path is not None and session_id:
+        candidates.append(
+            ("interaction_logger", root_path / "interaction_logs" / f"{session_id}.jsonl")
+        )
+
+    for trace in sorted(session_dir.glob("embryos/*/traces/*.json"))[:25]:
+        candidates.append(("perception_trace", trace))
+    for predictions in sorted(session_dir.glob("embryos/*/predictions.jsonl"))[:25]:
+        candidates.append(("predictions", predictions))
+
+    return [_summarize_artifact(kind, path) for kind, path in candidates]
+
+
+def collect_transcript_excerpt(
+    artifacts: Sequence[ArtifactSummary],
+    *,
+    max_records: int = 80,
+) -> List[Dict[str, Any]]:
+    """Read tail records from text/jsonl artifacts for compact debugging."""
+    text_kinds = {"events", "decisions", "timeline", "interaction_log", "interaction_logger"}
+    records: List[Dict[str, Any]] = []
+    per_file = max(1, max_records // max(1, len([a for a in artifacts if a.kind in text_kinds])))
+    for artifact in artifacts:
+        if not artifact.exists or artifact.kind not in text_kinds:
+            continue
+        for record in _read_jsonl_tail(Path(artifact.path), per_file):
+            records.append({"artifact": artifact.kind, "record": record})
+    return records[-max_records:]
+
+
+def summarize_profile_records(
+    artifacts: Sequence[ArtifactSummary],
+    *,
+    max_records: int = 1000,
+    max_slowest: int = 10,
+) -> Dict[str, Any]:
+    """Summarize profiler span logs for the debug bundle."""
+    profile_kinds = {"profile", "profile_spans"}
+    spans: List[Dict[str, Any]] = []
+    duration_by_component: Dict[str, float] = {}
+
+    for artifact in artifacts:
+        if not artifact.exists or artifact.kind not in profile_kinds:
+            continue
+        for record in _read_jsonl_tail(Path(artifact.path), max_records):
+            if not isinstance(record, Mapping):
+                continue
+            component = str(
+                record.get("component")
+                or record.get("subsystem")
+                or record.get("agent")
+                or record.get("tool_name")
+                or "unknown"
+            )
+            operation = str(
+                record.get("operation")
+                or record.get("name")
+                or record.get("tool_name")
+                or record.get("event")
+                or "unknown"
+            )
+            duration_ms = _duration_ms(record)
+            span = {
+                "artifact": artifact.kind,
+                "timestamp": record.get("timestamp") or record.get("start_time"),
+                "component": component,
+                "operation": operation,
+                "duration_ms": duration_ms,
+                "status": record.get("status") or record.get("outcome"),
+            }
+            spans.append(span)
+            if duration_ms is not None:
+                duration_by_component[component] = (
+                    duration_by_component.get(component, 0.0) + duration_ms
+                )
+
+    slowest = sorted(
+        [span for span in spans if span["duration_ms"] is not None],
+        key=lambda span: span["duration_ms"],
+        reverse=True,
+    )[:max_slowest]
+    return {
+        "span_count": len(spans),
+        "duration_by_component_ms": {
+            component: round(duration, 3)
+            for component, duration in sorted(duration_by_component.items())
+        },
+        "slowest_spans": slowest,
+    }
+
+
+def infer_relevant_source_files(
+    session_dir: Path,
+    artifacts: Sequence[ArtifactSummary],
+    *,
+    repo_root: Optional[Path] = None,
+) -> List[str]:
+    """Infer relevant source files from tool names found in session logs."""
+    repo_root = repo_root or Path(__file__).resolve().parents[2]
+    tool_names = extract_tool_names(artifacts)
+    source_index = build_tool_source_index(repo_root)
+    files: Set[str] = set()
+    for name in tool_names:
+        path = source_index.get(name)
+        if path:
+            files.add(path.relative_to(repo_root).as_posix())
+
+    if tool_names:
+        files.update(
+            [
+                "gently/app/agent.py",
+                "gently/harness/conversation.py",
+                "gently/eval/decision_log.py",
+            ]
+        )
+    if (session_dir / "events.jsonl").exists():
+        files.add("gently/eval/event_capture.py")
+        files.add("gently/eval/event_replay.py")
+
+    return sorted(files)
+
+
+def extract_tool_names(artifacts: Sequence[ArtifactSummary]) -> Set[str]:
+    """Extract tool names from decision and interaction logs."""
+    names: Set[str] = set()
+    for artifact in artifacts:
+        if not artifact.exists or artifact.kind not in {
+            "decisions",
+            "interaction_log",
+            "interaction_logger",
+        }:
+            continue
+        for record in _read_jsonl_tail(Path(artifact.path), 200):
+            names.update(_find_tool_names(record))
+    return names
+
+
+def build_tool_source_index(repo_root: Path) -> Dict[str, Path]:
+    """Map @tool decorator names to source files."""
+    index: Dict[str, Path] = {}
+    tools_dir = repo_root / "gently" / "app" / "tools"
+    if not tools_dir.exists():
+        return index
+    for path in tools_dir.glob("*.py"):
+        text = path.read_text(encoding="utf-8", errors="replace")
+        for match in _TOOL_DECORATOR_RE.finditer(text):
+            index.setdefault(match.group(1), path)
+    return index
+
+
+def build_debug_prompt(
+    bundle: DebugBundle,
+    transcript_records: Sequence[Mapping[str, Any]],
+) -> str:
+    """Build the markdown context handed to a coding agent."""
+    template = PROMPT_TEMPLATE.read_text(encoding="utf-8")
+    artifact_lines = [
+        f"- {artifact.kind}: `{artifact.path}` ({artifact.lines} lines, {artifact.bytes} bytes)"
+        for artifact in bundle.artifacts
+        if artifact.exists
+    ]
+    missing_lines = [
+        f"- {artifact.kind}: `{artifact.path}`"
+        for artifact in bundle.artifacts
+        if not artifact.exists
+    ]
+    source_lines = [f"- `{path}`" for path in bundle.source_files]
+
+    return "\n".join(
+        [
+            template,
+            "",
+            "## Session",
+            "",
+            f"- Session id: `{bundle.session_id}`",
+            f"- Session directory: `{bundle.session_dir}`",
+            f"- Annotation: {bundle.annotation or '(none supplied)'}",
+            "",
+            "## Included Artifacts",
+            "",
+            *(artifact_lines or ["- (none found)"]),
+            "",
+            "## Missing Expected Artifacts",
+            "",
+            *(missing_lines or ["- (none)"]),
+            "",
+            "## Relevant Source Files",
+            "",
+            *(source_lines or ["- (no tool-specific source files inferred)"]),
+            "",
+            "## Transcript Excerpt",
+            "",
+            f"`transcript_excerpt.jsonl` contains {len(transcript_records)} compact records.",
+            "",
+            "## Profile Summary",
+            "",
+            _format_profile_summary(bundle.profile_summary),
+            "",
+            "## Suggested Debugging Output",
+            "",
+            "1. Root cause.",
+            "2. Smallest code or prompt fix.",
+            "3. Offline regression test.",
+            "4. Any live-hardware validation that remains necessary.",
+            "",
+        ]
+    )
+
+
+def _summarize_artifact(kind: str, path: Path) -> ArtifactSummary:
+    if not path.exists():
+        return ArtifactSummary(kind=kind, path=str(path), exists=False)
+    stat = path.stat()
+    lines = 0
+    if path.suffix.lower() in {".jsonl", ".yaml", ".yml", ".json", ".md", ".txt"}:
+        try:
+            with path.open("r", encoding="utf-8", errors="replace") as f:
+                lines = sum(1 for _ in f)
+        except OSError:
+            lines = 0
+    return ArtifactSummary(
+        kind=kind,
+        path=str(path),
+        exists=True,
+        bytes=stat.st_size,
+        lines=lines,
+    )
+
+
+def _read_jsonl_tail(path: Path, max_lines: int) -> List[Any]:
+    try:
+        lines = path.read_text(encoding="utf-8", errors="replace").splitlines()
+    except OSError:
+        return []
+
+    out: List[Any] = []
+    for raw in lines[-max_lines:]:
+        if not raw.strip():
+            continue
+        try:
+            out.append(json.loads(raw))
+        except json.JSONDecodeError:
+            out.append({"raw": raw})
+    return out
+
+
+def _find_tool_names(value: Any) -> Set[str]:
+    names: Set[str] = set()
+    if isinstance(value, Mapping):
+        if isinstance(value.get("tool_name"), str):
+            names.add(value["tool_name"])
+        if isinstance(value.get("name"), str) and (
+            "input" in value or "arguments" in value or "params" in value
+        ):
+            names.add(value["name"])
+        for nested in value.values():
+            names.update(_find_tool_names(nested))
+    elif isinstance(value, list):
+        for item in value:
+            names.update(_find_tool_names(item))
+    return names
+
+
+def _duration_ms(record: Mapping[str, Any]) -> Optional[float]:
+    for key in ("duration_ms", "elapsed_ms", "wall_ms"):
+        if key in record and record[key] is not None:
+            try:
+                return round(float(record[key]), 3)
+            except (TypeError, ValueError):
+                return None
+    if "duration_s" in record and record["duration_s"] is not None:
+        try:
+            return round(float(record["duration_s"]) * 1000.0, 3)
+        except (TypeError, ValueError):
+            return None
+    return None
+
+
+def _format_profile_summary(summary: Mapping[str, Any]) -> str:
+    if not summary or not summary.get("span_count"):
+        return "No profiler spans were found."
+
+    lines = [f"Profiler spans: {summary.get('span_count')}"]
+    durations = summary.get("duration_by_component_ms") or {}
+    if durations:
+        lines.append("")
+        lines.append("Duration by component:")
+        for component, duration in durations.items():
+            lines.append(f"- {component}: {duration} ms")
+
+    slowest = summary.get("slowest_spans") or []
+    if slowest:
+        lines.append("")
+        lines.append("Slowest spans:")
+        for span in slowest[:5]:
+            lines.append(
+                f"- {span.get('component')}.{span.get('operation')}: "
+                f"{span.get('duration_ms')} ms"
+            )
+    return "\n".join(lines)
+
+
+def _write_jsonl(path: Path, records: Iterable[Mapping[str, Any]]) -> None:
+    with path.open("w", encoding="utf-8") as f:
+        for record in records:
+            f.write(json.dumps(record, default=str) + "\n")
+
+
+def main(argv: Optional[Sequence[str]] = None) -> int:
+    parser = argparse.ArgumentParser(
+        description="Prepare a trajectory-debugging context bundle for a Gently session."
+    )
+    parser.add_argument("--session", required=True, help="Session id/prefix or session directory")
+    parser.add_argument("--root", default=None, help="Gently storage root")
+    parser.add_argument("--output-dir", default=None, help="Debug bundle output directory")
+    parser.add_argument("--annotate", default=None, help="Expected behavior or failure note")
+    parser.add_argument("--max-records", type=int, default=80, help="Transcript records to include")
+    args = parser.parse_args(argv)
+
+    bundle = prepare_debug_context(
+        args.session,
+        root=Path(args.root) if args.root else None,
+        output_dir=Path(args.output_dir) if args.output_dir else None,
+        annotation=args.annotate,
+        max_records=args.max_records,
+    )
+    print(bundle.output_dir)
+    return 0
diff --git a/gently/debug/profiler.py b/gently/debug/profiler.py
new file mode 100644
index 00000000..99379598
--- /dev/null
+++ b/gently/debug/profiler.py
@@ -0,0 +1,80 @@
+"""Lightweight JSONL profiler spans for debug exports."""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import socket
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Mapping, Optional
+
+logger = logging.getLogger(__name__)
+
+
+def resolve_profile_path(context: Optional[Mapping[str, Any]]) -> Optional[Path]:
+    """Resolve where runtime profile spans should be written.
+
+    The explicit ``GENTLY_PROFILE_PATH`` env var wins. Otherwise, when an agent
+    with a FileStore session is available, spans go beside the session artifacts
+    as ``profile_spans.jsonl``.
+    """
+    explicit = os.environ.get("GENTLY_PROFILE_PATH", "").strip()
+    if explicit:
+        return Path(explicit)
+
+    if not isinstance(context, Mapping):
+        return None
+    agent = context.get("agent")
+    if agent is None:
+        return None
+
+    direct = getattr(agent, "profile_path", None)
+    if direct:
+        return Path(direct)
+
+    session_id = getattr(agent, "session_id", None)
+    store = getattr(agent, "store", None)
+    if session_id and store is not None and hasattr(store, "_session_dir"):
+        try:
+            session_dir = store._session_dir(session_id)
+        except Exception:
+            session_dir = None
+        if session_dir is not None:
+            return Path(session_dir) / "profile_spans.jsonl"
+
+    return None
+
+
+def record_profile_span(
+    context: Optional[Mapping[str, Any]],
+    *,
+    component: str,
+    operation: str,
+    duration_ms: float,
+    status: str,
+    metadata: Optional[Mapping[str, Any]] = None,
+) -> None:
+    """Append one profile span, best-effort and non-fatal."""
+    path = resolve_profile_path(context)
+    if path is None:
+        return
+
+    record = {
+        "timestamp": datetime.now().isoformat(timespec="milliseconds"),
+        "hostname": socket.gethostname(),
+        "component": component,
+        "operation": operation,
+        "duration_ms": round(float(duration_ms), 3),
+        "status": status,
+    }
+    if metadata:
+        record.update(dict(metadata))
+
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with path.open("a", encoding="utf-8") as f:
+            f.write(json.dumps(record, default=str) + "\n")
+    except Exception as exc:
+        logger.debug("failed to write profile span to %s: %s", path, exc)
diff --git a/gently/debug/prompts/debugging_prompt.md b/gently/debug/prompts/debugging_prompt.md
new file mode 100644
index 00000000..b649974a
--- /dev/null
+++ b/gently/debug/prompts/debugging_prompt.md
@@ -0,0 +1,18 @@
+# Trajectory Debugging Prompt
+
+You are reviewing a Gently copilot trajectory. Use the attached session
+artifacts, transcript excerpt, expected behavior annotation, and relevant source
+files to identify the smallest code or prompt change that would make the agent
+behave correctly.
+
+Focus on:
+
+- What the user expected.
+- What tool calls or events actually happened.
+- Whether the agent had enough context to choose the expected action.
+- Which tool descriptions, prompt sections, or orchestration code shaped the
+  decision.
+- A targeted fix and a regression test that would catch the issue next time.
+
+Do not assume live hardware is available. Prefer fixes that can be verified with
+offline traces, mock clients, or deterministic unit tests.
diff --git a/gently/harness/tools/registry.py b/gently/harness/tools/registry.py
index 48e1cf4f..eb7202c6 100644
--- a/gently/harness/tools/registry.py
+++ b/gently/harness/tools/registry.py
@@ -425,6 +425,8 @@ async def execute(self, tool_name: str, tool_input: Dict, context: Dict = None)
         else:
             exec_context = self._context
 
+        start_time = time.time()
+
         # Hybrid-autonomy backstop: during an autonomous (wake) turn, a small set
         # of irreversible tools (laser-on, embryo termination, stopping the run)
         # must NEVER execute without a human — even if the model tries to call
@@ -436,6 +438,7 @@ async def execute(self, tool_name: str, tool_input: Dict, context: Dict = None)
             blocked = getattr(_agent, '_autonomous_blocked_tools', None) or ()
             if tool_name in blocked:
                 logger.info("Autonomy backstop blocked '%s' (irreversible)", tool_name)
+                _record_tool_span(exec_context, tool, start_time, "blocked")
                 return (f"'{tool_name}' is an irreversible action and cannot run "
                         f"autonomously. Ask the operator to confirm it.")
 
@@ -443,10 +446,9 @@ async def execute(self, tool_name: str, tool_input: Dict, context: Dict = None)
         if tool.requires_microscope:
             client = exec_context.get('client')
             if client is None:
+                _record_tool_span(exec_context, tool, start_time, "missing_microscope")
                 return "Error: Not connected to microscope server. Start the server and reconnect."
 
-        start_time = time.time()
-
         try:
             # Prepare arguments
             kwargs = dict(tool_input)
@@ -464,12 +466,14 @@ async def execute(self, tool_name: str, tool_input: Dict, context: Dict = None)
 
             duration = time.time() - start_time
             logger.debug(f"Tool {tool_name} executed in {duration:.2f}s")
+            _record_tool_span(exec_context, tool, start_time, "ok")
 
             return result
 
         except Exception as e:
             import traceback
             logger.error(f"Tool {tool_name} failed: {e}")
+            _record_tool_span(exec_context, tool, start_time, "error", error=str(e))
             return f"Error executing {tool_name}: {str(e)}\n{traceback.format_exc()}"
 
     def __contains__(self, name: str) -> bool:
@@ -479,6 +483,37 @@ def __len__(self) -> int:
         return len(self._tools)
 
 
+def _record_tool_span(
+    context: Dict[str, Any],
+    tool: ToolDefinition,
+    start_time: float,
+    status: str,
+    *,
+    error: Optional[str] = None,
+) -> None:
+    """Record one best-effort tool-call profiler span."""
+    try:
+        from gently.debug.profiler import record_profile_span
+
+        metadata = {
+            "tool_name": tool.name,
+            "category": tool.category.name.lower(),
+            "requires_microscope": tool.requires_microscope,
+        }
+        if error:
+            metadata["error"] = error
+        record_profile_span(
+            context,
+            component="tool",
+            operation=tool.name,
+            duration_ms=(time.time() - start_time) * 1000.0,
+            status=status,
+            metadata=metadata,
+        )
+    except Exception:
+        logger.debug("failed to record tool profiler span", exc_info=True)
+
+
 # Global registry instance
 _global_registry: Optional[ToolRegistry] = None
 
diff --git a/tests/test_debug_export.py b/tests/test_debug_export.py
new file mode 100644
index 00000000..84a658b4
--- /dev/null
+++ b/tests/test_debug_export.py
@@ -0,0 +1,90 @@
+import json
+from pathlib import Path
+
+from gently.core.file_store import FileStore
+from gently.debug import prepare_debug_context, resolve_session_dir
+
+
+def _write_jsonl(path: Path, records):
+    path.write_text(
+        "".join(json.dumps(record) + "\n" for record in records),
+        encoding="utf-8",
+    )
+
+
+def test_prepare_debug_context_exports_session_bundle(tmp_path):
+    store = FileStore(tmp_path)
+    store.create_session("abc12345", name="debug test")
+    session_dir = store._session_dir("abc12345")
+    assert session_dir is not None
+
+    _write_jsonl(
+        session_dir / "decisions.jsonl",
+        [
+            {
+                "timestamp": "2026-05-30T12:00:00",
+                "agent": "production",
+                "trigger": "user_message",
+                "tool_calls": [
+                    {"name": "acquire_volume", "input": {"embryo_id": "embryo_1"}}
+                ],
+            }
+        ],
+    )
+    _write_jsonl(
+        session_dir / "events.jsonl",
+        [{"event_type": "STAGE_MOVED", "data": {"x": 1}}],
+    )
+    _write_jsonl(
+        session_dir / "profile.jsonl",
+        [
+            {
+                "timestamp": "2026-05-30T12:00:01",
+                "component": "llm",
+                "operation": "agent_turn",
+                "duration_ms": 1250.0,
+                "status": "ok",
+            },
+            {
+                "timestamp": "2026-05-30T12:00:02",
+                "component": "tool",
+                "operation": "acquire_volume",
+                "duration_ms": 320.5,
+                "status": "ok",
+            },
+        ],
+    )
+
+    bundle = prepare_debug_context(
+        "abc12345",
+        root=tmp_path,
+        output_dir=tmp_path / "debug_out",
+        annotation="should check stored position before acquisition",
+    )
+
+    output_dir = Path(bundle.output_dir)
+    context = (output_dir / "debug_context.md").read_text(encoding="utf-8")
+    source_files = (output_dir / "source_files.txt").read_text(encoding="utf-8")
+    artifacts = json.loads((output_dir / "artifacts.json").read_text(encoding="utf-8"))
+    profile = json.loads((output_dir / "profile_summary.json").read_text(encoding="utf-8"))
+    transcript = (output_dir / "transcript_excerpt.jsonl").read_text(encoding="utf-8")
+
+    assert "should check stored position" in context
+    assert "Profile Summary" in context
+    assert "llm.agent_turn" in context
+    assert "gently/app/tools/acquisition_tools.py" in source_files
+    assert artifacts["session_id"] == "abc12345"
+    assert artifacts["profile_summary"]["span_count"] == 2
+    assert profile["duration_by_component_ms"]["llm"] == 1250.0
+    assert profile["slowest_spans"][0]["operation"] == "agent_turn"
+    assert "acquire_volume" in transcript
+
+
+def test_resolve_session_dir_accepts_prefix(tmp_path):
+    store = FileStore(tmp_path)
+    store.create_session("prefix123", name="debug test")
+
+    session_id, session_dir = resolve_session_dir("prefix", root=tmp_path)
+
+    assert session_id == "prefix123"
+    assert session_dir.exists()
diff --git a/tests/test_tool_registry.py b/tests/test_tool_registry.py
index 273361bb..a78cd3f9 100644
--- a/tests/test_tool_registry.py
+++ b/tests/test_tool_registry.py
@@ -14,9 +14,12 @@
 """
 
 import asyncio
+import json
+from types import SimpleNamespace
 
 import pytest
 
+from gently.core.file_store import FileStore
 from gently.harness.tools.registry import (
     ToolRegistry,
     ToolCategory,
@@ -207,3 +210,27 @@ async def async_greeter(name: str) -> str:
     async def test_execute_nonexistent_tool(self, registry):
         with pytest.raises(ValueError, match="Unknown tool"):
             await registry.execute("nonexistent", {})
+
+    @pytest.mark.asyncio
+    async def test_execute_records_profile_span_for_session_tool(self, registry, tmp_path):
+        def adder(a: int, b: int) -> str:
+            return str(a + b)
+
+        store = FileStore(tmp_path)
+        store.create_session("abc12345", name="profile")
+        agent = SimpleNamespace(store=store, session_id="abc12345")
+        registry.register_function(adder, name="adder", category=ToolCategory.UTILITY)
+
+        result = await registry.execute("adder", {"a": 2, "b": 5}, {"agent": agent})
+
+        profile_path = store._session_dir("abc12345") / "profile_spans.jsonl"
+        records = [
+            json.loads(line)
+            for line in profile_path.read_text(encoding="utf-8").splitlines()
+        ]
+        assert result == "7"
+        assert records[-1]["component"] == "tool"
+        assert records[-1]["operation"] == "adder"
+        assert records[-1]["tool_name"] == "adder"
+        assert records[-1]["status"] == "ok"
+        assert records[-1]["duration_ms"] >= 0