From 455a1f1ab49e58bab8f8700a826b61e14985eb76 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Sat, 6 Jun 2026 16:31:55 +0300
Subject: [PATCH 1/2] 0.2.1: structured handoff records + huggingface pip extra

---
 README.md                   |  4 +-
 sdk/pyproject.toml          |  2 +-
 sdk/tracecraft/__init__.py  |  2 +-
 sdk/tracecraft/cli/steps.py | 86 +++++++++++++++++++++++++++++++------
 4 files changed, 78 insertions(+), 16 deletions(-)
diff --git a/README.md b/README.md
index 7be3945..c4576f7 100644
--- a/README.md
+++ b/README.md
@@ -172,7 +172,9 @@ tracecraft inbox                          # Read messages
 tracecraft inbox --delete                 # Read and clear
 
 tracecraft claim <step-id>                # Claim a step (atomic)
-tracecraft complete <step-id> [--note X]  # Mark done + handoff note
+tracecraft complete <step-id> [--note X] [--to AGENT] [--next-action X]
+                                          [--blocked|--needs-review]
+                                          [--changed-files-from-git]  # Structured handoff record
 tracecraft step-status <step-id>          # Check status
 tracecraft wait-for <step-ids...>         # Block until complete (default 300s timeout)
 
diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml
index 4f0be08..c6446f4 100644
--- a/sdk/pyproject.toml
+++ b/sdk/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "tracecraft-ai"
-version = "0.2.0"
+version = "0.2.1"
 description = "Coordination layer for multi-agent AI systems. Bring your own S3 / HuggingFace bucket; shared memory, mailbox, atomic task claims, handoffs, artifacts — no server, no database."
 readme = "README.md"
 license = {text = "MIT"}
diff --git a/sdk/tracecraft/__init__.py b/sdk/tracecraft/__init__.py
index ae1ec60..22b8802 100644
--- a/sdk/tracecraft/__init__.py
+++ b/sdk/tracecraft/__init__.py
@@ -1,3 +1,3 @@
 """Tracecraft — coordination layer for multi-agent AI systems."""
 
-__version__ = "0.2.0"
+__version__ = "0.2.1"
diff --git a/sdk/tracecraft/cli/steps.py b/sdk/tracecraft/cli/steps.py
index 58868d3..2ef1b34 100644
--- a/sdk/tracecraft/cli/steps.py
+++ b/sdk/tracecraft/cli/steps.py
@@ -1,5 +1,6 @@
 """tracecraft steps — claim, complete, and track coordination steps."""
 
+import subprocess
 import time
 from datetime import datetime, timezone
 
@@ -9,6 +10,27 @@
 from tracecraft.store import get_store
 
 
+def _git_changed_files() -> list[str]:
+    """Return changed files from `git diff --name-only HEAD` (staged + unstaged),
+    or [] if not a git repo / git unavailable. Never raises.
+
+    Git is the source of truth for what changed — we never let an agent type
+    the file list by hand (self-reported change lists are wrong ~half the time
+    and go stale on the next commit).
+    """
+    try:
+        out = subprocess.run(
+            ["git", "diff", "--name-only", "HEAD"],
+            capture_output=True, text=True, timeout=10,
+        )
+        if out.returncode != 0:
+            return []
+        files = [ln.strip() for ln in out.stdout.splitlines() if ln.strip()]
+        return files
+    except (OSError, subprocess.SubprocessError):
+        return []
+
+
 @click.command()
 @click.argument("step_id")
 def claim(step_id):
@@ -39,31 +61,69 @@ def claim(step_id):
 
 @click.command()
 @click.argument("step_id")
-@click.option("--note", default="", help="Handoff note for the next agent")
-def complete(step_id, note):
-    """Mark a step as complete and write handoff."""
+@click.option("--note", default="", help="Handoff note for the next agent (free text)")
+@click.option("--to", "next_agent", default=None, help="Agent this step hands off to")
+@click.option("--next-action", default=None, help="One line: what the next agent should do first")
+@click.option("--blocked", is_flag=True, help="Mark the step blocked rather than complete")
+@click.option("--needs-review", is_flag=True, help="Mark the step as needing review rather than complete")
+@click.option(
+    "--changed-files-from-git",
+    is_flag=True,
+    help="Record files changed (from `git diff`), so the next agent knows what moved. No-op outside a git repo.",
+)
+def complete(step_id, note, next_agent, next_action, blocked, needs_review, changed_files_from_git):
+    """Mark a step complete (or blocked / needs-review) and write a handoff record.
+
+    The handoff record is what the next agent sees instead of a shared
+    conversation — so it carries machine-checkable state, not just a note.
+    Fields that can be wrong if hand-typed (changed files) are sourced from
+    git; fields that would be hallucinated if mandatory (assumptions) stay as
+    optional free text in --note.
+    """
+    if blocked and needs_review:
+        raise click.ClickException("Use at most one of --blocked / --needs-review")
+
     store, cfg = get_store()
     agent = cfg["agent_id"]
     sid = step_id.lower().replace(".", "-")
     now = datetime.now(timezone.utc).isoformat()
 
-    # Update status
+    state = "blocked" if blocked else "needs_review" if needs_review else "complete"
+
+    # Status reflects the real outcome (not always "complete").
     existing = store.get_json(f"steps/{sid}/status.json") or {}
-    store.put_json(f"steps/{sid}/status.json", {
-        "status": "complete",
+    status_doc = {
+        "status": state,
         "agent": agent,
         "started_at": existing.get("started_at", now),
-        "completed_at": now,
-    })
-
-    # Write handoff
-    store.put_json(f"steps/{sid}/handoff.json", {
+    }
+    if state == "complete":
+        status_doc["completed_at"] = now
+    store.put_json(f"steps/{sid}/status.json", status_doc)
+
+    # Handoff record — schema v2. All v2 keys optional; old readers/handoffs
+    # keep working. changed_files is git-derived (never agent-typed).
+    handoff = {
+        "schema": 2,
         "from_agent": agent,
         "from_step": step_id,
+        "next_agent": next_agent,
+        "state": state,
+        "next_action": next_action,
         "note": note,
         "created_at": now,
-    })
-    click.echo(f"Completed step {step_id}")
+    }
+    if changed_files_from_git:
+        handoff["changed_files"] = _git_changed_files()
+    store.put_json(f"steps/{sid}/handoff.json", handoff)
+
+    label = {"complete": "Completed", "blocked": "Blocked", "needs_review": "Needs review on"}[state]
+    msg = f"{label} step {step_id}"
+    if next_agent:
+        msg += f" → handed off to {next_agent}"
+    if changed_files_from_git:
+        msg += f" ({len(handoff['changed_files'])} changed file(s))"
+    click.echo(msg)
 
 
 @click.command()

From d8a9fa7c326b459175f14f1428f9131cf7f0e911 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Sat, 6 Jun 2026 17:04:55 +0300
Subject: [PATCH 2/2] ci: add ruff format check; format codebase; add handoff
 tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a 'format' job to the tests workflow that runs 'ruff format --check sdk/'
on every push/PR, so style drift fails CI instead of reaching review.

To make the gate green, ran 'ruff format' across sdk/ (15 files reformatted,
whitespace/line-wrap only — no logic changes). Verified the full suite still
passes (66 tests).

Also lands sdk/tests/test_handoff_v2.py — the 9-test suite for the 0.2.1
structured handoff (complete/blocked/needs_review state, next_action/--to,
git-derived changed_files, mutual-exclusion, no mandatory assumptions field).
The fixture chdirs to an isolated dir and writes config to both the CWD-local
and HOME paths, so a stray ./.tracecraft.json can't shadow it (this was making
the tests hit a real endpoint and fail).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .github/workflows/test.yml            |  20 +++
 sdk/tests/test_handoff_v2.py          | 184 ++++++++++++++++++++++++++
 sdk/tests/test_session_cli.py         |   6 +-
 sdk/tests/test_tier_0.py              | 139 ++++++++++++-------
 sdk/tracecraft/cli/init_cmd.py        |  56 +++++---
 sdk/tracecraft/cli/memory.py          |  13 +-
 sdk/tracecraft/cli/messages.py        |  15 ++-
 sdk/tracecraft/cli/session.py         |  29 ++--
 sdk/tracecraft/cli/steps.py           |  25 ++--
 sdk/tracecraft/harness/claude_code.py |   3 +-
 sdk/tracecraft/harness/codex.py       |   4 +-
 sdk/tracecraft/harness/hermes.py      |   6 +-
 sdk/tracecraft/hf.py                  |   5 +-
 sdk/tracecraft/store.py               |   2 +
 14 files changed, 399 insertions(+), 108 deletions(-)
 create mode 100644 sdk/tests/test_handoff_v2.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index f6e88dc..8c4d219 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -7,6 +7,26 @@ on:
     branches: [main]
 
 jobs:
+  format:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+
+      - name: Install ruff
+        working-directory: sdk
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[dev]"
+
+      - name: Check formatting (ruff format)
+        run: ruff format --check sdk/
+
   pytest:
     runs-on: ubuntu-latest
     strategy:
diff --git a/sdk/tests/test_handoff_v2.py b/sdk/tests/test_handoff_v2.py
new file mode 100644
index 0000000..fa20346
--- /dev/null
+++ b/sdk/tests/test_handoff_v2.py
@@ -0,0 +1,184 @@
+"""Tests for the v0.2.1 structured handoff record.
+
+Schema v2 adds: state enum (complete/blocked/needs_review), next_action,
+git-derived changed_files. All optional + backward compatible.
+"""
+
+from __future__ import annotations
+
+import json
+
+import boto3
+import pytest
+from click.testing import CliRunner
+from moto import mock_aws
+
+from tracecraft.cli import cli
+import tracecraft.cli.steps as steps_mod
+
+
+BUCKET = "tc-handoff-test"
+PROJECT = "demo"
+
+
+@pytest.fixture
+def env(tmp_path, monkeypatch):
+    monkeypatch.setenv("AWS_ACCESS_KEY_ID", "testing")
+    monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing")
+    monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1")
+    # Run from an isolated empty dir. load_config() is CWD-first, so without
+    # this a stray ./.tracecraft.json in the repo would shadow our test config
+    # and point the CLI at a real endpoint.
+    work = tmp_path / "work"
+    work.mkdir()
+    monkeypatch.chdir(work)
+    cfg = {
+        "backend": "s3",
+        "endpoint": None,
+        "bucket": BUCKET,
+        "project": PROJECT,
+        "agent_id": "designer",
+        "access_key": "testing",
+        "secret_key": "testing",
+    }
+    # Write to the CWD-local path load_config() checks first...
+    (work / ".tracecraft.json").write_text(json.dumps(cfg))
+    # ...and the global HOME fallback, so tests that chdir elsewhere (the git
+    # tests below) still resolve a config.
+    fake_home = tmp_path / "home"
+    (fake_home / ".tracecraft").mkdir(parents=True)
+    (fake_home / ".tracecraft" / "config.json").write_text(json.dumps(cfg))
+    monkeypatch.setenv("HOME", str(fake_home))
+    with mock_aws():
+        boto3.client("s3").create_bucket(Bucket=BUCKET)
+        yield CliRunner()
+
+
+def _handoff(sid="design"):
+    c = boto3.client("s3")
+    obj = c.get_object(Bucket=BUCKET, Key=f"{PROJECT}/steps/{sid}/handoff.json")
+    return json.loads(obj["Body"].read())
+
+
+def _status(sid="design"):
+    c = boto3.client("s3")
+    obj = c.get_object(Bucket=BUCKET, Key=f"{PROJECT}/steps/{sid}/status.json")
+    return json.loads(obj["Body"].read())
+
+
+# ---------- backward compatibility ----------
+
+
+def test_plain_complete_is_backward_compatible(env):
+    r = env.invoke(cli, ["complete", "design"])
+    assert r.exit_code == 0, r.output
+    assert r.output.startswith("Completed step design")
+    h = _handoff()
+    # v1 keys still present
+    assert h["from_agent"] == "designer"
+    assert h["from_step"] == "design"
+    assert h["note"] == ""
+    assert "created_at" in h
+    # v2 defaults
+    assert h["schema"] == 2
+    assert h["state"] == "complete"
+    assert h["next_agent"] is None
+    assert h["next_action"] is None
+    assert "changed_files" not in h  # only present with the git flag
+    # status reflects complete
+    assert _status()["status"] == "complete"
+    assert "completed_at" in _status()
+
+
+# ---------- state enum ----------
+
+
+def test_blocked_sets_state_and_status(env):
+    r = env.invoke(cli, ["complete", "design", "--blocked", "--note", "stuck on auth"])
+    assert r.exit_code == 0, r.output
+    assert "Blocked step design" in r.output
+    assert _handoff()["state"] == "blocked"
+    assert _status()["status"] == "blocked"
+    assert "completed_at" not in _status()  # not complete → no completed_at
+
+
+def test_needs_review_sets_state(env):
+    r = env.invoke(cli, ["complete", "design", "--needs-review"])
+    assert r.exit_code == 0, r.output
+    assert "Needs review on step design" in r.output
+    assert _handoff()["state"] == "needs_review"
+    assert _status()["status"] == "needs_review"
+
+
+def test_blocked_and_needs_review_mutually_exclusive(env):
+    r = env.invoke(cli, ["complete", "design", "--blocked", "--needs-review"])
+    assert r.exit_code != 0
+    assert "at most one" in r.output
+
+
+# ---------- next_action + next_agent ----------
+
+
+def test_next_action_and_to(env):
+    r = env.invoke(
+        cli,
+        ["complete", "design", "--to", "developer", "--next-action", "wire api.py into search"],
+    )
+    assert r.exit_code == 0, r.output
+    assert "handed off to developer" in r.output
+    h = _handoff()
+    assert h["next_agent"] == "developer"
+    assert h["next_action"] == "wire api.py into search"
+
+
+# ---------- changed_files from git ----------
+
+
+def test_changed_files_git_in_repo(env, tmp_path, monkeypatch):
+    # Make cwd a git repo with one modified tracked file
+    import subprocess
+
+    repo = tmp_path / "repo"
+    repo.mkdir()
+    monkeypatch.chdir(repo)
+    subprocess.run(["git", "init", "-q"], cwd=repo, check=True)
+    subprocess.run(["git", "config", "user.email", "t@t.t"], cwd=repo, check=True)
+    subprocess.run(["git", "config", "user.name", "t"], cwd=repo, check=True)
+    (repo / "a.py").write_text("x = 1\n")
+    subprocess.run(["git", "add", "a.py"], cwd=repo, check=True)
+    subprocess.run(["git", "commit", "-qm", "init"], cwd=repo, check=True)
+    (repo / "a.py").write_text("x = 2\n")  # now modified vs HEAD
+
+    r = env.invoke(cli, ["complete", "design", "--changed-files-from-git"])
+    assert r.exit_code == 0, r.output
+    h = _handoff()
+    assert h["changed_files"] == ["a.py"]
+    assert "1 changed file(s)" in r.output
+
+
+def test_changed_files_git_outside_repo_is_empty(env, tmp_path, monkeypatch):
+    # cwd is NOT a git repo → flag is a no-op (empty list), never crashes
+    nonrepo = tmp_path / "plain"
+    nonrepo.mkdir()
+    monkeypatch.chdir(nonrepo)
+    r = env.invoke(cli, ["complete", "design", "--changed-files-from-git"])
+    assert r.exit_code == 0, r.output
+    assert _handoff()["changed_files"] == []
+
+
+def test_no_assumptions_field(env):
+    # We deliberately do NOT add a mandatory unresolved_assumptions field.
+    env.invoke(cli, ["complete", "design", "--note", "assumed v2 API"])
+    h = _handoff()
+    assert "unresolved_assumptions" not in h
+    assert "assumptions" not in h
+    # open questions live in the free-text note
+    assert h["note"] == "assumed v2 API"
+
+
+# ---------- helper direct test ----------
+
+
+def test_git_changed_files_helper_never_raises(monkeypatch, tmp_path):
+    monkeypatch.chdir(tmp_path)  # not a repo
+    assert steps_mod._git_changed_files() == []
diff --git a/sdk/tests/test_session_cli.py b/sdk/tests/test_session_cli.py
index cd22230..f2b2f7b 100644
--- a/sdk/tests/test_session_cli.py
+++ b/sdk/tests/test_session_cli.py
@@ -114,9 +114,7 @@ def _bucket_keys():
     """Return all keys under PROJECT/ stripped of the project prefix."""
     client = boto3.client("s3")
     resp = client.list_objects_v2(Bucket=BUCKET, Prefix=f"{PROJECT}/")
-    return [
-        obj["Key"][len(PROJECT) + 1 :] for obj in resp.get("Contents", [])
-    ]
+    return [obj["Key"][len(PROJECT) + 1 :] for obj in resp.get("Contents", [])]
 
 
 def _get_meta(session_id):
@@ -256,7 +254,7 @@ def test_session_list_shows_uploaded_session(cli_env):
 
 def test_session_show_tails_concatenated_parts(cli_env):
     runner, cwd, sess, sid = cli_env
-    sess.write_bytes(b'line1\n')
+    sess.write_bytes(b"line1\n")
     runner.invoke(cli, ["session", "mirror", "--harness", "claude-code", "--cwd", str(cwd)])
     with open(sess, "ab") as f:
         f.write(b"line2\nline3\n")
diff --git a/sdk/tests/test_tier_0.py b/sdk/tests/test_tier_0.py
index ab321a5..790a62d 100644
--- a/sdk/tests/test_tier_0.py
+++ b/sdk/tests/test_tier_0.py
@@ -57,6 +57,7 @@ def store(s3_env):
 
 # ---------- Fix 1: atomic claim ----------
 
+
 def test_fix1_atomic_put_first_writer_wins(store):
     """First put_json(if_none_match=True) succeeds; second raises PreconditionFailed."""
     store.put_json("steps/foo/claim.json", {"agent": "a"}, if_none_match=True)
@@ -80,15 +81,19 @@ def test_fix1_claim_cli_blocks_second_caller(s3_env, monkeypatch, tmp_path):
     cfg_file = tmp_path / ".tracecraft.json"
 
     def write_cfg(agent_id):
-        cfg_file.write_text(json.dumps({
-            "backend": "s3",
-            "bucket": BUCKET,
-            "project": PROJECT,
-            "endpoint": None,
-            "access_key": "testing",
-            "secret_key": "testing",
-            "agent_id": agent_id,
-        }))
+        cfg_file.write_text(
+            json.dumps(
+                {
+                    "backend": "s3",
+                    "bucket": BUCKET,
+                    "project": PROJECT,
+                    "endpoint": None,
+                    "access_key": "testing",
+                    "secret_key": "testing",
+                    "agent_id": agent_id,
+                }
+            )
+        )
 
     monkeypatch.chdir(tmp_path)
     write_cfg("agent-a")
@@ -106,6 +111,7 @@ def write_cfg(agent_id):
 
 # ---------- Fix 2: paginated list_keys ----------
 
+
 def test_fix2_list_keys_returns_more_than_1000(store):
     """Write 1250 keys; ensure list_keys returns them all (not capped at 1000)."""
     for i in range(1250):
@@ -118,6 +124,7 @@ def test_fix2_list_keys_returns_more_than_1000(store):
 
 # ---------- Fix 3: no default admin/secret credentials ----------
 
+
 def test_fix3_init_refuses_without_creds(monkeypatch, tmp_path):
     """`tracecraft init` without --access-key/--secret-key/env must error."""
     monkeypatch.chdir(tmp_path)
@@ -125,13 +132,21 @@ def test_fix3_init_refuses_without_creds(monkeypatch, tmp_path):
     monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False)
 
     runner = CliRunner()
-    r = runner.invoke(init_cmd, [
-        "--backend", "s3",
-        "--endpoint", "http://localhost:9000",
-        "--bucket", "x",
-        "--project", "p",
-        "--agent", "a",
-    ])
+    r = runner.invoke(
+        init_cmd,
+        [
+            "--backend",
+            "s3",
+            "--endpoint",
+            "http://localhost:9000",
+            "--bucket",
+            "x",
+            "--project",
+            "p",
+            "--agent",
+            "a",
+        ],
+    )
     assert r.exit_code != 0
     assert "credentials required" in r.output.lower()
     # Critically, must NOT have written admin/secret to disk
@@ -145,13 +160,21 @@ def test_fix3_init_reads_aws_env_vars(monkeypatch, tmp_path, s3_env):
     monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing")
 
     runner = CliRunner()
-    r = runner.invoke(init_cmd, [
-        "--backend", "s3",
-        "--endpoint", MOTO_ENDPOINT,  # moto default
-        "--bucket", BUCKET,
-        "--project", PROJECT,
-        "--agent", "a",
-    ])
+    r = runner.invoke(
+        init_cmd,
+        [
+            "--backend",
+            "s3",
+            "--endpoint",
+            MOTO_ENDPOINT,  # moto default
+            "--bucket",
+            BUCKET,
+            "--project",
+            PROJECT,
+            "--agent",
+            "a",
+        ],
+    )
     assert r.exit_code == 0, r.output
     saved = json.loads((tmp_path / ".tracecraft.json").read_text())
     assert saved["access_key"] == "testing"
@@ -163,6 +186,7 @@ def test_fix3_init_reads_aws_env_vars(monkeypatch, tmp_path, s3_env):
 
 # ---------- Fix 4: .gitignore handling ----------
 
+
 def test_fix4_gitignore_appended_in_git_repo(monkeypatch, tmp_path, s3_env):
     """When cwd is a git repo, init appends .tracecraft.json to .gitignore."""
     (tmp_path / ".git").mkdir()
@@ -171,13 +195,21 @@ def test_fix4_gitignore_appended_in_git_repo(monkeypatch, tmp_path, s3_env):
     monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing")
 
     runner = CliRunner()
-    r = runner.invoke(init_cmd, [
-        "--backend", "s3",
-        "--endpoint", MOTO_ENDPOINT,
-        "--bucket", BUCKET,
-        "--project", PROJECT,
-        "--agent", "a",
-    ])
+    r = runner.invoke(
+        init_cmd,
+        [
+            "--backend",
+            "s3",
+            "--endpoint",
+            MOTO_ENDPOINT,
+            "--bucket",
+            BUCKET,
+            "--project",
+            PROJECT,
+            "--agent",
+            "a",
+        ],
+    )
     assert r.exit_code == 0, r.output
     gi = (tmp_path / ".gitignore").read_text()
     assert ".tracecraft.json" in gi.splitlines()
@@ -192,13 +224,21 @@ def test_fix4_gitignore_not_duplicated(monkeypatch, tmp_path, s3_env):
     monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing")
 
     runner = CliRunner()
-    r = runner.invoke(init_cmd, [
-        "--backend", "s3",
-        "--endpoint", MOTO_ENDPOINT,
-        "--bucket", BUCKET,
-        "--project", PROJECT,
-        "--agent", "a",
-    ])
+    r = runner.invoke(
+        init_cmd,
+        [
+            "--backend",
+            "s3",
+            "--endpoint",
+            MOTO_ENDPOINT,
+            "--bucket",
+            BUCKET,
+            "--project",
+            PROJECT,
+            "--agent",
+            "a",
+        ],
+    )
     assert r.exit_code == 0, r.output
     lines = (tmp_path / ".gitignore").read_text().splitlines()
     assert lines.count(".tracecraft.json") == 1
@@ -211,19 +251,28 @@ def test_fix4_no_gitignore_outside_repo(monkeypatch, tmp_path, s3_env):
     monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing")
 
     runner = CliRunner()
-    r = runner.invoke(init_cmd, [
-        "--backend", "s3",
-        "--endpoint", MOTO_ENDPOINT,
-        "--bucket", BUCKET,
-        "--project", PROJECT,
-        "--agent", "a",
-    ])
+    r = runner.invoke(
+        init_cmd,
+        [
+            "--backend",
+            "s3",
+            "--endpoint",
+            MOTO_ENDPOINT,
+            "--bucket",
+            BUCKET,
+            "--project",
+            PROJECT,
+            "--agent",
+            "a",
+        ],
+    )
     assert r.exit_code == 0, r.output
     assert not (tmp_path / ".gitignore").exists()
 
 
 # ---------- Fix 5: dead scaffolding removed ----------
 
+
 def test_fix5_no_empty_namespace_packages():
     """integrations/ and transport/ packages must not be importable."""
     with pytest.raises(ImportError):
@@ -243,5 +292,5 @@ def test_fix5_pyproject_drops_dead_extras():
     """crewai/langgraph/claude-sdk/all extras must not be declared."""
     repo_root = pathlib.Path(__file__).resolve().parents[2]
     text = (repo_root / "sdk" / "pyproject.toml").read_text()
-    for forbidden in ('crewai = [', 'langgraph = [', 'claude-sdk = [', 'all = ['):
+    for forbidden in ("crewai = [", "langgraph = [", "claude-sdk = [", "all = ["):
         assert forbidden not in text, f"pyproject still declares: {forbidden}"
diff --git a/sdk/tracecraft/cli/init_cmd.py b/sdk/tracecraft/cli/init_cmd.py
index 1521e86..d1a9762 100644
--- a/sdk/tracecraft/cli/init_cmd.py
+++ b/sdk/tracecraft/cli/init_cmd.py
@@ -10,14 +10,35 @@
 
 
 @click.command()
-@click.option("--backend", type=click.Choice(["s3", "hf"]), default="s3", help="Storage backend: s3 or hf (HuggingFace Buckets)")
+@click.option(
+    "--backend",
+    type=click.Choice(["s3", "hf"]),
+    default="s3",
+    help="Storage backend: s3 or hf (HuggingFace Buckets)",
+)
 @click.option("--endpoint", default=None, help="S3 endpoint URL (s3 backend only)")
-@click.option("--bucket", required=True, help="Bucket name (s3) or HF bucket handle e.g. username/my-bucket (hf)")
+@click.option(
+    "--bucket",
+    required=True,
+    help="Bucket name (s3) or HF bucket handle e.g. username/my-bucket (hf)",
+)
 @click.option("--project", required=True, help="Project namespace")
 @click.option("--agent", required=True, help="Agent ID for this session")
-@click.option("--access-key", default=None, envvar="AWS_ACCESS_KEY_ID", help="S3 access key (env: AWS_ACCESS_KEY_ID)")
-@click.option("--secret-key", default=None, envvar="AWS_SECRET_ACCESS_KEY", help="S3 secret key (env: AWS_SECRET_ACCESS_KEY)")
-@click.option("--hf-token", default=None, envvar="HF_TOKEN", help="HuggingFace token (env: HF_TOKEN)")
+@click.option(
+    "--access-key",
+    default=None,
+    envvar="AWS_ACCESS_KEY_ID",
+    help="S3 access key (env: AWS_ACCESS_KEY_ID)",
+)
+@click.option(
+    "--secret-key",
+    default=None,
+    envvar="AWS_SECRET_ACCESS_KEY",
+    help="S3 secret key (env: AWS_SECRET_ACCESS_KEY)",
+)
+@click.option(
+    "--hf-token", default=None, envvar="HF_TOKEN", help="HuggingFace token (env: HF_TOKEN)"
+)
 def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key, hf_token):
     """Initialize tracecraft config, create bucket, and register agent."""
     cfg = {
@@ -49,23 +70,24 @@ def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key,
     store.ensure_bucket()
 
     now = datetime.now(timezone.utc).isoformat()
-    store.put_json(f"agents/{agent}.json", {
-        "id": agent,
-        "status": "active",
-        "step": None,
-        "started_at": now,
-        "heartbeat": now,
-        "summary": "Initialized",
-    })
+    store.put_json(
+        f"agents/{agent}.json",
+        {
+            "id": agent,
+            "status": "active",
+            "step": None,
+            "started_at": now,
+            "heartbeat": now,
+            "summary": "Initialized",
+        },
+    )
 
     click.echo(f"Initialized project '{project}' as agent '{agent}'")
     if backend == "s3":
         click.echo(f"Backend: S3  Endpoint: {endpoint}  Bucket: {bucket}")
     else:
         click.echo(f"Backend: HuggingFace Buckets  Bucket: {bucket}")
-    click.echo(
-        "Note: .tracecraft.json contains credentials. Keep it out of version control."
-    )
+    click.echo("Note: .tracecraft.json contains credentials. Keep it out of version control.")
 
 
 def _ensure_gitignore_entry():
@@ -98,9 +120,11 @@ def _get_store(cfg):
     backend = cfg.get("backend", "s3")
     if backend == "hf":
         from tracecraft.hf import HF
+
         return HF(bucket=cfg["bucket"], project=cfg["project"], token=cfg.get("hf_token"))
     else:
         from tracecraft.s3 import S3
+
         return S3(
             endpoint=cfg["endpoint"],
             bucket=cfg["bucket"],
diff --git a/sdk/tracecraft/cli/memory.py b/sdk/tracecraft/cli/memory.py
index 17c899b..65e12b2 100644
--- a/sdk/tracecraft/cli/memory.py
+++ b/sdk/tracecraft/cli/memory.py
@@ -33,11 +33,14 @@ def memory_set(key, value):
         raise click.ClickException("Key cannot be empty")
     store, cfg = get_store()
     now = datetime.now(timezone.utc).isoformat()
-    store.put_json(_key_to_path(key), {
-        "value": value,
-        "set_by": cfg["agent_id"],
-        "set_at": now,
-    })
+    store.put_json(
+        _key_to_path(key),
+        {
+            "value": value,
+            "set_by": cfg["agent_id"],
+            "set_at": now,
+        },
+    )
     click.echo(f"Set {key} = {value}")
 
 
diff --git a/sdk/tracecraft/cli/messages.py b/sdk/tracecraft/cli/messages.py
index 9088c70..efba3c4 100644
--- a/sdk/tracecraft/cli/messages.py
+++ b/sdk/tracecraft/cli/messages.py
@@ -21,12 +21,15 @@ def send(recipient, message):
     now = datetime.now(timezone.utc).isoformat()
 
     key = f"messages/{recipient}/{ts}_{sender}.json"
-    store.put_json(key, {
-        "from": sender,
-        "to": recipient,
-        "message": message,
-        "sent_at": now,
-    })
+    store.put_json(
+        key,
+        {
+            "from": sender,
+            "to": recipient,
+            "message": message,
+            "sent_at": now,
+        },
+    )
     click.echo(f"Sent to {recipient}: {message}")
 
 
diff --git a/sdk/tracecraft/cli/session.py b/sdk/tracecraft/cli/session.py
index 3342cc9..0313cc5 100644
--- a/sdk/tracecraft/cli/session.py
+++ b/sdk/tracecraft/cli/session.py
@@ -114,7 +114,9 @@ def session():
     default=None,
     help="Project directory the session ran in (claude-code only). Defaults to $PWD.",
 )
-@click.option("--no-redact", is_flag=True, help="Skip redaction. Use only on fully-trusted buckets.")
+@click.option(
+    "--no-redact", is_flag=True, help="Skip redaction. Use only on fully-trusted buckets."
+)
 @click.option(
     "--min-bytes",
     default=1,
@@ -156,9 +158,7 @@ def mirror(harness_name, session_id, cwd_str, no_redact, min_bytes):
     # authoritative — read_new() returns the real consumed cursor below.
     cur_size = harness.size(sess)
     if cur_size - cursor < min_bytes:
-        click.echo(
-            f"nothing new: session={sess.session_id} cursor={cursor:,} size={cur_size:,}"
-        )
+        click.echo(f"nothing new: session={sess.session_id} cursor={cursor:,} size={cur_size:,}")
         return
 
     # 2. Read everything new since `cursor`, race-free: read_new returns the
@@ -286,10 +286,10 @@ def list_(harness_filter, limit, sort_by):
         sid = m.get("session_id", "?")
         short = sid[:8] + ("…" if len(sid) > 8 else "")
         click.echo(
-            f"{m.get('harness','?'):<14} {short:<16} "
-            f"{m.get('total_uploaded_bytes',0):>12,} "
+            f"{m.get('harness', '?'):<14} {short:<16} "
+            f"{m.get('total_uploaded_bytes', 0):>12,} "
             f"{len(m.get('parts', [])):>6} "
-            f"{m.get('last_uploaded_at','-')[:24]:<25}"
+            f"{m.get('last_uploaded_at', '-')[:24]:<25}"
         )
 
 
@@ -309,7 +309,9 @@ def show(session_id, tail):
     store, _ = get_store()
 
     # Find which harness this session lives under (search every harness folder).
-    all_meta_keys = [k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json")]
+    all_meta_keys = [
+        k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json")
+    ]
     if not all_meta_keys:
         raise click.ClickException(f"session not found: {session_id}")
     meta_key = all_meta_keys[0]
@@ -321,9 +323,7 @@ def show(session_id, tail):
 
     # Fetch all parts (in seq order), concatenate, print last N lines.
     prefix = meta_key[: -len("meta.json")]
-    part_keys = sorted(
-        k for k in store.list_keys(prefix) if PART_RE.search(k.rsplit("/", 1)[-1])
-    )
+    part_keys = sorted(k for k in store.list_keys(prefix) if PART_RE.search(k.rsplit("/", 1)[-1]))
     body = bytearray()
     for k in part_keys:
         with tempfile.NamedTemporaryFile(delete=False) as tf:
@@ -365,9 +365,7 @@ def stop(session_id):
 
     # Best-effort: mark ended_at in meta if a meta exists.
     store, _ = get_store()
-    meta_keys = [
-        k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json")
-    ]
+    meta_keys = [k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json")]
     marked = False
     if meta_keys:
         meta = store.get_json(meta_keys[0]) or {}
@@ -377,6 +375,5 @@ def stop(session_id):
             marked = True
 
     click.echo(
-        f"stopped session={session_id}  "
-        f"state_cleared={had_state}  meta_marked_ended={marked}"
+        f"stopped session={session_id}  state_cleared={had_state}  meta_marked_ended={marked}"
     )
diff --git a/sdk/tracecraft/cli/steps.py b/sdk/tracecraft/cli/steps.py
index 2ef1b34..c0c34dd 100644
--- a/sdk/tracecraft/cli/steps.py
+++ b/sdk/tracecraft/cli/steps.py
@@ -21,7 +21,9 @@ def _git_changed_files() -> list[str]:
     try:
         out = subprocess.run(
             ["git", "diff", "--name-only", "HEAD"],
-            capture_output=True, text=True, timeout=10,
+            capture_output=True,
+            text=True,
+            timeout=10,
         )
         if out.returncode != 0:
             return []
@@ -51,11 +53,14 @@ def claim(step_id):
         owner = existing.get("agent", "unknown")
         raise click.ClickException(f"Step {step_id} already claimed by {owner}")
 
-    store.put_json(f"steps/{sid}/status.json", {
-        "status": "in_progress",
-        "agent": agent,
-        "started_at": now,
-    })
+    store.put_json(
+        f"steps/{sid}/status.json",
+        {
+            "status": "in_progress",
+            "agent": agent,
+            "started_at": now,
+        },
+    )
     click.echo(f"Claimed step {step_id} as {agent}")
 
 
@@ -65,7 +70,9 @@ def claim(step_id):
 @click.option("--to", "next_agent", default=None, help="Agent this step hands off to")
 @click.option("--next-action", default=None, help="One line: what the next agent should do first")
 @click.option("--blocked", is_flag=True, help="Mark the step blocked rather than complete")
-@click.option("--needs-review", is_flag=True, help="Mark the step as needing review rather than complete")
+@click.option(
+    "--needs-review", is_flag=True, help="Mark the step as needing review rather than complete"
+)
 @click.option(
     "--changed-files-from-git",
     is_flag=True,
@@ -117,7 +124,9 @@ def complete(step_id, note, next_agent, next_action, blocked, needs_review, chan
         handoff["changed_files"] = _git_changed_files()
     store.put_json(f"steps/{sid}/handoff.json", handoff)
 
-    label = {"complete": "Completed", "blocked": "Blocked", "needs_review": "Needs review on"}[state]
+    label = {"complete": "Completed", "blocked": "Blocked", "needs_review": "Needs review on"}[
+        state
+    ]
     msg = f"{label} step {step_id}"
     if next_agent:
         msg += f" → handed off to {next_agent}"
diff --git a/sdk/tracecraft/harness/claude_code.py b/sdk/tracecraft/harness/claude_code.py
index bd3bbd1..bcc936b 100644
--- a/sdk/tracecraft/harness/claude_code.py
+++ b/sdk/tracecraft/harness/claude_code.py
@@ -40,6 +40,5 @@ def discover(self, cwd: Path) -> list[Session]:
         if not pdir.is_dir():
             return []
         return [
-            Session(path=jsonl, session_id=jsonl.stem, cwd=cwd)
-            for jsonl in pdir.glob("*.jsonl")
+            Session(path=jsonl, session_id=jsonl.stem, cwd=cwd) for jsonl in pdir.glob("*.jsonl")
         ]
diff --git a/sdk/tracecraft/harness/codex.py b/sdk/tracecraft/harness/codex.py
index a5bd85e..5650dfb 100644
--- a/sdk/tracecraft/harness/codex.py
+++ b/sdk/tracecraft/harness/codex.py
@@ -16,7 +16,9 @@
 from .base import FileTailHarness, Session
 
 
-_ROLLOUT_RE = re.compile(r"rollout-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-(?P<id>[A-Za-z0-9_-]+)\.jsonl$")
+_ROLLOUT_RE = re.compile(
+    r"rollout-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-(?P<id>[A-Za-z0-9_-]+)\.jsonl$"
+)
 
 
 class CodexHarness(FileTailHarness):
diff --git a/sdk/tracecraft/harness/hermes.py b/sdk/tracecraft/harness/hermes.py
index 671d0d0..668fc5f 100644
--- a/sdk/tracecraft/harness/hermes.py
+++ b/sdk/tracecraft/harness/hermes.py
@@ -59,7 +59,7 @@ def _decode_content(value):
     """Hermes stores multimodal content as '\\x00json:<json>'; scalars as-is."""
     if isinstance(value, str) and value.startswith(_CONTENT_JSON_PREFIX):
         try:
-            return json.loads(value[len(_CONTENT_JSON_PREFIX):])
+            return json.loads(value[len(_CONTENT_JSON_PREFIX) :])
         except json.JSONDecodeError:
             return value
     return value
@@ -82,9 +82,7 @@ def discover(self, cwd: Path) -> list[Session]:
             return []
         conn = _connect_ro(self.db_path)
         try:
-            rows = conn.execute(
-                "SELECT id FROM sessions ORDER BY started_at DESC"
-            ).fetchall()
+            rows = conn.execute("SELECT id FROM sessions ORDER BY started_at DESC").fetchall()
         except sqlite3.Error:
             return []
         finally:
diff --git a/sdk/tracecraft/hf.py b/sdk/tracecraft/hf.py
index aa7a8dd..ca2756b 100644
--- a/sdk/tracecraft/hf.py
+++ b/sdk/tracecraft/hf.py
@@ -10,6 +10,7 @@
 class HF:
     def __init__(self, bucket, project, token=None):
         from huggingface_hub import HfFileSystem
+
         self.fs = HfFileSystem(token=token)
         self.bucket = bucket  # e.g. "username/my-bucket"
         self.project = project
@@ -35,11 +36,13 @@ def put_json(self, key, data, if_none_match=False):
                 # This is racy, but documented; S3-compatible backends use IfNoneMatch=* for safety.
                 if self.fs.exists(path):
                     from tracecraft.s3 import PreconditionFailed
+
                     raise PreconditionFailed(key)
             with self.fs.open(path, "w") as f:
                 json.dump(data, f, indent=2)
         except Exception as e:
             from tracecraft.s3 import PreconditionFailed
+
             if isinstance(e, PreconditionFailed):
                 raise
             raise click.ClickException(f"HF put failed: {e}")
@@ -65,7 +68,7 @@ def list_keys(self, prefix=""):
             keys = []
             for entry in entries:
                 if entry.startswith(base_prefix):
-                    keys.append(entry[len(base_prefix):])
+                    keys.append(entry[len(base_prefix) :])
                 else:
                     keys.append(entry)
             return keys
diff --git a/sdk/tracecraft/store.py b/sdk/tracecraft/store.py
index 4226394..85ce840 100644
--- a/sdk/tracecraft/store.py
+++ b/sdk/tracecraft/store.py
@@ -10,9 +10,11 @@ def get_store():
 
     if backend == "hf":
         from tracecraft.hf import HF
+
         return HF(bucket=cfg["bucket"], project=cfg["project"], token=cfg.get("hf_token")), cfg
     else:
         from tracecraft.s3 import S3
+
         return S3(
             endpoint=cfg["endpoint"],
             bucket=cfg["bucket"],