From 455a1f1ab49e58bab8f8700a826b61e14985eb76 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Sat, 6 Jun 2026 16:31:55 +0300
Subject: [PATCH 01/22] 0.2.1: structured handoff records + huggingface pip
 extra

---
 README.md                   |  4 +-
 sdk/pyproject.toml          |  2 +-
 sdk/tracecraft/__init__.py  |  2 +-
 sdk/tracecraft/cli/steps.py | 86 +++++++++++++++++++++++++++++++------
 4 files changed, 78 insertions(+), 16 deletions(-)
diff --git a/README.md b/README.md
index 7be3945..c4576f7 100644
--- a/README.md
+++ b/README.md
@@ -172,7 +172,9 @@ tracecraft inbox                          # Read messages
 tracecraft inbox --delete                 # Read and clear
 
 tracecraft claim <step-id>                # Claim a step (atomic)
-tracecraft complete <step-id> [--note X]  # Mark done + handoff note
+tracecraft complete <step-id> [--note X] [--to AGENT] [--next-action X]
+                                          [--blocked|--needs-review]
+                                          [--changed-files-from-git]  # Structured handoff record
 tracecraft step-status <step-id>          # Check status
 tracecraft wait-for <step-ids...>         # Block until complete (default 300s timeout)
 
diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml
index 4f0be08..c6446f4 100644
--- a/sdk/pyproject.toml
+++ b/sdk/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "tracecraft-ai"
-version = "0.2.0"
+version = "0.2.1"
 description = "Coordination layer for multi-agent AI systems. Bring your own S3 / HuggingFace bucket; shared memory, mailbox, atomic task claims, handoffs, artifacts — no server, no database."
 readme = "README.md"
 license = {text = "MIT"}
diff --git a/sdk/tracecraft/__init__.py b/sdk/tracecraft/__init__.py
index ae1ec60..22b8802 100644
--- a/sdk/tracecraft/__init__.py
+++ b/sdk/tracecraft/__init__.py
@@ -1,3 +1,3 @@
 """Tracecraft — coordination layer for multi-agent AI systems."""
 
-__version__ = "0.2.0"
+__version__ = "0.2.1"
diff --git a/sdk/tracecraft/cli/steps.py b/sdk/tracecraft/cli/steps.py
index 58868d3..2ef1b34 100644
--- a/sdk/tracecraft/cli/steps.py
+++ b/sdk/tracecraft/cli/steps.py
@@ -1,5 +1,6 @@
 """tracecraft steps — claim, complete, and track coordination steps."""
 
+import subprocess
 import time
 from datetime import datetime, timezone
 
@@ -9,6 +10,27 @@
 from tracecraft.store import get_store
 
 
+def _git_changed_files() -> list[str]:
+    """Return changed files from `git diff --name-only HEAD` (staged + unstaged),
+    or [] if not a git repo / git unavailable. Never raises.
+
+    Git is the source of truth for what changed — we never let an agent type
+    the file list by hand (self-reported change lists are wrong ~half the time
+    and go stale on the next commit).
+    """
+    try:
+        out = subprocess.run(
+            ["git", "diff", "--name-only", "HEAD"],
+            capture_output=True, text=True, timeout=10,
+        )
+        if out.returncode != 0:
+            return []
+        files = [ln.strip() for ln in out.stdout.splitlines() if ln.strip()]
+        return files
+    except (OSError, subprocess.SubprocessError):
+        return []
+
+
 @click.command()
 @click.argument("step_id")
 def claim(step_id):
@@ -39,31 +61,69 @@ def claim(step_id):
 
 @click.command()
 @click.argument("step_id")
-@click.option("--note", default="", help="Handoff note for the next agent")
-def complete(step_id, note):
-    """Mark a step as complete and write handoff."""
+@click.option("--note", default="", help="Handoff note for the next agent (free text)")
+@click.option("--to", "next_agent", default=None, help="Agent this step hands off to")
+@click.option("--next-action", default=None, help="One line: what the next agent should do first")
+@click.option("--blocked", is_flag=True, help="Mark the step blocked rather than complete")
+@click.option("--needs-review", is_flag=True, help="Mark the step as needing review rather than complete")
+@click.option(
+    "--changed-files-from-git",
+    is_flag=True,
+    help="Record files changed (from `git diff`), so the next agent knows what moved. No-op outside a git repo.",
+)
+def complete(step_id, note, next_agent, next_action, blocked, needs_review, changed_files_from_git):
+    """Mark a step complete (or blocked / needs-review) and write a handoff record.
+
+    The handoff record is what the next agent sees instead of a shared
+    conversation — so it carries machine-checkable state, not just a note.
+    Fields that can be wrong if hand-typed (changed files) are sourced from
+    git; fields that would be hallucinated if mandatory (assumptions) stay as
+    optional free text in --note.
+    """
+    if blocked and needs_review:
+        raise click.ClickException("Use at most one of --blocked / --needs-review")
+
     store, cfg = get_store()
     agent = cfg["agent_id"]
     sid = step_id.lower().replace(".", "-")
     now = datetime.now(timezone.utc).isoformat()
 
-    # Update status
+    state = "blocked" if blocked else "needs_review" if needs_review else "complete"
+
+    # Status reflects the real outcome (not always "complete").
     existing = store.get_json(f"steps/{sid}/status.json") or {}
-    store.put_json(f"steps/{sid}/status.json", {
-        "status": "complete",
+    status_doc = {
+        "status": state,
         "agent": agent,
         "started_at": existing.get("started_at", now),
-        "completed_at": now,
-    })
-
-    # Write handoff
-    store.put_json(f"steps/{sid}/handoff.json", {
+    }
+    if state == "complete":
+        status_doc["completed_at"] = now
+    store.put_json(f"steps/{sid}/status.json", status_doc)
+
+    # Handoff record — schema v2. All v2 keys optional; old readers/handoffs
+    # keep working. changed_files is git-derived (never agent-typed).
+    handoff = {
+        "schema": 2,
         "from_agent": agent,
         "from_step": step_id,
+        "next_agent": next_agent,
+        "state": state,
+        "next_action": next_action,
         "note": note,
         "created_at": now,
-    })
-    click.echo(f"Completed step {step_id}")
+    }
+    if changed_files_from_git:
+        handoff["changed_files"] = _git_changed_files()
+    store.put_json(f"steps/{sid}/handoff.json", handoff)
+
+    label = {"complete": "Completed", "blocked": "Blocked", "needs_review": "Needs review on"}[state]
+    msg = f"{label} step {step_id}"
+    if next_agent:
+        msg += f" → handed off to {next_agent}"
+    if changed_files_from_git:
+        msg += f" ({len(handoff['changed_files'])} changed file(s))"
+    click.echo(msg)
 
 
 @click.command()

From d8a9fa7c326b459175f14f1428f9131cf7f0e911 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Sat, 6 Jun 2026 17:04:55 +0300
Subject: [PATCH 02/22] ci: add ruff format check; format codebase; add handoff
 tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a 'format' job to the tests workflow that runs 'ruff format --check sdk/'
on every push/PR, so style drift fails CI instead of reaching review.

To make the gate green, ran 'ruff format' across sdk/ (15 files reformatted,
whitespace/line-wrap only — no logic changes). Verified the full suite still
passes (66 tests).

Also lands sdk/tests/test_handoff_v2.py — the 9-test suite for the 0.2.1
structured handoff (complete/blocked/needs_review state, next_action/--to,
git-derived changed_files, mutual-exclusion, no mandatory assumptions field).
The fixture chdirs to an isolated dir and writes config to both the CWD-local
and HOME paths, so a stray ./.tracecraft.json can't shadow it (this was making
the tests hit a real endpoint and fail).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .github/workflows/test.yml            |  20 +++
 sdk/tests/test_handoff_v2.py          | 184 ++++++++++++++++++++++++++
 sdk/tests/test_session_cli.py         |   6 +-
 sdk/tests/test_tier_0.py              | 139 ++++++++++++-------
 sdk/tracecraft/cli/init_cmd.py        |  56 +++++---
 sdk/tracecraft/cli/memory.py          |  13 +-
 sdk/tracecraft/cli/messages.py        |  15 ++-
 sdk/tracecraft/cli/session.py         |  29 ++--
 sdk/tracecraft/cli/steps.py           |  25 ++--
 sdk/tracecraft/harness/claude_code.py |   3 +-
 sdk/tracecraft/harness/codex.py       |   4 +-
 sdk/tracecraft/harness/hermes.py      |   6 +-
 sdk/tracecraft/hf.py                  |   5 +-
 sdk/tracecraft/store.py               |   2 +
 14 files changed, 399 insertions(+), 108 deletions(-)
 create mode 100644 sdk/tests/test_handoff_v2.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index f6e88dc..8c4d219 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -7,6 +7,26 @@ on:
     branches: [main]
 
 jobs:
+  format:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+
+      - name: Install ruff
+        working-directory: sdk
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[dev]"
+
+      - name: Check formatting (ruff format)
+        run: ruff format --check sdk/
+
   pytest:
     runs-on: ubuntu-latest
     strategy:
diff --git a/sdk/tests/test_handoff_v2.py b/sdk/tests/test_handoff_v2.py
new file mode 100644
index 0000000..fa20346
--- /dev/null
+++ b/sdk/tests/test_handoff_v2.py
@@ -0,0 +1,184 @@
+"""Tests for the v0.2.1 structured handoff record.
+
+Schema v2 adds: state enum (complete/blocked/needs_review), next_action,
+git-derived changed_files. All optional + backward compatible.
+"""
+
+from __future__ import annotations
+
+import json
+
+import boto3
+import pytest
+from click.testing import CliRunner
+from moto import mock_aws
+
+from tracecraft.cli import cli
+import tracecraft.cli.steps as steps_mod
+
+
+BUCKET = "tc-handoff-test"
+PROJECT = "demo"
+
+
+@pytest.fixture
+def env(tmp_path, monkeypatch):
+    monkeypatch.setenv("AWS_ACCESS_KEY_ID", "testing")
+    monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing")
+    monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1")
+    # Run from an isolated empty dir. load_config() is CWD-first, so without
+    # this a stray ./.tracecraft.json in the repo would shadow our test config
+    # and point the CLI at a real endpoint.
+    work = tmp_path / "work"
+    work.mkdir()
+    monkeypatch.chdir(work)
+    cfg = {
+        "backend": "s3",
+        "endpoint": None,
+        "bucket": BUCKET,
+        "project": PROJECT,
+        "agent_id": "designer",
+        "access_key": "testing",
+        "secret_key": "testing",
+    }
+    # Write to the CWD-local path load_config() checks first...
+    (work / ".tracecraft.json").write_text(json.dumps(cfg))
+    # ...and the global HOME fallback, so tests that chdir elsewhere (the git
+    # tests below) still resolve a config.
+    fake_home = tmp_path / "home"
+    (fake_home / ".tracecraft").mkdir(parents=True)
+    (fake_home / ".tracecraft" / "config.json").write_text(json.dumps(cfg))
+    monkeypatch.setenv("HOME", str(fake_home))
+    with mock_aws():
+        boto3.client("s3").create_bucket(Bucket=BUCKET)
+        yield CliRunner()
+
+
+def _handoff(sid="design"):
+    c = boto3.client("s3")
+    obj = c.get_object(Bucket=BUCKET, Key=f"{PROJECT}/steps/{sid}/handoff.json")
+    return json.loads(obj["Body"].read())
+
+
+def _status(sid="design"):
+    c = boto3.client("s3")
+    obj = c.get_object(Bucket=BUCKET, Key=f"{PROJECT}/steps/{sid}/status.json")
+    return json.loads(obj["Body"].read())
+
+
+# ---------- backward compatibility ----------
+
+
+def test_plain_complete_is_backward_compatible(env):
+    r = env.invoke(cli, ["complete", "design"])
+    assert r.exit_code == 0, r.output
+    assert r.output.startswith("Completed step design")
+    h = _handoff()
+    # v1 keys still present
+    assert h["from_agent"] == "designer"
+    assert h["from_step"] == "design"
+    assert h["note"] == ""
+    assert "created_at" in h
+    # v2 defaults
+    assert h["schema"] == 2
+    assert h["state"] == "complete"
+    assert h["next_agent"] is None
+    assert h["next_action"] is None
+    assert "changed_files" not in h  # only present with the git flag
+    # status reflects complete
+    assert _status()["status"] == "complete"
+    assert "completed_at" in _status()
+
+
+# ---------- state enum ----------
+
+
+def test_blocked_sets_state_and_status(env):
+    r = env.invoke(cli, ["complete", "design", "--blocked", "--note", "stuck on auth"])
+    assert r.exit_code == 0, r.output
+    assert "Blocked step design" in r.output
+    assert _handoff()["state"] == "blocked"
+    assert _status()["status"] == "blocked"
+    assert "completed_at" not in _status()  # not complete → no completed_at
+
+
+def test_needs_review_sets_state(env):
+    r = env.invoke(cli, ["complete", "design", "--needs-review"])
+    assert r.exit_code == 0, r.output
+    assert "Needs review on step design" in r.output
+    assert _handoff()["state"] == "needs_review"
+    assert _status()["status"] == "needs_review"
+
+
+def test_blocked_and_needs_review_mutually_exclusive(env):
+    r = env.invoke(cli, ["complete", "design", "--blocked", "--needs-review"])
+    assert r.exit_code != 0
+    assert "at most one" in r.output
+
+
+# ---------- next_action + next_agent ----------
+
+
+def test_next_action_and_to(env):
+    r = env.invoke(
+        cli,
+        ["complete", "design", "--to", "developer", "--next-action", "wire api.py into search"],
+    )
+    assert r.exit_code == 0, r.output
+    assert "handed off to developer" in r.output
+    h = _handoff()
+    assert h["next_agent"] == "developer"
+    assert h["next_action"] == "wire api.py into search"
+
+
+# ---------- changed_files from git ----------
+
+
+def test_changed_files_git_in_repo(env, tmp_path, monkeypatch):
+    # Make cwd a git repo with one modified tracked file
+    import subprocess
+
+    repo = tmp_path / "repo"
+    repo.mkdir()
+    monkeypatch.chdir(repo)
+    subprocess.run(["git", "init", "-q"], cwd=repo, check=True)
+    subprocess.run(["git", "config", "user.email", "t@t.t"], cwd=repo, check=True)
+    subprocess.run(["git", "config", "user.name", "t"], cwd=repo, check=True)
+    (repo / "a.py").write_text("x = 1\n")
+    subprocess.run(["git", "add", "a.py"], cwd=repo, check=True)
+    subprocess.run(["git", "commit", "-qm", "init"], cwd=repo, check=True)
+    (repo / "a.py").write_text("x = 2\n")  # now modified vs HEAD
+
+    r = env.invoke(cli, ["complete", "design", "--changed-files-from-git"])
+    assert r.exit_code == 0, r.output
+    h = _handoff()
+    assert h["changed_files"] == ["a.py"]
+    assert "1 changed file(s)" in r.output
+
+
+def test_changed_files_git_outside_repo_is_empty(env, tmp_path, monkeypatch):
+    # cwd is NOT a git repo → flag is a no-op (empty list), never crashes
+    nonrepo = tmp_path / "plain"
+    nonrepo.mkdir()
+    monkeypatch.chdir(nonrepo)
+    r = env.invoke(cli, ["complete", "design", "--changed-files-from-git"])
+    assert r.exit_code == 0, r.output
+    assert _handoff()["changed_files"] == []
+
+
+def test_no_assumptions_field(env):
+    # We deliberately do NOT add a mandatory unresolved_assumptions field.
+    env.invoke(cli, ["complete", "design", "--note", "assumed v2 API"])
+    h = _handoff()
+    assert "unresolved_assumptions" not in h
+    assert "assumptions" not in h
+    # open questions live in the free-text note
+    assert h["note"] == "assumed v2 API"
+
+
+# ---------- helper direct test ----------
+
+
+def test_git_changed_files_helper_never_raises(monkeypatch, tmp_path):
+    monkeypatch.chdir(tmp_path)  # not a repo
+    assert steps_mod._git_changed_files() == []
diff --git a/sdk/tests/test_session_cli.py b/sdk/tests/test_session_cli.py
index cd22230..f2b2f7b 100644
--- a/sdk/tests/test_session_cli.py
+++ b/sdk/tests/test_session_cli.py
@@ -114,9 +114,7 @@ def _bucket_keys():
     """Return all keys under PROJECT/ stripped of the project prefix."""
     client = boto3.client("s3")
     resp = client.list_objects_v2(Bucket=BUCKET, Prefix=f"{PROJECT}/")
-    return [
-        obj["Key"][len(PROJECT) + 1 :] for obj in resp.get("Contents", [])
-    ]
+    return [obj["Key"][len(PROJECT) + 1 :] for obj in resp.get("Contents", [])]
 
 
 def _get_meta(session_id):
@@ -256,7 +254,7 @@ def test_session_list_shows_uploaded_session(cli_env):
 
 def test_session_show_tails_concatenated_parts(cli_env):
     runner, cwd, sess, sid = cli_env
-    sess.write_bytes(b'line1\n')
+    sess.write_bytes(b"line1\n")
     runner.invoke(cli, ["session", "mirror", "--harness", "claude-code", "--cwd", str(cwd)])
     with open(sess, "ab") as f:
         f.write(b"line2\nline3\n")
diff --git a/sdk/tests/test_tier_0.py b/sdk/tests/test_tier_0.py
index ab321a5..790a62d 100644
--- a/sdk/tests/test_tier_0.py
+++ b/sdk/tests/test_tier_0.py
@@ -57,6 +57,7 @@ def store(s3_env):
 
 # ---------- Fix 1: atomic claim ----------
 
+
 def test_fix1_atomic_put_first_writer_wins(store):
     """First put_json(if_none_match=True) succeeds; second raises PreconditionFailed."""
     store.put_json("steps/foo/claim.json", {"agent": "a"}, if_none_match=True)
@@ -80,15 +81,19 @@ def test_fix1_claim_cli_blocks_second_caller(s3_env, monkeypatch, tmp_path):
     cfg_file = tmp_path / ".tracecraft.json"
 
     def write_cfg(agent_id):
-        cfg_file.write_text(json.dumps({
-            "backend": "s3",
-            "bucket": BUCKET,
-            "project": PROJECT,
-            "endpoint": None,
-            "access_key": "testing",
-            "secret_key": "testing",
-            "agent_id": agent_id,
-        }))
+        cfg_file.write_text(
+            json.dumps(
+                {
+                    "backend": "s3",
+                    "bucket": BUCKET,
+                    "project": PROJECT,
+                    "endpoint": None,
+                    "access_key": "testing",
+                    "secret_key": "testing",
+                    "agent_id": agent_id,
+                }
+            )
+        )
 
     monkeypatch.chdir(tmp_path)
     write_cfg("agent-a")
@@ -106,6 +111,7 @@ def write_cfg(agent_id):
 
 # ---------- Fix 2: paginated list_keys ----------
 
+
 def test_fix2_list_keys_returns_more_than_1000(store):
     """Write 1250 keys; ensure list_keys returns them all (not capped at 1000)."""
     for i in range(1250):
@@ -118,6 +124,7 @@ def test_fix2_list_keys_returns_more_than_1000(store):
 
 # ---------- Fix 3: no default admin/secret credentials ----------
 
+
 def test_fix3_init_refuses_without_creds(monkeypatch, tmp_path):
     """`tracecraft init` without --access-key/--secret-key/env must error."""
     monkeypatch.chdir(tmp_path)
@@ -125,13 +132,21 @@ def test_fix3_init_refuses_without_creds(monkeypatch, tmp_path):
     monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False)
 
     runner = CliRunner()
-    r = runner.invoke(init_cmd, [
-        "--backend", "s3",
-        "--endpoint", "http://localhost:9000",
-        "--bucket", "x",
-        "--project", "p",
-        "--agent", "a",
-    ])
+    r = runner.invoke(
+        init_cmd,
+        [
+            "--backend",
+            "s3",
+            "--endpoint",
+            "http://localhost:9000",
+            "--bucket",
+            "x",
+            "--project",
+            "p",
+            "--agent",
+            "a",
+        ],
+    )
     assert r.exit_code != 0
     assert "credentials required" in r.output.lower()
     # Critically, must NOT have written admin/secret to disk
@@ -145,13 +160,21 @@ def test_fix3_init_reads_aws_env_vars(monkeypatch, tmp_path, s3_env):
     monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing")
 
     runner = CliRunner()
-    r = runner.invoke(init_cmd, [
-        "--backend", "s3",
-        "--endpoint", MOTO_ENDPOINT,  # moto default
-        "--bucket", BUCKET,
-        "--project", PROJECT,
-        "--agent", "a",
-    ])
+    r = runner.invoke(
+        init_cmd,
+        [
+            "--backend",
+            "s3",
+            "--endpoint",
+            MOTO_ENDPOINT,  # moto default
+            "--bucket",
+            BUCKET,
+            "--project",
+            PROJECT,
+            "--agent",
+            "a",
+        ],
+    )
     assert r.exit_code == 0, r.output
     saved = json.loads((tmp_path / ".tracecraft.json").read_text())
     assert saved["access_key"] == "testing"
@@ -163,6 +186,7 @@ def test_fix3_init_reads_aws_env_vars(monkeypatch, tmp_path, s3_env):
 
 # ---------- Fix 4: .gitignore handling ----------
 
+
 def test_fix4_gitignore_appended_in_git_repo(monkeypatch, tmp_path, s3_env):
     """When cwd is a git repo, init appends .tracecraft.json to .gitignore."""
     (tmp_path / ".git").mkdir()
@@ -171,13 +195,21 @@ def test_fix4_gitignore_appended_in_git_repo(monkeypatch, tmp_path, s3_env):
     monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing")
 
     runner = CliRunner()
-    r = runner.invoke(init_cmd, [
-        "--backend", "s3",
-        "--endpoint", MOTO_ENDPOINT,
-        "--bucket", BUCKET,
-        "--project", PROJECT,
-        "--agent", "a",
-    ])
+    r = runner.invoke(
+        init_cmd,
+        [
+            "--backend",
+            "s3",
+            "--endpoint",
+            MOTO_ENDPOINT,
+            "--bucket",
+            BUCKET,
+            "--project",
+            PROJECT,
+            "--agent",
+            "a",
+        ],
+    )
     assert r.exit_code == 0, r.output
     gi = (tmp_path / ".gitignore").read_text()
     assert ".tracecraft.json" in gi.splitlines()
@@ -192,13 +224,21 @@ def test_fix4_gitignore_not_duplicated(monkeypatch, tmp_path, s3_env):
     monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing")
 
     runner = CliRunner()
-    r = runner.invoke(init_cmd, [
-        "--backend", "s3",
-        "--endpoint", MOTO_ENDPOINT,
-        "--bucket", BUCKET,
-        "--project", PROJECT,
-        "--agent", "a",
-    ])
+    r = runner.invoke(
+        init_cmd,
+        [
+            "--backend",
+            "s3",
+            "--endpoint",
+            MOTO_ENDPOINT,
+            "--bucket",
+            BUCKET,
+            "--project",
+            PROJECT,
+            "--agent",
+            "a",
+        ],
+    )
     assert r.exit_code == 0, r.output
     lines = (tmp_path / ".gitignore").read_text().splitlines()
     assert lines.count(".tracecraft.json") == 1
@@ -211,19 +251,28 @@ def test_fix4_no_gitignore_outside_repo(monkeypatch, tmp_path, s3_env):
     monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing")
 
     runner = CliRunner()
-    r = runner.invoke(init_cmd, [
-        "--backend", "s3",
-        "--endpoint", MOTO_ENDPOINT,
-        "--bucket", BUCKET,
-        "--project", PROJECT,
-        "--agent", "a",
-    ])
+    r = runner.invoke(
+        init_cmd,
+        [
+            "--backend",
+            "s3",
+            "--endpoint",
+            MOTO_ENDPOINT,
+            "--bucket",
+            BUCKET,
+            "--project",
+            PROJECT,
+            "--agent",
+            "a",
+        ],
+    )
     assert r.exit_code == 0, r.output
     assert not (tmp_path / ".gitignore").exists()
 
 
 # ---------- Fix 5: dead scaffolding removed ----------
 
+
 def test_fix5_no_empty_namespace_packages():
     """integrations/ and transport/ packages must not be importable."""
     with pytest.raises(ImportError):
@@ -243,5 +292,5 @@ def test_fix5_pyproject_drops_dead_extras():
     """crewai/langgraph/claude-sdk/all extras must not be declared."""
     repo_root = pathlib.Path(__file__).resolve().parents[2]
     text = (repo_root / "sdk" / "pyproject.toml").read_text()
-    for forbidden in ('crewai = [', 'langgraph = [', 'claude-sdk = [', 'all = ['):
+    for forbidden in ("crewai = [", "langgraph = [", "claude-sdk = [", "all = ["):
         assert forbidden not in text, f"pyproject still declares: {forbidden}"
diff --git a/sdk/tracecraft/cli/init_cmd.py b/sdk/tracecraft/cli/init_cmd.py
index 1521e86..d1a9762 100644
--- a/sdk/tracecraft/cli/init_cmd.py
+++ b/sdk/tracecraft/cli/init_cmd.py
@@ -10,14 +10,35 @@
 
 
 @click.command()
-@click.option("--backend", type=click.Choice(["s3", "hf"]), default="s3", help="Storage backend: s3 or hf (HuggingFace Buckets)")
+@click.option(
+    "--backend",
+    type=click.Choice(["s3", "hf"]),
+    default="s3",
+    help="Storage backend: s3 or hf (HuggingFace Buckets)",
+)
 @click.option("--endpoint", default=None, help="S3 endpoint URL (s3 backend only)")
-@click.option("--bucket", required=True, help="Bucket name (s3) or HF bucket handle e.g. username/my-bucket (hf)")
+@click.option(
+    "--bucket",
+    required=True,
+    help="Bucket name (s3) or HF bucket handle e.g. username/my-bucket (hf)",
+)
 @click.option("--project", required=True, help="Project namespace")
 @click.option("--agent", required=True, help="Agent ID for this session")
-@click.option("--access-key", default=None, envvar="AWS_ACCESS_KEY_ID", help="S3 access key (env: AWS_ACCESS_KEY_ID)")
-@click.option("--secret-key", default=None, envvar="AWS_SECRET_ACCESS_KEY", help="S3 secret key (env: AWS_SECRET_ACCESS_KEY)")
-@click.option("--hf-token", default=None, envvar="HF_TOKEN", help="HuggingFace token (env: HF_TOKEN)")
+@click.option(
+    "--access-key",
+    default=None,
+    envvar="AWS_ACCESS_KEY_ID",
+    help="S3 access key (env: AWS_ACCESS_KEY_ID)",
+)
+@click.option(
+    "--secret-key",
+    default=None,
+    envvar="AWS_SECRET_ACCESS_KEY",
+    help="S3 secret key (env: AWS_SECRET_ACCESS_KEY)",
+)
+@click.option(
+    "--hf-token", default=None, envvar="HF_TOKEN", help="HuggingFace token (env: HF_TOKEN)"
+)
 def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key, hf_token):
     """Initialize tracecraft config, create bucket, and register agent."""
     cfg = {
@@ -49,23 +70,24 @@ def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key,
     store.ensure_bucket()
 
     now = datetime.now(timezone.utc).isoformat()
-    store.put_json(f"agents/{agent}.json", {
-        "id": agent,
-        "status": "active",
-        "step": None,
-        "started_at": now,
-        "heartbeat": now,
-        "summary": "Initialized",
-    })
+    store.put_json(
+        f"agents/{agent}.json",
+        {
+            "id": agent,
+            "status": "active",
+            "step": None,
+            "started_at": now,
+            "heartbeat": now,
+            "summary": "Initialized",
+        },
+    )
 
     click.echo(f"Initialized project '{project}' as agent '{agent}'")
     if backend == "s3":
         click.echo(f"Backend: S3  Endpoint: {endpoint}  Bucket: {bucket}")
     else:
         click.echo(f"Backend: HuggingFace Buckets  Bucket: {bucket}")
-    click.echo(
-        "Note: .tracecraft.json contains credentials. Keep it out of version control."
-    )
+    click.echo("Note: .tracecraft.json contains credentials. Keep it out of version control.")
 
 
 def _ensure_gitignore_entry():
@@ -98,9 +120,11 @@ def _get_store(cfg):
     backend = cfg.get("backend", "s3")
     if backend == "hf":
         from tracecraft.hf import HF
+
         return HF(bucket=cfg["bucket"], project=cfg["project"], token=cfg.get("hf_token"))
     else:
         from tracecraft.s3 import S3
+
         return S3(
             endpoint=cfg["endpoint"],
             bucket=cfg["bucket"],
diff --git a/sdk/tracecraft/cli/memory.py b/sdk/tracecraft/cli/memory.py
index 17c899b..65e12b2 100644
--- a/sdk/tracecraft/cli/memory.py
+++ b/sdk/tracecraft/cli/memory.py
@@ -33,11 +33,14 @@ def memory_set(key, value):
         raise click.ClickException("Key cannot be empty")
     store, cfg = get_store()
     now = datetime.now(timezone.utc).isoformat()
-    store.put_json(_key_to_path(key), {
-        "value": value,
-        "set_by": cfg["agent_id"],
-        "set_at": now,
-    })
+    store.put_json(
+        _key_to_path(key),
+        {
+            "value": value,
+            "set_by": cfg["agent_id"],
+            "set_at": now,
+        },
+    )
     click.echo(f"Set {key} = {value}")
 
 
diff --git a/sdk/tracecraft/cli/messages.py b/sdk/tracecraft/cli/messages.py
index 9088c70..efba3c4 100644
--- a/sdk/tracecraft/cli/messages.py
+++ b/sdk/tracecraft/cli/messages.py
@@ -21,12 +21,15 @@ def send(recipient, message):
     now = datetime.now(timezone.utc).isoformat()
 
     key = f"messages/{recipient}/{ts}_{sender}.json"
-    store.put_json(key, {
-        "from": sender,
-        "to": recipient,
-        "message": message,
-        "sent_at": now,
-    })
+    store.put_json(
+        key,
+        {
+            "from": sender,
+            "to": recipient,
+            "message": message,
+            "sent_at": now,
+        },
+    )
     click.echo(f"Sent to {recipient}: {message}")
 
 
diff --git a/sdk/tracecraft/cli/session.py b/sdk/tracecraft/cli/session.py
index 3342cc9..0313cc5 100644
--- a/sdk/tracecraft/cli/session.py
+++ b/sdk/tracecraft/cli/session.py
@@ -114,7 +114,9 @@ def session():
     default=None,
     help="Project directory the session ran in (claude-code only). Defaults to $PWD.",
 )
-@click.option("--no-redact", is_flag=True, help="Skip redaction. Use only on fully-trusted buckets.")
+@click.option(
+    "--no-redact", is_flag=True, help="Skip redaction. Use only on fully-trusted buckets."
+)
 @click.option(
     "--min-bytes",
     default=1,
@@ -156,9 +158,7 @@ def mirror(harness_name, session_id, cwd_str, no_redact, min_bytes):
     # authoritative — read_new() returns the real consumed cursor below.
     cur_size = harness.size(sess)
     if cur_size - cursor < min_bytes:
-        click.echo(
-            f"nothing new: session={sess.session_id} cursor={cursor:,} size={cur_size:,}"
-        )
+        click.echo(f"nothing new: session={sess.session_id} cursor={cursor:,} size={cur_size:,}")
         return
 
     # 2. Read everything new since `cursor`, race-free: read_new returns the
@@ -286,10 +286,10 @@ def list_(harness_filter, limit, sort_by):
         sid = m.get("session_id", "?")
         short = sid[:8] + ("…" if len(sid) > 8 else "")
         click.echo(
-            f"{m.get('harness','?'):<14} {short:<16} "
-            f"{m.get('total_uploaded_bytes',0):>12,} "
+            f"{m.get('harness', '?'):<14} {short:<16} "
+            f"{m.get('total_uploaded_bytes', 0):>12,} "
             f"{len(m.get('parts', [])):>6} "
-            f"{m.get('last_uploaded_at','-')[:24]:<25}"
+            f"{m.get('last_uploaded_at', '-')[:24]:<25}"
         )
 
 
@@ -309,7 +309,9 @@ def show(session_id, tail):
     store, _ = get_store()
 
     # Find which harness this session lives under (search every harness folder).
-    all_meta_keys = [k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json")]
+    all_meta_keys = [
+        k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json")
+    ]
     if not all_meta_keys:
         raise click.ClickException(f"session not found: {session_id}")
     meta_key = all_meta_keys[0]
@@ -321,9 +323,7 @@ def show(session_id, tail):
 
     # Fetch all parts (in seq order), concatenate, print last N lines.
     prefix = meta_key[: -len("meta.json")]
-    part_keys = sorted(
-        k for k in store.list_keys(prefix) if PART_RE.search(k.rsplit("/", 1)[-1])
-    )
+    part_keys = sorted(k for k in store.list_keys(prefix) if PART_RE.search(k.rsplit("/", 1)[-1]))
     body = bytearray()
     for k in part_keys:
         with tempfile.NamedTemporaryFile(delete=False) as tf:
@@ -365,9 +365,7 @@ def stop(session_id):
 
     # Best-effort: mark ended_at in meta if a meta exists.
     store, _ = get_store()
-    meta_keys = [
-        k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json")
-    ]
+    meta_keys = [k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json")]
     marked = False
     if meta_keys:
         meta = store.get_json(meta_keys[0]) or {}
@@ -377,6 +375,5 @@ def stop(session_id):
             marked = True
 
     click.echo(
-        f"stopped session={session_id}  "
-        f"state_cleared={had_state}  meta_marked_ended={marked}"
+        f"stopped session={session_id}  state_cleared={had_state}  meta_marked_ended={marked}"
     )
diff --git a/sdk/tracecraft/cli/steps.py b/sdk/tracecraft/cli/steps.py
index 2ef1b34..c0c34dd 100644
--- a/sdk/tracecraft/cli/steps.py
+++ b/sdk/tracecraft/cli/steps.py
@@ -21,7 +21,9 @@ def _git_changed_files() -> list[str]:
     try:
         out = subprocess.run(
             ["git", "diff", "--name-only", "HEAD"],
-            capture_output=True, text=True, timeout=10,
+            capture_output=True,
+            text=True,
+            timeout=10,
         )
         if out.returncode != 0:
             return []
@@ -51,11 +53,14 @@ def claim(step_id):
         owner = existing.get("agent", "unknown")
         raise click.ClickException(f"Step {step_id} already claimed by {owner}")
 
-    store.put_json(f"steps/{sid}/status.json", {
-        "status": "in_progress",
-        "agent": agent,
-        "started_at": now,
-    })
+    store.put_json(
+        f"steps/{sid}/status.json",
+        {
+            "status": "in_progress",
+            "agent": agent,
+            "started_at": now,
+        },
+    )
     click.echo(f"Claimed step {step_id} as {agent}")
 
 
@@ -65,7 +70,9 @@ def claim(step_id):
 @click.option("--to", "next_agent", default=None, help="Agent this step hands off to")
 @click.option("--next-action", default=None, help="One line: what the next agent should do first")
 @click.option("--blocked", is_flag=True, help="Mark the step blocked rather than complete")
-@click.option("--needs-review", is_flag=True, help="Mark the step as needing review rather than complete")
+@click.option(
+    "--needs-review", is_flag=True, help="Mark the step as needing review rather than complete"
+)
 @click.option(
     "--changed-files-from-git",
     is_flag=True,
@@ -117,7 +124,9 @@ def complete(step_id, note, next_agent, next_action, blocked, needs_review, chan
         handoff["changed_files"] = _git_changed_files()
     store.put_json(f"steps/{sid}/handoff.json", handoff)
 
-    label = {"complete": "Completed", "blocked": "Blocked", "needs_review": "Needs review on"}[state]
+    label = {"complete": "Completed", "blocked": "Blocked", "needs_review": "Needs review on"}[
+        state
+    ]
     msg = f"{label} step {step_id}"
     if next_agent:
         msg += f" → handed off to {next_agent}"
diff --git a/sdk/tracecraft/harness/claude_code.py b/sdk/tracecraft/harness/claude_code.py
index bd3bbd1..bcc936b 100644
--- a/sdk/tracecraft/harness/claude_code.py
+++ b/sdk/tracecraft/harness/claude_code.py
@@ -40,6 +40,5 @@ def discover(self, cwd: Path) -> list[Session]:
         if not pdir.is_dir():
             return []
         return [
-            Session(path=jsonl, session_id=jsonl.stem, cwd=cwd)
-            for jsonl in pdir.glob("*.jsonl")
+            Session(path=jsonl, session_id=jsonl.stem, cwd=cwd) for jsonl in pdir.glob("*.jsonl")
         ]
diff --git a/sdk/tracecraft/harness/codex.py b/sdk/tracecraft/harness/codex.py
index a5bd85e..5650dfb 100644
--- a/sdk/tracecraft/harness/codex.py
+++ b/sdk/tracecraft/harness/codex.py
@@ -16,7 +16,9 @@
 from .base import FileTailHarness, Session
 
 
-_ROLLOUT_RE = re.compile(r"rollout-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-(?P<id>[A-Za-z0-9_-]+)\.jsonl$")
+_ROLLOUT_RE = re.compile(
+    r"rollout-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-(?P<id>[A-Za-z0-9_-]+)\.jsonl$"
+)
 
 
 class CodexHarness(FileTailHarness):
diff --git a/sdk/tracecraft/harness/hermes.py b/sdk/tracecraft/harness/hermes.py
index 671d0d0..668fc5f 100644
--- a/sdk/tracecraft/harness/hermes.py
+++ b/sdk/tracecraft/harness/hermes.py
@@ -59,7 +59,7 @@ def _decode_content(value):
     """Hermes stores multimodal content as '\\x00json:<json>'; scalars as-is."""
     if isinstance(value, str) and value.startswith(_CONTENT_JSON_PREFIX):
         try:
-            return json.loads(value[len(_CONTENT_JSON_PREFIX):])
+            return json.loads(value[len(_CONTENT_JSON_PREFIX) :])
         except json.JSONDecodeError:
             return value
     return value
@@ -82,9 +82,7 @@ def discover(self, cwd: Path) -> list[Session]:
             return []
         conn = _connect_ro(self.db_path)
         try:
-            rows = conn.execute(
-                "SELECT id FROM sessions ORDER BY started_at DESC"
-            ).fetchall()
+            rows = conn.execute("SELECT id FROM sessions ORDER BY started_at DESC").fetchall()
         except sqlite3.Error:
             return []
         finally:
diff --git a/sdk/tracecraft/hf.py b/sdk/tracecraft/hf.py
index aa7a8dd..ca2756b 100644
--- a/sdk/tracecraft/hf.py
+++ b/sdk/tracecraft/hf.py
@@ -10,6 +10,7 @@
 class HF:
     def __init__(self, bucket, project, token=None):
         from huggingface_hub import HfFileSystem
+
         self.fs = HfFileSystem(token=token)
         self.bucket = bucket  # e.g. "username/my-bucket"
         self.project = project
@@ -35,11 +36,13 @@ def put_json(self, key, data, if_none_match=False):
                 # This is racy, but documented; S3-compatible backends use IfNoneMatch=* for safety.
                 if self.fs.exists(path):
                     from tracecraft.s3 import PreconditionFailed
+
                     raise PreconditionFailed(key)
             with self.fs.open(path, "w") as f:
                 json.dump(data, f, indent=2)
         except Exception as e:
             from tracecraft.s3 import PreconditionFailed
+
             if isinstance(e, PreconditionFailed):
                 raise
             raise click.ClickException(f"HF put failed: {e}")
@@ -65,7 +68,7 @@ def list_keys(self, prefix=""):
             keys = []
             for entry in entries:
                 if entry.startswith(base_prefix):
-                    keys.append(entry[len(base_prefix):])
+                    keys.append(entry[len(base_prefix) :])
                 else:
                     keys.append(entry)
             return keys
diff --git a/sdk/tracecraft/store.py b/sdk/tracecraft/store.py
index 4226394..85ce840 100644
--- a/sdk/tracecraft/store.py
+++ b/sdk/tracecraft/store.py
@@ -10,9 +10,11 @@ def get_store():
 
     if backend == "hf":
         from tracecraft.hf import HF
+
         return HF(bucket=cfg["bucket"], project=cfg["project"], token=cfg.get("hf_token")), cfg
     else:
         from tracecraft.s3 import S3
+
         return S3(
             endpoint=cfg["endpoint"],
             bucket=cfg["bucket"],

From 315cc555d25060cfdf68fda6f66e7955120d7640 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:14:10 +0300
Subject: [PATCH 03/22] hf: implement ensure_bucket() via HfApi.create_bucket,
 private by default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ensure_bucket was a no-op, so 'init --backend hf' against a brand-new
bucket failed cryptically on the first write. Now creates the bucket
with HfApi.create_bucket(exist_ok=True), private unless the caller
opts out — HF buckets default to public upstream, which is the wrong
default for internal coordination data and mirrored transcripts.

Fixes #7. Refs #8 (creation half).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 sdk/tracecraft/hf.py | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/sdk/tracecraft/hf.py b/sdk/tracecraft/hf.py
index ca2756b..318edbd 100644
--- a/sdk/tracecraft/hf.py
+++ b/sdk/tracecraft/hf.py
@@ -8,12 +8,14 @@
 
 
 class HF:
-    def __init__(self, bucket, project, token=None):
+    def __init__(self, bucket, project, token=None, private=True):
         from huggingface_hub import HfFileSystem
 
         self.fs = HfFileSystem(token=token)
         self.bucket = bucket  # e.g. "username/my-bucket"
         self.project = project
+        self.token = token
+        self.private = private  # safe default: private (these hold internal traces)
         self.base = f"hf://buckets/{bucket}"
 
     @classmethod
@@ -104,6 +106,23 @@ def get_file(self, key, local_path):
             raise click.ClickException(f"HF download failed: {e}")
 
     def ensure_bucket(self):
-        # HF buckets are created via CLI or web — verify by checking exists or listing
-        # Empty buckets fail on ls(), so we just pass and let first write validate access
-        pass
+        """Create the HF bucket if it doesn't exist (private by default).
+
+        Previously a no-op, which made `init` against a brand-new bucket fail with a
+        cryptic error on the first write (issue #7). HF buckets default to *public*
+        on creation, which is a privacy footgun for a tool that stores internal
+        memory/transcripts (issue #8) — so we create them private unless the caller
+        opts out via `private=False`.
+        """
+        try:
+            from huggingface_hub import HfApi
+
+            HfApi(token=self.token).create_bucket(self.bucket, private=self.private, exist_ok=True)
+        except Exception as e:
+            # Fall back to the old behavior: let the first write validate access,
+            # but surface a useful hint instead of a cryptic one.
+            raise click.ClickException(
+                f"Could not ensure HF bucket '{self.bucket}' exists: {e}\n"
+                f"Create it first at https://huggingface.co/new-bucket (set it Private), "
+                f"or check your --hf-token has write access."
+            )

From 9f0659eb59928f21e09d0cf2229eb67b43f6d2d9 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:14:31 +0300
Subject: [PATCH 04/22] init: add --private/--public flag (default: private)

Passed through to the backend's ensure_bucket via the store factory.
Plain S3 ignores it (bucket ACLs are out of scope there); the HF
backend uses it to decide bucket visibility at creation time.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 sdk/tracecraft/cli/init_cmd.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/sdk/tracecraft/cli/init_cmd.py b/sdk/tracecraft/cli/init_cmd.py
index d1a9762..52b6195 100644
--- a/sdk/tracecraft/cli/init_cmd.py
+++ b/sdk/tracecraft/cli/init_cmd.py
@@ -1,6 +1,5 @@
 """tracecraft init — configure and register agent."""
 
-import os
 from datetime import datetime, timezone
 from pathlib import Path
 
@@ -39,7 +38,14 @@
 @click.option(
     "--hf-token", default=None, envvar="HF_TOKEN", help="HuggingFace token (env: HF_TOKEN)"
 )
-def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key, hf_token):
+@click.option(
+    "--private/--public",
+    "private",
+    default=True,
+    help="Create the bucket private (default) or public. HF only. "
+    "Internal memory/transcripts should stay private.",
+)
+def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key, hf_token, private):
     """Initialize tracecraft config, create bucket, and register agent."""
     cfg = {
         "backend": backend,
@@ -66,7 +72,7 @@ def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key,
     save_config(cfg)
     _ensure_gitignore_entry()
 
-    store = _get_store(cfg)
+    store = _get_store(cfg, private=private)
     store.ensure_bucket()
 
     now = datetime.now(timezone.utc).isoformat()
@@ -115,13 +121,18 @@ def _ensure_gitignore_entry():
         )
 
 
-def _get_store(cfg):
+def _get_store(cfg, private=True):
     """Create the right storage backend from config."""
     backend = cfg.get("backend", "s3")
     if backend == "hf":
         from tracecraft.hf import HF
 
-        return HF(bucket=cfg["bucket"], project=cfg["project"], token=cfg.get("hf_token"))
+        return HF(
+            bucket=cfg["bucket"],
+            project=cfg["project"],
+            token=cfg.get("hf_token"),
+            private=private,
+        )
     else:
         from tracecraft.s3 import S3
 

From 9f581e932874ef363fd69cc4830077552472a986 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:15:04 +0300
Subject: [PATCH 05/22] init: report real HF bucket visibility from
 bucket_info()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The init line now reads 'Backend: HuggingFace Buckets  Bucket: user/x
(private)' with the state read back from the Hub, not assumed from the
flag — create_bucket(exist_ok=True) keeps a pre-existing bucket's
visibility, so flag and reality can disagree. Also surface the
best-effort-claims caveat for the HF backend at init time.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 sdk/tracecraft/cli/init_cmd.py | 20 +++++++++++++++++++-
 sdk/tracecraft/hf.py           | 15 +++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/sdk/tracecraft/cli/init_cmd.py b/sdk/tracecraft/cli/init_cmd.py
index 52b6195..2a57b10 100644
--- a/sdk/tracecraft/cli/init_cmd.py
+++ b/sdk/tracecraft/cli/init_cmd.py
@@ -92,7 +92,25 @@ def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key,
     if backend == "s3":
         click.echo(f"Backend: S3  Endpoint: {endpoint}  Bucket: {bucket}")
     else:
-        click.echo(f"Backend: HuggingFace Buckets  Bucket: {bucket}")
+        # Report the bucket's *actual* visibility, read back from the Hub —
+        # create_bucket(exist_ok=True) keeps a pre-existing bucket's setting,
+        # so the --private/--public flag and reality can disagree.
+        actual_private = store.bucket_privacy()
+        if actual_private is None:
+            visibility = "visibility unknown"
+        else:
+            visibility = "private" if actual_private else "PUBLIC"
+        click.echo(f"Backend: HuggingFace Buckets  Bucket: {bucket} ({visibility})")
+        # Be honest about the core-promise gap on this backend (see hf.py put_json):
+        # HF has no conditional-write, so atomic claims are best-effort there.
+        click.echo(
+            "Note: HuggingFace buckets have no conditional-write primitive, so "
+            "`tracecraft claim` is best-effort (racy) here — two agents can both think "
+            "they won. For safe atomic claims, use an S3-compatible backend (AWS, R2, "
+            "MinIO, B2, Wasabi). Memory, messaging, handoffs, and session mirroring are "
+            "unaffected.",
+            err=True,
+        )
     click.echo("Note: .tracecraft.json contains credentials. Keep it out of version control.")
 
 
diff --git a/sdk/tracecraft/hf.py b/sdk/tracecraft/hf.py
index 318edbd..626a2fa 100644
--- a/sdk/tracecraft/hf.py
+++ b/sdk/tracecraft/hf.py
@@ -126,3 +126,18 @@ def ensure_bucket(self):
                 f"Create it first at https://huggingface.co/new-bucket (set it Private), "
                 f"or check your --hf-token has write access."
             )
+
+    def bucket_privacy(self):
+        """Return the bucket's *actual* visibility: True=private, False=public,
+        None if it can't be determined (network error, no permission).
+
+        Read back from bucket_info() rather than assumed from the flag we passed —
+        create_bucket(exist_ok=True) silently keeps a pre-existing bucket's
+        visibility, so the flag and reality can disagree.
+        """
+        try:
+            from huggingface_hub import HfApi
+
+            return bool(HfApi(token=self.token).bucket_info(self.bucket).private)
+        except Exception:
+            return None

From c6ec0df8cd8fc645fdd2b7fdfc8279ade73ebd49 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:15:24 +0300
Subject: [PATCH 06/22] init: warn prominently when an existing HF bucket is
 public
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the bucket pre-exists as PUBLIC and the user did not pass --public,
init now spells out that coordination data and mirrored transcripts
will be publicly visible, and that huggingface_hub has no
update_bucket — delete + recreate as private is the only remedy.

Fixes #8.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 sdk/tracecraft/cli/init_cmd.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/sdk/tracecraft/cli/init_cmd.py b/sdk/tracecraft/cli/init_cmd.py
index 2a57b10..7514a36 100644
--- a/sdk/tracecraft/cli/init_cmd.py
+++ b/sdk/tracecraft/cli/init_cmd.py
@@ -101,6 +101,20 @@ def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key,
         else:
             visibility = "private" if actual_private else "PUBLIC"
         click.echo(f"Backend: HuggingFace Buckets  Bucket: {bucket} ({visibility})")
+        if actual_private is False and private:
+            # The bucket pre-existed as public; we asked for private but
+            # create_bucket(exist_ok=True) never changes an existing bucket.
+            click.echo(
+                "\n"
+                "  WARNING: bucket already exists and is PUBLIC.\n"
+                f"  Everything tracecraft writes to '{bucket}' — shared memory, messages,\n"
+                "  handoffs, and mirrored session transcripts — will be publicly visible\n"
+                "  on the Hub. huggingface_hub has no update_bucket, so visibility cannot\n"
+                "  be flipped in place: the only remedy is to delete the bucket and\n"
+                "  re-run init so tracecraft recreates it private.\n"
+                "  If public was intentional, pass --public to silence this warning.\n",
+                err=True,
+            )
         # Be honest about the core-promise gap on this backend (see hf.py put_json):
         # HF has no conditional-write, so atomic claims are best-effort there.
         click.echo(

From 976224f8a198b06849996234b1ac51417782581e Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:15:57 +0300
Subject: [PATCH 07/22] hf: actionable error when a write hits a missing bucket

A put against a nonexistent bucket used to surface HfFileSystem's raw
'repository and revision' resolution error. Now the error names the
bucket, points at 'tracecraft init', and suggests checking the
'username/bucket-name' handle. Applies to put_json and put_file.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 sdk/tracecraft/hf.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/sdk/tracecraft/hf.py b/sdk/tracecraft/hf.py
index 626a2fa..079b88f 100644
--- a/sdk/tracecraft/hf.py
+++ b/sdk/tracecraft/hf.py
@@ -30,6 +30,24 @@ def from_config(cls):
     def _path(self, key):
         return f"{self.base}/{self.project}/{key}"
 
+    def _raise_write_error(self, e):
+        """Translate raw HfFileSystem write errors into actionable ones.
+
+        A put against a bucket that doesn't exist surfaces as a cryptic
+        'repository and revision' / 404 resolution error from HfFileSystem —
+        name the bucket and say what to do instead.
+        """
+        msg = str(e)
+        if isinstance(e, FileNotFoundError) or (
+            "Repository Not Found" in msg or "repository and revision" in msg or "404" in msg
+        ):
+            raise click.ClickException(
+                f"HF write failed: bucket '{self.bucket}' was not found.\n"
+                f"Run `tracecraft init --backend hf --bucket {self.bucket} ...` to create it, "
+                f"and check the bucket handle is 'username/bucket-name'."
+            )
+        raise click.ClickException(f"HF write failed: {e}")
+
     def put_json(self, key, data, if_none_match=False):
         try:
             path = self._path(key)
@@ -47,7 +65,7 @@ def put_json(self, key, data, if_none_match=False):
 
             if isinstance(e, PreconditionFailed):
                 raise
-            raise click.ClickException(f"HF put failed: {e}")
+            self._raise_write_error(e)
 
     def get_json(self, key):
         try:
@@ -97,7 +115,7 @@ def put_file(self, key, local_path):
         try:
             self.fs.put(local_path, self._path(key))
         except Exception as e:
-            raise click.ClickException(f"HF upload failed: {e}")
+            self._raise_write_error(e)
 
     def get_file(self, key, local_path):
         try:

From a11559e9b2c54e90ac664e56085f2421fae46b29 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:16:18 +0300
Subject: [PATCH 08/22] hf: don't swallow auth errors as False in exists()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

exists() caught every exception and returned False, so a bad or
under-scoped token looked identical to an empty bucket — and let the
best-effort claim path sail past its check-then-write guard. 401/403
now raise with a pointer at the token; genuine not-found stays False.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 sdk/tracecraft/hf.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/sdk/tracecraft/hf.py b/sdk/tracecraft/hf.py
index 079b88f..7c67969 100644
--- a/sdk/tracecraft/hf.py
+++ b/sdk/tracecraft/hf.py
@@ -100,7 +100,19 @@ def list_keys(self, prefix=""):
     def exists(self, key):
         try:
             return self.fs.exists(self._path(key))
-        except Exception:
+        except FileNotFoundError:
+            return False
+        except Exception as e:
+            # "Not found" is a legitimate False; "unauthorized" is not — swallowing
+            # it makes a bad token look like an empty bucket (and lets a best-effort
+            # claim race past its check-then-write guard).
+            status = getattr(getattr(e, "response", None), "status_code", None)
+            if status in (401, 403) or "unauthorized" in str(e).lower():
+                raise click.ClickException(
+                    f"HF auth error while checking '{key}': {e}\n"
+                    f"Check that your token (--hf-token / HF_TOKEN) has read access "
+                    f"to '{self.bucket}'."
+                )
             return False
 
     def delete(self, key):

From 55095d2e81ca0def5d2e0936987d33b63559603e Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:16:40 +0300
Subject: [PATCH 09/22] messages: unique keys per send; inbox sorted by sent_at

Message keys were messages/<recipient>/<int_seconds>_<sender>.json, so
two messages from one sender inside the same second collided and the
later silently overwrote the earlier. Keys now carry nanosecond
resolution plus a uuid4 suffix, so every send is a distinct object.

inbox now merges direct + broadcast messages and prints them in
sent_at order instead of raw list order, which interleaved the two
prefixes arbitrarily.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 sdk/tracecraft/cli/messages.py | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/sdk/tracecraft/cli/messages.py b/sdk/tracecraft/cli/messages.py
index efba3c4..fef94fd 100644
--- a/sdk/tracecraft/cli/messages.py
+++ b/sdk/tracecraft/cli/messages.py
@@ -1,6 +1,7 @@
 """tracecraft send/inbox — agent-to-agent messaging via S3."""
 
 import time
+import uuid
 from datetime import datetime, timezone
 
 import click
@@ -17,10 +18,17 @@ def send(recipient, message):
         raise click.ClickException("Recipient cannot be empty")
     store, cfg = get_store()
     sender = cfg["agent_id"]
-    ts = int(time.time())
     now = datetime.now(timezone.utc).isoformat()
 
-    key = f"messages/{recipient}/{ts}_{sender}.json"
+    # Message keys MUST be unique per send. A whole-second timestamp collides when
+    # one sender fires two messages to the same recipient in the same second — the
+    # second silently overwrites the first (measured: a 5-message burst kept only 1).
+    # Use nanosecond resolution for rough chronological ordering PLUS a uuid suffix
+    # that guarantees uniqueness even at sub-nanosecond send rates or clock ties.
+    # (Same approach the session mirror uses for its part keys.)
+    ts_ns = time.time_ns()
+    uniq = uuid.uuid4().hex[:8]
+    key = f"messages/{recipient}/{ts_ns}_{sender}_{uniq}.json"
     store.put_json(
         key,
         {
@@ -48,25 +56,30 @@ def inbox(delete):
         click.echo("No messages.")
         return
 
-    count = 0
+    # Merge direct + broadcast and sort by sent_at — raw list order interleaves
+    # the two prefixes, so a broadcast could print before the direct message
+    # that preceded it.
+    messages = []
     for key in all_keys:
         data = store.get_json(key)
         if data is None:
             continue
-        sender = data.get("from", "?")
         # Skip own broadcasts
-        if "_broadcast/" in key and sender == my_id:
+        if "_broadcast/" in key and data.get("from", "?") == my_id:
             continue
+        messages.append((key, data))
+    messages.sort(key=lambda kd: kd[1].get("sent_at", ""))
+
+    for key, data in messages:
+        sender = data.get("from", "?")
         msg = data.get("message", "")
         sent_at = data.get("sent_at", "?")
         target = "broadcast" if "_broadcast/" in key else "direct"
         click.echo(f"[{sent_at}] ({target}) {sender}: {msg}")
-        count += 1
-
         if delete:
             store.delete(key)
 
-    if count == 0:
+    if not messages:
         click.echo("No messages.")
     elif delete:
-        click.echo(f"Deleted {count} message(s).")
+        click.echo(f"Deleted {len(messages)} message(s).")

From 2c572b078620cdd707e9e2847380c52537bcb0f8 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:17:17 +0300
Subject: [PATCH 10/22] steps: complete enforces claim ownership; --force to
 override

Any agent could previously complete any step, including one another
agent was actively working on. complete now reads claim.json first and
fails with a clear error when the claim belongs to a different agent,
unless --force is passed (escape hatch for crashed claim-holders).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 sdk/tracecraft/cli/steps.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/sdk/tracecraft/cli/steps.py b/sdk/tracecraft/cli/steps.py
index c0c34dd..75a57f7 100644
--- a/sdk/tracecraft/cli/steps.py
+++ b/sdk/tracecraft/cli/steps.py
@@ -78,7 +78,14 @@ def claim(step_id):
     is_flag=True,
     help="Record files changed (from `git diff`), so the next agent knows what moved. No-op outside a git repo.",
 )
-def complete(step_id, note, next_agent, next_action, blocked, needs_review, changed_files_from_git):
+@click.option(
+    "--force",
+    is_flag=True,
+    help="Complete a step claimed by a different agent (e.g. the claim-holder crashed).",
+)
+def complete(
+    step_id, note, next_agent, next_action, blocked, needs_review, changed_files_from_git, force
+):
     """Mark a step complete (or blocked / needs-review) and write a handoff record.
 
     The handoff record is what the next agent sees instead of a shared
@@ -95,6 +102,15 @@ def complete(step_id, note, next_agent, next_action, blocked, needs_review, chan
     sid = step_id.lower().replace(".", "-")
     now = datetime.now(timezone.utc).isoformat()
 
+    # A step belongs to whoever claimed it — without this check any agent
+    # could mark any step complete and silently steal/clobber someone's work.
+    claim_doc = store.get_json(f"steps/{sid}/claim.json")
+    if claim_doc and claim_doc.get("agent") not in (None, agent) and not force:
+        raise click.ClickException(
+            f"Step {step_id} is claimed by '{claim_doc['agent']}', not '{agent}'. "
+            f"Pass --force to complete it anyway (e.g. if the claim-holder crashed)."
+        )
+
     state = "blocked" if blocked else "needs_review" if needs_review else "complete"
 
     # Status reflects the real outcome (not always "complete").

From de0cb612d323a18fc1e3a03b174cdca5af7aab65 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:18:10 +0300
Subject: [PATCH 11/22] steps: tolerate the claim/status crash window in
 readers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

claim.json (atomic) and status.json are two separate writes; a crash
between them leaves a claim with no status. step-status and wait-for
now resolve that state as in_progress by the claiming agent via a
shared _effective_status helper — the claim is the authoritative
write. Invariant documented in CLAUDE.md.

Also warn at claim time on the HF backend that claims are best-effort
(no conditional-write upstream), matching the note init prints.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 CLAUDE.md                   |  5 +++++
 sdk/tracecraft/cli/steps.py | 42 +++++++++++++++++++++++++++++--------
 2 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index ea7b340..370f35c 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -56,6 +56,11 @@ pivot lives in `plans/server-archive/` for reference only — nothing in the SDK
   many isolated projects.
 - **No server, no daemon**: each CLI call is stateless; state lives on the bucket.
 - **No vendor lock-in**: AWS, R2, MinIO, B2, Wasabi, HuggingFace all work today.
+- **Claim/status crash-window invariant**: `claim.json` (atomic) and `status.json` are
+  two separate writes; a crash between them leaves a claim with no status. Readers MUST
+  treat "claim.json exists, status.json missing" as `in_progress` by the claiming agent —
+  the claim is the authoritative write (`step-status` and `wait-for` implement this via
+  `_effective_status` in `cli/steps.py`).
 
 ## Known gaps (May 2026)
 
diff --git a/sdk/tracecraft/cli/steps.py b/sdk/tracecraft/cli/steps.py
index 75a57f7..5ecdacc 100644
--- a/sdk/tracecraft/cli/steps.py
+++ b/sdk/tracecraft/cli/steps.py
@@ -62,6 +62,15 @@ def claim(step_id):
         },
     )
     click.echo(f"Claimed step {step_id} as {agent}")
+    if cfg.get("backend") == "hf":
+        # The claim is best-effort on HF (no conditional-write); don't let the
+        # success message imply the race was atomically arbitrated.
+        click.echo(
+            "warning: claims on the HuggingFace backend are best-effort (racy) — "
+            "another agent may also believe it won this step. Use an S3-compatible "
+            "backend for atomic claims.",
+            err=True,
+        )
 
 
 @click.command()
@@ -151,19 +160,34 @@ def complete(
     click.echo(msg)
 
 
+def _effective_status(store, sid):
+    """Resolve a step's status, tolerating the claim/status crash window.
+
+    claim.json (atomic) and status.json are two separate writes; a crash
+    between them leaves a claim with no status. Readers treat that state as
+    in_progress by the claiming agent — the claim is the authoritative write.
+    Returns (status, agent); status is 'pending' when neither file exists.
+    """
+    data = store.get_json(f"steps/{sid}/status.json")
+    if data is not None:
+        return data.get("status", "unknown"), data.get("agent", "?")
+    claim_doc = store.get_json(f"steps/{sid}/claim.json")
+    if claim_doc is not None:
+        return "in_progress", claim_doc.get("agent", "?")
+    return "pending", None
+
+
 @click.command()
 @click.argument("step_id")
 def step_status(step_id):
     """Check the status of a step."""
     store, _ = get_store()
     sid = step_id.lower().replace(".", "-")
-    data = store.get_json(f"steps/{sid}/status.json")
-    if data is None:
-        click.echo(f"{step_id}: pending")
-        return
-    status = data.get("status", "unknown")
-    agent = data.get("agent", "?")
-    click.echo(f"{step_id}: {status} (agent: {agent})")
+    status, agent = _effective_status(store, sid)
+    if agent is None:
+        click.echo(f"{step_id}: {status}")
+    else:
+        click.echo(f"{step_id}: {status} (agent: {agent})")
 
 
 @click.command()
@@ -178,8 +202,8 @@ def wait_for(step_ids, timeout):
         all_done = True
         for step_id in step_ids:
             sid = step_id.lower().replace(".", "-")
-            data = store.get_json(f"steps/{sid}/status.json")
-            if data is None or data.get("status") != "complete":
+            status, _ = _effective_status(store, sid)
+            if status != "complete":
                 all_done = False
                 break
 

From b6d9d6d19e29ed4a45917f248349a159bd142041 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:18:31 +0300
Subject: [PATCH 12/22] steps: wait-for fails fast on blocked steps

A blocked step never completes on its own, so waiters used to spin
until the full timeout. wait-for now exits non-zero immediately with
a clear message naming the blocked step. needs_review still counts as
waiting but is called out in the progress line so a human can step in.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 sdk/tracecraft/cli/steps.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/sdk/tracecraft/cli/steps.py b/sdk/tracecraft/cli/steps.py
index 5ecdacc..0d42ac9 100644
--- a/sdk/tracecraft/cli/steps.py
+++ b/sdk/tracecraft/cli/steps.py
@@ -200,19 +200,31 @@ def wait_for(step_ids, timeout):
 
     while time.time() < deadline:
         all_done = True
+        needs_review = []
         for step_id in step_ids:
             sid = step_id.lower().replace(".", "-")
-            status, _ = _effective_status(store, sid)
+            status, agent = _effective_status(store, sid)
+            if status == "blocked":
+                # A blocked step won't complete on its own — failing fast beats
+                # spinning until the full timeout.
+                raise click.ClickException(
+                    f"Step {step_id} is blocked (agent: {agent}) — it will not "
+                    f"complete without intervention. Resolve it and re-run wait-for."
+                )
+            if status == "needs_review":
+                needs_review.append(step_id)
             if status != "complete":
                 all_done = False
-                break
 
         if all_done:
             click.echo(f"All steps complete: {', '.join(step_ids)}")
             return
 
         remaining = int(deadline - time.time())
-        click.echo(f"Waiting... ({remaining}s remaining)", err=True)
+        progress = f"Waiting... ({remaining}s remaining)"
+        if needs_review:
+            progress += f" — needs review: {', '.join(needs_review)}"
+        click.echo(progress, err=True)
         time.sleep(5)
 
     raise click.ClickException(

From 453d8d24e038e5543e721747d34361a91637d0b6 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:18:57 +0300
Subject: [PATCH 13/22] deps: drop unused httpx and pydantic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Zero imports of either anywhere in the package or tests — leftovers
from the pre-pivot FastAPI scaffolding. Runtime deps are now just
click + boto3. Also remove two unused imports in test_session_cli.py
(pre-existing ruff check failures).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 sdk/pyproject.toml            | 2 --
 sdk/tests/test_session_cli.py | 2 --
 2 files changed, 4 deletions(-)

diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml
index c6446f4..db4310f 100644
--- a/sdk/pyproject.toml
+++ b/sdk/pyproject.toml
@@ -25,8 +25,6 @@ classifiers = [
 ]
 dependencies = [
     "click>=8.1.0",
-    "httpx>=0.25.0",
-    "pydantic>=2.5.0",
     "boto3>=1.28.0",
 ]
 
diff --git a/sdk/tests/test_session_cli.py b/sdk/tests/test_session_cli.py
index f2b2f7b..bc9ce73 100644
--- a/sdk/tests/test_session_cli.py
+++ b/sdk/tests/test_session_cli.py
@@ -20,8 +20,6 @@
 from __future__ import annotations
 
 import json
-import os
-from pathlib import Path
 
 import boto3
 import pytest

From 7e82b5f0e1bb46e7cd70dfda29b6c006dcb54ca3 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:24:17 +0300
Subject: [PATCH 14/22] tests: coordination correctness + messaging + HF
 onboarding

moto-backed (no network):
- claim race: two claimers, exactly one wins, holder preserved
- same-second message bursts keep every message (uuid-suffixed keys)
- inbox merges direct + broadcast chronologically by sent_at
- complete rejects a non-owner without --force, allows with it
- wait-for fails fast on blocked, names needs_review while waiting
- claim.json-without-status.json reads as in_progress (crash window)

HF backend tests mock HfApi/HfFileSystem in-memory: private-by-default
creation, --public opt-out, real visibility readback, the
existing-public-bucket warning, actionable missing-bucket write errors,
and exists() raising on 401/403 instead of returning False.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 sdk/tests/test_coordination.py  | 143 ++++++++++++++++++
 sdk/tests/test_hf_onboarding.py | 250 ++++++++++++++++++++++++++++++++
 sdk/tests/test_messaging.py     | 127 ++++++++++++++++
 3 files changed, 520 insertions(+)
 create mode 100644 sdk/tests/test_coordination.py
 create mode 100644 sdk/tests/test_hf_onboarding.py
 create mode 100644 sdk/tests/test_messaging.py

diff --git a/sdk/tests/test_coordination.py b/sdk/tests/test_coordination.py
new file mode 100644
index 0000000..ce6ff8b
--- /dev/null
+++ b/sdk/tests/test_coordination.py
@@ -0,0 +1,143 @@
+"""Tests for coordination correctness: claim races, complete ownership,
+the claim/status crash window, and wait-for's blocked fast-fail.
+
+All run against moto's in-process S3 — no network.
+"""
+
+from __future__ import annotations
+
+import json
+import time
+
+import boto3
+import pytest
+from click.testing import CliRunner
+from moto import mock_aws
+
+from tracecraft.cli import cli
+
+BUCKET = "tc-coord-test"
+PROJECT = "demo"
+
+
+@pytest.fixture
+def env(tmp_path, monkeypatch):
+    monkeypatch.setenv("AWS_ACCESS_KEY_ID", "testing")
+    monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing")
+    monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1")
+    work = tmp_path / "work"
+    work.mkdir()
+    monkeypatch.chdir(work)
+    cfg = {
+        "backend": "s3",
+        "endpoint": None,
+        "bucket": BUCKET,
+        "project": PROJECT,
+        "agent_id": "agent-a",
+        "access_key": "testing",
+        "secret_key": "testing",
+    }
+    (work / ".tracecraft.json").write_text(json.dumps(cfg))
+    with mock_aws():
+        boto3.client("s3").create_bucket(Bucket=BUCKET)
+        yield CliRunner()
+
+
+def _as(agent):
+    return {"TRACECRAFT_AGENT": agent}
+
+
+def _get(key):
+    c = boto3.client("s3")
+    return json.loads(c.get_object(Bucket=BUCKET, Key=f"{PROJECT}/{key}")["Body"].read())
+
+
+# ---------- atomic claim: two claimers, exactly one wins ----------
+
+
+def test_claim_race_exactly_one_winner(env):
+    r1 = env.invoke(cli, ["claim", "build"], env=_as("agent-a"))
+    r2 = env.invoke(cli, ["claim", "build"], env=_as("agent-b"))
+    outcomes = [r.exit_code == 0 for r in (r1, r2)]
+    assert outcomes.count(True) == 1, f"exactly one claimer must win: {r1.output} / {r2.output}"
+    assert "already claimed by agent-a" in r2.output
+    assert _get("steps/build/claim.json")["agent"] == "agent-a"
+
+
+# ---------- complete: ownership enforced, --force overrides ----------
+
+
+def test_complete_rejects_non_owner(env):
+    env.invoke(cli, ["claim", "build"], env=_as("agent-a"))
+    r = env.invoke(cli, ["complete", "build"], env=_as("agent-b"))
+    assert r.exit_code != 0
+    assert "claimed by 'agent-a'" in r.output
+    assert "--force" in r.output
+    # the step's status must be untouched
+    assert _get("steps/build/status.json")["status"] == "in_progress"
+
+
+def test_complete_owner_succeeds(env):
+    env.invoke(cli, ["claim", "build"], env=_as("agent-a"))
+    r = env.invoke(cli, ["complete", "build"], env=_as("agent-a"))
+    assert r.exit_code == 0, r.output
+    assert _get("steps/build/status.json")["status"] == "complete"
+
+
+def test_complete_force_overrides_ownership(env):
+    env.invoke(cli, ["claim", "build"], env=_as("agent-a"))
+    r = env.invoke(cli, ["complete", "build", "--force"], env=_as("agent-b"))
+    assert r.exit_code == 0, r.output
+    doc = _get("steps/build/status.json")
+    assert doc["status"] == "complete"
+    assert doc["agent"] == "agent-b"
+
+
+def test_complete_unclaimed_step_is_allowed(env):
+    """No claim.json at all — nothing to own, complete goes through."""
+    r = env.invoke(cli, ["complete", "adhoc"], env=_as("agent-a"))
+    assert r.exit_code == 0, r.output
+
+
+# ---------- crash window: claim.json exists, status.json missing ----------
+
+
+def test_step_status_treats_claim_without_status_as_in_progress(env):
+    env.invoke(cli, ["claim", "build"], env=_as("agent-a"))
+    # simulate a crash between the two writes: claim landed, status didn't
+    boto3.client("s3").delete_object(Bucket=BUCKET, Key=f"{PROJECT}/steps/build/status.json")
+    r = env.invoke(cli, ["step-status", "build"])
+    assert r.exit_code == 0, r.output
+    assert "in_progress" in r.output
+    assert "agent-a" in r.output
+
+
+def test_wait_for_treats_claim_without_status_as_waiting(env):
+    env.invoke(cli, ["claim", "build"], env=_as("agent-a"))
+    boto3.client("s3").delete_object(Bucket=BUCKET, Key=f"{PROJECT}/steps/build/status.json")
+    r = env.invoke(cli, ["wait-for", "build", "--timeout", "1"])
+    # not complete, not blocked → waits, then times out (no crash, no false success)
+    assert r.exit_code != 0
+    assert "Timeout" in r.output
+
+
+# ---------- wait-for: blocked fails fast, needs_review keeps waiting ----------
+
+
+def test_wait_for_fast_fails_on_blocked(env):
+    env.invoke(cli, ["claim", "build"], env=_as("agent-a"))
+    env.invoke(cli, ["complete", "build", "--blocked"], env=_as("agent-a"))
+    start = time.monotonic()
+    r = env.invoke(cli, ["wait-for", "build", "--timeout", "300"])
+    elapsed = time.monotonic() - start
+    assert r.exit_code != 0
+    assert "blocked" in r.output
+    assert elapsed < 10, f"must fail fast, not spin toward the timeout (took {elapsed:.1f}s)"
+
+
+def test_wait_for_mentions_needs_review_while_waiting(env):
+    env.invoke(cli, ["claim", "build"], env=_as("agent-a"))
+    env.invoke(cli, ["complete", "build", "--needs-review"], env=_as("agent-a"))
+    r = env.invoke(cli, ["wait-for", "build", "--timeout", "1"])
+    assert r.exit_code != 0  # still waiting → times out
+    assert "needs review: build" in r.output
diff --git a/sdk/tests/test_hf_onboarding.py b/sdk/tests/test_hf_onboarding.py
new file mode 100644
index 0000000..48c9192
--- /dev/null
+++ b/sdk/tests/test_hf_onboarding.py
@@ -0,0 +1,250 @@
+"""Tests for the HF onboarding + correctness-honesty fixes.
+
+Covers three real, externally-reported issues:
+  - #7: `init --backend hf` against a non-existent bucket must auto-create it
+        (HF ensure_bucket() was a no-op; first write failed cryptically).
+  - #8: HF buckets are public-by-default; init must create them PRIVATE by default,
+        with an explicit --public opt-out.
+  - correctness honesty: claims on HF are best-effort (no conditional-write), so both
+        `init --backend hf` and `claim` must SAY SO rather than imply atomicity.
+
+These mock the HuggingFace SDK (no network) — they verify the wiring (private flag
+reaches create_bucket; the warnings are emitted), not HF's servers.
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+import types
+
+import click
+import pytest
+from click.testing import CliRunner
+
+from tracecraft.cli.init_cmd import init_cmd
+from tracecraft.cli.steps import claim
+
+
+class FakeBucketState:
+    """Records create_bucket calls and stores written JSON in-memory."""
+
+    def __init__(self):
+        self.create_calls = []  # list of (bucket, private, exist_ok)
+        self.objects = {}  # path -> data
+        self.buckets = {}  # bucket_id -> private (bool); pre-seed to simulate existing
+
+
+@pytest.fixture
+def hf_stub(monkeypatch):
+    """Stub huggingface_hub so init/claim run against an in-memory fake HF backend."""
+    state = FakeBucketState()
+
+    # --- fake huggingface_hub module surface used by tracecraft.hf ---
+    class FakeApi:
+        """Mimics HfApi: create_bucket(exist_ok=True) never changes an existing
+        bucket's visibility; bucket_info returns the actual state."""
+
+        def __init__(self, token=None):
+            self.token = token
+
+        def create_bucket(self, bucket_id, *, private=None, exist_ok=False, **kw):
+            state.create_calls.append((bucket_id, private, exist_ok))
+            if bucket_id in state.buckets:
+                if not exist_ok:
+                    raise ValueError(f"Bucket {bucket_id} already exists")
+            else:
+                state.buckets[bucket_id] = bool(private)
+            return f"hf://buckets/{bucket_id}"
+
+        def bucket_info(self, bucket_id, **kw):
+            if bucket_id not in state.buckets:
+                raise FileNotFoundError(bucket_id)
+            return types.SimpleNamespace(private=state.buckets[bucket_id])
+
+    class FakeFS:
+        def __init__(self, *a, **k):
+            pass
+
+        def exists(self, path):
+            return path in state.objects
+
+        def open(self, path, mode="r"):
+            store = state.objects
+
+            class _F:
+                def __enter__(self_):
+                    if "r" in mode:
+                        self_._buf = store.get(path, "")
+                    return self_
+
+                def __exit__(self_, *exc):
+                    return False
+
+                def write(self_, s):
+                    store[path] = store.get(path, "") + s
+
+                def read(self_):
+                    return self_._buf
+
+            return _F()
+
+        def find(self, path, detail=False):
+            return [p for p in state.objects if p.startswith(path)]
+
+    fake_hf = types.ModuleType("huggingface_hub")
+    fake_hf.HfFileSystem = FakeFS
+    fake_hf.HfApi = FakeApi
+    monkeypatch.setitem(sys.modules, "huggingface_hub", fake_hf)
+    return state
+
+
+def _init(runner, tmp_path, monkeypatch, *extra):
+    monkeypatch.chdir(tmp_path)
+    args = [
+        "--backend",
+        "hf",
+        "--bucket",
+        "user/tc-test",
+        "--project",
+        "demo",
+        "--agent",
+        "tester",
+        "--hf-token",
+        "hf_faketoken",
+        *extra,
+    ]
+    return runner.invoke(init_cmd, args)
+
+
+# ---------- #7: auto-create ----------
+
+
+def test_init_hf_creates_bucket(hf_stub, tmp_path, monkeypatch):
+    r = _init(CliRunner(), tmp_path, monkeypatch)
+    assert r.exit_code == 0, r.output
+    # ensure_bucket() actually called create_bucket (was a no-op before)
+    assert len(hf_stub.create_calls) == 1
+    bucket, private, exist_ok = hf_stub.create_calls[0]
+    assert bucket == "user/tc-test"
+    assert exist_ok is True  # idempotent: don't fail if it already exists
+    # the agent record was written (the first write that used to fail cryptically)
+    assert any("agents/tester.json" in p for p in hf_stub.objects)
+
+
+# ---------- #8: private by default, --public opt-out ----------
+
+
+def test_init_hf_private_by_default(hf_stub, tmp_path, monkeypatch):
+    r = _init(CliRunner(), tmp_path, monkeypatch)
+    assert r.exit_code == 0, r.output
+    _, private, _ = hf_stub.create_calls[0]
+    assert private is True
+    assert "(private)" in r.output
+
+
+def test_init_hf_public_when_asked(hf_stub, tmp_path, monkeypatch):
+    r = _init(CliRunner(), tmp_path, monkeypatch, "--public")
+    assert r.exit_code == 0, r.output
+    _, private, _ = hf_stub.create_calls[0]
+    assert private is False
+    assert "(PUBLIC)" in r.output
+
+
+# ---------- correctness honesty ----------
+
+
+def test_init_hf_warns_claims_are_best_effort(hf_stub, tmp_path, monkeypatch):
+    r = _init(CliRunner(), tmp_path, monkeypatch)
+    assert r.exit_code == 0, r.output
+    # the racy-claim caveat must be surfaced at init (output includes stderr via CliRunner)
+    assert "best-effort" in r.output.lower()
+    assert "S3-compatible" in r.output
+
+
+def test_claim_on_hf_warns_best_effort(hf_stub, tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    # write an hf config the CWD-first loader will pick up
+    cfg = {
+        "backend": "hf",
+        "bucket": "user/tc-test",
+        "project": "demo",
+        "agent_id": "tester",
+        "hf_token": "hf_faketoken",
+    }
+    (tmp_path / ".tracecraft.json").write_text(json.dumps(cfg))
+    r = CliRunner().invoke(claim, ["build"])
+    assert r.exit_code == 0, r.output
+    assert "Claimed step build" in r.output
+    assert "best-effort" in r.output.lower()
+
+
+# ---------- #8: pre-existing PUBLIC bucket triggers a prominent warning ----------
+
+
+def test_init_hf_existing_public_bucket_warns(hf_stub, tmp_path, monkeypatch):
+    """Bucket pre-exists as public; user asked for private (default) — init must
+    say the data will be publicly visible and that delete+recreate is the only fix."""
+    hf_stub.buckets["user/tc-test"] = False  # exists, public
+    r = _init(CliRunner(), tmp_path, monkeypatch)
+    assert r.exit_code == 0, r.output
+    assert "(PUBLIC)" in r.output  # real state, not the requested flag
+    assert "WARNING" in r.output
+    assert "publicly visible" in r.output
+    assert "delete" in r.output.lower()
+
+
+def test_init_hf_existing_public_bucket_no_warning_with_public_flag(hf_stub, tmp_path, monkeypatch):
+    hf_stub.buckets["user/tc-test"] = False
+    r = _init(CliRunner(), tmp_path, monkeypatch, "--public")
+    assert r.exit_code == 0, r.output
+    assert "WARNING" not in r.output
+
+
+# ---------- write errors name the bucket and point at init ----------
+
+
+def test_put_against_missing_bucket_is_actionable(hf_stub, monkeypatch):
+    from tracecraft.hf import HF
+
+    store = HF(bucket="user/tc-test", project="demo", token="hf_faketoken")
+
+    def boom(*a, **k):
+        raise OSError("unable to resolve path: invalid repository and revision")
+
+    monkeypatch.setattr(store.fs, "open", boom)
+    with pytest.raises(click.ClickException) as ei:
+        store.put_json("memory/x.json", {"v": 1})
+    msg = str(ei.value)
+    assert "user/tc-test" in msg
+    assert "tracecraft init" in msg
+    assert "repository and revision" not in msg  # raw error replaced, not echoed
+
+
+# ---------- exists(): not-found is False, unauthorized raises ----------
+
+
+def test_exists_not_found_is_false(hf_stub, monkeypatch):
+    from tracecraft.hf import HF
+
+    store = HF(bucket="user/tc-test", project="demo", token="hf_faketoken")
+    monkeypatch.setattr(store.fs, "exists", lambda p: (_ for _ in ()).throw(FileNotFoundError(p)))
+    assert store.exists("memory/x.json") is False
+
+
+def test_exists_surfaces_auth_errors(hf_stub, monkeypatch):
+    from tracecraft.hf import HF
+
+    store = HF(bucket="user/tc-test", project="demo", token="hf_badtoken")
+
+    def boom(path):
+        e = Exception("401 Client Error: Unauthorized for url")
+        e.response = types.SimpleNamespace(status_code=401)
+        raise e
+
+    monkeypatch.setattr(store.fs, "exists", boom)
+    with pytest.raises(click.ClickException) as ei:
+        store.exists("memory/x.json")
+    msg = str(ei.value)
+    assert "auth" in msg.lower()
+    assert "HF_TOKEN" in msg
diff --git a/sdk/tests/test_messaging.py b/sdk/tests/test_messaging.py
new file mode 100644
index 0000000..e3ed7f1
--- /dev/null
+++ b/sdk/tests/test_messaging.py
@@ -0,0 +1,127 @@
+"""Tests for agent-to-agent messaging — especially the same-instant key collision.
+
+The bug these guard against: message keys were `messages/<recip>/<int_seconds>_<sender>.json`,
+so two messages from one sender to one recipient in the same wall-clock second collided on
+the same key and the later one silently overwrote the earlier (a 5-message burst kept 1).
+The fix uses nanosecond resolution + a uuid suffix, so every send is a distinct key.
+"""
+
+from __future__ import annotations
+
+import json
+
+import boto3
+import pytest
+from click.testing import CliRunner
+from moto import mock_aws
+
+from tracecraft.cli import cli
+
+
+BUCKET = "tc-msg-test"
+PROJECT = "demo"
+
+
+@pytest.fixture
+def env(tmp_path, monkeypatch):
+    monkeypatch.setenv("AWS_ACCESS_KEY_ID", "testing")
+    monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing")
+    monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1")
+    work = tmp_path / "work"
+    work.mkdir()
+    monkeypatch.chdir(work)
+    cfg = {
+        "backend": "s3",
+        "endpoint": None,
+        "bucket": BUCKET,
+        "project": PROJECT,
+        "agent_id": "designer",
+        "access_key": "testing",
+        "secret_key": "testing",
+    }
+    (work / ".tracecraft.json").write_text(json.dumps(cfg))
+    fake_home = tmp_path / "home"
+    (fake_home / ".tracecraft").mkdir(parents=True)
+    (fake_home / ".tracecraft" / "config.json").write_text(json.dumps(cfg))
+    monkeypatch.setenv("HOME", str(fake_home))
+    with mock_aws():
+        boto3.client("s3").create_bucket(Bucket=BUCKET)
+        yield CliRunner()
+
+
+def _keys(prefix):
+    c = boto3.client("s3")
+    out = c.list_objects_v2(Bucket=BUCKET, Prefix=f"{PROJECT}/{prefix}")
+    return [o["Key"] for o in out.get("Contents", [])]
+
+
+def test_burst_to_same_recipient_keeps_every_message(env):
+    """The regression: many messages from one sender to one recipient, sent back to
+    back (same second), must ALL survive — not collapse onto one overwritten key."""
+    n = 8
+    for i in range(n):
+        r = env.invoke(cli, ["send", "reviewer", f"update {i}"])
+        assert r.exit_code == 0, r.output
+    keys = _keys("messages/reviewer/")
+    assert len(keys) == n, f"expected {n} distinct message keys, got {len(keys)}: {keys}"
+    # and the bodies are all distinct (no overwrite)
+    c = boto3.client("s3")
+    bodies = {
+        json.loads(c.get_object(Bucket=BUCKET, Key=k)["Body"].read())["message"] for k in keys
+    }
+    assert bodies == {f"update {i}" for i in range(n)}
+
+
+def test_inbox_reads_the_whole_burst(env):
+    """End-to-end: a burst sent by one agent is fully readable by the recipient."""
+    for i in range(5):
+        env.invoke(cli, ["send", "reviewer", f"msg {i}"])
+    r = env.invoke(cli, ["inbox"], env={"TRACECRAFT_AGENT": "reviewer"})
+    assert r.exit_code == 0, r.output
+    for i in range(5):
+        assert f"msg {i}" in r.output
+
+
+def test_key_shape_is_unique_per_send(env):
+    """Two sends to the same recipient produce two different keys even with no delay."""
+    env.invoke(cli, ["send", "reviewer", "a"])
+    env.invoke(cli, ["send", "reviewer", "b"])
+    keys = _keys("messages/reviewer/")
+    assert len(set(keys)) == 2
+
+
+def test_broadcast_and_direct_are_separate(env):
+    """A broadcast lands under _broadcast, a direct message under the recipient."""
+    env.invoke(cli, ["send", "_broadcast", "hello all"])
+    env.invoke(cli, ["send", "reviewer", "hello you"])
+    assert len(_keys("messages/_broadcast/")) == 1
+    assert len(_keys("messages/reviewer/")) == 1
+
+
+def test_inbox_merges_direct_and_broadcast_chronologically(env):
+    """inbox must interleave direct + broadcast messages by sent_at, not print
+    one prefix's raw list order after the other."""
+    import time as _time
+
+    env.invoke(cli, ["send", "reviewer", "first-direct"])
+    _time.sleep(0.01)
+    env.invoke(cli, ["send", "_broadcast", "second-broadcast"])
+    _time.sleep(0.01)
+    env.invoke(cli, ["send", "reviewer", "third-direct"])
+    r = env.invoke(cli, ["inbox"], env={"TRACECRAFT_AGENT": "reviewer"})
+    assert r.exit_code == 0, r.output
+    out = r.output
+    assert out.index("first-direct") < out.index("second-broadcast") < out.index("third-direct")
+
+
+def test_message_body_carries_sender_and_recipient(env):
+    """The body (not the filename) is the source of truth for from/to — readers parse
+    the body, so the key shape can change freely without breaking inbox or replay."""
+    env.invoke(cli, ["send", "reviewer", "check"])
+    c = boto3.client("s3")
+    k = _keys("messages/reviewer/")[0]
+    doc = json.loads(c.get_object(Bucket=BUCKET, Key=k)["Body"].read())
+    assert doc["from"] == "designer"
+    assert doc["to"] == "reviewer"
+    assert doc["message"] == "check"
+    assert "sent_at" in doc

From 8c3a4641ebcb4b15256f684137ae5a1d926687bf Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:24:43 +0300
Subject: [PATCH 15/22] ci: consolidate workflows into ci.yml (ruff check +
 format + pytest)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces test.yml. Lint job runs ruff check (new — lint errors were
previously uncaught) plus the existing ruff format --check; pytest runs
on Python 3.10 and 3.12 with dev+huggingface extras, on push and PR.
README badge updated to point at the new workflow.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 .github/workflows/{test.yml => ci.yml} | 11 +++++++----
 README.md                              |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)
 rename .github/workflows/{test.yml => ci.yml} (86%)

diff --git a/.github/workflows/test.yml b/.github/workflows/ci.yml
similarity index 86%
rename from .github/workflows/test.yml
rename to .github/workflows/ci.yml
index 8c4d219..975c5a3 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/ci.yml
@@ -1,4 +1,4 @@
-name: tests
+name: ci
 
 on:
   push:
@@ -7,7 +7,7 @@ on:
     branches: [main]
 
 jobs:
-  format:
+  lint:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
@@ -18,12 +18,15 @@ jobs:
           python-version: "3.12"
           cache: pip
 
-      - name: Install ruff
+      - name: Install package + dev extras
         working-directory: sdk
         run: |
           python -m pip install --upgrade pip
           pip install -e ".[dev]"
 
+      - name: Lint (ruff check)
+        run: ruff check sdk/
+
       - name: Check formatting (ruff format)
         run: ruff format --check sdk/
 
@@ -32,7 +35,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.10", "3.12"]
 
     steps:
       - uses: actions/checkout@v4
diff --git a/README.md b/README.md
index c4576f7..a9e638c 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 [![PyPI](https://img.shields.io/pypi/v/tracecraft-ai)](https://pypi.org/project/tracecraft-ai/)
 [![Python](https://img.shields.io/pypi/pyversions/tracecraft-ai)](https://pypi.org/project/tracecraft-ai/)
 [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
-[![Tests](https://github.com/Arrmlet/tracecraft/actions/workflows/test.yml/badge.svg)](https://github.com/Arrmlet/tracecraft/actions/workflows/test.yml)
+[![CI](https://github.com/Arrmlet/tracecraft/actions/workflows/ci.yml/badge.svg)](https://github.com/Arrmlet/tracecraft/actions/workflows/ci.yml)
 
 **Tracecraft is a CLI coordination layer for multi-agent AI systems** — shared **memory**, a **mailbox**, atomic task **claims**, **handoffs**, and **artifacts**, plus mirrored **session transcripts**, all stored as plain JSON in any **S3** or **HuggingFace** bucket. No server. No database. No SDK lock-in.
 

From a6db54b3ffcd80dd083c7874bc338e71a5026fad Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:25:05 +0300
Subject: [PATCH 16/22] docs: refresh CLAUDE.md known gaps after the
 correctness pass

Same-second message collisions and the empty test suite are fixed;
claim TTL and heartbeat refresh stay open (need design decisions).
Bucket-layout sketch updated to the new message key shape.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 CLAUDE.md | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 370f35c..5668b72 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -38,8 +38,8 @@ pivot lives in `plans/server-archive/` for reference only — nothing in the SDK
 <bucket>/<project>/
   agents/<agent_id>.json                ← agent registration + heartbeat
   memory/<dotted.key>.json              ← shared key-value state
-  messages/<recipient>/<ts>_<from>.json ← per-agent mailbox
-  messages/_broadcast/<ts>_<from>.json  ← broadcast
+  messages/<recipient>/<ts_ns>_<from>_<uuid8>.json ← per-agent mailbox
+  messages/_broadcast/<ts_ns>_<from>_<uuid8>.json  ← broadcast
   steps/<step_id>/claim.json            ← atomic claim (If-None-Match=*)
   steps/<step_id>/status.json           ← pending / in_progress / complete
   steps/<step_id>/handoff.json          ← note + from_agent for next agent
@@ -62,12 +62,11 @@ pivot lives in `plans/server-archive/` for reference only — nothing in the SDK
   the claim is the authoritative write (`step-status` and `wait-for` implement this via
   `_effective_status` in `cli/steps.py`).
 
-## Known gaps (May 2026)
+## Known gaps (June 2026)
 
-- No TTL on claims (a crashed claim-holder keeps the lock forever) — Tier 1 work.
+- No TTL on claims (a crashed claim-holder keeps the lock forever; `complete --force`
+  is the manual escape hatch) — Tier 1 work.
 - Heartbeat is written at `init` only, never refreshed — Tier 1 work.
-- Messages keyed by `<ts>_<sender>.json` can collide same-second — Tier 1 work.
-- No tests in `sdk/tests/` — Tier 1 work.
 
 ## Building
 

From 89e3fa28147f6a5af61307bd41775c29baa394c4 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:25:41 +0300
Subject: [PATCH 17/22] README: document session mirroring properly

Retitle the section and spell out the four harnesses, incremental
cursor uploads (safe to re-run on a cron; seq derived from the bucket),
default secret redaction with per-pattern counts in meta.json, and
replay via 'session show --tail'. Session commands were already in the
CLI reference block.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 README.md | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index a9e638c..e86594c 100644
--- a/README.md
+++ b/README.md
@@ -96,16 +96,21 @@ tracecraft send _broadcast "v1 cut at 3pm, wrap your tasks"
 
 ---
 
-## Coordination + reasoning in one bucket
+## Session mirroring
 
 Most coordination tools store the *events* — who claimed what, who messaged whom. Tracecraft stores those **and** each agent's full reasoning, by mirroring coding-agent session transcripts into the same bucket. When a run goes sideways, one `tracecraft session show` gives you the handoffs **and** the chain of thought behind them — same place, same JSON, no second system to wire up.
 
 ```bash
-tracecraft session mirror --harness claude-code   # tail this session into the bucket
-tracecraft session show <id> --tail 50            # read coordination + reasoning together
+tracecraft session mirror --harness claude-code   # upload this session's new bytes
+tracecraft session list                           # browse mirrored sessions
+tracecraft session show <id> --tail 50            # replay: meta + last N transcript lines
+tracecraft session stop <id>                      # clear local cursor, mark session ended
 ```
 
-Works with **Claude Code, Codex, OpenClaw, and Hermes**. Source transcripts are never modified; secret-shape redaction (AWS / Anthropic / OpenAI / HF / GitHub / Slack token patterns) is on by default and counted in metadata.
+- **Four harnesses** — `claude-code`, `codex`, `openclaw`, `hermes`. Anything else can mirror by writing JSONL to the same layout.
+- **Incremental cursor uploads** — `mirror` keeps a per-session byte offset and uploads only what's new as numbered parts, so re-running it from a cron or hook is safe and cheap; a run with nothing new is a no-op. The part sequence is derived from the bucket, so it even survives losing the local state file.
+- **Redaction on by default** — AWS / Anthropic / OpenAI / HF / GitHub / Slack token shapes are scrubbed before upload, with per-pattern match counts recorded in the session's `meta.json` (pass `--no-redact` to opt out). Source transcripts are never modified.
+- **Replay** — `session show <id> --tail N` concatenates the uploaded parts and prints the last N transcript lines next to the session metadata.
 
 Harness matrix, storage formats, and redaction details → **[docs/session-mirror.md](docs/session-mirror.md)**
 

From 41a257573a2c3e453b72e6ba635bb1a6d725380f Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:25:57 +0300
Subject: [PATCH 18/22] README: quick start takes credentials from AWS env vars

Inline --access-key/--secret-key flags leak into shell history; the
init command already reads AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY,
so show that path. Note that .tracecraft.json is written chmod 600 and
auto-added to .gitignore.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 README.md | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index e86594c..3d708a0 100644
--- a/README.md
+++ b/README.md
@@ -29,20 +29,23 @@ docker run -d -p 9000:9000 \
   minio/minio server /data
 ```
 
-Register two agents against the same project:
+Register two agents against the same project. Credentials come from the standard AWS env vars, so they never land in your shell history:
 
 ```bash
+export AWS_ACCESS_KEY_ID=admin
+export AWS_SECRET_ACCESS_KEY=admin123456
+
 # Terminal 1
 tracecraft init --project demo --agent designer \
-  --endpoint http://localhost:9000 --bucket tracecraft \
-  --access-key admin --secret-key admin123456
+  --endpoint http://localhost:9000 --bucket tracecraft
 
 # Terminal 2 — same flags, --agent developer
 tracecraft init --project demo --agent developer \
-  --endpoint http://localhost:9000 --bucket tracecraft \
-  --access-key admin --secret-key admin123456
+  --endpoint http://localhost:9000 --bucket tracecraft
 ```
 
+`init` writes the config to `.tracecraft.json` with mode `600` and auto-adds it to `.gitignore` when you're in a git repo.
+
 Now the core move — **two agents cannot grab the same work**, with no lock service and no server to run:
 
 ```console

From 0f5734d92c6070e836ac1f07638891cb8ee48ebf Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:26:13 +0300
Subject: [PATCH 19/22] README: document HF bucket privacy behavior

Private by default at creation, --public opt-out, real visibility
shown in init output, and the delete+recreate caveat (no update_bucket
upstream).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 3d708a0..0a5c276 100644
--- a/README.md
+++ b/README.md
@@ -153,6 +153,8 @@ Bring your own bucket — no vendor lock-in:
 | Backblaze B2 / Wasabi | S3-compatible endpoint | |
 | HuggingFace Buckets | `--backend hf --bucket user/name` | browsable on the Hub; `pip install tracecraft-ai[huggingface]` |
 
+**HuggingFace privacy:** `init` creates the bucket **private by default** (pass `--public` to opt out) and prints the bucket's *actual* visibility, read back from the Hub — e.g. `Backend: HuggingFace Buckets  Bucket: user/x (private)`. If the bucket already exists as public and you didn't ask for that, init warns loudly: coordination data and mirrored transcripts would be publicly visible. Visibility can't be flipped after creation (`huggingface_hub` has no `update_bucket`) — the only way to change it is delete + recreate.
+
 ---
 
 ## Use cases

From 70499ca7584d04ea0f8178f7c26b560573b5b9b4 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:26:36 +0300
Subject: [PATCH 20/22] README: add 'Why not X?' and 'Status & limitations'
 sections

Positions tracecraft against in-process frameworks, server-backed
stores, and live wire protocols; and is honest about pre-alpha status:
no claim TTL, heartbeat not refreshed after init, HF claims
best-effort. Links to open issues.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 README.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/README.md b/README.md
index 0a5c276..14eae6c 100644
--- a/README.md
+++ b/README.md
@@ -99,6 +99,24 @@ tracecraft send _broadcast "v1 cut at 3pm, wrap your tasks"
 
 ---
 
+## Why not LangGraph / Redis / message queues?
+
+- **Frameworks (LangGraph, CrewAI, AutoGen)** orchestrate agents *inside one process*. Tracecraft coordinates *any* processes across machines — different harnesses, different clouds, different teams — through storage they already have.
+- **Redis / Postgres / a queue** means operating a server: provisioning, auth, uptime, backups. A bucket is zero infra, and every state change is a browsable JSON file — you get an audit trail for free just by opening the bucket.
+- **A2A / MCP** are live wire protocols between *running* agents. Tracecraft is durable state for agents that aren't running at the same time — one agent finishes Tuesday, the next picks up the handoff Wednesday.
+
+## Status & limitations
+
+Tracecraft is **pre-alpha**. Honest sharp edges, as of now:
+
+- **No TTL on claims** — a crashed claim-holder keeps the lock until someone runs `complete --force`.
+- **Heartbeat isn't refreshed** — `agents` shows who registered, not who's alive right now.
+- **HF claims are best-effort** — HuggingFace Buckets have no conditional write, so atomic claims need an S3-compatible backend.
+
+Open issues and roadmap → [github.com/Arrmlet/tracecraft/issues](https://github.com/Arrmlet/tracecraft/issues)
+
+---
+
 ## Session mirroring
 
 Most coordination tools store the *events* — who claimed what, who messaged whom. Tracecraft stores those **and** each agent's full reasoning, by mirroring coding-agent session transcripts into the same bucket. When a run goes sideways, one `tracecraft session show` gives you the handoffs **and** the chain of thought behind them — same place, same JSON, no second system to wire up.

From 1aa52ea0bdbf8b7b82da04f88f37121f610a8c16 Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:26:51 +0300
Subject: [PATCH 21/22] README: add minimal Python API snippet

The CLI is the stable interface; get_store() is the documented escape
hatch for direct bucket access from Python.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 README.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/README.md b/README.md
index 14eae6c..54eb9e3 100644
--- a/README.md
+++ b/README.md
@@ -227,6 +227,19 @@ TRACECRAFT_AGENT=developer tracecraft inbox
 
 ---
 
+## Python API
+
+The CLI is the stable interface; for code that wants direct bucket access, the store factory is the escape hatch:
+
+```python
+from tracecraft.store import get_store
+
+store, cfg = get_store()  # reads .tracecraft.json like the CLI does
+store.put_json("memory/build/status.json", {"value": "passing", "set_by": cfg["agent_id"]})
+```
+
+---
+
 ## More
 
 - [docs/session-mirror.md](docs/session-mirror.md) — session mirroring: harnesses, formats, redaction

From 83cc0e534b5edf9986c0a2b3a40124322e915f6a Mon Sep 17 00:00:00 2001
From: arrmlet <trubavolodymyr@gmail.com>
Date: Tue, 9 Jun 2026 22:27:23 +0300
Subject: [PATCH 22/22] dev env: MinIO-only compose; .env.example matches
 reality
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

docker-compose.dev.yml drops postgres, redis, and seaweedfs — leftovers
from the pre-pivot server design; the shipped CLI needs exactly one
S3-compatible bucket. .env.example rewritten to the variables the code
actually reads (AWS creds, HF_TOKEN, TRACECRAFT_AGENT, harness path
overrides) instead of JWT/UI/database/monitoring leftovers. Quick start
references the compose file.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 .env.example           | 44 ++++++++++++-------------------------
 README.md              |  2 ++
 docker-compose.dev.yml | 50 ++++++++++--------------------------------
 3 files changed, 27 insertions(+), 69 deletions(-)

diff --git a/.env.example b/.env.example
index c767cf6..34fd1ae 100644
--- a/.env.example
+++ b/.env.example
@@ -1,34 +1,18 @@
-# Tracecraft Configuration
+# Tracecraft environment variables — only what the CLI actually reads.
 
-# SeaweedFS Configuration
-TRACECRAFT_SEAWEEDFS_S3_ENDPOINT=localhost:8333
-TRACECRAFT_SEAWEEDFS_ACCESS_KEY=admin
-TRACECRAFT_SEAWEEDFS_SECRET_KEY=admin_secret_key
-TRACECRAFT_SEAWEEDFS_USE_SSL=false
+# S3 backend credentials (read by `tracecraft init`; match docker-compose.dev.yml)
+AWS_ACCESS_KEY_ID=admin
+AWS_SECRET_ACCESS_KEY=admin123456
 
-# Security Configuration
-TRACECRAFT_SECURITY_ENCRYPTION_ENABLED=true
-TRACECRAFT_SECURITY_JWT_SECRET=your-jwt-secret-here
+# HuggingFace backend token (read by `tracecraft init --backend hf`)
+# HF_TOKEN=hf_...
 
-# Storage Configuration
-TRACECRAFT_STORAGE_BUCKET_NAME=tracecraft-data
-TRACECRAFT_STORAGE_RETENTION_DAYS=90
+# Override the agent identity per shell/process (lets several agents share one
+# directory and .tracecraft.json)
+# TRACECRAFT_AGENT=designer
 
-# UI Configuration
-TRACECRAFT_UI_HOST=0.0.0.0
-TRACECRAFT_UI_PORT=8000
-TRACECRAFT_UI_AUTH_REQUIRED=false
-
-# Monitoring Configuration
-TRACECRAFT_MONITORING_ENABLED=true
-
-# Database Configuration
-TRACECRAFT_DATABASE_HOST=localhost
-TRACECRAFT_DATABASE_PORT=5432
-TRACECRAFT_DATABASE_DATABASE=tracecraft
-TRACECRAFT_DATABASE_USER=tracecraft
-TRACECRAFT_DATABASE_PASSWORD=tracecraft
-
-# Redis Configuration
-TRACECRAFT_REDIS_HOST=localhost
-TRACECRAFT_REDIS_PORT=6379
+# Session-mirror harness location overrides (only if your harness lives in a
+# non-default path)
+# OPENCLAW_STATE_DIR=
+# OPENCLAW_HOME=
+# HERMES_HOME=
diff --git a/README.md b/README.md
index 54eb9e3..386274a 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,8 @@ docker run -d -p 9000:9000 \
   minio/minio server /data
 ```
 
+(From a checkout, `docker compose -f docker-compose.dev.yml up -d` does the same and adds the MinIO console on `:9001`.)
+
 Register two agents against the same project. Credentials come from the standard AWS env vars, so they never land in your shell history:
 
 ```bash
diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
index bdadcbd..c43ca8d 100644
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@@ -1,45 +1,17 @@
-version: "3.8"
-
+# Local dev backend: a single MinIO bucket is all tracecraft needs.
+# Console at http://localhost:9001 to watch agents coordinate live.
 services:
-  postgres:
-    image: postgres:16
+  minio:
+    image: minio/minio
+    command: server /data --console-address ":9001"
     ports:
-      - "5432:5432"
+      - "9000:9000"
+      - "9001:9001"
     environment:
-      POSTGRES_DB: tracecraft
-      POSTGRES_USER: tracecraft
-      POSTGRES_PASSWORD: tracecraft
+      MINIO_ROOT_USER: admin
+      MINIO_ROOT_PASSWORD: admin123456
     volumes:
-      - postgres_data:/var/lib/postgresql/data
-
-  redis:
-    image: redis:7-alpine
-    ports:
-      - "6379:6379"
-
-  seaweed-master:
-    image: chrislusf/seaweedfs
-    command: master -ip=seaweed-master -port=9333
-    ports:
-      - "9333:9333"
-
-  seaweed-volume:
-    image: chrislusf/seaweedfs
-    command: volume -mserver=seaweed-master:9333 -port=8080 -ip=seaweed-volume
-    ports:
-      - "8080:8080"
-    depends_on:
-      - seaweed-master
-
-  seaweed-filer:
-    image: chrislusf/seaweedfs
-    command: filer -master=seaweed-master:9333 -port=8888 -s3 -s3.port=8333
-    ports:
-      - "8888:8888"
-      - "8333:8333"
-    depends_on:
-      - seaweed-master
-      - seaweed-volume
+      - minio_data:/data
 
 volumes:
-  postgres_data:
+  minio_data: