From 455a1f1ab49e58bab8f8700a826b61e14985eb76 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Sat, 6 Jun 2026 16:31:55 +0300 Subject: [PATCH 01/22] 0.2.1: structured handoff records + huggingface pip extra --- README.md | 4 +- sdk/pyproject.toml | 2 +- sdk/tracecraft/__init__.py | 2 +- sdk/tracecraft/cli/steps.py | 86 +++++++++++++++++++++++++++++++------ 4 files changed, 78 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 7be3945..c4576f7 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,9 @@ tracecraft inbox # Read messages tracecraft inbox --delete # Read and clear tracecraft claim # Claim a step (atomic) -tracecraft complete [--note X] # Mark done + handoff note +tracecraft complete [--note X] [--to AGENT] [--next-action X] + [--blocked|--needs-review] + [--changed-files-from-git] # Structured handoff record tracecraft step-status # Check status tracecraft wait-for # Block until complete (default 300s timeout) diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml index 4f0be08..c6446f4 100644 --- a/sdk/pyproject.toml +++ b/sdk/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "tracecraft-ai" -version = "0.2.0" +version = "0.2.1" description = "Coordination layer for multi-agent AI systems. Bring your own S3 / HuggingFace bucket; shared memory, mailbox, atomic task claims, handoffs, artifacts — no server, no database." readme = "README.md" license = {text = "MIT"} diff --git a/sdk/tracecraft/__init__.py b/sdk/tracecraft/__init__.py index ae1ec60..22b8802 100644 --- a/sdk/tracecraft/__init__.py +++ b/sdk/tracecraft/__init__.py @@ -1,3 +1,3 @@ """Tracecraft — coordination layer for multi-agent AI systems.""" -__version__ = "0.2.0" +__version__ = "0.2.1" diff --git a/sdk/tracecraft/cli/steps.py b/sdk/tracecraft/cli/steps.py index 58868d3..2ef1b34 100644 --- a/sdk/tracecraft/cli/steps.py +++ b/sdk/tracecraft/cli/steps.py @@ -1,5 +1,6 @@ """tracecraft steps — claim, complete, and track coordination steps.""" +import subprocess import time from datetime import datetime, timezone @@ -9,6 +10,27 @@ from tracecraft.store import get_store +def _git_changed_files() -> list[str]: + """Return changed files from `git diff --name-only HEAD` (staged + unstaged), + or [] if not a git repo / git unavailable. Never raises. + + Git is the source of truth for what changed — we never let an agent type + the file list by hand (self-reported change lists are wrong ~half the time + and go stale on the next commit). + """ + try: + out = subprocess.run( + ["git", "diff", "--name-only", "HEAD"], + capture_output=True, text=True, timeout=10, + ) + if out.returncode != 0: + return [] + files = [ln.strip() for ln in out.stdout.splitlines() if ln.strip()] + return files + except (OSError, subprocess.SubprocessError): + return [] + + @click.command() @click.argument("step_id") def claim(step_id): @@ -39,31 +61,69 @@ def claim(step_id): @click.command() @click.argument("step_id") -@click.option("--note", default="", help="Handoff note for the next agent") -def complete(step_id, note): - """Mark a step as complete and write handoff.""" +@click.option("--note", default="", help="Handoff note for the next agent (free text)") +@click.option("--to", "next_agent", default=None, help="Agent this step hands off to") +@click.option("--next-action", default=None, help="One line: what the next agent should do first") +@click.option("--blocked", is_flag=True, help="Mark the step blocked rather than complete") +@click.option("--needs-review", is_flag=True, help="Mark the step as needing review rather than complete") +@click.option( + "--changed-files-from-git", + is_flag=True, + help="Record files changed (from `git diff`), so the next agent knows what moved. No-op outside a git repo.", +) +def complete(step_id, note, next_agent, next_action, blocked, needs_review, changed_files_from_git): + """Mark a step complete (or blocked / needs-review) and write a handoff record. + + The handoff record is what the next agent sees instead of a shared + conversation — so it carries machine-checkable state, not just a note. + Fields that can be wrong if hand-typed (changed files) are sourced from + git; fields that would be hallucinated if mandatory (assumptions) stay as + optional free text in --note. + """ + if blocked and needs_review: + raise click.ClickException("Use at most one of --blocked / --needs-review") + store, cfg = get_store() agent = cfg["agent_id"] sid = step_id.lower().replace(".", "-") now = datetime.now(timezone.utc).isoformat() - # Update status + state = "blocked" if blocked else "needs_review" if needs_review else "complete" + + # Status reflects the real outcome (not always "complete"). existing = store.get_json(f"steps/{sid}/status.json") or {} - store.put_json(f"steps/{sid}/status.json", { - "status": "complete", + status_doc = { + "status": state, "agent": agent, "started_at": existing.get("started_at", now), - "completed_at": now, - }) - - # Write handoff - store.put_json(f"steps/{sid}/handoff.json", { + } + if state == "complete": + status_doc["completed_at"] = now + store.put_json(f"steps/{sid}/status.json", status_doc) + + # Handoff record — schema v2. All v2 keys optional; old readers/handoffs + # keep working. changed_files is git-derived (never agent-typed). + handoff = { + "schema": 2, "from_agent": agent, "from_step": step_id, + "next_agent": next_agent, + "state": state, + "next_action": next_action, "note": note, "created_at": now, - }) - click.echo(f"Completed step {step_id}") + } + if changed_files_from_git: + handoff["changed_files"] = _git_changed_files() + store.put_json(f"steps/{sid}/handoff.json", handoff) + + label = {"complete": "Completed", "blocked": "Blocked", "needs_review": "Needs review on"}[state] + msg = f"{label} step {step_id}" + if next_agent: + msg += f" → handed off to {next_agent}" + if changed_files_from_git: + msg += f" ({len(handoff['changed_files'])} changed file(s))" + click.echo(msg) @click.command() From d8a9fa7c326b459175f14f1428f9131cf7f0e911 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Sat, 6 Jun 2026 17:04:55 +0300 Subject: [PATCH 02/22] ci: add ruff format check; format codebase; add handoff tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a 'format' job to the tests workflow that runs 'ruff format --check sdk/' on every push/PR, so style drift fails CI instead of reaching review. To make the gate green, ran 'ruff format' across sdk/ (15 files reformatted, whitespace/line-wrap only — no logic changes). Verified the full suite still passes (66 tests). Also lands sdk/tests/test_handoff_v2.py — the 9-test suite for the 0.2.1 structured handoff (complete/blocked/needs_review state, next_action/--to, git-derived changed_files, mutual-exclusion, no mandatory assumptions field). The fixture chdirs to an isolated dir and writes config to both the CWD-local and HOME paths, so a stray ./.tracecraft.json can't shadow it (this was making the tests hit a real endpoint and fail). Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/test.yml | 20 +++ sdk/tests/test_handoff_v2.py | 184 ++++++++++++++++++++++++++ sdk/tests/test_session_cli.py | 6 +- sdk/tests/test_tier_0.py | 139 ++++++++++++------- sdk/tracecraft/cli/init_cmd.py | 56 +++++--- sdk/tracecraft/cli/memory.py | 13 +- sdk/tracecraft/cli/messages.py | 15 ++- sdk/tracecraft/cli/session.py | 29 ++-- sdk/tracecraft/cli/steps.py | 25 ++-- sdk/tracecraft/harness/claude_code.py | 3 +- sdk/tracecraft/harness/codex.py | 4 +- sdk/tracecraft/harness/hermes.py | 6 +- sdk/tracecraft/hf.py | 5 +- sdk/tracecraft/store.py | 2 + 14 files changed, 399 insertions(+), 108 deletions(-) create mode 100644 sdk/tests/test_handoff_v2.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f6e88dc..8c4d219 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,6 +7,26 @@ on: branches: [main] jobs: + format: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + + - name: Install ruff + working-directory: sdk + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + + - name: Check formatting (ruff format) + run: ruff format --check sdk/ + pytest: runs-on: ubuntu-latest strategy: diff --git a/sdk/tests/test_handoff_v2.py b/sdk/tests/test_handoff_v2.py new file mode 100644 index 0000000..fa20346 --- /dev/null +++ b/sdk/tests/test_handoff_v2.py @@ -0,0 +1,184 @@ +"""Tests for the v0.2.1 structured handoff record. + +Schema v2 adds: state enum (complete/blocked/needs_review), next_action, +git-derived changed_files. All optional + backward compatible. +""" + +from __future__ import annotations + +import json + +import boto3 +import pytest +from click.testing import CliRunner +from moto import mock_aws + +from tracecraft.cli import cli +import tracecraft.cli.steps as steps_mod + + +BUCKET = "tc-handoff-test" +PROJECT = "demo" + + +@pytest.fixture +def env(tmp_path, monkeypatch): + monkeypatch.setenv("AWS_ACCESS_KEY_ID", "testing") + monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing") + monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1") + # Run from an isolated empty dir. load_config() is CWD-first, so without + # this a stray ./.tracecraft.json in the repo would shadow our test config + # and point the CLI at a real endpoint. + work = tmp_path / "work" + work.mkdir() + monkeypatch.chdir(work) + cfg = { + "backend": "s3", + "endpoint": None, + "bucket": BUCKET, + "project": PROJECT, + "agent_id": "designer", + "access_key": "testing", + "secret_key": "testing", + } + # Write to the CWD-local path load_config() checks first... + (work / ".tracecraft.json").write_text(json.dumps(cfg)) + # ...and the global HOME fallback, so tests that chdir elsewhere (the git + # tests below) still resolve a config. + fake_home = tmp_path / "home" + (fake_home / ".tracecraft").mkdir(parents=True) + (fake_home / ".tracecraft" / "config.json").write_text(json.dumps(cfg)) + monkeypatch.setenv("HOME", str(fake_home)) + with mock_aws(): + boto3.client("s3").create_bucket(Bucket=BUCKET) + yield CliRunner() + + +def _handoff(sid="design"): + c = boto3.client("s3") + obj = c.get_object(Bucket=BUCKET, Key=f"{PROJECT}/steps/{sid}/handoff.json") + return json.loads(obj["Body"].read()) + + +def _status(sid="design"): + c = boto3.client("s3") + obj = c.get_object(Bucket=BUCKET, Key=f"{PROJECT}/steps/{sid}/status.json") + return json.loads(obj["Body"].read()) + + +# ---------- backward compatibility ---------- + + +def test_plain_complete_is_backward_compatible(env): + r = env.invoke(cli, ["complete", "design"]) + assert r.exit_code == 0, r.output + assert r.output.startswith("Completed step design") + h = _handoff() + # v1 keys still present + assert h["from_agent"] == "designer" + assert h["from_step"] == "design" + assert h["note"] == "" + assert "created_at" in h + # v2 defaults + assert h["schema"] == 2 + assert h["state"] == "complete" + assert h["next_agent"] is None + assert h["next_action"] is None + assert "changed_files" not in h # only present with the git flag + # status reflects complete + assert _status()["status"] == "complete" + assert "completed_at" in _status() + + +# ---------- state enum ---------- + + +def test_blocked_sets_state_and_status(env): + r = env.invoke(cli, ["complete", "design", "--blocked", "--note", "stuck on auth"]) + assert r.exit_code == 0, r.output + assert "Blocked step design" in r.output + assert _handoff()["state"] == "blocked" + assert _status()["status"] == "blocked" + assert "completed_at" not in _status() # not complete → no completed_at + + +def test_needs_review_sets_state(env): + r = env.invoke(cli, ["complete", "design", "--needs-review"]) + assert r.exit_code == 0, r.output + assert "Needs review on step design" in r.output + assert _handoff()["state"] == "needs_review" + assert _status()["status"] == "needs_review" + + +def test_blocked_and_needs_review_mutually_exclusive(env): + r = env.invoke(cli, ["complete", "design", "--blocked", "--needs-review"]) + assert r.exit_code != 0 + assert "at most one" in r.output + + +# ---------- next_action + next_agent ---------- + + +def test_next_action_and_to(env): + r = env.invoke( + cli, + ["complete", "design", "--to", "developer", "--next-action", "wire api.py into search"], + ) + assert r.exit_code == 0, r.output + assert "handed off to developer" in r.output + h = _handoff() + assert h["next_agent"] == "developer" + assert h["next_action"] == "wire api.py into search" + + +# ---------- changed_files from git ---------- + + +def test_changed_files_git_in_repo(env, tmp_path, monkeypatch): + # Make cwd a git repo with one modified tracked file + import subprocess + + repo = tmp_path / "repo" + repo.mkdir() + monkeypatch.chdir(repo) + subprocess.run(["git", "init", "-q"], cwd=repo, check=True) + subprocess.run(["git", "config", "user.email", "t@t.t"], cwd=repo, check=True) + subprocess.run(["git", "config", "user.name", "t"], cwd=repo, check=True) + (repo / "a.py").write_text("x = 1\n") + subprocess.run(["git", "add", "a.py"], cwd=repo, check=True) + subprocess.run(["git", "commit", "-qm", "init"], cwd=repo, check=True) + (repo / "a.py").write_text("x = 2\n") # now modified vs HEAD + + r = env.invoke(cli, ["complete", "design", "--changed-files-from-git"]) + assert r.exit_code == 0, r.output + h = _handoff() + assert h["changed_files"] == ["a.py"] + assert "1 changed file(s)" in r.output + + +def test_changed_files_git_outside_repo_is_empty(env, tmp_path, monkeypatch): + # cwd is NOT a git repo → flag is a no-op (empty list), never crashes + nonrepo = tmp_path / "plain" + nonrepo.mkdir() + monkeypatch.chdir(nonrepo) + r = env.invoke(cli, ["complete", "design", "--changed-files-from-git"]) + assert r.exit_code == 0, r.output + assert _handoff()["changed_files"] == [] + + +def test_no_assumptions_field(env): + # We deliberately do NOT add a mandatory unresolved_assumptions field. + env.invoke(cli, ["complete", "design", "--note", "assumed v2 API"]) + h = _handoff() + assert "unresolved_assumptions" not in h + assert "assumptions" not in h + # open questions live in the free-text note + assert h["note"] == "assumed v2 API" + + +# ---------- helper direct test ---------- + + +def test_git_changed_files_helper_never_raises(monkeypatch, tmp_path): + monkeypatch.chdir(tmp_path) # not a repo + assert steps_mod._git_changed_files() == [] diff --git a/sdk/tests/test_session_cli.py b/sdk/tests/test_session_cli.py index cd22230..f2b2f7b 100644 --- a/sdk/tests/test_session_cli.py +++ b/sdk/tests/test_session_cli.py @@ -114,9 +114,7 @@ def _bucket_keys(): """Return all keys under PROJECT/ stripped of the project prefix.""" client = boto3.client("s3") resp = client.list_objects_v2(Bucket=BUCKET, Prefix=f"{PROJECT}/") - return [ - obj["Key"][len(PROJECT) + 1 :] for obj in resp.get("Contents", []) - ] + return [obj["Key"][len(PROJECT) + 1 :] for obj in resp.get("Contents", [])] def _get_meta(session_id): @@ -256,7 +254,7 @@ def test_session_list_shows_uploaded_session(cli_env): def test_session_show_tails_concatenated_parts(cli_env): runner, cwd, sess, sid = cli_env - sess.write_bytes(b'line1\n') + sess.write_bytes(b"line1\n") runner.invoke(cli, ["session", "mirror", "--harness", "claude-code", "--cwd", str(cwd)]) with open(sess, "ab") as f: f.write(b"line2\nline3\n") diff --git a/sdk/tests/test_tier_0.py b/sdk/tests/test_tier_0.py index ab321a5..790a62d 100644 --- a/sdk/tests/test_tier_0.py +++ b/sdk/tests/test_tier_0.py @@ -57,6 +57,7 @@ def store(s3_env): # ---------- Fix 1: atomic claim ---------- + def test_fix1_atomic_put_first_writer_wins(store): """First put_json(if_none_match=True) succeeds; second raises PreconditionFailed.""" store.put_json("steps/foo/claim.json", {"agent": "a"}, if_none_match=True) @@ -80,15 +81,19 @@ def test_fix1_claim_cli_blocks_second_caller(s3_env, monkeypatch, tmp_path): cfg_file = tmp_path / ".tracecraft.json" def write_cfg(agent_id): - cfg_file.write_text(json.dumps({ - "backend": "s3", - "bucket": BUCKET, - "project": PROJECT, - "endpoint": None, - "access_key": "testing", - "secret_key": "testing", - "agent_id": agent_id, - })) + cfg_file.write_text( + json.dumps( + { + "backend": "s3", + "bucket": BUCKET, + "project": PROJECT, + "endpoint": None, + "access_key": "testing", + "secret_key": "testing", + "agent_id": agent_id, + } + ) + ) monkeypatch.chdir(tmp_path) write_cfg("agent-a") @@ -106,6 +111,7 @@ def write_cfg(agent_id): # ---------- Fix 2: paginated list_keys ---------- + def test_fix2_list_keys_returns_more_than_1000(store): """Write 1250 keys; ensure list_keys returns them all (not capped at 1000).""" for i in range(1250): @@ -118,6 +124,7 @@ def test_fix2_list_keys_returns_more_than_1000(store): # ---------- Fix 3: no default admin/secret credentials ---------- + def test_fix3_init_refuses_without_creds(monkeypatch, tmp_path): """`tracecraft init` without --access-key/--secret-key/env must error.""" monkeypatch.chdir(tmp_path) @@ -125,13 +132,21 @@ def test_fix3_init_refuses_without_creds(monkeypatch, tmp_path): monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False) runner = CliRunner() - r = runner.invoke(init_cmd, [ - "--backend", "s3", - "--endpoint", "http://localhost:9000", - "--bucket", "x", - "--project", "p", - "--agent", "a", - ]) + r = runner.invoke( + init_cmd, + [ + "--backend", + "s3", + "--endpoint", + "http://localhost:9000", + "--bucket", + "x", + "--project", + "p", + "--agent", + "a", + ], + ) assert r.exit_code != 0 assert "credentials required" in r.output.lower() # Critically, must NOT have written admin/secret to disk @@ -145,13 +160,21 @@ def test_fix3_init_reads_aws_env_vars(monkeypatch, tmp_path, s3_env): monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing") runner = CliRunner() - r = runner.invoke(init_cmd, [ - "--backend", "s3", - "--endpoint", MOTO_ENDPOINT, # moto default - "--bucket", BUCKET, - "--project", PROJECT, - "--agent", "a", - ]) + r = runner.invoke( + init_cmd, + [ + "--backend", + "s3", + "--endpoint", + MOTO_ENDPOINT, # moto default + "--bucket", + BUCKET, + "--project", + PROJECT, + "--agent", + "a", + ], + ) assert r.exit_code == 0, r.output saved = json.loads((tmp_path / ".tracecraft.json").read_text()) assert saved["access_key"] == "testing" @@ -163,6 +186,7 @@ def test_fix3_init_reads_aws_env_vars(monkeypatch, tmp_path, s3_env): # ---------- Fix 4: .gitignore handling ---------- + def test_fix4_gitignore_appended_in_git_repo(monkeypatch, tmp_path, s3_env): """When cwd is a git repo, init appends .tracecraft.json to .gitignore.""" (tmp_path / ".git").mkdir() @@ -171,13 +195,21 @@ def test_fix4_gitignore_appended_in_git_repo(monkeypatch, tmp_path, s3_env): monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing") runner = CliRunner() - r = runner.invoke(init_cmd, [ - "--backend", "s3", - "--endpoint", MOTO_ENDPOINT, - "--bucket", BUCKET, - "--project", PROJECT, - "--agent", "a", - ]) + r = runner.invoke( + init_cmd, + [ + "--backend", + "s3", + "--endpoint", + MOTO_ENDPOINT, + "--bucket", + BUCKET, + "--project", + PROJECT, + "--agent", + "a", + ], + ) assert r.exit_code == 0, r.output gi = (tmp_path / ".gitignore").read_text() assert ".tracecraft.json" in gi.splitlines() @@ -192,13 +224,21 @@ def test_fix4_gitignore_not_duplicated(monkeypatch, tmp_path, s3_env): monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing") runner = CliRunner() - r = runner.invoke(init_cmd, [ - "--backend", "s3", - "--endpoint", MOTO_ENDPOINT, - "--bucket", BUCKET, - "--project", PROJECT, - "--agent", "a", - ]) + r = runner.invoke( + init_cmd, + [ + "--backend", + "s3", + "--endpoint", + MOTO_ENDPOINT, + "--bucket", + BUCKET, + "--project", + PROJECT, + "--agent", + "a", + ], + ) assert r.exit_code == 0, r.output lines = (tmp_path / ".gitignore").read_text().splitlines() assert lines.count(".tracecraft.json") == 1 @@ -211,19 +251,28 @@ def test_fix4_no_gitignore_outside_repo(monkeypatch, tmp_path, s3_env): monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing") runner = CliRunner() - r = runner.invoke(init_cmd, [ - "--backend", "s3", - "--endpoint", MOTO_ENDPOINT, - "--bucket", BUCKET, - "--project", PROJECT, - "--agent", "a", - ]) + r = runner.invoke( + init_cmd, + [ + "--backend", + "s3", + "--endpoint", + MOTO_ENDPOINT, + "--bucket", + BUCKET, + "--project", + PROJECT, + "--agent", + "a", + ], + ) assert r.exit_code == 0, r.output assert not (tmp_path / ".gitignore").exists() # ---------- Fix 5: dead scaffolding removed ---------- + def test_fix5_no_empty_namespace_packages(): """integrations/ and transport/ packages must not be importable.""" with pytest.raises(ImportError): @@ -243,5 +292,5 @@ def test_fix5_pyproject_drops_dead_extras(): """crewai/langgraph/claude-sdk/all extras must not be declared.""" repo_root = pathlib.Path(__file__).resolve().parents[2] text = (repo_root / "sdk" / "pyproject.toml").read_text() - for forbidden in ('crewai = [', 'langgraph = [', 'claude-sdk = [', 'all = ['): + for forbidden in ("crewai = [", "langgraph = [", "claude-sdk = [", "all = ["): assert forbidden not in text, f"pyproject still declares: {forbidden}" diff --git a/sdk/tracecraft/cli/init_cmd.py b/sdk/tracecraft/cli/init_cmd.py index 1521e86..d1a9762 100644 --- a/sdk/tracecraft/cli/init_cmd.py +++ b/sdk/tracecraft/cli/init_cmd.py @@ -10,14 +10,35 @@ @click.command() -@click.option("--backend", type=click.Choice(["s3", "hf"]), default="s3", help="Storage backend: s3 or hf (HuggingFace Buckets)") +@click.option( + "--backend", + type=click.Choice(["s3", "hf"]), + default="s3", + help="Storage backend: s3 or hf (HuggingFace Buckets)", +) @click.option("--endpoint", default=None, help="S3 endpoint URL (s3 backend only)") -@click.option("--bucket", required=True, help="Bucket name (s3) or HF bucket handle e.g. username/my-bucket (hf)") +@click.option( + "--bucket", + required=True, + help="Bucket name (s3) or HF bucket handle e.g. username/my-bucket (hf)", +) @click.option("--project", required=True, help="Project namespace") @click.option("--agent", required=True, help="Agent ID for this session") -@click.option("--access-key", default=None, envvar="AWS_ACCESS_KEY_ID", help="S3 access key (env: AWS_ACCESS_KEY_ID)") -@click.option("--secret-key", default=None, envvar="AWS_SECRET_ACCESS_KEY", help="S3 secret key (env: AWS_SECRET_ACCESS_KEY)") -@click.option("--hf-token", default=None, envvar="HF_TOKEN", help="HuggingFace token (env: HF_TOKEN)") +@click.option( + "--access-key", + default=None, + envvar="AWS_ACCESS_KEY_ID", + help="S3 access key (env: AWS_ACCESS_KEY_ID)", +) +@click.option( + "--secret-key", + default=None, + envvar="AWS_SECRET_ACCESS_KEY", + help="S3 secret key (env: AWS_SECRET_ACCESS_KEY)", +) +@click.option( + "--hf-token", default=None, envvar="HF_TOKEN", help="HuggingFace token (env: HF_TOKEN)" +) def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key, hf_token): """Initialize tracecraft config, create bucket, and register agent.""" cfg = { @@ -49,23 +70,24 @@ def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key, store.ensure_bucket() now = datetime.now(timezone.utc).isoformat() - store.put_json(f"agents/{agent}.json", { - "id": agent, - "status": "active", - "step": None, - "started_at": now, - "heartbeat": now, - "summary": "Initialized", - }) + store.put_json( + f"agents/{agent}.json", + { + "id": agent, + "status": "active", + "step": None, + "started_at": now, + "heartbeat": now, + "summary": "Initialized", + }, + ) click.echo(f"Initialized project '{project}' as agent '{agent}'") if backend == "s3": click.echo(f"Backend: S3 Endpoint: {endpoint} Bucket: {bucket}") else: click.echo(f"Backend: HuggingFace Buckets Bucket: {bucket}") - click.echo( - "Note: .tracecraft.json contains credentials. Keep it out of version control." - ) + click.echo("Note: .tracecraft.json contains credentials. Keep it out of version control.") def _ensure_gitignore_entry(): @@ -98,9 +120,11 @@ def _get_store(cfg): backend = cfg.get("backend", "s3") if backend == "hf": from tracecraft.hf import HF + return HF(bucket=cfg["bucket"], project=cfg["project"], token=cfg.get("hf_token")) else: from tracecraft.s3 import S3 + return S3( endpoint=cfg["endpoint"], bucket=cfg["bucket"], diff --git a/sdk/tracecraft/cli/memory.py b/sdk/tracecraft/cli/memory.py index 17c899b..65e12b2 100644 --- a/sdk/tracecraft/cli/memory.py +++ b/sdk/tracecraft/cli/memory.py @@ -33,11 +33,14 @@ def memory_set(key, value): raise click.ClickException("Key cannot be empty") store, cfg = get_store() now = datetime.now(timezone.utc).isoformat() - store.put_json(_key_to_path(key), { - "value": value, - "set_by": cfg["agent_id"], - "set_at": now, - }) + store.put_json( + _key_to_path(key), + { + "value": value, + "set_by": cfg["agent_id"], + "set_at": now, + }, + ) click.echo(f"Set {key} = {value}") diff --git a/sdk/tracecraft/cli/messages.py b/sdk/tracecraft/cli/messages.py index 9088c70..efba3c4 100644 --- a/sdk/tracecraft/cli/messages.py +++ b/sdk/tracecraft/cli/messages.py @@ -21,12 +21,15 @@ def send(recipient, message): now = datetime.now(timezone.utc).isoformat() key = f"messages/{recipient}/{ts}_{sender}.json" - store.put_json(key, { - "from": sender, - "to": recipient, - "message": message, - "sent_at": now, - }) + store.put_json( + key, + { + "from": sender, + "to": recipient, + "message": message, + "sent_at": now, + }, + ) click.echo(f"Sent to {recipient}: {message}") diff --git a/sdk/tracecraft/cli/session.py b/sdk/tracecraft/cli/session.py index 3342cc9..0313cc5 100644 --- a/sdk/tracecraft/cli/session.py +++ b/sdk/tracecraft/cli/session.py @@ -114,7 +114,9 @@ def session(): default=None, help="Project directory the session ran in (claude-code only). Defaults to $PWD.", ) -@click.option("--no-redact", is_flag=True, help="Skip redaction. Use only on fully-trusted buckets.") +@click.option( + "--no-redact", is_flag=True, help="Skip redaction. Use only on fully-trusted buckets." +) @click.option( "--min-bytes", default=1, @@ -156,9 +158,7 @@ def mirror(harness_name, session_id, cwd_str, no_redact, min_bytes): # authoritative — read_new() returns the real consumed cursor below. cur_size = harness.size(sess) if cur_size - cursor < min_bytes: - click.echo( - f"nothing new: session={sess.session_id} cursor={cursor:,} size={cur_size:,}" - ) + click.echo(f"nothing new: session={sess.session_id} cursor={cursor:,} size={cur_size:,}") return # 2. Read everything new since `cursor`, race-free: read_new returns the @@ -286,10 +286,10 @@ def list_(harness_filter, limit, sort_by): sid = m.get("session_id", "?") short = sid[:8] + ("…" if len(sid) > 8 else "") click.echo( - f"{m.get('harness','?'):<14} {short:<16} " - f"{m.get('total_uploaded_bytes',0):>12,} " + f"{m.get('harness', '?'):<14} {short:<16} " + f"{m.get('total_uploaded_bytes', 0):>12,} " f"{len(m.get('parts', [])):>6} " - f"{m.get('last_uploaded_at','-')[:24]:<25}" + f"{m.get('last_uploaded_at', '-')[:24]:<25}" ) @@ -309,7 +309,9 @@ def show(session_id, tail): store, _ = get_store() # Find which harness this session lives under (search every harness folder). - all_meta_keys = [k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json")] + all_meta_keys = [ + k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json") + ] if not all_meta_keys: raise click.ClickException(f"session not found: {session_id}") meta_key = all_meta_keys[0] @@ -321,9 +323,7 @@ def show(session_id, tail): # Fetch all parts (in seq order), concatenate, print last N lines. prefix = meta_key[: -len("meta.json")] - part_keys = sorted( - k for k in store.list_keys(prefix) if PART_RE.search(k.rsplit("/", 1)[-1]) - ) + part_keys = sorted(k for k in store.list_keys(prefix) if PART_RE.search(k.rsplit("/", 1)[-1])) body = bytearray() for k in part_keys: with tempfile.NamedTemporaryFile(delete=False) as tf: @@ -365,9 +365,7 @@ def stop(session_id): # Best-effort: mark ended_at in meta if a meta exists. store, _ = get_store() - meta_keys = [ - k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json") - ] + meta_keys = [k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json")] marked = False if meta_keys: meta = store.get_json(meta_keys[0]) or {} @@ -377,6 +375,5 @@ def stop(session_id): marked = True click.echo( - f"stopped session={session_id} " - f"state_cleared={had_state} meta_marked_ended={marked}" + f"stopped session={session_id} state_cleared={had_state} meta_marked_ended={marked}" ) diff --git a/sdk/tracecraft/cli/steps.py b/sdk/tracecraft/cli/steps.py index 2ef1b34..c0c34dd 100644 --- a/sdk/tracecraft/cli/steps.py +++ b/sdk/tracecraft/cli/steps.py @@ -21,7 +21,9 @@ def _git_changed_files() -> list[str]: try: out = subprocess.run( ["git", "diff", "--name-only", "HEAD"], - capture_output=True, text=True, timeout=10, + capture_output=True, + text=True, + timeout=10, ) if out.returncode != 0: return [] @@ -51,11 +53,14 @@ def claim(step_id): owner = existing.get("agent", "unknown") raise click.ClickException(f"Step {step_id} already claimed by {owner}") - store.put_json(f"steps/{sid}/status.json", { - "status": "in_progress", - "agent": agent, - "started_at": now, - }) + store.put_json( + f"steps/{sid}/status.json", + { + "status": "in_progress", + "agent": agent, + "started_at": now, + }, + ) click.echo(f"Claimed step {step_id} as {agent}") @@ -65,7 +70,9 @@ def claim(step_id): @click.option("--to", "next_agent", default=None, help="Agent this step hands off to") @click.option("--next-action", default=None, help="One line: what the next agent should do first") @click.option("--blocked", is_flag=True, help="Mark the step blocked rather than complete") -@click.option("--needs-review", is_flag=True, help="Mark the step as needing review rather than complete") +@click.option( + "--needs-review", is_flag=True, help="Mark the step as needing review rather than complete" +) @click.option( "--changed-files-from-git", is_flag=True, @@ -117,7 +124,9 @@ def complete(step_id, note, next_agent, next_action, blocked, needs_review, chan handoff["changed_files"] = _git_changed_files() store.put_json(f"steps/{sid}/handoff.json", handoff) - label = {"complete": "Completed", "blocked": "Blocked", "needs_review": "Needs review on"}[state] + label = {"complete": "Completed", "blocked": "Blocked", "needs_review": "Needs review on"}[ + state + ] msg = f"{label} step {step_id}" if next_agent: msg += f" → handed off to {next_agent}" diff --git a/sdk/tracecraft/harness/claude_code.py b/sdk/tracecraft/harness/claude_code.py index bd3bbd1..bcc936b 100644 --- a/sdk/tracecraft/harness/claude_code.py +++ b/sdk/tracecraft/harness/claude_code.py @@ -40,6 +40,5 @@ def discover(self, cwd: Path) -> list[Session]: if not pdir.is_dir(): return [] return [ - Session(path=jsonl, session_id=jsonl.stem, cwd=cwd) - for jsonl in pdir.glob("*.jsonl") + Session(path=jsonl, session_id=jsonl.stem, cwd=cwd) for jsonl in pdir.glob("*.jsonl") ] diff --git a/sdk/tracecraft/harness/codex.py b/sdk/tracecraft/harness/codex.py index a5bd85e..5650dfb 100644 --- a/sdk/tracecraft/harness/codex.py +++ b/sdk/tracecraft/harness/codex.py @@ -16,7 +16,9 @@ from .base import FileTailHarness, Session -_ROLLOUT_RE = re.compile(r"rollout-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-(?P[A-Za-z0-9_-]+)\.jsonl$") +_ROLLOUT_RE = re.compile( + r"rollout-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-(?P[A-Za-z0-9_-]+)\.jsonl$" +) class CodexHarness(FileTailHarness): diff --git a/sdk/tracecraft/harness/hermes.py b/sdk/tracecraft/harness/hermes.py index 671d0d0..668fc5f 100644 --- a/sdk/tracecraft/harness/hermes.py +++ b/sdk/tracecraft/harness/hermes.py @@ -59,7 +59,7 @@ def _decode_content(value): """Hermes stores multimodal content as '\\x00json:'; scalars as-is.""" if isinstance(value, str) and value.startswith(_CONTENT_JSON_PREFIX): try: - return json.loads(value[len(_CONTENT_JSON_PREFIX):]) + return json.loads(value[len(_CONTENT_JSON_PREFIX) :]) except json.JSONDecodeError: return value return value @@ -82,9 +82,7 @@ def discover(self, cwd: Path) -> list[Session]: return [] conn = _connect_ro(self.db_path) try: - rows = conn.execute( - "SELECT id FROM sessions ORDER BY started_at DESC" - ).fetchall() + rows = conn.execute("SELECT id FROM sessions ORDER BY started_at DESC").fetchall() except sqlite3.Error: return [] finally: diff --git a/sdk/tracecraft/hf.py b/sdk/tracecraft/hf.py index aa7a8dd..ca2756b 100644 --- a/sdk/tracecraft/hf.py +++ b/sdk/tracecraft/hf.py @@ -10,6 +10,7 @@ class HF: def __init__(self, bucket, project, token=None): from huggingface_hub import HfFileSystem + self.fs = HfFileSystem(token=token) self.bucket = bucket # e.g. "username/my-bucket" self.project = project @@ -35,11 +36,13 @@ def put_json(self, key, data, if_none_match=False): # This is racy, but documented; S3-compatible backends use IfNoneMatch=* for safety. if self.fs.exists(path): from tracecraft.s3 import PreconditionFailed + raise PreconditionFailed(key) with self.fs.open(path, "w") as f: json.dump(data, f, indent=2) except Exception as e: from tracecraft.s3 import PreconditionFailed + if isinstance(e, PreconditionFailed): raise raise click.ClickException(f"HF put failed: {e}") @@ -65,7 +68,7 @@ def list_keys(self, prefix=""): keys = [] for entry in entries: if entry.startswith(base_prefix): - keys.append(entry[len(base_prefix):]) + keys.append(entry[len(base_prefix) :]) else: keys.append(entry) return keys diff --git a/sdk/tracecraft/store.py b/sdk/tracecraft/store.py index 4226394..85ce840 100644 --- a/sdk/tracecraft/store.py +++ b/sdk/tracecraft/store.py @@ -10,9 +10,11 @@ def get_store(): if backend == "hf": from tracecraft.hf import HF + return HF(bucket=cfg["bucket"], project=cfg["project"], token=cfg.get("hf_token")), cfg else: from tracecraft.s3 import S3 + return S3( endpoint=cfg["endpoint"], bucket=cfg["bucket"], From 315cc555d25060cfdf68fda6f66e7955120d7640 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:14:10 +0300 Subject: [PATCH 03/22] hf: implement ensure_bucket() via HfApi.create_bucket, private by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ensure_bucket was a no-op, so 'init --backend hf' against a brand-new bucket failed cryptically on the first write. Now creates the bucket with HfApi.create_bucket(exist_ok=True), private unless the caller opts out — HF buckets default to public upstream, which is the wrong default for internal coordination data and mirrored transcripts. Fixes #7. Refs #8 (creation half). Co-Authored-By: Claude Fable 5 --- sdk/tracecraft/hf.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/sdk/tracecraft/hf.py b/sdk/tracecraft/hf.py index ca2756b..318edbd 100644 --- a/sdk/tracecraft/hf.py +++ b/sdk/tracecraft/hf.py @@ -8,12 +8,14 @@ class HF: - def __init__(self, bucket, project, token=None): + def __init__(self, bucket, project, token=None, private=True): from huggingface_hub import HfFileSystem self.fs = HfFileSystem(token=token) self.bucket = bucket # e.g. "username/my-bucket" self.project = project + self.token = token + self.private = private # safe default: private (these hold internal traces) self.base = f"hf://buckets/{bucket}" @classmethod @@ -104,6 +106,23 @@ def get_file(self, key, local_path): raise click.ClickException(f"HF download failed: {e}") def ensure_bucket(self): - # HF buckets are created via CLI or web — verify by checking exists or listing - # Empty buckets fail on ls(), so we just pass and let first write validate access - pass + """Create the HF bucket if it doesn't exist (private by default). + + Previously a no-op, which made `init` against a brand-new bucket fail with a + cryptic error on the first write (issue #7). HF buckets default to *public* + on creation, which is a privacy footgun for a tool that stores internal + memory/transcripts (issue #8) — so we create them private unless the caller + opts out via `private=False`. + """ + try: + from huggingface_hub import HfApi + + HfApi(token=self.token).create_bucket(self.bucket, private=self.private, exist_ok=True) + except Exception as e: + # Fall back to the old behavior: let the first write validate access, + # but surface a useful hint instead of a cryptic one. + raise click.ClickException( + f"Could not ensure HF bucket '{self.bucket}' exists: {e}\n" + f"Create it first at https://huggingface.co/new-bucket (set it Private), " + f"or check your --hf-token has write access." + ) From 9f0659eb59928f21e09d0cf2229eb67b43f6d2d9 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:14:31 +0300 Subject: [PATCH 04/22] init: add --private/--public flag (default: private) Passed through to the backend's ensure_bucket via the store factory. Plain S3 ignores it (bucket ACLs are out of scope there); the HF backend uses it to decide bucket visibility at creation time. Co-Authored-By: Claude Fable 5 --- sdk/tracecraft/cli/init_cmd.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/sdk/tracecraft/cli/init_cmd.py b/sdk/tracecraft/cli/init_cmd.py index d1a9762..52b6195 100644 --- a/sdk/tracecraft/cli/init_cmd.py +++ b/sdk/tracecraft/cli/init_cmd.py @@ -1,6 +1,5 @@ """tracecraft init — configure and register agent.""" -import os from datetime import datetime, timezone from pathlib import Path @@ -39,7 +38,14 @@ @click.option( "--hf-token", default=None, envvar="HF_TOKEN", help="HuggingFace token (env: HF_TOKEN)" ) -def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key, hf_token): +@click.option( + "--private/--public", + "private", + default=True, + help="Create the bucket private (default) or public. HF only. " + "Internal memory/transcripts should stay private.", +) +def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key, hf_token, private): """Initialize tracecraft config, create bucket, and register agent.""" cfg = { "backend": backend, @@ -66,7 +72,7 @@ def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key, save_config(cfg) _ensure_gitignore_entry() - store = _get_store(cfg) + store = _get_store(cfg, private=private) store.ensure_bucket() now = datetime.now(timezone.utc).isoformat() @@ -115,13 +121,18 @@ def _ensure_gitignore_entry(): ) -def _get_store(cfg): +def _get_store(cfg, private=True): """Create the right storage backend from config.""" backend = cfg.get("backend", "s3") if backend == "hf": from tracecraft.hf import HF - return HF(bucket=cfg["bucket"], project=cfg["project"], token=cfg.get("hf_token")) + return HF( + bucket=cfg["bucket"], + project=cfg["project"], + token=cfg.get("hf_token"), + private=private, + ) else: from tracecraft.s3 import S3 From 9f581e932874ef363fd69cc4830077552472a986 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:15:04 +0300 Subject: [PATCH 05/22] init: report real HF bucket visibility from bucket_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The init line now reads 'Backend: HuggingFace Buckets Bucket: user/x (private)' with the state read back from the Hub, not assumed from the flag — create_bucket(exist_ok=True) keeps a pre-existing bucket's visibility, so flag and reality can disagree. Also surface the best-effort-claims caveat for the HF backend at init time. Co-Authored-By: Claude Fable 5 --- sdk/tracecraft/cli/init_cmd.py | 20 +++++++++++++++++++- sdk/tracecraft/hf.py | 15 +++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/sdk/tracecraft/cli/init_cmd.py b/sdk/tracecraft/cli/init_cmd.py index 52b6195..2a57b10 100644 --- a/sdk/tracecraft/cli/init_cmd.py +++ b/sdk/tracecraft/cli/init_cmd.py @@ -92,7 +92,25 @@ def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key, if backend == "s3": click.echo(f"Backend: S3 Endpoint: {endpoint} Bucket: {bucket}") else: - click.echo(f"Backend: HuggingFace Buckets Bucket: {bucket}") + # Report the bucket's *actual* visibility, read back from the Hub — + # create_bucket(exist_ok=True) keeps a pre-existing bucket's setting, + # so the --private/--public flag and reality can disagree. + actual_private = store.bucket_privacy() + if actual_private is None: + visibility = "visibility unknown" + else: + visibility = "private" if actual_private else "PUBLIC" + click.echo(f"Backend: HuggingFace Buckets Bucket: {bucket} ({visibility})") + # Be honest about the core-promise gap on this backend (see hf.py put_json): + # HF has no conditional-write, so atomic claims are best-effort there. + click.echo( + "Note: HuggingFace buckets have no conditional-write primitive, so " + "`tracecraft claim` is best-effort (racy) here — two agents can both think " + "they won. For safe atomic claims, use an S3-compatible backend (AWS, R2, " + "MinIO, B2, Wasabi). Memory, messaging, handoffs, and session mirroring are " + "unaffected.", + err=True, + ) click.echo("Note: .tracecraft.json contains credentials. Keep it out of version control.") diff --git a/sdk/tracecraft/hf.py b/sdk/tracecraft/hf.py index 318edbd..626a2fa 100644 --- a/sdk/tracecraft/hf.py +++ b/sdk/tracecraft/hf.py @@ -126,3 +126,18 @@ def ensure_bucket(self): f"Create it first at https://huggingface.co/new-bucket (set it Private), " f"or check your --hf-token has write access." ) + + def bucket_privacy(self): + """Return the bucket's *actual* visibility: True=private, False=public, + None if it can't be determined (network error, no permission). + + Read back from bucket_info() rather than assumed from the flag we passed — + create_bucket(exist_ok=True) silently keeps a pre-existing bucket's + visibility, so the flag and reality can disagree. + """ + try: + from huggingface_hub import HfApi + + return bool(HfApi(token=self.token).bucket_info(self.bucket).private) + except Exception: + return None From c6ec0df8cd8fc645fdd2b7fdfc8279ade73ebd49 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:15:24 +0300 Subject: [PATCH 06/22] init: warn prominently when an existing HF bucket is public MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the bucket pre-exists as PUBLIC and the user did not pass --public, init now spells out that coordination data and mirrored transcripts will be publicly visible, and that huggingface_hub has no update_bucket — delete + recreate as private is the only remedy. Fixes #8. Co-Authored-By: Claude Fable 5 --- sdk/tracecraft/cli/init_cmd.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sdk/tracecraft/cli/init_cmd.py b/sdk/tracecraft/cli/init_cmd.py index 2a57b10..7514a36 100644 --- a/sdk/tracecraft/cli/init_cmd.py +++ b/sdk/tracecraft/cli/init_cmd.py @@ -101,6 +101,20 @@ def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key, else: visibility = "private" if actual_private else "PUBLIC" click.echo(f"Backend: HuggingFace Buckets Bucket: {bucket} ({visibility})") + if actual_private is False and private: + # The bucket pre-existed as public; we asked for private but + # create_bucket(exist_ok=True) never changes an existing bucket. + click.echo( + "\n" + " WARNING: bucket already exists and is PUBLIC.\n" + f" Everything tracecraft writes to '{bucket}' — shared memory, messages,\n" + " handoffs, and mirrored session transcripts — will be publicly visible\n" + " on the Hub. huggingface_hub has no update_bucket, so visibility cannot\n" + " be flipped in place: the only remedy is to delete the bucket and\n" + " re-run init so tracecraft recreates it private.\n" + " If public was intentional, pass --public to silence this warning.\n", + err=True, + ) # Be honest about the core-promise gap on this backend (see hf.py put_json): # HF has no conditional-write, so atomic claims are best-effort there. click.echo( From 976224f8a198b06849996234b1ac51417782581e Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:15:57 +0300 Subject: [PATCH 07/22] hf: actionable error when a write hits a missing bucket A put against a nonexistent bucket used to surface HfFileSystem's raw 'repository and revision' resolution error. Now the error names the bucket, points at 'tracecraft init', and suggests checking the 'username/bucket-name' handle. Applies to put_json and put_file. Co-Authored-By: Claude Fable 5 --- sdk/tracecraft/hf.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/sdk/tracecraft/hf.py b/sdk/tracecraft/hf.py index 626a2fa..079b88f 100644 --- a/sdk/tracecraft/hf.py +++ b/sdk/tracecraft/hf.py @@ -30,6 +30,24 @@ def from_config(cls): def _path(self, key): return f"{self.base}/{self.project}/{key}" + def _raise_write_error(self, e): + """Translate raw HfFileSystem write errors into actionable ones. + + A put against a bucket that doesn't exist surfaces as a cryptic + 'repository and revision' / 404 resolution error from HfFileSystem — + name the bucket and say what to do instead. + """ + msg = str(e) + if isinstance(e, FileNotFoundError) or ( + "Repository Not Found" in msg or "repository and revision" in msg or "404" in msg + ): + raise click.ClickException( + f"HF write failed: bucket '{self.bucket}' was not found.\n" + f"Run `tracecraft init --backend hf --bucket {self.bucket} ...` to create it, " + f"and check the bucket handle is 'username/bucket-name'." + ) + raise click.ClickException(f"HF write failed: {e}") + def put_json(self, key, data, if_none_match=False): try: path = self._path(key) @@ -47,7 +65,7 @@ def put_json(self, key, data, if_none_match=False): if isinstance(e, PreconditionFailed): raise - raise click.ClickException(f"HF put failed: {e}") + self._raise_write_error(e) def get_json(self, key): try: @@ -97,7 +115,7 @@ def put_file(self, key, local_path): try: self.fs.put(local_path, self._path(key)) except Exception as e: - raise click.ClickException(f"HF upload failed: {e}") + self._raise_write_error(e) def get_file(self, key, local_path): try: From a11559e9b2c54e90ac664e56085f2421fae46b29 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:16:18 +0300 Subject: [PATCH 08/22] hf: don't swallow auth errors as False in exists() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit exists() caught every exception and returned False, so a bad or under-scoped token looked identical to an empty bucket — and let the best-effort claim path sail past its check-then-write guard. 401/403 now raise with a pointer at the token; genuine not-found stays False. Co-Authored-By: Claude Fable 5 --- sdk/tracecraft/hf.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sdk/tracecraft/hf.py b/sdk/tracecraft/hf.py index 079b88f..7c67969 100644 --- a/sdk/tracecraft/hf.py +++ b/sdk/tracecraft/hf.py @@ -100,7 +100,19 @@ def list_keys(self, prefix=""): def exists(self, key): try: return self.fs.exists(self._path(key)) - except Exception: + except FileNotFoundError: + return False + except Exception as e: + # "Not found" is a legitimate False; "unauthorized" is not — swallowing + # it makes a bad token look like an empty bucket (and lets a best-effort + # claim race past its check-then-write guard). + status = getattr(getattr(e, "response", None), "status_code", None) + if status in (401, 403) or "unauthorized" in str(e).lower(): + raise click.ClickException( + f"HF auth error while checking '{key}': {e}\n" + f"Check that your token (--hf-token / HF_TOKEN) has read access " + f"to '{self.bucket}'." + ) return False def delete(self, key): From 55095d2e81ca0def5d2e0936987d33b63559603e Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:16:40 +0300 Subject: [PATCH 09/22] messages: unique keys per send; inbox sorted by sent_at Message keys were messages//_.json, so two messages from one sender inside the same second collided and the later silently overwrote the earlier. Keys now carry nanosecond resolution plus a uuid4 suffix, so every send is a distinct object. inbox now merges direct + broadcast messages and prints them in sent_at order instead of raw list order, which interleaved the two prefixes arbitrarily. Co-Authored-By: Claude Fable 5 --- sdk/tracecraft/cli/messages.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/sdk/tracecraft/cli/messages.py b/sdk/tracecraft/cli/messages.py index efba3c4..fef94fd 100644 --- a/sdk/tracecraft/cli/messages.py +++ b/sdk/tracecraft/cli/messages.py @@ -1,6 +1,7 @@ """tracecraft send/inbox — agent-to-agent messaging via S3.""" import time +import uuid from datetime import datetime, timezone import click @@ -17,10 +18,17 @@ def send(recipient, message): raise click.ClickException("Recipient cannot be empty") store, cfg = get_store() sender = cfg["agent_id"] - ts = int(time.time()) now = datetime.now(timezone.utc).isoformat() - key = f"messages/{recipient}/{ts}_{sender}.json" + # Message keys MUST be unique per send. A whole-second timestamp collides when + # one sender fires two messages to the same recipient in the same second — the + # second silently overwrites the first (measured: a 5-message burst kept only 1). + # Use nanosecond resolution for rough chronological ordering PLUS a uuid suffix + # that guarantees uniqueness even at sub-nanosecond send rates or clock ties. + # (Same approach the session mirror uses for its part keys.) + ts_ns = time.time_ns() + uniq = uuid.uuid4().hex[:8] + key = f"messages/{recipient}/{ts_ns}_{sender}_{uniq}.json" store.put_json( key, { @@ -48,25 +56,30 @@ def inbox(delete): click.echo("No messages.") return - count = 0 + # Merge direct + broadcast and sort by sent_at — raw list order interleaves + # the two prefixes, so a broadcast could print before the direct message + # that preceded it. + messages = [] for key in all_keys: data = store.get_json(key) if data is None: continue - sender = data.get("from", "?") # Skip own broadcasts - if "_broadcast/" in key and sender == my_id: + if "_broadcast/" in key and data.get("from", "?") == my_id: continue + messages.append((key, data)) + messages.sort(key=lambda kd: kd[1].get("sent_at", "")) + + for key, data in messages: + sender = data.get("from", "?") msg = data.get("message", "") sent_at = data.get("sent_at", "?") target = "broadcast" if "_broadcast/" in key else "direct" click.echo(f"[{sent_at}] ({target}) {sender}: {msg}") - count += 1 - if delete: store.delete(key) - if count == 0: + if not messages: click.echo("No messages.") elif delete: - click.echo(f"Deleted {count} message(s).") + click.echo(f"Deleted {len(messages)} message(s).") From 2c572b078620cdd707e9e2847380c52537bcb0f8 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:17:17 +0300 Subject: [PATCH 10/22] steps: complete enforces claim ownership; --force to override Any agent could previously complete any step, including one another agent was actively working on. complete now reads claim.json first and fails with a clear error when the claim belongs to a different agent, unless --force is passed (escape hatch for crashed claim-holders). Co-Authored-By: Claude Fable 5 --- sdk/tracecraft/cli/steps.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/sdk/tracecraft/cli/steps.py b/sdk/tracecraft/cli/steps.py index c0c34dd..75a57f7 100644 --- a/sdk/tracecraft/cli/steps.py +++ b/sdk/tracecraft/cli/steps.py @@ -78,7 +78,14 @@ def claim(step_id): is_flag=True, help="Record files changed (from `git diff`), so the next agent knows what moved. No-op outside a git repo.", ) -def complete(step_id, note, next_agent, next_action, blocked, needs_review, changed_files_from_git): +@click.option( + "--force", + is_flag=True, + help="Complete a step claimed by a different agent (e.g. the claim-holder crashed).", +) +def complete( + step_id, note, next_agent, next_action, blocked, needs_review, changed_files_from_git, force +): """Mark a step complete (or blocked / needs-review) and write a handoff record. The handoff record is what the next agent sees instead of a shared @@ -95,6 +102,15 @@ def complete(step_id, note, next_agent, next_action, blocked, needs_review, chan sid = step_id.lower().replace(".", "-") now = datetime.now(timezone.utc).isoformat() + # A step belongs to whoever claimed it — without this check any agent + # could mark any step complete and silently steal/clobber someone's work. + claim_doc = store.get_json(f"steps/{sid}/claim.json") + if claim_doc and claim_doc.get("agent") not in (None, agent) and not force: + raise click.ClickException( + f"Step {step_id} is claimed by '{claim_doc['agent']}', not '{agent}'. " + f"Pass --force to complete it anyway (e.g. if the claim-holder crashed)." + ) + state = "blocked" if blocked else "needs_review" if needs_review else "complete" # Status reflects the real outcome (not always "complete"). From de0cb612d323a18fc1e3a03b174cdca5af7aab65 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:18:10 +0300 Subject: [PATCH 11/22] steps: tolerate the claim/status crash window in readers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit claim.json (atomic) and status.json are two separate writes; a crash between them leaves a claim with no status. step-status and wait-for now resolve that state as in_progress by the claiming agent via a shared _effective_status helper — the claim is the authoritative write. Invariant documented in CLAUDE.md. Also warn at claim time on the HF backend that claims are best-effort (no conditional-write upstream), matching the note init prints. Co-Authored-By: Claude Fable 5 --- CLAUDE.md | 5 +++++ sdk/tracecraft/cli/steps.py | 42 +++++++++++++++++++++++++++++-------- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index ea7b340..370f35c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -56,6 +56,11 @@ pivot lives in `plans/server-archive/` for reference only — nothing in the SDK many isolated projects. - **No server, no daemon**: each CLI call is stateless; state lives on the bucket. - **No vendor lock-in**: AWS, R2, MinIO, B2, Wasabi, HuggingFace all work today. +- **Claim/status crash-window invariant**: `claim.json` (atomic) and `status.json` are + two separate writes; a crash between them leaves a claim with no status. Readers MUST + treat "claim.json exists, status.json missing" as `in_progress` by the claiming agent — + the claim is the authoritative write (`step-status` and `wait-for` implement this via + `_effective_status` in `cli/steps.py`). ## Known gaps (May 2026) diff --git a/sdk/tracecraft/cli/steps.py b/sdk/tracecraft/cli/steps.py index 75a57f7..5ecdacc 100644 --- a/sdk/tracecraft/cli/steps.py +++ b/sdk/tracecraft/cli/steps.py @@ -62,6 +62,15 @@ def claim(step_id): }, ) click.echo(f"Claimed step {step_id} as {agent}") + if cfg.get("backend") == "hf": + # The claim is best-effort on HF (no conditional-write); don't let the + # success message imply the race was atomically arbitrated. + click.echo( + "warning: claims on the HuggingFace backend are best-effort (racy) — " + "another agent may also believe it won this step. Use an S3-compatible " + "backend for atomic claims.", + err=True, + ) @click.command() @@ -151,19 +160,34 @@ def complete( click.echo(msg) +def _effective_status(store, sid): + """Resolve a step's status, tolerating the claim/status crash window. + + claim.json (atomic) and status.json are two separate writes; a crash + between them leaves a claim with no status. Readers treat that state as + in_progress by the claiming agent — the claim is the authoritative write. + Returns (status, agent); status is 'pending' when neither file exists. + """ + data = store.get_json(f"steps/{sid}/status.json") + if data is not None: + return data.get("status", "unknown"), data.get("agent", "?") + claim_doc = store.get_json(f"steps/{sid}/claim.json") + if claim_doc is not None: + return "in_progress", claim_doc.get("agent", "?") + return "pending", None + + @click.command() @click.argument("step_id") def step_status(step_id): """Check the status of a step.""" store, _ = get_store() sid = step_id.lower().replace(".", "-") - data = store.get_json(f"steps/{sid}/status.json") - if data is None: - click.echo(f"{step_id}: pending") - return - status = data.get("status", "unknown") - agent = data.get("agent", "?") - click.echo(f"{step_id}: {status} (agent: {agent})") + status, agent = _effective_status(store, sid) + if agent is None: + click.echo(f"{step_id}: {status}") + else: + click.echo(f"{step_id}: {status} (agent: {agent})") @click.command() @@ -178,8 +202,8 @@ def wait_for(step_ids, timeout): all_done = True for step_id in step_ids: sid = step_id.lower().replace(".", "-") - data = store.get_json(f"steps/{sid}/status.json") - if data is None or data.get("status") != "complete": + status, _ = _effective_status(store, sid) + if status != "complete": all_done = False break From b6d9d6d19e29ed4a45917f248349a159bd142041 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:18:31 +0300 Subject: [PATCH 12/22] steps: wait-for fails fast on blocked steps A blocked step never completes on its own, so waiters used to spin until the full timeout. wait-for now exits non-zero immediately with a clear message naming the blocked step. needs_review still counts as waiting but is called out in the progress line so a human can step in. Co-Authored-By: Claude Fable 5 --- sdk/tracecraft/cli/steps.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/sdk/tracecraft/cli/steps.py b/sdk/tracecraft/cli/steps.py index 5ecdacc..0d42ac9 100644 --- a/sdk/tracecraft/cli/steps.py +++ b/sdk/tracecraft/cli/steps.py @@ -200,19 +200,31 @@ def wait_for(step_ids, timeout): while time.time() < deadline: all_done = True + needs_review = [] for step_id in step_ids: sid = step_id.lower().replace(".", "-") - status, _ = _effective_status(store, sid) + status, agent = _effective_status(store, sid) + if status == "blocked": + # A blocked step won't complete on its own — failing fast beats + # spinning until the full timeout. + raise click.ClickException( + f"Step {step_id} is blocked (agent: {agent}) — it will not " + f"complete without intervention. Resolve it and re-run wait-for." + ) + if status == "needs_review": + needs_review.append(step_id) if status != "complete": all_done = False - break if all_done: click.echo(f"All steps complete: {', '.join(step_ids)}") return remaining = int(deadline - time.time()) - click.echo(f"Waiting... ({remaining}s remaining)", err=True) + progress = f"Waiting... ({remaining}s remaining)" + if needs_review: + progress += f" — needs review: {', '.join(needs_review)}" + click.echo(progress, err=True) time.sleep(5) raise click.ClickException( From 453d8d24e038e5543e721747d34361a91637d0b6 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:18:57 +0300 Subject: [PATCH 13/22] deps: drop unused httpx and pydantic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zero imports of either anywhere in the package or tests — leftovers from the pre-pivot FastAPI scaffolding. Runtime deps are now just click + boto3. Also remove two unused imports in test_session_cli.py (pre-existing ruff check failures). Co-Authored-By: Claude Fable 5 --- sdk/pyproject.toml | 2 -- sdk/tests/test_session_cli.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml index c6446f4..db4310f 100644 --- a/sdk/pyproject.toml +++ b/sdk/pyproject.toml @@ -25,8 +25,6 @@ classifiers = [ ] dependencies = [ "click>=8.1.0", - "httpx>=0.25.0", - "pydantic>=2.5.0", "boto3>=1.28.0", ] diff --git a/sdk/tests/test_session_cli.py b/sdk/tests/test_session_cli.py index f2b2f7b..bc9ce73 100644 --- a/sdk/tests/test_session_cli.py +++ b/sdk/tests/test_session_cli.py @@ -20,8 +20,6 @@ from __future__ import annotations import json -import os -from pathlib import Path import boto3 import pytest From 7e82b5f0e1bb46e7cd70dfda29b6c006dcb54ca3 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:24:17 +0300 Subject: [PATCH 14/22] tests: coordination correctness + messaging + HF onboarding moto-backed (no network): - claim race: two claimers, exactly one wins, holder preserved - same-second message bursts keep every message (uuid-suffixed keys) - inbox merges direct + broadcast chronologically by sent_at - complete rejects a non-owner without --force, allows with it - wait-for fails fast on blocked, names needs_review while waiting - claim.json-without-status.json reads as in_progress (crash window) HF backend tests mock HfApi/HfFileSystem in-memory: private-by-default creation, --public opt-out, real visibility readback, the existing-public-bucket warning, actionable missing-bucket write errors, and exists() raising on 401/403 instead of returning False. Co-Authored-By: Claude Fable 5 --- sdk/tests/test_coordination.py | 143 ++++++++++++++++++ sdk/tests/test_hf_onboarding.py | 250 ++++++++++++++++++++++++++++++++ sdk/tests/test_messaging.py | 127 ++++++++++++++++ 3 files changed, 520 insertions(+) create mode 100644 sdk/tests/test_coordination.py create mode 100644 sdk/tests/test_hf_onboarding.py create mode 100644 sdk/tests/test_messaging.py diff --git a/sdk/tests/test_coordination.py b/sdk/tests/test_coordination.py new file mode 100644 index 0000000..ce6ff8b --- /dev/null +++ b/sdk/tests/test_coordination.py @@ -0,0 +1,143 @@ +"""Tests for coordination correctness: claim races, complete ownership, +the claim/status crash window, and wait-for's blocked fast-fail. + +All run against moto's in-process S3 — no network. +""" + +from __future__ import annotations + +import json +import time + +import boto3 +import pytest +from click.testing import CliRunner +from moto import mock_aws + +from tracecraft.cli import cli + +BUCKET = "tc-coord-test" +PROJECT = "demo" + + +@pytest.fixture +def env(tmp_path, monkeypatch): + monkeypatch.setenv("AWS_ACCESS_KEY_ID", "testing") + monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing") + monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1") + work = tmp_path / "work" + work.mkdir() + monkeypatch.chdir(work) + cfg = { + "backend": "s3", + "endpoint": None, + "bucket": BUCKET, + "project": PROJECT, + "agent_id": "agent-a", + "access_key": "testing", + "secret_key": "testing", + } + (work / ".tracecraft.json").write_text(json.dumps(cfg)) + with mock_aws(): + boto3.client("s3").create_bucket(Bucket=BUCKET) + yield CliRunner() + + +def _as(agent): + return {"TRACECRAFT_AGENT": agent} + + +def _get(key): + c = boto3.client("s3") + return json.loads(c.get_object(Bucket=BUCKET, Key=f"{PROJECT}/{key}")["Body"].read()) + + +# ---------- atomic claim: two claimers, exactly one wins ---------- + + +def test_claim_race_exactly_one_winner(env): + r1 = env.invoke(cli, ["claim", "build"], env=_as("agent-a")) + r2 = env.invoke(cli, ["claim", "build"], env=_as("agent-b")) + outcomes = [r.exit_code == 0 for r in (r1, r2)] + assert outcomes.count(True) == 1, f"exactly one claimer must win: {r1.output} / {r2.output}" + assert "already claimed by agent-a" in r2.output + assert _get("steps/build/claim.json")["agent"] == "agent-a" + + +# ---------- complete: ownership enforced, --force overrides ---------- + + +def test_complete_rejects_non_owner(env): + env.invoke(cli, ["claim", "build"], env=_as("agent-a")) + r = env.invoke(cli, ["complete", "build"], env=_as("agent-b")) + assert r.exit_code != 0 + assert "claimed by 'agent-a'" in r.output + assert "--force" in r.output + # the step's status must be untouched + assert _get("steps/build/status.json")["status"] == "in_progress" + + +def test_complete_owner_succeeds(env): + env.invoke(cli, ["claim", "build"], env=_as("agent-a")) + r = env.invoke(cli, ["complete", "build"], env=_as("agent-a")) + assert r.exit_code == 0, r.output + assert _get("steps/build/status.json")["status"] == "complete" + + +def test_complete_force_overrides_ownership(env): + env.invoke(cli, ["claim", "build"], env=_as("agent-a")) + r = env.invoke(cli, ["complete", "build", "--force"], env=_as("agent-b")) + assert r.exit_code == 0, r.output + doc = _get("steps/build/status.json") + assert doc["status"] == "complete" + assert doc["agent"] == "agent-b" + + +def test_complete_unclaimed_step_is_allowed(env): + """No claim.json at all — nothing to own, complete goes through.""" + r = env.invoke(cli, ["complete", "adhoc"], env=_as("agent-a")) + assert r.exit_code == 0, r.output + + +# ---------- crash window: claim.json exists, status.json missing ---------- + + +def test_step_status_treats_claim_without_status_as_in_progress(env): + env.invoke(cli, ["claim", "build"], env=_as("agent-a")) + # simulate a crash between the two writes: claim landed, status didn't + boto3.client("s3").delete_object(Bucket=BUCKET, Key=f"{PROJECT}/steps/build/status.json") + r = env.invoke(cli, ["step-status", "build"]) + assert r.exit_code == 0, r.output + assert "in_progress" in r.output + assert "agent-a" in r.output + + +def test_wait_for_treats_claim_without_status_as_waiting(env): + env.invoke(cli, ["claim", "build"], env=_as("agent-a")) + boto3.client("s3").delete_object(Bucket=BUCKET, Key=f"{PROJECT}/steps/build/status.json") + r = env.invoke(cli, ["wait-for", "build", "--timeout", "1"]) + # not complete, not blocked → waits, then times out (no crash, no false success) + assert r.exit_code != 0 + assert "Timeout" in r.output + + +# ---------- wait-for: blocked fails fast, needs_review keeps waiting ---------- + + +def test_wait_for_fast_fails_on_blocked(env): + env.invoke(cli, ["claim", "build"], env=_as("agent-a")) + env.invoke(cli, ["complete", "build", "--blocked"], env=_as("agent-a")) + start = time.monotonic() + r = env.invoke(cli, ["wait-for", "build", "--timeout", "300"]) + elapsed = time.monotonic() - start + assert r.exit_code != 0 + assert "blocked" in r.output + assert elapsed < 10, f"must fail fast, not spin toward the timeout (took {elapsed:.1f}s)" + + +def test_wait_for_mentions_needs_review_while_waiting(env): + env.invoke(cli, ["claim", "build"], env=_as("agent-a")) + env.invoke(cli, ["complete", "build", "--needs-review"], env=_as("agent-a")) + r = env.invoke(cli, ["wait-for", "build", "--timeout", "1"]) + assert r.exit_code != 0 # still waiting → times out + assert "needs review: build" in r.output diff --git a/sdk/tests/test_hf_onboarding.py b/sdk/tests/test_hf_onboarding.py new file mode 100644 index 0000000..48c9192 --- /dev/null +++ b/sdk/tests/test_hf_onboarding.py @@ -0,0 +1,250 @@ +"""Tests for the HF onboarding + correctness-honesty fixes. + +Covers three real, externally-reported issues: + - #7: `init --backend hf` against a non-existent bucket must auto-create it + (HF ensure_bucket() was a no-op; first write failed cryptically). + - #8: HF buckets are public-by-default; init must create them PRIVATE by default, + with an explicit --public opt-out. + - correctness honesty: claims on HF are best-effort (no conditional-write), so both + `init --backend hf` and `claim` must SAY SO rather than imply atomicity. + +These mock the HuggingFace SDK (no network) — they verify the wiring (private flag +reaches create_bucket; the warnings are emitted), not HF's servers. +""" + +from __future__ import annotations + +import json +import sys +import types + +import click +import pytest +from click.testing import CliRunner + +from tracecraft.cli.init_cmd import init_cmd +from tracecraft.cli.steps import claim + + +class FakeBucketState: + """Records create_bucket calls and stores written JSON in-memory.""" + + def __init__(self): + self.create_calls = [] # list of (bucket, private, exist_ok) + self.objects = {} # path -> data + self.buckets = {} # bucket_id -> private (bool); pre-seed to simulate existing + + +@pytest.fixture +def hf_stub(monkeypatch): + """Stub huggingface_hub so init/claim run against an in-memory fake HF backend.""" + state = FakeBucketState() + + # --- fake huggingface_hub module surface used by tracecraft.hf --- + class FakeApi: + """Mimics HfApi: create_bucket(exist_ok=True) never changes an existing + bucket's visibility; bucket_info returns the actual state.""" + + def __init__(self, token=None): + self.token = token + + def create_bucket(self, bucket_id, *, private=None, exist_ok=False, **kw): + state.create_calls.append((bucket_id, private, exist_ok)) + if bucket_id in state.buckets: + if not exist_ok: + raise ValueError(f"Bucket {bucket_id} already exists") + else: + state.buckets[bucket_id] = bool(private) + return f"hf://buckets/{bucket_id}" + + def bucket_info(self, bucket_id, **kw): + if bucket_id not in state.buckets: + raise FileNotFoundError(bucket_id) + return types.SimpleNamespace(private=state.buckets[bucket_id]) + + class FakeFS: + def __init__(self, *a, **k): + pass + + def exists(self, path): + return path in state.objects + + def open(self, path, mode="r"): + store = state.objects + + class _F: + def __enter__(self_): + if "r" in mode: + self_._buf = store.get(path, "") + return self_ + + def __exit__(self_, *exc): + return False + + def write(self_, s): + store[path] = store.get(path, "") + s + + def read(self_): + return self_._buf + + return _F() + + def find(self, path, detail=False): + return [p for p in state.objects if p.startswith(path)] + + fake_hf = types.ModuleType("huggingface_hub") + fake_hf.HfFileSystem = FakeFS + fake_hf.HfApi = FakeApi + monkeypatch.setitem(sys.modules, "huggingface_hub", fake_hf) + return state + + +def _init(runner, tmp_path, monkeypatch, *extra): + monkeypatch.chdir(tmp_path) + args = [ + "--backend", + "hf", + "--bucket", + "user/tc-test", + "--project", + "demo", + "--agent", + "tester", + "--hf-token", + "hf_faketoken", + *extra, + ] + return runner.invoke(init_cmd, args) + + +# ---------- #7: auto-create ---------- + + +def test_init_hf_creates_bucket(hf_stub, tmp_path, monkeypatch): + r = _init(CliRunner(), tmp_path, monkeypatch) + assert r.exit_code == 0, r.output + # ensure_bucket() actually called create_bucket (was a no-op before) + assert len(hf_stub.create_calls) == 1 + bucket, private, exist_ok = hf_stub.create_calls[0] + assert bucket == "user/tc-test" + assert exist_ok is True # idempotent: don't fail if it already exists + # the agent record was written (the first write that used to fail cryptically) + assert any("agents/tester.json" in p for p in hf_stub.objects) + + +# ---------- #8: private by default, --public opt-out ---------- + + +def test_init_hf_private_by_default(hf_stub, tmp_path, monkeypatch): + r = _init(CliRunner(), tmp_path, monkeypatch) + assert r.exit_code == 0, r.output + _, private, _ = hf_stub.create_calls[0] + assert private is True + assert "(private)" in r.output + + +def test_init_hf_public_when_asked(hf_stub, tmp_path, monkeypatch): + r = _init(CliRunner(), tmp_path, monkeypatch, "--public") + assert r.exit_code == 0, r.output + _, private, _ = hf_stub.create_calls[0] + assert private is False + assert "(PUBLIC)" in r.output + + +# ---------- correctness honesty ---------- + + +def test_init_hf_warns_claims_are_best_effort(hf_stub, tmp_path, monkeypatch): + r = _init(CliRunner(), tmp_path, monkeypatch) + assert r.exit_code == 0, r.output + # the racy-claim caveat must be surfaced at init (output includes stderr via CliRunner) + assert "best-effort" in r.output.lower() + assert "S3-compatible" in r.output + + +def test_claim_on_hf_warns_best_effort(hf_stub, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + # write an hf config the CWD-first loader will pick up + cfg = { + "backend": "hf", + "bucket": "user/tc-test", + "project": "demo", + "agent_id": "tester", + "hf_token": "hf_faketoken", + } + (tmp_path / ".tracecraft.json").write_text(json.dumps(cfg)) + r = CliRunner().invoke(claim, ["build"]) + assert r.exit_code == 0, r.output + assert "Claimed step build" in r.output + assert "best-effort" in r.output.lower() + + +# ---------- #8: pre-existing PUBLIC bucket triggers a prominent warning ---------- + + +def test_init_hf_existing_public_bucket_warns(hf_stub, tmp_path, monkeypatch): + """Bucket pre-exists as public; user asked for private (default) — init must + say the data will be publicly visible and that delete+recreate is the only fix.""" + hf_stub.buckets["user/tc-test"] = False # exists, public + r = _init(CliRunner(), tmp_path, monkeypatch) + assert r.exit_code == 0, r.output + assert "(PUBLIC)" in r.output # real state, not the requested flag + assert "WARNING" in r.output + assert "publicly visible" in r.output + assert "delete" in r.output.lower() + + +def test_init_hf_existing_public_bucket_no_warning_with_public_flag(hf_stub, tmp_path, monkeypatch): + hf_stub.buckets["user/tc-test"] = False + r = _init(CliRunner(), tmp_path, monkeypatch, "--public") + assert r.exit_code == 0, r.output + assert "WARNING" not in r.output + + +# ---------- write errors name the bucket and point at init ---------- + + +def test_put_against_missing_bucket_is_actionable(hf_stub, monkeypatch): + from tracecraft.hf import HF + + store = HF(bucket="user/tc-test", project="demo", token="hf_faketoken") + + def boom(*a, **k): + raise OSError("unable to resolve path: invalid repository and revision") + + monkeypatch.setattr(store.fs, "open", boom) + with pytest.raises(click.ClickException) as ei: + store.put_json("memory/x.json", {"v": 1}) + msg = str(ei.value) + assert "user/tc-test" in msg + assert "tracecraft init" in msg + assert "repository and revision" not in msg # raw error replaced, not echoed + + +# ---------- exists(): not-found is False, unauthorized raises ---------- + + +def test_exists_not_found_is_false(hf_stub, monkeypatch): + from tracecraft.hf import HF + + store = HF(bucket="user/tc-test", project="demo", token="hf_faketoken") + monkeypatch.setattr(store.fs, "exists", lambda p: (_ for _ in ()).throw(FileNotFoundError(p))) + assert store.exists("memory/x.json") is False + + +def test_exists_surfaces_auth_errors(hf_stub, monkeypatch): + from tracecraft.hf import HF + + store = HF(bucket="user/tc-test", project="demo", token="hf_badtoken") + + def boom(path): + e = Exception("401 Client Error: Unauthorized for url") + e.response = types.SimpleNamespace(status_code=401) + raise e + + monkeypatch.setattr(store.fs, "exists", boom) + with pytest.raises(click.ClickException) as ei: + store.exists("memory/x.json") + msg = str(ei.value) + assert "auth" in msg.lower() + assert "HF_TOKEN" in msg diff --git a/sdk/tests/test_messaging.py b/sdk/tests/test_messaging.py new file mode 100644 index 0000000..e3ed7f1 --- /dev/null +++ b/sdk/tests/test_messaging.py @@ -0,0 +1,127 @@ +"""Tests for agent-to-agent messaging — especially the same-instant key collision. + +The bug these guard against: message keys were `messages//_.json`, +so two messages from one sender to one recipient in the same wall-clock second collided on +the same key and the later one silently overwrote the earlier (a 5-message burst kept 1). +The fix uses nanosecond resolution + a uuid suffix, so every send is a distinct key. +""" + +from __future__ import annotations + +import json + +import boto3 +import pytest +from click.testing import CliRunner +from moto import mock_aws + +from tracecraft.cli import cli + + +BUCKET = "tc-msg-test" +PROJECT = "demo" + + +@pytest.fixture +def env(tmp_path, monkeypatch): + monkeypatch.setenv("AWS_ACCESS_KEY_ID", "testing") + monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing") + monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1") + work = tmp_path / "work" + work.mkdir() + monkeypatch.chdir(work) + cfg = { + "backend": "s3", + "endpoint": None, + "bucket": BUCKET, + "project": PROJECT, + "agent_id": "designer", + "access_key": "testing", + "secret_key": "testing", + } + (work / ".tracecraft.json").write_text(json.dumps(cfg)) + fake_home = tmp_path / "home" + (fake_home / ".tracecraft").mkdir(parents=True) + (fake_home / ".tracecraft" / "config.json").write_text(json.dumps(cfg)) + monkeypatch.setenv("HOME", str(fake_home)) + with mock_aws(): + boto3.client("s3").create_bucket(Bucket=BUCKET) + yield CliRunner() + + +def _keys(prefix): + c = boto3.client("s3") + out = c.list_objects_v2(Bucket=BUCKET, Prefix=f"{PROJECT}/{prefix}") + return [o["Key"] for o in out.get("Contents", [])] + + +def test_burst_to_same_recipient_keeps_every_message(env): + """The regression: many messages from one sender to one recipient, sent back to + back (same second), must ALL survive — not collapse onto one overwritten key.""" + n = 8 + for i in range(n): + r = env.invoke(cli, ["send", "reviewer", f"update {i}"]) + assert r.exit_code == 0, r.output + keys = _keys("messages/reviewer/") + assert len(keys) == n, f"expected {n} distinct message keys, got {len(keys)}: {keys}" + # and the bodies are all distinct (no overwrite) + c = boto3.client("s3") + bodies = { + json.loads(c.get_object(Bucket=BUCKET, Key=k)["Body"].read())["message"] for k in keys + } + assert bodies == {f"update {i}" for i in range(n)} + + +def test_inbox_reads_the_whole_burst(env): + """End-to-end: a burst sent by one agent is fully readable by the recipient.""" + for i in range(5): + env.invoke(cli, ["send", "reviewer", f"msg {i}"]) + r = env.invoke(cli, ["inbox"], env={"TRACECRAFT_AGENT": "reviewer"}) + assert r.exit_code == 0, r.output + for i in range(5): + assert f"msg {i}" in r.output + + +def test_key_shape_is_unique_per_send(env): + """Two sends to the same recipient produce two different keys even with no delay.""" + env.invoke(cli, ["send", "reviewer", "a"]) + env.invoke(cli, ["send", "reviewer", "b"]) + keys = _keys("messages/reviewer/") + assert len(set(keys)) == 2 + + +def test_broadcast_and_direct_are_separate(env): + """A broadcast lands under _broadcast, a direct message under the recipient.""" + env.invoke(cli, ["send", "_broadcast", "hello all"]) + env.invoke(cli, ["send", "reviewer", "hello you"]) + assert len(_keys("messages/_broadcast/")) == 1 + assert len(_keys("messages/reviewer/")) == 1 + + +def test_inbox_merges_direct_and_broadcast_chronologically(env): + """inbox must interleave direct + broadcast messages by sent_at, not print + one prefix's raw list order after the other.""" + import time as _time + + env.invoke(cli, ["send", "reviewer", "first-direct"]) + _time.sleep(0.01) + env.invoke(cli, ["send", "_broadcast", "second-broadcast"]) + _time.sleep(0.01) + env.invoke(cli, ["send", "reviewer", "third-direct"]) + r = env.invoke(cli, ["inbox"], env={"TRACECRAFT_AGENT": "reviewer"}) + assert r.exit_code == 0, r.output + out = r.output + assert out.index("first-direct") < out.index("second-broadcast") < out.index("third-direct") + + +def test_message_body_carries_sender_and_recipient(env): + """The body (not the filename) is the source of truth for from/to — readers parse + the body, so the key shape can change freely without breaking inbox or replay.""" + env.invoke(cli, ["send", "reviewer", "check"]) + c = boto3.client("s3") + k = _keys("messages/reviewer/")[0] + doc = json.loads(c.get_object(Bucket=BUCKET, Key=k)["Body"].read()) + assert doc["from"] == "designer" + assert doc["to"] == "reviewer" + assert doc["message"] == "check" + assert "sent_at" in doc From 8c3a4641ebcb4b15256f684137ae5a1d926687bf Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:24:43 +0300 Subject: [PATCH 15/22] ci: consolidate workflows into ci.yml (ruff check + format + pytest) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces test.yml. Lint job runs ruff check (new — lint errors were previously uncaught) plus the existing ruff format --check; pytest runs on Python 3.10 and 3.12 with dev+huggingface extras, on push and PR. README badge updated to point at the new workflow. Co-Authored-By: Claude Fable 5 --- .github/workflows/{test.yml => ci.yml} | 11 +++++++---- README.md | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) rename .github/workflows/{test.yml => ci.yml} (86%) diff --git a/.github/workflows/test.yml b/.github/workflows/ci.yml similarity index 86% rename from .github/workflows/test.yml rename to .github/workflows/ci.yml index 8c4d219..975c5a3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/ci.yml @@ -1,4 +1,4 @@ -name: tests +name: ci on: push: @@ -7,7 +7,7 @@ on: branches: [main] jobs: - format: + lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -18,12 +18,15 @@ jobs: python-version: "3.12" cache: pip - - name: Install ruff + - name: Install package + dev extras working-directory: sdk run: | python -m pip install --upgrade pip pip install -e ".[dev]" + - name: Lint (ruff check) + run: ruff check sdk/ + - name: Check formatting (ruff format) run: ruff format --check sdk/ @@ -32,7 +35,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.12"] steps: - uses: actions/checkout@v4 diff --git a/README.md b/README.md index c4576f7..a9e638c 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![PyPI](https://img.shields.io/pypi/v/tracecraft-ai)](https://pypi.org/project/tracecraft-ai/) [![Python](https://img.shields.io/pypi/pyversions/tracecraft-ai)](https://pypi.org/project/tracecraft-ai/) [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT) -[![Tests](https://github.com/Arrmlet/tracecraft/actions/workflows/test.yml/badge.svg)](https://github.com/Arrmlet/tracecraft/actions/workflows/test.yml) +[![CI](https://github.com/Arrmlet/tracecraft/actions/workflows/ci.yml/badge.svg)](https://github.com/Arrmlet/tracecraft/actions/workflows/ci.yml) **Tracecraft is a CLI coordination layer for multi-agent AI systems** — shared **memory**, a **mailbox**, atomic task **claims**, **handoffs**, and **artifacts**, plus mirrored **session transcripts**, all stored as plain JSON in any **S3** or **HuggingFace** bucket. No server. No database. No SDK lock-in. From a6db54b3ffcd80dd083c7874bc338e71a5026fad Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:25:05 +0300 Subject: [PATCH 16/22] docs: refresh CLAUDE.md known gaps after the correctness pass Same-second message collisions and the empty test suite are fixed; claim TTL and heartbeat refresh stay open (need design decisions). Bucket-layout sketch updated to the new message key shape. Co-Authored-By: Claude Fable 5 --- CLAUDE.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 370f35c..5668b72 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -38,8 +38,8 @@ pivot lives in `plans/server-archive/` for reference only — nothing in the SDK // agents/.json ← agent registration + heartbeat memory/.json ← shared key-value state - messages//_.json ← per-agent mailbox - messages/_broadcast/_.json ← broadcast + messages//__.json ← per-agent mailbox + messages/_broadcast/__.json ← broadcast steps//claim.json ← atomic claim (If-None-Match=*) steps//status.json ← pending / in_progress / complete steps//handoff.json ← note + from_agent for next agent @@ -62,12 +62,11 @@ pivot lives in `plans/server-archive/` for reference only — nothing in the SDK the claim is the authoritative write (`step-status` and `wait-for` implement this via `_effective_status` in `cli/steps.py`). -## Known gaps (May 2026) +## Known gaps (June 2026) -- No TTL on claims (a crashed claim-holder keeps the lock forever) — Tier 1 work. +- No TTL on claims (a crashed claim-holder keeps the lock forever; `complete --force` + is the manual escape hatch) — Tier 1 work. - Heartbeat is written at `init` only, never refreshed — Tier 1 work. -- Messages keyed by `_.json` can collide same-second — Tier 1 work. -- No tests in `sdk/tests/` — Tier 1 work. ## Building From 89e3fa28147f6a5af61307bd41775c29baa394c4 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:25:41 +0300 Subject: [PATCH 17/22] README: document session mirroring properly Retitle the section and spell out the four harnesses, incremental cursor uploads (safe to re-run on a cron; seq derived from the bucket), default secret redaction with per-pattern counts in meta.json, and replay via 'session show --tail'. Session commands were already in the CLI reference block. Co-Authored-By: Claude Fable 5 --- README.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a9e638c..e86594c 100644 --- a/README.md +++ b/README.md @@ -96,16 +96,21 @@ tracecraft send _broadcast "v1 cut at 3pm, wrap your tasks" --- -## Coordination + reasoning in one bucket +## Session mirroring Most coordination tools store the *events* — who claimed what, who messaged whom. Tracecraft stores those **and** each agent's full reasoning, by mirroring coding-agent session transcripts into the same bucket. When a run goes sideways, one `tracecraft session show` gives you the handoffs **and** the chain of thought behind them — same place, same JSON, no second system to wire up. ```bash -tracecraft session mirror --harness claude-code # tail this session into the bucket -tracecraft session show --tail 50 # read coordination + reasoning together +tracecraft session mirror --harness claude-code # upload this session's new bytes +tracecraft session list # browse mirrored sessions +tracecraft session show --tail 50 # replay: meta + last N transcript lines +tracecraft session stop # clear local cursor, mark session ended ``` -Works with **Claude Code, Codex, OpenClaw, and Hermes**. Source transcripts are never modified; secret-shape redaction (AWS / Anthropic / OpenAI / HF / GitHub / Slack token patterns) is on by default and counted in metadata. +- **Four harnesses** — `claude-code`, `codex`, `openclaw`, `hermes`. Anything else can mirror by writing JSONL to the same layout. +- **Incremental cursor uploads** — `mirror` keeps a per-session byte offset and uploads only what's new as numbered parts, so re-running it from a cron or hook is safe and cheap; a run with nothing new is a no-op. The part sequence is derived from the bucket, so it even survives losing the local state file. +- **Redaction on by default** — AWS / Anthropic / OpenAI / HF / GitHub / Slack token shapes are scrubbed before upload, with per-pattern match counts recorded in the session's `meta.json` (pass `--no-redact` to opt out). Source transcripts are never modified. +- **Replay** — `session show --tail N` concatenates the uploaded parts and prints the last N transcript lines next to the session metadata. Harness matrix, storage formats, and redaction details → **[docs/session-mirror.md](docs/session-mirror.md)** From 41a257573a2c3e453b72e6ba635bb1a6d725380f Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:25:57 +0300 Subject: [PATCH 18/22] README: quick start takes credentials from AWS env vars Inline --access-key/--secret-key flags leak into shell history; the init command already reads AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY, so show that path. Note that .tracecraft.json is written chmod 600 and auto-added to .gitignore. Co-Authored-By: Claude Fable 5 --- README.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e86594c..3d708a0 100644 --- a/README.md +++ b/README.md @@ -29,20 +29,23 @@ docker run -d -p 9000:9000 \ minio/minio server /data ``` -Register two agents against the same project: +Register two agents against the same project. Credentials come from the standard AWS env vars, so they never land in your shell history: ```bash +export AWS_ACCESS_KEY_ID=admin +export AWS_SECRET_ACCESS_KEY=admin123456 + # Terminal 1 tracecraft init --project demo --agent designer \ - --endpoint http://localhost:9000 --bucket tracecraft \ - --access-key admin --secret-key admin123456 + --endpoint http://localhost:9000 --bucket tracecraft # Terminal 2 — same flags, --agent developer tracecraft init --project demo --agent developer \ - --endpoint http://localhost:9000 --bucket tracecraft \ - --access-key admin --secret-key admin123456 + --endpoint http://localhost:9000 --bucket tracecraft ``` +`init` writes the config to `.tracecraft.json` with mode `600` and auto-adds it to `.gitignore` when you're in a git repo. + Now the core move — **two agents cannot grab the same work**, with no lock service and no server to run: ```console From 0f5734d92c6070e836ac1f07638891cb8ee48ebf Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:26:13 +0300 Subject: [PATCH 19/22] README: document HF bucket privacy behavior Private by default at creation, --public opt-out, real visibility shown in init output, and the delete+recreate caveat (no update_bucket upstream). Co-Authored-By: Claude Fable 5 --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 3d708a0..0a5c276 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,8 @@ Bring your own bucket — no vendor lock-in: | Backblaze B2 / Wasabi | S3-compatible endpoint | | | HuggingFace Buckets | `--backend hf --bucket user/name` | browsable on the Hub; `pip install tracecraft-ai[huggingface]` | +**HuggingFace privacy:** `init` creates the bucket **private by default** (pass `--public` to opt out) and prints the bucket's *actual* visibility, read back from the Hub — e.g. `Backend: HuggingFace Buckets Bucket: user/x (private)`. If the bucket already exists as public and you didn't ask for that, init warns loudly: coordination data and mirrored transcripts would be publicly visible. Visibility can't be flipped after creation (`huggingface_hub` has no `update_bucket`) — the only way to change it is delete + recreate. + --- ## Use cases From 70499ca7584d04ea0f8178f7c26b560573b5b9b4 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:26:36 +0300 Subject: [PATCH 20/22] README: add 'Why not X?' and 'Status & limitations' sections Positions tracecraft against in-process frameworks, server-backed stores, and live wire protocols; and is honest about pre-alpha status: no claim TTL, heartbeat not refreshed after init, HF claims best-effort. Links to open issues. Co-Authored-By: Claude Fable 5 --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index 0a5c276..14eae6c 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,24 @@ tracecraft send _broadcast "v1 cut at 3pm, wrap your tasks" --- +## Why not LangGraph / Redis / message queues? + +- **Frameworks (LangGraph, CrewAI, AutoGen)** orchestrate agents *inside one process*. Tracecraft coordinates *any* processes across machines — different harnesses, different clouds, different teams — through storage they already have. +- **Redis / Postgres / a queue** means operating a server: provisioning, auth, uptime, backups. A bucket is zero infra, and every state change is a browsable JSON file — you get an audit trail for free just by opening the bucket. +- **A2A / MCP** are live wire protocols between *running* agents. Tracecraft is durable state for agents that aren't running at the same time — one agent finishes Tuesday, the next picks up the handoff Wednesday. + +## Status & limitations + +Tracecraft is **pre-alpha**. Honest sharp edges, as of now: + +- **No TTL on claims** — a crashed claim-holder keeps the lock until someone runs `complete --force`. +- **Heartbeat isn't refreshed** — `agents` shows who registered, not who's alive right now. +- **HF claims are best-effort** — HuggingFace Buckets have no conditional write, so atomic claims need an S3-compatible backend. + +Open issues and roadmap → [github.com/Arrmlet/tracecraft/issues](https://github.com/Arrmlet/tracecraft/issues) + +--- + ## Session mirroring Most coordination tools store the *events* — who claimed what, who messaged whom. Tracecraft stores those **and** each agent's full reasoning, by mirroring coding-agent session transcripts into the same bucket. When a run goes sideways, one `tracecraft session show` gives you the handoffs **and** the chain of thought behind them — same place, same JSON, no second system to wire up. From 1aa52ea0bdbf8b7b82da04f88f37121f610a8c16 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:26:51 +0300 Subject: [PATCH 21/22] README: add minimal Python API snippet The CLI is the stable interface; get_store() is the documented escape hatch for direct bucket access from Python. Co-Authored-By: Claude Fable 5 --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 14eae6c..54eb9e3 100644 --- a/README.md +++ b/README.md @@ -227,6 +227,19 @@ TRACECRAFT_AGENT=developer tracecraft inbox --- +## Python API + +The CLI is the stable interface; for code that wants direct bucket access, the store factory is the escape hatch: + +```python +from tracecraft.store import get_store + +store, cfg = get_store() # reads .tracecraft.json like the CLI does +store.put_json("memory/build/status.json", {"value": "passing", "set_by": cfg["agent_id"]}) +``` + +--- + ## More - [docs/session-mirror.md](docs/session-mirror.md) — session mirroring: harnesses, formats, redaction From 83cc0e534b5edf9986c0a2b3a40124322e915f6a Mon Sep 17 00:00:00 2001 From: arrmlet Date: Tue, 9 Jun 2026 22:27:23 +0300 Subject: [PATCH 22/22] dev env: MinIO-only compose; .env.example matches reality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit docker-compose.dev.yml drops postgres, redis, and seaweedfs — leftovers from the pre-pivot server design; the shipped CLI needs exactly one S3-compatible bucket. .env.example rewritten to the variables the code actually reads (AWS creds, HF_TOKEN, TRACECRAFT_AGENT, harness path overrides) instead of JWT/UI/database/monitoring leftovers. Quick start references the compose file. Co-Authored-By: Claude Fable 5 --- .env.example | 44 ++++++++++++------------------------- README.md | 2 ++ docker-compose.dev.yml | 50 ++++++++++-------------------------------- 3 files changed, 27 insertions(+), 69 deletions(-) diff --git a/.env.example b/.env.example index c767cf6..34fd1ae 100644 --- a/.env.example +++ b/.env.example @@ -1,34 +1,18 @@ -# Tracecraft Configuration +# Tracecraft environment variables — only what the CLI actually reads. -# SeaweedFS Configuration -TRACECRAFT_SEAWEEDFS_S3_ENDPOINT=localhost:8333 -TRACECRAFT_SEAWEEDFS_ACCESS_KEY=admin -TRACECRAFT_SEAWEEDFS_SECRET_KEY=admin_secret_key -TRACECRAFT_SEAWEEDFS_USE_SSL=false +# S3 backend credentials (read by `tracecraft init`; match docker-compose.dev.yml) +AWS_ACCESS_KEY_ID=admin +AWS_SECRET_ACCESS_KEY=admin123456 -# Security Configuration -TRACECRAFT_SECURITY_ENCRYPTION_ENABLED=true -TRACECRAFT_SECURITY_JWT_SECRET=your-jwt-secret-here +# HuggingFace backend token (read by `tracecraft init --backend hf`) +# HF_TOKEN=hf_... -# Storage Configuration -TRACECRAFT_STORAGE_BUCKET_NAME=tracecraft-data -TRACECRAFT_STORAGE_RETENTION_DAYS=90 +# Override the agent identity per shell/process (lets several agents share one +# directory and .tracecraft.json) +# TRACECRAFT_AGENT=designer -# UI Configuration -TRACECRAFT_UI_HOST=0.0.0.0 -TRACECRAFT_UI_PORT=8000 -TRACECRAFT_UI_AUTH_REQUIRED=false - -# Monitoring Configuration -TRACECRAFT_MONITORING_ENABLED=true - -# Database Configuration -TRACECRAFT_DATABASE_HOST=localhost -TRACECRAFT_DATABASE_PORT=5432 -TRACECRAFT_DATABASE_DATABASE=tracecraft -TRACECRAFT_DATABASE_USER=tracecraft -TRACECRAFT_DATABASE_PASSWORD=tracecraft - -# Redis Configuration -TRACECRAFT_REDIS_HOST=localhost -TRACECRAFT_REDIS_PORT=6379 +# Session-mirror harness location overrides (only if your harness lives in a +# non-default path) +# OPENCLAW_STATE_DIR= +# OPENCLAW_HOME= +# HERMES_HOME= diff --git a/README.md b/README.md index 54eb9e3..386274a 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,8 @@ docker run -d -p 9000:9000 \ minio/minio server /data ``` +(From a checkout, `docker compose -f docker-compose.dev.yml up -d` does the same and adds the MinIO console on `:9001`.) + Register two agents against the same project. Credentials come from the standard AWS env vars, so they never land in your shell history: ```bash diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index bdadcbd..c43ca8d 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -1,45 +1,17 @@ -version: "3.8" - +# Local dev backend: a single MinIO bucket is all tracecraft needs. +# Console at http://localhost:9001 to watch agents coordinate live. services: - postgres: - image: postgres:16 + minio: + image: minio/minio + command: server /data --console-address ":9001" ports: - - "5432:5432" + - "9000:9000" + - "9001:9001" environment: - POSTGRES_DB: tracecraft - POSTGRES_USER: tracecraft - POSTGRES_PASSWORD: tracecraft + MINIO_ROOT_USER: admin + MINIO_ROOT_PASSWORD: admin123456 volumes: - - postgres_data:/var/lib/postgresql/data - - redis: - image: redis:7-alpine - ports: - - "6379:6379" - - seaweed-master: - image: chrislusf/seaweedfs - command: master -ip=seaweed-master -port=9333 - ports: - - "9333:9333" - - seaweed-volume: - image: chrislusf/seaweedfs - command: volume -mserver=seaweed-master:9333 -port=8080 -ip=seaweed-volume - ports: - - "8080:8080" - depends_on: - - seaweed-master - - seaweed-filer: - image: chrislusf/seaweedfs - command: filer -master=seaweed-master:9333 -port=8888 -s3 -s3.port=8333 - ports: - - "8888:8888" - - "8333:8333" - depends_on: - - seaweed-master - - seaweed-volume + - minio_data:/data volumes: - postgres_data: + minio_data: