From 455a1f1ab49e58bab8f8700a826b61e14985eb76 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Sat, 6 Jun 2026 16:31:55 +0300 Subject: [PATCH 1/2] 0.2.1: structured handoff records + huggingface pip extra --- README.md | 4 +- sdk/pyproject.toml | 2 +- sdk/tracecraft/__init__.py | 2 +- sdk/tracecraft/cli/steps.py | 86 +++++++++++++++++++++++++++++++------ 4 files changed, 78 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 7be3945..c4576f7 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,9 @@ tracecraft inbox # Read messages tracecraft inbox --delete # Read and clear tracecraft claim # Claim a step (atomic) -tracecraft complete [--note X] # Mark done + handoff note +tracecraft complete [--note X] [--to AGENT] [--next-action X] + [--blocked|--needs-review] + [--changed-files-from-git] # Structured handoff record tracecraft step-status # Check status tracecraft wait-for # Block until complete (default 300s timeout) diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml index 4f0be08..c6446f4 100644 --- a/sdk/pyproject.toml +++ b/sdk/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "tracecraft-ai" -version = "0.2.0" +version = "0.2.1" description = "Coordination layer for multi-agent AI systems. Bring your own S3 / HuggingFace bucket; shared memory, mailbox, atomic task claims, handoffs, artifacts — no server, no database." readme = "README.md" license = {text = "MIT"} diff --git a/sdk/tracecraft/__init__.py b/sdk/tracecraft/__init__.py index ae1ec60..22b8802 100644 --- a/sdk/tracecraft/__init__.py +++ b/sdk/tracecraft/__init__.py @@ -1,3 +1,3 @@ """Tracecraft — coordination layer for multi-agent AI systems.""" -__version__ = "0.2.0" +__version__ = "0.2.1" diff --git a/sdk/tracecraft/cli/steps.py b/sdk/tracecraft/cli/steps.py index 58868d3..2ef1b34 100644 --- a/sdk/tracecraft/cli/steps.py +++ b/sdk/tracecraft/cli/steps.py @@ -1,5 +1,6 @@ """tracecraft steps — claim, complete, and track coordination steps.""" +import subprocess import time from datetime import datetime, timezone @@ -9,6 +10,27 @@ from tracecraft.store import get_store +def _git_changed_files() -> list[str]: + """Return changed files from `git diff --name-only HEAD` (staged + unstaged), + or [] if not a git repo / git unavailable. Never raises. + + Git is the source of truth for what changed — we never let an agent type + the file list by hand (self-reported change lists are wrong ~half the time + and go stale on the next commit). + """ + try: + out = subprocess.run( + ["git", "diff", "--name-only", "HEAD"], + capture_output=True, text=True, timeout=10, + ) + if out.returncode != 0: + return [] + files = [ln.strip() for ln in out.stdout.splitlines() if ln.strip()] + return files + except (OSError, subprocess.SubprocessError): + return [] + + @click.command() @click.argument("step_id") def claim(step_id): @@ -39,31 +61,69 @@ def claim(step_id): @click.command() @click.argument("step_id") -@click.option("--note", default="", help="Handoff note for the next agent") -def complete(step_id, note): - """Mark a step as complete and write handoff.""" +@click.option("--note", default="", help="Handoff note for the next agent (free text)") +@click.option("--to", "next_agent", default=None, help="Agent this step hands off to") +@click.option("--next-action", default=None, help="One line: what the next agent should do first") +@click.option("--blocked", is_flag=True, help="Mark the step blocked rather than complete") +@click.option("--needs-review", is_flag=True, help="Mark the step as needing review rather than complete") +@click.option( + "--changed-files-from-git", + is_flag=True, + help="Record files changed (from `git diff`), so the next agent knows what moved. No-op outside a git repo.", +) +def complete(step_id, note, next_agent, next_action, blocked, needs_review, changed_files_from_git): + """Mark a step complete (or blocked / needs-review) and write a handoff record. + + The handoff record is what the next agent sees instead of a shared + conversation — so it carries machine-checkable state, not just a note. + Fields that can be wrong if hand-typed (changed files) are sourced from + git; fields that would be hallucinated if mandatory (assumptions) stay as + optional free text in --note. + """ + if blocked and needs_review: + raise click.ClickException("Use at most one of --blocked / --needs-review") + store, cfg = get_store() agent = cfg["agent_id"] sid = step_id.lower().replace(".", "-") now = datetime.now(timezone.utc).isoformat() - # Update status + state = "blocked" if blocked else "needs_review" if needs_review else "complete" + + # Status reflects the real outcome (not always "complete"). existing = store.get_json(f"steps/{sid}/status.json") or {} - store.put_json(f"steps/{sid}/status.json", { - "status": "complete", + status_doc = { + "status": state, "agent": agent, "started_at": existing.get("started_at", now), - "completed_at": now, - }) - - # Write handoff - store.put_json(f"steps/{sid}/handoff.json", { + } + if state == "complete": + status_doc["completed_at"] = now + store.put_json(f"steps/{sid}/status.json", status_doc) + + # Handoff record — schema v2. All v2 keys optional; old readers/handoffs + # keep working. changed_files is git-derived (never agent-typed). + handoff = { + "schema": 2, "from_agent": agent, "from_step": step_id, + "next_agent": next_agent, + "state": state, + "next_action": next_action, "note": note, "created_at": now, - }) - click.echo(f"Completed step {step_id}") + } + if changed_files_from_git: + handoff["changed_files"] = _git_changed_files() + store.put_json(f"steps/{sid}/handoff.json", handoff) + + label = {"complete": "Completed", "blocked": "Blocked", "needs_review": "Needs review on"}[state] + msg = f"{label} step {step_id}" + if next_agent: + msg += f" → handed off to {next_agent}" + if changed_files_from_git: + msg += f" ({len(handoff['changed_files'])} changed file(s))" + click.echo(msg) @click.command() From d8a9fa7c326b459175f14f1428f9131cf7f0e911 Mon Sep 17 00:00:00 2001 From: arrmlet Date: Sat, 6 Jun 2026 17:04:55 +0300 Subject: [PATCH 2/2] ci: add ruff format check; format codebase; add handoff tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a 'format' job to the tests workflow that runs 'ruff format --check sdk/' on every push/PR, so style drift fails CI instead of reaching review. To make the gate green, ran 'ruff format' across sdk/ (15 files reformatted, whitespace/line-wrap only — no logic changes). Verified the full suite still passes (66 tests). Also lands sdk/tests/test_handoff_v2.py — the 9-test suite for the 0.2.1 structured handoff (complete/blocked/needs_review state, next_action/--to, git-derived changed_files, mutual-exclusion, no mandatory assumptions field). The fixture chdirs to an isolated dir and writes config to both the CWD-local and HOME paths, so a stray ./.tracecraft.json can't shadow it (this was making the tests hit a real endpoint and fail). Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/test.yml | 20 +++ sdk/tests/test_handoff_v2.py | 184 ++++++++++++++++++++++++++ sdk/tests/test_session_cli.py | 6 +- sdk/tests/test_tier_0.py | 139 ++++++++++++------- sdk/tracecraft/cli/init_cmd.py | 56 +++++--- sdk/tracecraft/cli/memory.py | 13 +- sdk/tracecraft/cli/messages.py | 15 ++- sdk/tracecraft/cli/session.py | 29 ++-- sdk/tracecraft/cli/steps.py | 25 ++-- sdk/tracecraft/harness/claude_code.py | 3 +- sdk/tracecraft/harness/codex.py | 4 +- sdk/tracecraft/harness/hermes.py | 6 +- sdk/tracecraft/hf.py | 5 +- sdk/tracecraft/store.py | 2 + 14 files changed, 399 insertions(+), 108 deletions(-) create mode 100644 sdk/tests/test_handoff_v2.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f6e88dc..8c4d219 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,6 +7,26 @@ on: branches: [main] jobs: + format: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + + - name: Install ruff + working-directory: sdk + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + + - name: Check formatting (ruff format) + run: ruff format --check sdk/ + pytest: runs-on: ubuntu-latest strategy: diff --git a/sdk/tests/test_handoff_v2.py b/sdk/tests/test_handoff_v2.py new file mode 100644 index 0000000..fa20346 --- /dev/null +++ b/sdk/tests/test_handoff_v2.py @@ -0,0 +1,184 @@ +"""Tests for the v0.2.1 structured handoff record. + +Schema v2 adds: state enum (complete/blocked/needs_review), next_action, +git-derived changed_files. All optional + backward compatible. +""" + +from __future__ import annotations + +import json + +import boto3 +import pytest +from click.testing import CliRunner +from moto import mock_aws + +from tracecraft.cli import cli +import tracecraft.cli.steps as steps_mod + + +BUCKET = "tc-handoff-test" +PROJECT = "demo" + + +@pytest.fixture +def env(tmp_path, monkeypatch): + monkeypatch.setenv("AWS_ACCESS_KEY_ID", "testing") + monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing") + monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1") + # Run from an isolated empty dir. load_config() is CWD-first, so without + # this a stray ./.tracecraft.json in the repo would shadow our test config + # and point the CLI at a real endpoint. + work = tmp_path / "work" + work.mkdir() + monkeypatch.chdir(work) + cfg = { + "backend": "s3", + "endpoint": None, + "bucket": BUCKET, + "project": PROJECT, + "agent_id": "designer", + "access_key": "testing", + "secret_key": "testing", + } + # Write to the CWD-local path load_config() checks first... + (work / ".tracecraft.json").write_text(json.dumps(cfg)) + # ...and the global HOME fallback, so tests that chdir elsewhere (the git + # tests below) still resolve a config. + fake_home = tmp_path / "home" + (fake_home / ".tracecraft").mkdir(parents=True) + (fake_home / ".tracecraft" / "config.json").write_text(json.dumps(cfg)) + monkeypatch.setenv("HOME", str(fake_home)) + with mock_aws(): + boto3.client("s3").create_bucket(Bucket=BUCKET) + yield CliRunner() + + +def _handoff(sid="design"): + c = boto3.client("s3") + obj = c.get_object(Bucket=BUCKET, Key=f"{PROJECT}/steps/{sid}/handoff.json") + return json.loads(obj["Body"].read()) + + +def _status(sid="design"): + c = boto3.client("s3") + obj = c.get_object(Bucket=BUCKET, Key=f"{PROJECT}/steps/{sid}/status.json") + return json.loads(obj["Body"].read()) + + +# ---------- backward compatibility ---------- + + +def test_plain_complete_is_backward_compatible(env): + r = env.invoke(cli, ["complete", "design"]) + assert r.exit_code == 0, r.output + assert r.output.startswith("Completed step design") + h = _handoff() + # v1 keys still present + assert h["from_agent"] == "designer" + assert h["from_step"] == "design" + assert h["note"] == "" + assert "created_at" in h + # v2 defaults + assert h["schema"] == 2 + assert h["state"] == "complete" + assert h["next_agent"] is None + assert h["next_action"] is None + assert "changed_files" not in h # only present with the git flag + # status reflects complete + assert _status()["status"] == "complete" + assert "completed_at" in _status() + + +# ---------- state enum ---------- + + +def test_blocked_sets_state_and_status(env): + r = env.invoke(cli, ["complete", "design", "--blocked", "--note", "stuck on auth"]) + assert r.exit_code == 0, r.output + assert "Blocked step design" in r.output + assert _handoff()["state"] == "blocked" + assert _status()["status"] == "blocked" + assert "completed_at" not in _status() # not complete → no completed_at + + +def test_needs_review_sets_state(env): + r = env.invoke(cli, ["complete", "design", "--needs-review"]) + assert r.exit_code == 0, r.output + assert "Needs review on step design" in r.output + assert _handoff()["state"] == "needs_review" + assert _status()["status"] == "needs_review" + + +def test_blocked_and_needs_review_mutually_exclusive(env): + r = env.invoke(cli, ["complete", "design", "--blocked", "--needs-review"]) + assert r.exit_code != 0 + assert "at most one" in r.output + + +# ---------- next_action + next_agent ---------- + + +def test_next_action_and_to(env): + r = env.invoke( + cli, + ["complete", "design", "--to", "developer", "--next-action", "wire api.py into search"], + ) + assert r.exit_code == 0, r.output + assert "handed off to developer" in r.output + h = _handoff() + assert h["next_agent"] == "developer" + assert h["next_action"] == "wire api.py into search" + + +# ---------- changed_files from git ---------- + + +def test_changed_files_git_in_repo(env, tmp_path, monkeypatch): + # Make cwd a git repo with one modified tracked file + import subprocess + + repo = tmp_path / "repo" + repo.mkdir() + monkeypatch.chdir(repo) + subprocess.run(["git", "init", "-q"], cwd=repo, check=True) + subprocess.run(["git", "config", "user.email", "t@t.t"], cwd=repo, check=True) + subprocess.run(["git", "config", "user.name", "t"], cwd=repo, check=True) + (repo / "a.py").write_text("x = 1\n") + subprocess.run(["git", "add", "a.py"], cwd=repo, check=True) + subprocess.run(["git", "commit", "-qm", "init"], cwd=repo, check=True) + (repo / "a.py").write_text("x = 2\n") # now modified vs HEAD + + r = env.invoke(cli, ["complete", "design", "--changed-files-from-git"]) + assert r.exit_code == 0, r.output + h = _handoff() + assert h["changed_files"] == ["a.py"] + assert "1 changed file(s)" in r.output + + +def test_changed_files_git_outside_repo_is_empty(env, tmp_path, monkeypatch): + # cwd is NOT a git repo → flag is a no-op (empty list), never crashes + nonrepo = tmp_path / "plain" + nonrepo.mkdir() + monkeypatch.chdir(nonrepo) + r = env.invoke(cli, ["complete", "design", "--changed-files-from-git"]) + assert r.exit_code == 0, r.output + assert _handoff()["changed_files"] == [] + + +def test_no_assumptions_field(env): + # We deliberately do NOT add a mandatory unresolved_assumptions field. + env.invoke(cli, ["complete", "design", "--note", "assumed v2 API"]) + h = _handoff() + assert "unresolved_assumptions" not in h + assert "assumptions" not in h + # open questions live in the free-text note + assert h["note"] == "assumed v2 API" + + +# ---------- helper direct test ---------- + + +def test_git_changed_files_helper_never_raises(monkeypatch, tmp_path): + monkeypatch.chdir(tmp_path) # not a repo + assert steps_mod._git_changed_files() == [] diff --git a/sdk/tests/test_session_cli.py b/sdk/tests/test_session_cli.py index cd22230..f2b2f7b 100644 --- a/sdk/tests/test_session_cli.py +++ b/sdk/tests/test_session_cli.py @@ -114,9 +114,7 @@ def _bucket_keys(): """Return all keys under PROJECT/ stripped of the project prefix.""" client = boto3.client("s3") resp = client.list_objects_v2(Bucket=BUCKET, Prefix=f"{PROJECT}/") - return [ - obj["Key"][len(PROJECT) + 1 :] for obj in resp.get("Contents", []) - ] + return [obj["Key"][len(PROJECT) + 1 :] for obj in resp.get("Contents", [])] def _get_meta(session_id): @@ -256,7 +254,7 @@ def test_session_list_shows_uploaded_session(cli_env): def test_session_show_tails_concatenated_parts(cli_env): runner, cwd, sess, sid = cli_env - sess.write_bytes(b'line1\n') + sess.write_bytes(b"line1\n") runner.invoke(cli, ["session", "mirror", "--harness", "claude-code", "--cwd", str(cwd)]) with open(sess, "ab") as f: f.write(b"line2\nline3\n") diff --git a/sdk/tests/test_tier_0.py b/sdk/tests/test_tier_0.py index ab321a5..790a62d 100644 --- a/sdk/tests/test_tier_0.py +++ b/sdk/tests/test_tier_0.py @@ -57,6 +57,7 @@ def store(s3_env): # ---------- Fix 1: atomic claim ---------- + def test_fix1_atomic_put_first_writer_wins(store): """First put_json(if_none_match=True) succeeds; second raises PreconditionFailed.""" store.put_json("steps/foo/claim.json", {"agent": "a"}, if_none_match=True) @@ -80,15 +81,19 @@ def test_fix1_claim_cli_blocks_second_caller(s3_env, monkeypatch, tmp_path): cfg_file = tmp_path / ".tracecraft.json" def write_cfg(agent_id): - cfg_file.write_text(json.dumps({ - "backend": "s3", - "bucket": BUCKET, - "project": PROJECT, - "endpoint": None, - "access_key": "testing", - "secret_key": "testing", - "agent_id": agent_id, - })) + cfg_file.write_text( + json.dumps( + { + "backend": "s3", + "bucket": BUCKET, + "project": PROJECT, + "endpoint": None, + "access_key": "testing", + "secret_key": "testing", + "agent_id": agent_id, + } + ) + ) monkeypatch.chdir(tmp_path) write_cfg("agent-a") @@ -106,6 +111,7 @@ def write_cfg(agent_id): # ---------- Fix 2: paginated list_keys ---------- + def test_fix2_list_keys_returns_more_than_1000(store): """Write 1250 keys; ensure list_keys returns them all (not capped at 1000).""" for i in range(1250): @@ -118,6 +124,7 @@ def test_fix2_list_keys_returns_more_than_1000(store): # ---------- Fix 3: no default admin/secret credentials ---------- + def test_fix3_init_refuses_without_creds(monkeypatch, tmp_path): """`tracecraft init` without --access-key/--secret-key/env must error.""" monkeypatch.chdir(tmp_path) @@ -125,13 +132,21 @@ def test_fix3_init_refuses_without_creds(monkeypatch, tmp_path): monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False) runner = CliRunner() - r = runner.invoke(init_cmd, [ - "--backend", "s3", - "--endpoint", "http://localhost:9000", - "--bucket", "x", - "--project", "p", - "--agent", "a", - ]) + r = runner.invoke( + init_cmd, + [ + "--backend", + "s3", + "--endpoint", + "http://localhost:9000", + "--bucket", + "x", + "--project", + "p", + "--agent", + "a", + ], + ) assert r.exit_code != 0 assert "credentials required" in r.output.lower() # Critically, must NOT have written admin/secret to disk @@ -145,13 +160,21 @@ def test_fix3_init_reads_aws_env_vars(monkeypatch, tmp_path, s3_env): monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing") runner = CliRunner() - r = runner.invoke(init_cmd, [ - "--backend", "s3", - "--endpoint", MOTO_ENDPOINT, # moto default - "--bucket", BUCKET, - "--project", PROJECT, - "--agent", "a", - ]) + r = runner.invoke( + init_cmd, + [ + "--backend", + "s3", + "--endpoint", + MOTO_ENDPOINT, # moto default + "--bucket", + BUCKET, + "--project", + PROJECT, + "--agent", + "a", + ], + ) assert r.exit_code == 0, r.output saved = json.loads((tmp_path / ".tracecraft.json").read_text()) assert saved["access_key"] == "testing" @@ -163,6 +186,7 @@ def test_fix3_init_reads_aws_env_vars(monkeypatch, tmp_path, s3_env): # ---------- Fix 4: .gitignore handling ---------- + def test_fix4_gitignore_appended_in_git_repo(monkeypatch, tmp_path, s3_env): """When cwd is a git repo, init appends .tracecraft.json to .gitignore.""" (tmp_path / ".git").mkdir() @@ -171,13 +195,21 @@ def test_fix4_gitignore_appended_in_git_repo(monkeypatch, tmp_path, s3_env): monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing") runner = CliRunner() - r = runner.invoke(init_cmd, [ - "--backend", "s3", - "--endpoint", MOTO_ENDPOINT, - "--bucket", BUCKET, - "--project", PROJECT, - "--agent", "a", - ]) + r = runner.invoke( + init_cmd, + [ + "--backend", + "s3", + "--endpoint", + MOTO_ENDPOINT, + "--bucket", + BUCKET, + "--project", + PROJECT, + "--agent", + "a", + ], + ) assert r.exit_code == 0, r.output gi = (tmp_path / ".gitignore").read_text() assert ".tracecraft.json" in gi.splitlines() @@ -192,13 +224,21 @@ def test_fix4_gitignore_not_duplicated(monkeypatch, tmp_path, s3_env): monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing") runner = CliRunner() - r = runner.invoke(init_cmd, [ - "--backend", "s3", - "--endpoint", MOTO_ENDPOINT, - "--bucket", BUCKET, - "--project", PROJECT, - "--agent", "a", - ]) + r = runner.invoke( + init_cmd, + [ + "--backend", + "s3", + "--endpoint", + MOTO_ENDPOINT, + "--bucket", + BUCKET, + "--project", + PROJECT, + "--agent", + "a", + ], + ) assert r.exit_code == 0, r.output lines = (tmp_path / ".gitignore").read_text().splitlines() assert lines.count(".tracecraft.json") == 1 @@ -211,19 +251,28 @@ def test_fix4_no_gitignore_outside_repo(monkeypatch, tmp_path, s3_env): monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "testing") runner = CliRunner() - r = runner.invoke(init_cmd, [ - "--backend", "s3", - "--endpoint", MOTO_ENDPOINT, - "--bucket", BUCKET, - "--project", PROJECT, - "--agent", "a", - ]) + r = runner.invoke( + init_cmd, + [ + "--backend", + "s3", + "--endpoint", + MOTO_ENDPOINT, + "--bucket", + BUCKET, + "--project", + PROJECT, + "--agent", + "a", + ], + ) assert r.exit_code == 0, r.output assert not (tmp_path / ".gitignore").exists() # ---------- Fix 5: dead scaffolding removed ---------- + def test_fix5_no_empty_namespace_packages(): """integrations/ and transport/ packages must not be importable.""" with pytest.raises(ImportError): @@ -243,5 +292,5 @@ def test_fix5_pyproject_drops_dead_extras(): """crewai/langgraph/claude-sdk/all extras must not be declared.""" repo_root = pathlib.Path(__file__).resolve().parents[2] text = (repo_root / "sdk" / "pyproject.toml").read_text() - for forbidden in ('crewai = [', 'langgraph = [', 'claude-sdk = [', 'all = ['): + for forbidden in ("crewai = [", "langgraph = [", "claude-sdk = [", "all = ["): assert forbidden not in text, f"pyproject still declares: {forbidden}" diff --git a/sdk/tracecraft/cli/init_cmd.py b/sdk/tracecraft/cli/init_cmd.py index 1521e86..d1a9762 100644 --- a/sdk/tracecraft/cli/init_cmd.py +++ b/sdk/tracecraft/cli/init_cmd.py @@ -10,14 +10,35 @@ @click.command() -@click.option("--backend", type=click.Choice(["s3", "hf"]), default="s3", help="Storage backend: s3 or hf (HuggingFace Buckets)") +@click.option( + "--backend", + type=click.Choice(["s3", "hf"]), + default="s3", + help="Storage backend: s3 or hf (HuggingFace Buckets)", +) @click.option("--endpoint", default=None, help="S3 endpoint URL (s3 backend only)") -@click.option("--bucket", required=True, help="Bucket name (s3) or HF bucket handle e.g. username/my-bucket (hf)") +@click.option( + "--bucket", + required=True, + help="Bucket name (s3) or HF bucket handle e.g. username/my-bucket (hf)", +) @click.option("--project", required=True, help="Project namespace") @click.option("--agent", required=True, help="Agent ID for this session") -@click.option("--access-key", default=None, envvar="AWS_ACCESS_KEY_ID", help="S3 access key (env: AWS_ACCESS_KEY_ID)") -@click.option("--secret-key", default=None, envvar="AWS_SECRET_ACCESS_KEY", help="S3 secret key (env: AWS_SECRET_ACCESS_KEY)") -@click.option("--hf-token", default=None, envvar="HF_TOKEN", help="HuggingFace token (env: HF_TOKEN)") +@click.option( + "--access-key", + default=None, + envvar="AWS_ACCESS_KEY_ID", + help="S3 access key (env: AWS_ACCESS_KEY_ID)", +) +@click.option( + "--secret-key", + default=None, + envvar="AWS_SECRET_ACCESS_KEY", + help="S3 secret key (env: AWS_SECRET_ACCESS_KEY)", +) +@click.option( + "--hf-token", default=None, envvar="HF_TOKEN", help="HuggingFace token (env: HF_TOKEN)" +) def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key, hf_token): """Initialize tracecraft config, create bucket, and register agent.""" cfg = { @@ -49,23 +70,24 @@ def init_cmd(backend, endpoint, bucket, project, agent, access_key, secret_key, store.ensure_bucket() now = datetime.now(timezone.utc).isoformat() - store.put_json(f"agents/{agent}.json", { - "id": agent, - "status": "active", - "step": None, - "started_at": now, - "heartbeat": now, - "summary": "Initialized", - }) + store.put_json( + f"agents/{agent}.json", + { + "id": agent, + "status": "active", + "step": None, + "started_at": now, + "heartbeat": now, + "summary": "Initialized", + }, + ) click.echo(f"Initialized project '{project}' as agent '{agent}'") if backend == "s3": click.echo(f"Backend: S3 Endpoint: {endpoint} Bucket: {bucket}") else: click.echo(f"Backend: HuggingFace Buckets Bucket: {bucket}") - click.echo( - "Note: .tracecraft.json contains credentials. Keep it out of version control." - ) + click.echo("Note: .tracecraft.json contains credentials. Keep it out of version control.") def _ensure_gitignore_entry(): @@ -98,9 +120,11 @@ def _get_store(cfg): backend = cfg.get("backend", "s3") if backend == "hf": from tracecraft.hf import HF + return HF(bucket=cfg["bucket"], project=cfg["project"], token=cfg.get("hf_token")) else: from tracecraft.s3 import S3 + return S3( endpoint=cfg["endpoint"], bucket=cfg["bucket"], diff --git a/sdk/tracecraft/cli/memory.py b/sdk/tracecraft/cli/memory.py index 17c899b..65e12b2 100644 --- a/sdk/tracecraft/cli/memory.py +++ b/sdk/tracecraft/cli/memory.py @@ -33,11 +33,14 @@ def memory_set(key, value): raise click.ClickException("Key cannot be empty") store, cfg = get_store() now = datetime.now(timezone.utc).isoformat() - store.put_json(_key_to_path(key), { - "value": value, - "set_by": cfg["agent_id"], - "set_at": now, - }) + store.put_json( + _key_to_path(key), + { + "value": value, + "set_by": cfg["agent_id"], + "set_at": now, + }, + ) click.echo(f"Set {key} = {value}") diff --git a/sdk/tracecraft/cli/messages.py b/sdk/tracecraft/cli/messages.py index 9088c70..efba3c4 100644 --- a/sdk/tracecraft/cli/messages.py +++ b/sdk/tracecraft/cli/messages.py @@ -21,12 +21,15 @@ def send(recipient, message): now = datetime.now(timezone.utc).isoformat() key = f"messages/{recipient}/{ts}_{sender}.json" - store.put_json(key, { - "from": sender, - "to": recipient, - "message": message, - "sent_at": now, - }) + store.put_json( + key, + { + "from": sender, + "to": recipient, + "message": message, + "sent_at": now, + }, + ) click.echo(f"Sent to {recipient}: {message}") diff --git a/sdk/tracecraft/cli/session.py b/sdk/tracecraft/cli/session.py index 3342cc9..0313cc5 100644 --- a/sdk/tracecraft/cli/session.py +++ b/sdk/tracecraft/cli/session.py @@ -114,7 +114,9 @@ def session(): default=None, help="Project directory the session ran in (claude-code only). Defaults to $PWD.", ) -@click.option("--no-redact", is_flag=True, help="Skip redaction. Use only on fully-trusted buckets.") +@click.option( + "--no-redact", is_flag=True, help="Skip redaction. Use only on fully-trusted buckets." +) @click.option( "--min-bytes", default=1, @@ -156,9 +158,7 @@ def mirror(harness_name, session_id, cwd_str, no_redact, min_bytes): # authoritative — read_new() returns the real consumed cursor below. cur_size = harness.size(sess) if cur_size - cursor < min_bytes: - click.echo( - f"nothing new: session={sess.session_id} cursor={cursor:,} size={cur_size:,}" - ) + click.echo(f"nothing new: session={sess.session_id} cursor={cursor:,} size={cur_size:,}") return # 2. Read everything new since `cursor`, race-free: read_new returns the @@ -286,10 +286,10 @@ def list_(harness_filter, limit, sort_by): sid = m.get("session_id", "?") short = sid[:8] + ("…" if len(sid) > 8 else "") click.echo( - f"{m.get('harness','?'):<14} {short:<16} " - f"{m.get('total_uploaded_bytes',0):>12,} " + f"{m.get('harness', '?'):<14} {short:<16} " + f"{m.get('total_uploaded_bytes', 0):>12,} " f"{len(m.get('parts', [])):>6} " - f"{m.get('last_uploaded_at','-')[:24]:<25}" + f"{m.get('last_uploaded_at', '-')[:24]:<25}" ) @@ -309,7 +309,9 @@ def show(session_id, tail): store, _ = get_store() # Find which harness this session lives under (search every harness folder). - all_meta_keys = [k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json")] + all_meta_keys = [ + k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json") + ] if not all_meta_keys: raise click.ClickException(f"session not found: {session_id}") meta_key = all_meta_keys[0] @@ -321,9 +323,7 @@ def show(session_id, tail): # Fetch all parts (in seq order), concatenate, print last N lines. prefix = meta_key[: -len("meta.json")] - part_keys = sorted( - k for k in store.list_keys(prefix) if PART_RE.search(k.rsplit("/", 1)[-1]) - ) + part_keys = sorted(k for k in store.list_keys(prefix) if PART_RE.search(k.rsplit("/", 1)[-1])) body = bytearray() for k in part_keys: with tempfile.NamedTemporaryFile(delete=False) as tf: @@ -365,9 +365,7 @@ def stop(session_id): # Best-effort: mark ended_at in meta if a meta exists. store, _ = get_store() - meta_keys = [ - k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json") - ] + meta_keys = [k for k in store.list_keys("sessions/") if k.endswith(f"/{session_id}/meta.json")] marked = False if meta_keys: meta = store.get_json(meta_keys[0]) or {} @@ -377,6 +375,5 @@ def stop(session_id): marked = True click.echo( - f"stopped session={session_id} " - f"state_cleared={had_state} meta_marked_ended={marked}" + f"stopped session={session_id} state_cleared={had_state} meta_marked_ended={marked}" ) diff --git a/sdk/tracecraft/cli/steps.py b/sdk/tracecraft/cli/steps.py index 2ef1b34..c0c34dd 100644 --- a/sdk/tracecraft/cli/steps.py +++ b/sdk/tracecraft/cli/steps.py @@ -21,7 +21,9 @@ def _git_changed_files() -> list[str]: try: out = subprocess.run( ["git", "diff", "--name-only", "HEAD"], - capture_output=True, text=True, timeout=10, + capture_output=True, + text=True, + timeout=10, ) if out.returncode != 0: return [] @@ -51,11 +53,14 @@ def claim(step_id): owner = existing.get("agent", "unknown") raise click.ClickException(f"Step {step_id} already claimed by {owner}") - store.put_json(f"steps/{sid}/status.json", { - "status": "in_progress", - "agent": agent, - "started_at": now, - }) + store.put_json( + f"steps/{sid}/status.json", + { + "status": "in_progress", + "agent": agent, + "started_at": now, + }, + ) click.echo(f"Claimed step {step_id} as {agent}") @@ -65,7 +70,9 @@ def claim(step_id): @click.option("--to", "next_agent", default=None, help="Agent this step hands off to") @click.option("--next-action", default=None, help="One line: what the next agent should do first") @click.option("--blocked", is_flag=True, help="Mark the step blocked rather than complete") -@click.option("--needs-review", is_flag=True, help="Mark the step as needing review rather than complete") +@click.option( + "--needs-review", is_flag=True, help="Mark the step as needing review rather than complete" +) @click.option( "--changed-files-from-git", is_flag=True, @@ -117,7 +124,9 @@ def complete(step_id, note, next_agent, next_action, blocked, needs_review, chan handoff["changed_files"] = _git_changed_files() store.put_json(f"steps/{sid}/handoff.json", handoff) - label = {"complete": "Completed", "blocked": "Blocked", "needs_review": "Needs review on"}[state] + label = {"complete": "Completed", "blocked": "Blocked", "needs_review": "Needs review on"}[ + state + ] msg = f"{label} step {step_id}" if next_agent: msg += f" → handed off to {next_agent}" diff --git a/sdk/tracecraft/harness/claude_code.py b/sdk/tracecraft/harness/claude_code.py index bd3bbd1..bcc936b 100644 --- a/sdk/tracecraft/harness/claude_code.py +++ b/sdk/tracecraft/harness/claude_code.py @@ -40,6 +40,5 @@ def discover(self, cwd: Path) -> list[Session]: if not pdir.is_dir(): return [] return [ - Session(path=jsonl, session_id=jsonl.stem, cwd=cwd) - for jsonl in pdir.glob("*.jsonl") + Session(path=jsonl, session_id=jsonl.stem, cwd=cwd) for jsonl in pdir.glob("*.jsonl") ] diff --git a/sdk/tracecraft/harness/codex.py b/sdk/tracecraft/harness/codex.py index a5bd85e..5650dfb 100644 --- a/sdk/tracecraft/harness/codex.py +++ b/sdk/tracecraft/harness/codex.py @@ -16,7 +16,9 @@ from .base import FileTailHarness, Session -_ROLLOUT_RE = re.compile(r"rollout-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-(?P[A-Za-z0-9_-]+)\.jsonl$") +_ROLLOUT_RE = re.compile( + r"rollout-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-(?P[A-Za-z0-9_-]+)\.jsonl$" +) class CodexHarness(FileTailHarness): diff --git a/sdk/tracecraft/harness/hermes.py b/sdk/tracecraft/harness/hermes.py index 671d0d0..668fc5f 100644 --- a/sdk/tracecraft/harness/hermes.py +++ b/sdk/tracecraft/harness/hermes.py @@ -59,7 +59,7 @@ def _decode_content(value): """Hermes stores multimodal content as '\\x00json:'; scalars as-is.""" if isinstance(value, str) and value.startswith(_CONTENT_JSON_PREFIX): try: - return json.loads(value[len(_CONTENT_JSON_PREFIX):]) + return json.loads(value[len(_CONTENT_JSON_PREFIX) :]) except json.JSONDecodeError: return value return value @@ -82,9 +82,7 @@ def discover(self, cwd: Path) -> list[Session]: return [] conn = _connect_ro(self.db_path) try: - rows = conn.execute( - "SELECT id FROM sessions ORDER BY started_at DESC" - ).fetchall() + rows = conn.execute("SELECT id FROM sessions ORDER BY started_at DESC").fetchall() except sqlite3.Error: return [] finally: diff --git a/sdk/tracecraft/hf.py b/sdk/tracecraft/hf.py index aa7a8dd..ca2756b 100644 --- a/sdk/tracecraft/hf.py +++ b/sdk/tracecraft/hf.py @@ -10,6 +10,7 @@ class HF: def __init__(self, bucket, project, token=None): from huggingface_hub import HfFileSystem + self.fs = HfFileSystem(token=token) self.bucket = bucket # e.g. "username/my-bucket" self.project = project @@ -35,11 +36,13 @@ def put_json(self, key, data, if_none_match=False): # This is racy, but documented; S3-compatible backends use IfNoneMatch=* for safety. if self.fs.exists(path): from tracecraft.s3 import PreconditionFailed + raise PreconditionFailed(key) with self.fs.open(path, "w") as f: json.dump(data, f, indent=2) except Exception as e: from tracecraft.s3 import PreconditionFailed + if isinstance(e, PreconditionFailed): raise raise click.ClickException(f"HF put failed: {e}") @@ -65,7 +68,7 @@ def list_keys(self, prefix=""): keys = [] for entry in entries: if entry.startswith(base_prefix): - keys.append(entry[len(base_prefix):]) + keys.append(entry[len(base_prefix) :]) else: keys.append(entry) return keys diff --git a/sdk/tracecraft/store.py b/sdk/tracecraft/store.py index 4226394..85ce840 100644 --- a/sdk/tracecraft/store.py +++ b/sdk/tracecraft/store.py @@ -10,9 +10,11 @@ def get_store(): if backend == "hf": from tracecraft.hf import HF + return HF(bucket=cfg["bucket"], project=cfg["project"], token=cfg.get("hf_token")), cfg else: from tracecraft.s3 import S3 + return S3( endpoint=cfg["endpoint"], bucket=cfg["bucket"],