aganthos · bordeauxred · Apr 19, 2026 · Apr 19, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -9,6 +9,27 @@ permissions:
   contents: read
 
 jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: false
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
+      - name: Set up Python 3.12
+        run: uv python install 3.12
+
+      - name: Sync dependencies (dev extras)
+        run: uv sync --extra dev --python 3.12
+
+      - name: Run pre-commit hooks
+        run: uv run pre-commit run --all-files
+
   test:
     runs-on: ubuntu-latest
     strategy:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,7 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.4.4
+    hooks:
+      - id: ruff
+        args: [--fix, --select, "E,F,I"]
+      - id: ruff-format
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -92,7 +92,24 @@ private modules.
 - Add docstrings to public classes and functions
 - Use `from __future__ import annotations` for forward references
 - Use `Protocol` for interfaces, `@dataclass` for value types
-- No linter is enforced yet — just keep it consistent with surrounding code
+
+We use [ruff](https://docs.astral.sh/ruff/) for formatting and linting
+(rules `E`, `F`, `I`). Enable the pre-commit hook once:
+
+```bash
+uv sync --extra dev
+uv run pre-commit install
+```
+
+After that, `ruff format` and `ruff check --fix` run automatically on
+`git commit`.
+
+To run manually:
+
+```bash
+uv run ruff format clawloop tests examples
+uv run ruff check --select E,F,I clawloop tests examples
+```
 
 ## Commits
 

diff --git a/clawloop/agent.py b/clawloop/agent.py
@@ -17,7 +17,7 @@
 from dataclasses import dataclass, field
 from typing import Any
 
-from clawloop.core.env import EvalResult, Sample, TaskEnvironment
+from clawloop.core.env import Sample, TaskEnvironment
 from clawloop.core.episode import Episode, EpisodeSummary, Message, StepMeta
 from clawloop.core.intensity import AdaptiveIntensity
 from clawloop.core.paradigm import ParadigmBreakthrough
@@ -37,7 +37,9 @@ def _sanitize_input(text: str) -> str:
     text = text.replace("\x00", "")
     if len(text) > _MAX_INPUT_LENGTH:
         log.warning(
-            "Truncating agent input from %d to %d chars", len(text), _MAX_INPUT_LENGTH,
+            "Truncating agent input from %d to %d chars",
+            len(text),
+            _MAX_INPUT_LENGTH,
         )
         text = text[:_MAX_INPUT_LENGTH]
     return text
@@ -227,9 +229,12 @@ def _run_one(self, sample: Sample, env: TaskEnvironment) -> Episode:
         meta_id = ""
         if hasattr(sample, "metadata") and isinstance(sample.metadata, dict):
             meta_id = sample.metadata.get("id", "")
-        task_id = meta_id or hashlib.sha256(
-            f"{self.bench}:{sample.question}:{context}".encode(),
-        ).hexdigest()[:16]
+        task_id = (
+            meta_id
+            or hashlib.sha256(
+                f"{self.bench}:{sample.question}:{context}".encode(),
+            ).hexdigest()[:16]
+        )
 
         # Build episode
         ep_messages = [

diff --git a/clawloop/archive/jsonl_store.py b/clawloop/archive/jsonl_store.py
@@ -30,13 +30,7 @@
 
 def _safe_run_id(run_id: str) -> str:
     """Reject run_id values that could escape the archive directory."""
-    if (
-        not run_id
-        or "/" in run_id
-        or "\\" in run_id
-        or ".." in run_id
-        or run_id.startswith(".")
-    ):
+    if not run_id or "/" in run_id or "\\" in run_id or ".." in run_id or run_id.startswith("."):
         raise ValueError(f"unsafe run_id for filesystem path: {run_id!r}")
     return run_id
 
@@ -248,9 +242,7 @@ def get_similar_runs(
             if completion is not None:
                 start = {
                     **start,
-                    "best_reward": completion.get(
-                        "best_reward", start.get("best_reward", 0.0)
-                    ),
+                    "best_reward": completion.get("best_reward", start.get("best_reward", 0.0)),
                     "improvement_delta": completion.get(
                         "improvement_delta", start.get("improvement_delta", 0.0)
                     ),

diff --git a/clawloop/archive/null_store.py b/clawloop/archive/null_store.py
@@ -26,7 +26,10 @@ def log_variant(self, variant: AgentVariant) -> None:
         pass
 
     def log_run_complete(
-        self, run_id: str, best_reward: float, improvement_delta: float,
+        self,
+        run_id: str,
+        best_reward: float,
+        improvement_delta: float,
         total_cost_tokens: int = 0,
     ) -> None:
         pass

diff --git a/clawloop/archive/store.py b/clawloop/archive/store.py
@@ -24,7 +24,10 @@ def log_episodes(self, episodes: list[EpisodeRecord]) -> None: ...
     def log_variant(self, variant: AgentVariant) -> None: ...
 
     def log_run_complete(
-        self, run_id: str, best_reward: float, improvement_delta: float,
+        self,
+        run_id: str,
+        best_reward: float,
+        improvement_delta: float,
         total_cost_tokens: int = 0,
     ) -> None: ...
 

diff --git a/clawloop/callbacks/litellm_cb.py b/clawloop/callbacks/litellm_cb.py
@@ -23,7 +23,7 @@
 
 from clawloop.collector import EpisodeCollector
 from clawloop.core.episode import Message, TokenLogProb, TokenUsage, ToolCall, cap_logprobs
-from clawloop.core.parse import parse_tool_calls, resolve_oi_span_kind, _safe_session_hash
+from clawloop.core.parse import _safe_session_hash, parse_tool_calls, resolve_oi_span_kind
 
 log = logging.getLogger(__name__)
 
@@ -95,7 +95,11 @@ async def async_log_failure_event(
     ) -> None:
         """Async variant — delegates to sync."""
         self.log_failure_event(
-            kwargs, response_obj, start_time, end_time, exception,
+            kwargs,
+            response_obj,
+            start_time,
+            end_time,
+            exception,
         )
 
     def _process(
@@ -151,14 +155,16 @@ def _process(
         logprobs = None
         raw_logprobs = getattr(choice, "logprobs", None)
         if raw_logprobs and hasattr(raw_logprobs, "content") and raw_logprobs.content:
-            logprobs = cap_logprobs([
-                TokenLogProb(
-                    token=lp.token,
-                    token_id=getattr(lp, "token_id", None),
-                    logprob=lp.logprob,
-                )
-                for lp in raw_logprobs.content
-            ])
+            logprobs = cap_logprobs(
+                [
+                    TokenLogProb(
+                        token=lp.token,
+                        token_id=getattr(lp, "token_id", None),
+                        logprob=lp.logprob,
+                    )
+                    for lp in raw_logprobs.content
+                ]
+            )
 
         # Build assistant message
         ep_messages.append(

diff --git a/clawloop/cli.py b/clawloop/cli.py
@@ -19,38 +19,35 @@ def _build_parser() -> argparse.ArgumentParser:
         prog="clawloop",
         description="ClawLoop — Learning from Experience unified learning API",
     )
-    parser.add_argument(
-        "-v", "--verbose", action="store_true", help="Enable debug logging"
-    )
+    parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
     sub = parser.add_subparsers(dest="command", required=True)
 
     # -- run --
     run_p = sub.add_parser("run", help="Run the learning loop")
     run_p.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
     run_p.add_argument("--bench", required=True, help="Benchmark name")
+    run_p.add_argument("--iterations", type=int, default=1, help="Number of learning iterations")
+    run_p.add_argument("--episodes", type=int, default=10, help="Episodes per iteration")
+    run_p.add_argument("--config", type=str, default=None, help="Config JSON file")
+    run_p.add_argument("--model", type=str, default=None, help="LLM model (litellm format)")
     run_p.add_argument(
-        "--iterations", type=int, default=1, help="Number of learning iterations"
+        "--api-base", type=str, default=None, help="LLM API base URL (OpenAI-compatible endpoint)"
     )
     run_p.add_argument(
-        "--episodes", type=int, default=10, help="Episodes per iteration"
+        "--task-type",
+        type=str,
+        default="base",
+        help="Task type: base, hallucination, disambiguation",
     )
-    run_p.add_argument("--config", type=str, default=None, help="Config JSON file")
-    run_p.add_argument("--model", type=str, default=None, help="LLM model (litellm format)")
-    run_p.add_argument("--api-base", type=str, default=None, help="LLM API base URL (OpenAI-compatible endpoint)")
-    run_p.add_argument("--task-type", type=str, default="base",
-                       help="Task type: base, hallucination, disambiguation")
-    run_p.add_argument("--task-split", type=str, default="test",
-                       help="Data split: train, test")
+    run_p.add_argument("--task-split", type=str, default="test", help="Data split: train, test")
     run_p.add_argument("--output", type=str, default=None, help="Output directory")
     run_p.add_argument("--seed", type=int, default=None, help="Random seed")
 
     # -- eval --
     eval_p = sub.add_parser("eval", help="Evaluate current state (no learning)")
     eval_p.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
     eval_p.add_argument("--bench", required=True, help="Benchmark name")
-    eval_p.add_argument(
-        "--episodes", type=int, default=10, help="Number of episodes"
-    )
+    eval_p.add_argument("--episodes", type=int, default=10, help="Number of episodes")
     eval_p.add_argument("--config", type=str, default=None, help="Config JSON file")
 
     # -- setup-bench --
@@ -64,7 +61,9 @@ def _build_parser() -> argparse.ArgumentParser:
 
     math_p = demo_sub.add_parser("math", help="Math learning loop demo")
     math_p.add_argument("--dry-run", action="store_true", help="Use mock LLMs (no API calls)")
-    math_p.add_argument("--iterations", type=int, default=None, help="Number of learning iterations")
+    math_p.add_argument(
+        "--iterations", type=int, default=None, help="Number of learning iterations"
+    )
     math_p.add_argument("--episodes", type=int, default=None, help="Episodes per iteration")
     math_p.add_argument("--output", type=str, default="playbook.json", help="Playbook output path")
 
@@ -117,7 +116,9 @@ def _build_evolver(config: dict[str, Any]) -> Any | None:
     from clawloop.harness_backends.local import LocalEvolver
     from clawloop.llm import LiteLLMClient
 
-    model = config.get("reflector_model", config.get("model", "anthropic/claude-haiku-4-5-20251001"))
+    model = config.get(
+        "reflector_model", config.get("model", "anthropic/claude-haiku-4-5-20251001")
+    )
     client = LiteLLMClient(
         model=model,
         api_base=api_base,
@@ -132,6 +133,7 @@ def _build_evolver(config: dict[str, Any]) -> Any | None:
 def _ensure_output_dir(config: dict[str, Any], bench: str) -> None:
     """Set output dir if not configured. Convention: runs/<bench>/<timestamp>."""
     import time
+
     if "output" not in config or not config["output"]:
         config["output"] = f"./runs/{bench}/{int(time.time())}"
 
@@ -162,6 +164,7 @@ def cmd_run(args: argparse.Namespace) -> None:
 
     # Wire LocalEvolver (with Reflector) into harness for ICL learning
     from clawloop.learning_layers.harness import Harness
+
     evolver = _build_evolver(config)
     agent_state = AgentState(harness=Harness(evolver=evolver))
 
@@ -226,7 +229,14 @@ def cmd_eval(args: argparse.Namespace) -> None:
     "car": {
         "bench_dir": "benchmarks/a2a/car-bench",
         "data_setup": "scenarios/car-bench/setup.sh",
-        "uv_sync_cmd": ["uv", "sync", "--extra", "car-bench-agent", "--extra", "car-bench-evaluator"],
+        "uv_sync_cmd": [
+            "uv",
+            "sync",
+            "--extra",
+            "car-bench-agent",
+            "--extra",
+            "car-bench-evaluator",
+        ],
     },
     "entropic": {
         "bench_dir": "benchmarks/a2a/entropic-crmarenapro",

diff --git a/clawloop/collector.py b/clawloop/collector.py
@@ -97,7 +97,9 @@ def ingest(
                     done=True,
                     timing_ms=timing_ms or 0.0,
                 )
-            ] if messages else [],
+            ]
+            if messages
+            else [],
             summary=EpisodeSummary(
                 token_usage=usage,
                 timing=Timing(total_ms=timing_ms or 0.0) if timing_ms else None,

diff --git a/clawloop/config.py b/clawloop/config.py
@@ -11,6 +11,7 @@
 Missing files are skipped silently. Existing environment variables are never
 overridden (so CI/CD injected secrets always win over local ``.env``).
 """
+
 from __future__ import annotations
 
 import os

diff --git a/clawloop/core/background.py b/clawloop/core/background.py
@@ -11,7 +11,7 @@
 import json
 import logging
 import time
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import Any, Protocol, runtime_checkable
 
 from clawloop.core.episode import Episode
@@ -73,7 +73,10 @@ def run(self, state: BackgroundState) -> None:
             report = self.curator.consolidate(state.playbook)
             log.info(
                 "Consolidation: %d->%d entries (merged=%d, pruned=%d)",
-                report.before, report.after, report.merged, report.pruned,
+                report.before,
+                report.after,
+                report.merged,
+                report.pruned,
             )
         except Exception:
             log.exception("PlaybookConsolidation failed")
@@ -111,9 +114,7 @@ def run(self, state: BackgroundState) -> None:
             reward = ep.summary.effective_reward()
             task = ep.task_id
             msgs = len(ep.messages)
-            episode_summaries.append(
-                f"- Task={task} reward={reward:.2f} messages={msgs}"
-            )
+            episode_summaries.append(f"- Task={task} reward={reward:.2f} messages={msgs}")
 
         if not episode_summaries:
             return
@@ -136,8 +137,7 @@ def run(self, state: BackgroundState) -> None:
                     + "\n".join(episode_summaries)
                     + "\n\n## Current Playbook Entries\n"
                     + "\n".join(
-                        f"- {e.content[:100]}"
-                        for e in state.playbook.active_entries()[:10]
+                        f"- {e.content[:100]}" for e in state.playbook.active_entries()[:10]
                     )
                     + "\n\nWhat meta-patterns do you see across these episodes?"
                 ),
@@ -155,11 +155,13 @@ def run(self, state: BackgroundState) -> None:
                     tags.append("meta-pattern")
                 content = item.get("content", "")
                 if content:
-                    insights.append(Insight(
-                        action=item.get("action", "add"),
-                        content=content,
-                        tags=tags,
-                    ))
+                    insights.append(
+                        Insight(
+                            action=item.get("action", "add"),
+                            content=content,
+                            tags=tags,
+                        )
+                    )
 
             if not insights:
                 return