From c5249a92f0a46a03a07e3e4e70180ad20e5924b7 Mon Sep 17 00:00:00 2001
From: Chris Raethke <chris@codesoda.com>
Date: Sat, 7 Mar 2026 15:11:54 +1000
Subject: [PATCH 1/4] feat: add py-agent for Python workflow checks

- Add py-agent skill and script (format, lint, typecheck, test)
- Auto-detect runner (uv, poetry, plain) and python binary (python3/python)
- Auto-detect tools: ruff/black (format), ruff/flake8 (lint),
  mypy/pyright (typecheck), pytest/unittest (test)
- CI-aware: format checks in CI, auto-fixes locally; MAX_LINES unlimited in CI
- Support --fail-fast flag (passes -x to pytest)
- Fix hints after each failure with auto-fix suggestions where available
- Add clean and issues scenario test fixtures
- Update README, install.sh with py-agent entry
---
 README.md                               |  17 +
 install.sh                              |  21 +-
 skills/py-agent/SKILL.md                |  82 +++++
 skills/py-agent/scripts/py-agent.sh     | 401 ++++++++++++++++++++++++
 tests/py-agent/clean/pyproject.toml     |   4 +
 tests/py-agent/clean/scenario.env       |   5 +
 tests/py-agent/clean/src/__init__.py    |   0
 tests/py-agent/clean/src/app.py         |   2 +
 tests/py-agent/clean/tests/__init__.py  |   0
 tests/py-agent/clean/tests/test_app.py  |  12 +
 tests/py-agent/issues/pyproject.toml    |   4 +
 tests/py-agent/issues/scenario.env      |   5 +
 tests/py-agent/issues/src/__init__.py   |   0
 tests/py-agent/issues/src/app.py        |   6 +
 tests/py-agent/issues/tests/__init__.py |   0
 tests/py-agent/issues/tests/test_app.py |  12 +
 16 files changed, 570 insertions(+), 1 deletion(-)
 create mode 100644 skills/py-agent/SKILL.md
 create mode 100755 skills/py-agent/scripts/py-agent.sh
 create mode 100644 tests/py-agent/clean/pyproject.toml
 create mode 100644 tests/py-agent/clean/scenario.env
 create mode 100644 tests/py-agent/clean/src/__init__.py
 create mode 100644 tests/py-agent/clean/src/app.py
 create mode 100644 tests/py-agent/clean/tests/__init__.py
 create mode 100644 tests/py-agent/clean/tests/test_app.py
 create mode 100644 tests/py-agent/issues/pyproject.toml
 create mode 100644 tests/py-agent/issues/scenario.env
 create mode 100644 tests/py-agent/issues/src/__init__.py
 create mode 100644 tests/py-agent/issues/src/app.py
 create mode 100644 tests/py-agent/issues/tests/__init__.py
 create mode 100644 tests/py-agent/issues/tests/test_app.py

diff --git a/README.md b/README.md
index 566141e..cb3fdf7 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,7 @@ Standard build tools produce walls of text. Agents waste context window parsing
 |-------|-----------|-------|
 | `cargo-agent` | Rust | fmt, check, clippy, test (nextest) |
 | `npm-agent` | Node.js | format, lint, typecheck, test, build |
+| `py-agent` | Python | format (ruff/black), lint (ruff/flake8), typecheck (mypy/pyright), test (pytest) |
 | `terra-agent` | Terraform | fmt (check/fix), safe init, plan-safe, validate, lint (tflint) |
 
 ## Quick Start
@@ -48,6 +49,9 @@ path/to/x-agent/skills/cargo-agent/scripts/cargo-agent.sh
 # Node.js project
 path/to/x-agent/skills/npm-agent/scripts/npm-agent.sh
 
+# Python project
+path/to/x-agent/skills/py-agent/scripts/py-agent.sh
+
 # Terraform project
 path/to/x-agent/skills/terra-agent/scripts/terra-agent.sh
 ```
@@ -75,6 +79,18 @@ npm-agent.sh typecheck      # type checking only
 
 npm-agent auto-detects your package manager (bun, pnpm, yarn, npm) and finds formatters/linters from package.json scripts or common tools (biome, eslint, prettier, tsc).
 
+### py-agent
+
+```sh
+py-agent.sh                # full suite: format + lint + typecheck + test
+py-agent.sh format         # format only (auto-fix locally, check in CI)
+py-agent.sh lint           # lint only
+py-agent.sh test           # tests only
+py-agent.sh test -k login  # tests matching "login"
+```
+
+py-agent auto-detects your runner (uv, poetry, or plain python) and finds tools (ruff, black, flake8, mypy, pyright, pytest).
+
 ### terra-agent
 
 ```sh
@@ -138,6 +154,7 @@ The `skills/` directory contains Claude Code skill definitions. After installing
 
 - `/cargo-agent` — run Rust checks
 - `/npm-agent` — run Node.js checks
+- `/py-agent` — run Python checks
 - `/terra-agent` — run Terraform checks/fixes
 
 ## License
diff --git a/install.sh b/install.sh
index aec58ac..6ae0866 100755
--- a/install.sh
+++ b/install.sh
@@ -24,7 +24,7 @@ SOURCE_DIR=""
 SOURCE_MODE="remote"
 
 # Available skills to install (each has its own scripts/ subdirectory)
-SKILLS="cargo-agent npm-agent terra-agent"
+SKILLS="cargo-agent npm-agent py-agent terra-agent"
 SELECTED_SKILLS=""
 
 info() {
@@ -287,6 +287,22 @@ check_optional_deps() {
     fi
   fi
 
+  if skill_selected "py-agent"; then
+    if command -v python3 >/dev/null 2>&1 || command -v python >/dev/null 2>&1; then
+      info "  Found: python"
+    else
+      warn "  Missing: python3 (needed by py-agent)"
+      all_ok=0
+    fi
+
+    for dep in ruff black; do
+      if command -v "$dep" >/dev/null 2>&1; then
+        info "  Found: ${dep}"
+        break
+      fi
+    done
+  fi
+
   if skill_selected "terra-agent"; then
     if command -v terraform >/dev/null 2>&1; then
       info "  Found: terraform"
@@ -326,6 +342,9 @@ print_agents_md_snippet() {
       npm-agent)
         echo "- Node.js: use \`/npm-agent\` (format/lint/typecheck/test/build)."
         ;;
+      py-agent)
+        echo "- Python: use \`/py-agent\` (format/lint/typecheck/test)."
+        ;;
       terra-agent)
         echo "- Terraform: use \`/terra-agent\` (fmt-check/fmt-fix/init/plan-safe/validate/lint)."
         ;;
diff --git a/skills/py-agent/SKILL.md b/skills/py-agent/SKILL.md
new file mode 100644
index 0000000..19194a6
--- /dev/null
+++ b/skills/py-agent/SKILL.md
@@ -0,0 +1,82 @@
+---
+name: py-agent
+description: |
+  Run py-agent.sh — a lean Python workflow runner that produces agent-friendly output.
+  Use when: running Python checks (format, lint, typecheck, test), verifying Python code before committing,
+  or when the user asks to run Python checks, lint, format, or test a Python project.
+  Triggers on: py agent, run python checks, python checks, ruff mypy pytest, verify python code, run py checks.
+context: fork
+allowed-tools:
+  - Bash(scripts/py-agent.sh*)
+  - Bash(RUN_*=* scripts/py-agent.sh*)
+  - Bash(MAX_LINES=* scripts/py-agent.sh*)
+  - Bash(KEEP_DIR=* scripts/py-agent.sh*)
+  - Bash(FAIL_FAST=* scripts/py-agent.sh*)
+---
+
+# Py Agent
+
+Run the `py-agent.sh` script for lean, structured Python workflow output designed for coding agents.
+
+## Script Location
+
+```
+scripts/py-agent.sh
+```
+
+## Usage
+
+### Run Full Suite (format + lint + typecheck + test)
+```bash
+scripts/py-agent.sh
+```
+
+### Run Individual Steps
+```bash
+scripts/py-agent.sh format      # format (auto-fix locally, check in CI)
+scripts/py-agent.sh lint         # lint only
+scripts/py-agent.sh typecheck    # typecheck only
+scripts/py-agent.sh test         # tests only
+scripts/py-agent.sh all          # full suite (default)
+```
+
+### Pass Args to Tests
+```bash
+scripts/py-agent.sh test -k test_login       # filter by name
+scripts/py-agent.sh test tests/unit/          # specific directory
+```
+
+## Environment Knobs
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `RUN_FORMAT` | `1` | Set to `0` to skip format |
+| `RUN_LINT` | `1` | Set to `0` to skip lint |
+| `RUN_TYPECHECK` | `1` | Set to `0` to skip typecheck |
+| `RUN_TESTS` | `1` | Set to `0` to skip tests |
+| `MAX_LINES` | `40` | Max output lines printed per step (unlimited in CI) |
+| `KEEP_DIR` | `0` | Set to `1` to keep temp log dir on success |
+| `FAIL_FAST` | `0` | Stop after first failing step; passes `-x` to pytest |
+
+## Auto-Detection
+
+- **Runner**: detects uv, poetry, or plain python from lock files
+- **Format**: tries ruff format, then black
+- **Lint**: tries ruff check, then flake8
+- **Typecheck**: tries mypy, then pyright
+- **Tests**: tries pytest, then python -m unittest discover
+- **CI mode**: format runs `--check` in CI, auto-fixes locally
+
+## Output Format
+
+- Each step prints a header (`Step: format`, `Step: lint`, etc.)
+- Results are `PASS`, `FAIL`, or `SKIP`
+- On failure, output is truncated to `MAX_LINES` with full logs on disk
+- `Fix:` hint after each failure (suggests auto-fix commands where available)
+- Overall result is printed at the end: `Overall: PASS` or `Overall: FAIL`
+
+## Important Notes
+
+- The script must be run from within a Python project directory (requires pyproject.toml, setup.py, setup.cfg, or requirements.txt)
+- Steps are skipped gracefully if no matching tool is found
+- On failure, the temp log directory is preserved automatically for inspection
diff --git a/skills/py-agent/scripts/py-agent.sh b/skills/py-agent/scripts/py-agent.sh
new file mode 100755
index 0000000..1a54ede
--- /dev/null
+++ b/skills/py-agent/scripts/py-agent.sh
@@ -0,0 +1,401 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# py-agent: lean Python workflow output for coding agents
+# deps: bash, mktemp
+# optional: ruff, black, mypy, pyright, pytest
+
+KEEP_DIR="${KEEP_DIR:-0}"         # set to 1 to keep temp dir even on success
+# In CI, show full output; locally, limit to 40 lines to keep things tidy.
+if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then
+  MAX_LINES="${MAX_LINES:-999999}"
+else
+  MAX_LINES="${MAX_LINES:-40}"
+fi
+RUN_FORMAT="${RUN_FORMAT:-1}"     # set to 0 to skip format
+RUN_LINT="${RUN_LINT:-1}"         # set to 0 to skip lint
+RUN_TYPECHECK="${RUN_TYPECHECK:-1}" # set to 0 to skip typecheck
+RUN_TESTS="${RUN_TESTS:-1}"       # set to 0 to skip tests
+FAIL_FAST="${FAIL_FAST:-0}"      # set to 1 or use --fail-fast to stop after first failure
+
+TMPDIR_ROOT="${TMPDIR_ROOT:-/tmp}"
+OUTDIR="$(mktemp -d "${TMPDIR_ROOT%/}/py-agent.XXXXXX")"
+
+cleanup() {
+  local code="$?"
+  if [[ "$KEEP_DIR" == "1" || "$code" != "0" ]]; then
+    echo "Logs kept in: $OUTDIR"
+  else
+    rm -rf "$OUTDIR"
+  fi
+  exit "$code"
+}
+trap cleanup EXIT
+
+hr() { echo "------------------------------------------------------------"; }
+
+# Returns 0 (continue) unless fail-fast is on and a step already failed.
+should_continue() { [[ "$FAIL_FAST" != "1" || "$overall_ok" == "1" ]]; }
+
+STEP_START_SECONDS=0
+
+step() {
+  local name="$1"
+  STEP_START_SECONDS=$SECONDS
+  hr
+  echo "Step: $name"
+}
+
+fmt_elapsed() {
+  local elapsed=$(( SECONDS - STEP_START_SECONDS ))
+  echo "Time: ${elapsed}s"
+}
+
+# Detect Python runner (uv, poetry, or plain python/pip).
+detect_runner() {
+  if [[ -f "uv.lock" ]] && command -v uv >/dev/null 2>&1; then echo "uv"
+  elif [[ -f "poetry.lock" ]] && command -v poetry >/dev/null 2>&1; then echo "poetry"
+  else echo "plain"
+  fi
+}
+
+# Resolve python binary (prefer python3, fall back to python).
+detect_python() {
+  if command -v python3 >/dev/null 2>&1; then echo "python3"
+  elif command -v python >/dev/null 2>&1; then echo "python"
+  else echo "python3"  # let it fail with a clear error
+  fi
+}
+
+RUNNER="$(detect_runner)"
+PYTHON="$(detect_python)"
+echo "Runner: $RUNNER"
+
+# Run a command through the detected runner, or plain.
+# Replaces bare "python" with the detected python binary.
+run_cmd() {
+  local cmd="$1"
+  shift
+  if [[ "$cmd" == "python" ]]; then
+    cmd="$PYTHON"
+  fi
+  case "$RUNNER" in
+    uv)     uv run "$cmd" "$@" ;;
+    poetry) poetry run "$cmd" "$@" ;;
+    *)      "$cmd" "$@" ;;
+  esac
+}
+
+# Check if a tool is available (directly or via runner).
+have_tool() {
+  local tool="$1"
+  case "$RUNNER" in
+    uv)     uv run "$tool" --version >/dev/null 2>&1 ;;
+    poetry) poetry run "$tool" --version >/dev/null 2>&1 ;;
+    *)      command -v "$tool" >/dev/null 2>&1 ;;
+  esac
+}
+
+run_format() {
+  step "format"
+  local log="$OUTDIR/format.log"
+  local ok=1
+  local found=0
+  local mode="fix"
+  if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then
+    mode="check"
+  fi
+
+  echo "Mode: $mode"
+
+  # Try ruff format first, then black
+  if have_tool ruff; then
+    found=1
+    echo "Using: ruff format"
+    local -a args=(ruff format)
+    if [[ "$mode" == "check" ]]; then
+      args+=(--check --diff)
+    fi
+    if run_cmd "${args[@]}" >"$log" 2>&1; then
+      :
+    else
+      ok=0
+    fi
+  elif have_tool black; then
+    found=1
+    echo "Using: black"
+    local -a args=(black .)
+    if [[ "$mode" == "check" ]]; then
+      args=(black --check --diff .)
+    fi
+    if run_cmd "${args[@]}" >"$log" 2>&1; then
+      :
+    else
+      ok=0
+    fi
+  fi
+
+  if [[ "$found" == "0" ]]; then
+    echo "Result: SKIP (no formatter found — install ruff or black)"
+    fmt_elapsed
+    return 0
+  fi
+
+  if [[ "$ok" == "0" && -s "$log" ]]; then
+    echo
+    echo "Output (first ${MAX_LINES} lines):"
+    head -n "$MAX_LINES" "$log"
+  fi
+
+  echo
+  echo "Result: $([[ "$ok" == "1" ]] && echo PASS || echo FAIL)"
+  if [[ "$ok" == "0" ]]; then
+    if [[ "$mode" == "check" ]]; then
+      echo "Fix: run /py-agent format (auto-fixes locally), then re-check"
+    else
+      echo "Fix: resolve the formatting issues, then re-run: /py-agent format"
+    fi
+  fi
+  echo "Full log: $log"
+  fmt_elapsed
+  [[ "$ok" == "1" ]]
+}
+
+run_lint() {
+  step "lint"
+  local log="$OUTDIR/lint.log"
+  local ok=1
+  local found=0
+
+  # Try ruff check first, then flake8
+  if have_tool ruff; then
+    found=1
+    echo "Using: ruff check"
+    if run_cmd ruff check >"$log" 2>&1; then
+      :
+    else
+      ok=0
+    fi
+  elif have_tool flake8; then
+    found=1
+    echo "Using: flake8"
+    if run_cmd flake8 >"$log" 2>&1; then
+      :
+    else
+      ok=0
+    fi
+  fi
+
+  if [[ "$found" == "0" ]]; then
+    echo "Result: SKIP (no linter found — install ruff or flake8)"
+    fmt_elapsed
+    return 0
+  fi
+
+  if [[ "$ok" == "0" && -s "$log" ]]; then
+    echo
+    echo "Output (first ${MAX_LINES} lines):"
+    head -n "$MAX_LINES" "$log"
+  fi
+
+  echo
+  echo "Result: $([[ "$ok" == "1" ]] && echo PASS || echo FAIL)"
+  if [[ "$ok" == "0" ]]; then
+    if have_tool ruff; then
+      echo "Fix: try 'ruff check --fix' for auto-fixable issues, then re-run: /py-agent lint"
+    else
+      echo "Fix: resolve the lint errors above, then re-run: /py-agent lint"
+    fi
+  fi
+  echo "Full log: $log"
+  fmt_elapsed
+  [[ "$ok" == "1" ]]
+}
+
+run_typecheck() {
+  step "typecheck"
+  local log="$OUTDIR/typecheck.log"
+  local ok=1
+  local found=0
+
+  # Try mypy first, then pyright
+  if have_tool mypy; then
+    found=1
+    echo "Using: mypy"
+    if run_cmd mypy . >"$log" 2>&1; then
+      :
+    else
+      ok=0
+    fi
+  elif have_tool pyright; then
+    found=1
+    echo "Using: pyright"
+    if run_cmd pyright >"$log" 2>&1; then
+      :
+    else
+      ok=0
+    fi
+  fi
+
+  if [[ "$found" == "0" ]]; then
+    echo "Result: SKIP (no type checker found — install mypy or pyright)"
+    fmt_elapsed
+    return 0
+  fi
+
+  if [[ "$ok" == "0" && -s "$log" ]]; then
+    echo
+    echo "Output (first ${MAX_LINES} lines):"
+    head -n "$MAX_LINES" "$log"
+  fi
+
+  echo
+  echo "Result: $([[ "$ok" == "1" ]] && echo PASS || echo FAIL)"
+  [[ "$ok" == "0" ]] && echo "Fix: resolve the type errors above, then re-run: /py-agent typecheck"
+  echo "Full log: $log"
+  fmt_elapsed
+  [[ "$ok" == "1" ]]
+}
+
+run_tests() {
+  step "test"
+  local log="$OUTDIR/test.log"
+  local ok=1
+  local found=0
+
+  local -a test_args=()
+  [[ "$FAIL_FAST" == "1" ]] && test_args+=(-x)
+
+  # Pass through extra args (e.g. test file paths, -k filters).
+  # Bash 3.2 + `set -u` treats "${arr[@]}" on an empty array as unbound.
+  if [[ $# -gt 0 ]]; then
+    test_args+=("$@")
+  fi
+
+  if have_tool pytest; then
+    found=1
+    echo "Using: pytest"
+    if [[ ${#test_args[@]} -gt 0 ]]; then
+      run_cmd pytest "${test_args[@]}" >"$log" 2>&1 || ok=0
+    else
+      run_cmd pytest >"$log" 2>&1 || ok=0
+    fi
+  else
+    # Fallback: python -m unittest
+    found=1
+    echo "Using: python -m unittest"
+    local rc=0
+    if [[ ${#test_args[@]} -gt 0 ]]; then
+      run_cmd python -m unittest discover "${test_args[@]}" >"$log" 2>&1 || rc=$?
+    else
+      run_cmd python -m unittest discover >"$log" 2>&1 || rc=$?
+    fi
+    if [[ "$rc" != "0" ]]; then
+      # If discover fails with no tests, treat as skip
+      if grep -q "Ran 0 tests" "$log" 2>/dev/null; then
+        echo "Result: SKIP (no tests found)"
+        fmt_elapsed
+        return 0
+      fi
+      ok=0
+    fi
+  fi
+
+  if [[ "$ok" == "0" && -s "$log" ]]; then
+    echo
+    echo "Output (first ${MAX_LINES} lines):"
+    head -n "$MAX_LINES" "$log"
+  fi
+
+  echo
+  echo "Result: $([[ "$ok" == "1" ]] && echo PASS || echo FAIL)"
+  [[ "$ok" == "0" ]] && echo "Fix: resolve the failing tests, then re-run: /py-agent test"
+  echo "Full log: $log"
+  fmt_elapsed
+  [[ "$ok" == "1" ]]
+}
+
+usage() {
+  cat <<'EOF'
+py-agent: lean Python workflow output for coding agents
+
+Usage:
+  py-agent [--fail-fast]              # runs format, lint, typecheck, test
+  py-agent [--fail-fast] format|lint|typecheck|test|all
+  py-agent [--fail-fast] test [PYTEST_ARGS]
+
+Flags:
+  --fail-fast            stop after first failing step; also passes -x to pytest
+
+Env knobs:
+  MAX_LINES=40           # printed lines per step (unlimited in CI)
+  KEEP_DIR=0|1           # keep temp log dir even on success
+  FAIL_FAST=0|1          # same as --fail-fast flag
+  RUN_FORMAT=0|1
+  RUN_LINT=0|1
+  RUN_TYPECHECK=0|1
+  RUN_TESTS=0|1
+
+Auto-detection:
+  - Runner: detects uv, poetry, or plain python from lock files
+  - Format: tries ruff format, then black
+  - Lint: tries ruff check, then flake8
+  - Typecheck: tries mypy, then pyright
+  - Tests: tries pytest, then python -m unittest discover
+  - CI mode: format runs --check in CI, auto-fixes locally
+
+Examples:
+  py-agent                             # full suite
+  py-agent --fail-fast                 # full suite, stop on first failure
+  py-agent lint                        # lint only
+  py-agent test                        # tests only
+  py-agent test -k test_login          # tests matching "test_login"
+  py-agent test tests/unit/            # tests in a specific directory
+  RUN_TESTS=0 py-agent                 # skip tests
+EOF
+}
+
+main() {
+  while [[ "${1:-}" == --* ]]; do
+    case "$1" in
+      --fail-fast) FAIL_FAST=1; shift ;;
+      *) break ;;
+    esac
+  done
+
+  local cmd="${1:-all}"
+  shift 2>/dev/null || true
+  local overall_ok=1
+
+  # Verify we're in a Python project
+  if [[ ! -f "pyproject.toml" && ! -f "setup.py" && ! -f "setup.cfg" && ! -f "requirements.txt" ]]; then
+    echo "Error: no Python project found (expected pyproject.toml, setup.py, setup.cfg, or requirements.txt)" >&2
+    exit 2
+  fi
+
+  case "$cmd" in
+    -h|--help|help) usage; exit 0 ;;
+    format)    run_format    || overall_ok=0 ;;
+    lint)      run_lint      || overall_ok=0 ;;
+    typecheck) run_typecheck || overall_ok=0 ;;
+    test)      run_tests "$@" || overall_ok=0 ;;
+    all)
+      if [[ "$RUN_FORMAT" == "1" ]]; then run_format || overall_ok=0; fi
+      if [[ "$RUN_LINT" == "1" ]] && should_continue; then run_lint || overall_ok=0; fi
+      if [[ "$RUN_TYPECHECK" == "1" ]] && should_continue; then run_typecheck || overall_ok=0; fi
+      if [[ "$RUN_TESTS" == "1" ]] && should_continue; then run_tests || overall_ok=0; fi
+      ;;
+    *)
+      echo "Unknown command: $cmd" >&2
+      usage
+      exit 2
+      ;;
+  esac
+
+  hr
+  echo "Overall: $([[ "$overall_ok" == "1" ]] && echo PASS || echo FAIL)"
+  echo "Logs: $OUTDIR"
+  [[ "$overall_ok" == "1" ]]
+}
+
+main "$@"
diff --git a/tests/py-agent/clean/pyproject.toml b/tests/py-agent/clean/pyproject.toml
new file mode 100644
index 0000000..e073c3b
--- /dev/null
+++ b/tests/py-agent/clean/pyproject.toml
@@ -0,0 +1,4 @@
+[project]
+name = "py-agent-clean-scenario"
+version = "0.1.0"
+requires-python = ">=3.9"
diff --git a/tests/py-agent/clean/scenario.env b/tests/py-agent/clean/scenario.env
new file mode 100644
index 0000000..baac4c2
--- /dev/null
+++ b/tests/py-agent/clean/scenario.env
@@ -0,0 +1,5 @@
+SCENARIO_NAME="py-agent clean"
+AGENT_SCRIPT="skills/py-agent/scripts/py-agent.sh"
+RUN_ARGS="all"
+EXPECT_EXIT="0"
+REQUIRED_TOOLS="python3"
diff --git a/tests/py-agent/clean/src/__init__.py b/tests/py-agent/clean/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/py-agent/clean/src/app.py b/tests/py-agent/clean/src/app.py
new file mode 100644
index 0000000..1e21eeb
--- /dev/null
+++ b/tests/py-agent/clean/src/app.py
@@ -0,0 +1,2 @@
+def greet(name: str) -> str:
+    return f"Hello, {name}!"
diff --git a/tests/py-agent/clean/tests/__init__.py b/tests/py-agent/clean/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/py-agent/clean/tests/test_app.py b/tests/py-agent/clean/tests/test_app.py
new file mode 100644
index 0000000..ef8234a
--- /dev/null
+++ b/tests/py-agent/clean/tests/test_app.py
@@ -0,0 +1,12 @@
+import unittest
+
+from src.app import greet
+
+
+class TestApp(unittest.TestCase):
+    def test_greet(self):
+        self.assertEqual(greet("world"), "Hello, world!")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/py-agent/issues/pyproject.toml b/tests/py-agent/issues/pyproject.toml
new file mode 100644
index 0000000..e3cfed0
--- /dev/null
+++ b/tests/py-agent/issues/pyproject.toml
@@ -0,0 +1,4 @@
+[project]
+name = "py-agent-issues-scenario"
+version = "0.1.0"
+requires-python = ">=3.9"
diff --git a/tests/py-agent/issues/scenario.env b/tests/py-agent/issues/scenario.env
new file mode 100644
index 0000000..d783e90
--- /dev/null
+++ b/tests/py-agent/issues/scenario.env
@@ -0,0 +1,5 @@
+SCENARIO_NAME="py-agent issues"
+AGENT_SCRIPT="skills/py-agent/scripts/py-agent.sh"
+RUN_ARGS="all"
+EXPECT_EXIT="1"
+REQUIRED_TOOLS="python3"
diff --git a/tests/py-agent/issues/src/__init__.py b/tests/py-agent/issues/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/py-agent/issues/src/app.py b/tests/py-agent/issues/src/app.py
new file mode 100644
index 0000000..b48df94
--- /dev/null
+++ b/tests/py-agent/issues/src/app.py
@@ -0,0 +1,6 @@
+import os
+import sys
+import json
+
+def greet(name):
+    return f"Hello, {name}!"
diff --git a/tests/py-agent/issues/tests/__init__.py b/tests/py-agent/issues/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/py-agent/issues/tests/test_app.py b/tests/py-agent/issues/tests/test_app.py
new file mode 100644
index 0000000..4024be4
--- /dev/null
+++ b/tests/py-agent/issues/tests/test_app.py
@@ -0,0 +1,12 @@
+import unittest
+
+from src.app import greet
+
+
+class TestApp(unittest.TestCase):
+    def test_greet_fails(self):
+        self.assertEqual(greet("world"), "Wrong answer!")
+
+
+if __name__ == "__main__":
+    unittest.main()

From 991bb8ea186ff803b93b5d1d1118ec5a9024065a Mon Sep 17 00:00:00 2001
From: Chris Raethke <chris@codesoda.com>
Date: Sat, 7 Mar 2026 15:14:52 +1000
Subject: [PATCH 2/4] feat(cargo-agent): report reformatted files in fmt fix
 mode

In fix mode, run --check first to detect which files need formatting,
then apply the fix and list the changed files in the output.
Ported from ai-barometer's cargo-agent.
---
 skills/cargo-agent/scripts/cargo-agent.sh | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/skills/cargo-agent/scripts/cargo-agent.sh b/skills/cargo-agent/scripts/cargo-agent.sh
index 1605054..588b465 100755
--- a/skills/cargo-agent/scripts/cargo-agent.sh
+++ b/skills/cargo-agent/scripts/cargo-agent.sh
@@ -148,7 +148,25 @@ run_fmt() {
   fi
 
   echo "Mode: $mode"
-  if cargo "${fmt_args[@]}" >"$log" 2>&1; then
+  if [[ "$mode" == "fix" ]]; then
+    # In fix mode, first check which files need formatting, then apply.
+    local needs_fmt
+    needs_fmt="$(cargo fmt --all -- --check 2>&1 || true)"
+    cargo "${fmt_args[@]}" >"$log" 2>&1
+    if [[ -n "$needs_fmt" ]]; then
+      local changed_files
+      changed_files="$(echo "$needs_fmt" | grep '^Diff in' | sed 's/^Diff in //' | sed 's/:[0-9]*:$//' | sort -u)"
+      if [[ -n "$changed_files" ]]; then
+        echo "Result: PASS (files reformatted)"
+        echo "Files fixed:"
+        echo "$changed_files" | while read -r f; do echo "  $f"; done
+      else
+        echo "Result: PASS"
+      fi
+    else
+      echo "Result: PASS"
+    fi
+  elif cargo "${fmt_args[@]}" >"$log" 2>&1; then
     echo "Result: PASS"
   else
     ok=0

From c22619374f2a004e8e7b65b0365a19423d2fef01 Mon Sep 17 00:00:00 2001
From: Chris Raethke <chris@codesoda.com>
Date: Sun, 8 Mar 2026 19:07:56 +1000
Subject: [PATCH 3/4] fix(py-agent): add CI job, workflow lock, and
 CHANGED_FILES support

- Add py-agent CI job with baseline and with-ruff matrix variants
- Add workflow-level flock (Linux) / Perl fallback (macOS) to prevent
  concurrent py-agent runs
- Add CHANGED_FILES support: scopes format and lint to changed .py
  files; typecheck and test run project-wide (need full context)
- Add CHANGED_FILES to SKILL.md allowed-tools and env knobs table
---
 .github/workflows/ci.yml            | 33 ++++++++++++
 skills/py-agent/SKILL.md            |  2 +
 skills/py-agent/scripts/py-agent.sh | 84 +++++++++++++++++++++++++++--
 3 files changed, 115 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1caff9e..e7d405c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -71,6 +71,39 @@ jobs:
           CI: "true"
         run: tests/run-scenarios.sh npm-agent
 
+  # ── py-agent scenarios ──────────────────────────────────────────────
+  py-agent:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # baseline: no optional python tools
+          - name: baseline
+            install_ruff: false
+
+          # with ruff
+          - name: with-ruff
+            install_ruff: true
+
+    name: "py-agent (${{ matrix.name }})"
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install ruff
+        if: matrix.install_ruff
+        run: pip install ruff
+
+      - name: Run py-agent scenarios
+        env:
+          CI: "true"
+        run: tests/run-scenarios.sh py-agent
+
   # ── terra-agent scenarios ───────────────────────────────────────────
   terra-agent:
     runs-on: ubuntu-latest
diff --git a/skills/py-agent/SKILL.md b/skills/py-agent/SKILL.md
index 19194a6..dfca7c6 100644
--- a/skills/py-agent/SKILL.md
+++ b/skills/py-agent/SKILL.md
@@ -12,6 +12,7 @@ allowed-tools:
   - Bash(MAX_LINES=* scripts/py-agent.sh*)
   - Bash(KEEP_DIR=* scripts/py-agent.sh*)
   - Bash(FAIL_FAST=* scripts/py-agent.sh*)
+  - Bash(CHANGED_FILES=* scripts/py-agent.sh*)
 ---
 
 # Py Agent
@@ -57,6 +58,7 @@ scripts/py-agent.sh test tests/unit/          # specific directory
 | `MAX_LINES` | `40` | Max output lines printed per step (unlimited in CI) |
 | `KEEP_DIR` | `0` | Set to `1` to keep temp log dir on success |
 | `FAIL_FAST` | `0` | Stop after first failing step; passes `-x` to pytest |
+| `CHANGED_FILES` | _(empty)_ | Space-separated changed file paths; scopes format/lint to affected `.py` files |
 
 ## Auto-Detection
 
diff --git a/skills/py-agent/scripts/py-agent.sh b/skills/py-agent/scripts/py-agent.sh
index 1a54ede..d336bcf 100755
--- a/skills/py-agent/scripts/py-agent.sh
+++ b/skills/py-agent/scripts/py-agent.sh
@@ -17,6 +17,7 @@ RUN_LINT="${RUN_LINT:-1}"         # set to 0 to skip lint
 RUN_TYPECHECK="${RUN_TYPECHECK:-1}" # set to 0 to skip typecheck
 RUN_TESTS="${RUN_TESTS:-1}"       # set to 0 to skip tests
 FAIL_FAST="${FAIL_FAST:-0}"      # set to 1 or use --fail-fast to stop after first failure
+CHANGED_FILES="${CHANGED_FILES:-}"  # space-separated list of changed files; scopes format/lint to affected .py files
 
 TMPDIR_ROOT="${TMPDIR_ROOT:-/tmp}"
 OUTDIR="$(mktemp -d "${TMPDIR_ROOT%/}/py-agent.XXXXXX")"
@@ -32,6 +33,31 @@ cleanup() {
 }
 trap cleanup EXIT
 
+# Workflow-level lock: only one py-agent instance runs at a time.
+# Prevents overlapping runs when agents invoke the script concurrently.
+LOCKFILE="${TMPDIR_ROOT%/}/py-agent.lock"
+exec 9>"$LOCKFILE"
+if command -v flock >/dev/null 2>&1; then
+  if ! flock -n 9; then
+    echo "py-agent: waiting for another run to finish..."
+    flock 9
+  fi
+else
+  # macOS: flock not available, use perl as a portable fallback.
+  if ! command -v perl >/dev/null 2>&1; then
+    echo "Warning: neither flock nor perl available; skipping workflow lock" >&2
+  else
+    perl -e '
+      use Fcntl ":flock";
+      open(my $fh, ">&=", 9) or die "fdopen: $!";
+      if (!flock($fh, LOCK_EX | LOCK_NB)) {
+        print STDERR "py-agent: waiting for another run to finish...\n";
+        flock($fh, LOCK_EX) or die "flock: $!";
+      }
+    '
+  fi
+fi
+
 hr() { echo "------------------------------------------------------------"; }
 
 # Returns 0 (continue) unless fail-fast is on and a step already failed.
@@ -96,6 +122,25 @@ have_tool() {
   esac
 }
 
+# Resolve CHANGED_FILES to .py files only.
+# Populates _CHANGED_PY_FILES with the filtered list.
+_CHANGED_PY_FILES=()
+resolve_changed_py_files() {
+  _CHANGED_PY_FILES=()
+  if [[ -z "$CHANGED_FILES" ]]; then return; fi
+
+  local file
+  for file in $CHANGED_FILES; do
+    if [[ "$file" == *.py ]]; then
+      _CHANGED_PY_FILES+=("$file")
+    fi
+  done
+
+  if [[ ${#_CHANGED_PY_FILES[@]} -gt 0 ]]; then
+    echo "Scoped to ${#_CHANGED_PY_FILES[@]} changed .py file(s)"
+  fi
+}
+
 run_format() {
   step "format"
   local log="$OUTDIR/format.log"
@@ -108,6 +153,12 @@ run_format() {
 
   echo "Mode: $mode"
 
+  # Collect scoped file targets (empty = whole project).
+  local -a targets=()
+  if [[ ${#_CHANGED_PY_FILES[@]} -gt 0 ]]; then
+    targets=("${_CHANGED_PY_FILES[@]}")
+  fi
+
   # Try ruff format first, then black
   if have_tool ruff; then
     found=1
@@ -116,6 +167,9 @@ run_format() {
     if [[ "$mode" == "check" ]]; then
       args+=(--check --diff)
     fi
+    if [[ ${#targets[@]} -gt 0 ]]; then
+      args+=("${targets[@]}")
+    fi
     if run_cmd "${args[@]}" >"$log" 2>&1; then
       :
     else
@@ -124,9 +178,14 @@ run_format() {
   elif have_tool black; then
     found=1
     echo "Using: black"
-    local -a args=(black .)
+    local -a args=(black)
     if [[ "$mode" == "check" ]]; then
-      args=(black --check --diff .)
+      args+=(--check --diff)
+    fi
+    if [[ ${#targets[@]} -gt 0 ]]; then
+      args+=("${targets[@]}")
+    else
+      args+=(.)
     fi
     if run_cmd "${args[@]}" >"$log" 2>&1; then
       :
@@ -167,11 +226,21 @@ run_lint() {
   local ok=1
   local found=0
 
+  # Collect scoped file targets (empty = whole project).
+  local -a targets=()
+  if [[ ${#_CHANGED_PY_FILES[@]} -gt 0 ]]; then
+    targets=("${_CHANGED_PY_FILES[@]}")
+  fi
+
   # Try ruff check first, then flake8
   if have_tool ruff; then
     found=1
     echo "Using: ruff check"
-    if run_cmd ruff check >"$log" 2>&1; then
+    local -a args=(ruff check)
+    if [[ ${#targets[@]} -gt 0 ]]; then
+      args+=("${targets[@]}")
+    fi
+    if run_cmd "${args[@]}" >"$log" 2>&1; then
       :
     else
       ok=0
@@ -179,7 +248,11 @@ run_lint() {
   elif have_tool flake8; then
     found=1
     echo "Using: flake8"
-    if run_cmd flake8 >"$log" 2>&1; then
+    local -a args=(flake8)
+    if [[ ${#targets[@]} -gt 0 ]]; then
+      args+=("${targets[@]}")
+    fi
+    if run_cmd "${args[@]}" >"$log" 2>&1; then
       :
     else
       ok=0
@@ -335,6 +408,7 @@ Env knobs:
   RUN_LINT=0|1
   RUN_TYPECHECK=0|1
   RUN_TESTS=0|1
+  CHANGED_FILES="f1 f2"   # scope format/lint to changed .py files
 
 Auto-detection:
   - Runner: detects uv, poetry, or plain python from lock files
@@ -367,6 +441,8 @@ main() {
   shift 2>/dev/null || true
   local overall_ok=1
 
+  resolve_changed_py_files
+
   # Verify we're in a Python project
   if [[ ! -f "pyproject.toml" && ! -f "setup.py" && ! -f "setup.cfg" && ! -f "requirements.txt" ]]; then
     echo "Error: no Python project found (expected pyproject.toml, setup.py, setup.cfg, or requirements.txt)" >&2

From 5f80b021924180aa036532c30f7f1b463a9450b6 Mon Sep 17 00:00:00 2001
From: Chris Raethke <chris@codesoda.com>
Date: Sun, 8 Mar 2026 19:16:23 +1000
Subject: [PATCH 4/4] fix: address PR review feedback

- Move help/--help before project-existence checks (py-agent + convention)
- Switch have_tool() to use `which` via runner instead of --version
- Make unittest fallback conditional on python being available
- Add SKIP result when no test runner is found
- Update definition-of-done with help-accessibility requirement
---
 AGENTS.md                           |  1 +
 docs/agents/definition-of-done.md   |  2 +-
 skills/py-agent/scripts/py-agent.sh | 24 ++++++++++++++++++------
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 9b6525a..1a2f959 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -9,6 +9,7 @@ Read this file first, then open only the linked docs you need.
 - Keep output concise and structured (`Step`, `Result`, `Fix`, `Overall`, log path).
 - Support shared knobs (`RUN_<STEP>`, `MAX_LINES`, `KEEP_DIR`, `FAIL_FAST`, `CHANGED_FILES`).
 - Every `Result: FAIL` must include a `Fix:` hint.
+- The `help`/`--help`/`-h` command must work without project context — resolve it before any project-existence checks.
 - `shellcheck --severity=warning` must pass on all scripts.
 - Ship one backlog item per commit.
 
diff --git a/docs/agents/definition-of-done.md b/docs/agents/definition-of-done.md
index ed60dc8..dd8f0cf 100644
--- a/docs/agents/definition-of-done.md
+++ b/docs/agents/definition-of-done.md
@@ -8,7 +8,7 @@ An agent is done only when all items below are complete.
 - `skills/<name>-agent/SKILL.md` `allowed-tools` includes patterns for all env knobs (`RUN_*`, `MAX_LINES`, `KEEP_DIR`, `FAIL_FAST`, plus any agent-specific knobs).
 - `skills/<name>-agent/scripts/<name>-agent.sh` exists and is executable.
 - Script starts with `set -euo pipefail`.
-- Script has `--help`/`help`/`-h` usage output.
+- Script has `--help`/`help`/`-h` usage output that works without project context (resolved before project-existence checks).
 
 ## Output Contract
 
diff --git a/skills/py-agent/scripts/py-agent.sh b/skills/py-agent/scripts/py-agent.sh
index d336bcf..8476c69 100755
--- a/skills/py-agent/scripts/py-agent.sh
+++ b/skills/py-agent/scripts/py-agent.sh
@@ -113,11 +113,13 @@ run_cmd() {
 }
 
 # Check if a tool is available (directly or via runner).
+# For uv/poetry, checks whether the tool is accessible via `run which` to
+# avoid --version triggering auto-install of missing packages.
 have_tool() {
   local tool="$1"
   case "$RUNNER" in
-    uv)     uv run "$tool" --version >/dev/null 2>&1 ;;
-    poetry) poetry run "$tool" --version >/dev/null 2>&1 ;;
+    uv)     uv run which "$tool" >/dev/null 2>&1 ;;
+    poetry) poetry run which "$tool" >/dev/null 2>&1 ;;
     *)      command -v "$tool" >/dev/null 2>&1 ;;
   esac
 }
@@ -353,7 +355,7 @@ run_tests() {
     else
       run_cmd pytest >"$log" 2>&1 || ok=0
     fi
-  else
+  elif command -v "$PYTHON" >/dev/null 2>&1; then
     # Fallback: python -m unittest
     found=1
     echo "Using: python -m unittest"
@@ -374,6 +376,12 @@ run_tests() {
     fi
   fi
 
+  if [[ "$found" == "0" ]]; then
+    echo "Result: SKIP (no test runner found — install pytest)"
+    fmt_elapsed
+    return 0
+  fi
+
   if [[ "$ok" == "0" && -s "$log" ]]; then
     echo
     echo "Output (first ${MAX_LINES} lines):"
@@ -439,9 +447,11 @@ main() {
 
   local cmd="${1:-all}"
   shift 2>/dev/null || true
-  local overall_ok=1
 
-  resolve_changed_py_files
+  # Help must work without project context.
+  case "$cmd" in
+    -h|--help|help) usage; exit 0 ;;
+  esac
 
   # Verify we're in a Python project
   if [[ ! -f "pyproject.toml" && ! -f "setup.py" && ! -f "setup.cfg" && ! -f "requirements.txt" ]]; then
@@ -449,8 +459,10 @@ main() {
     exit 2
   fi
 
+  local overall_ok=1
+  resolve_changed_py_files
+
   case "$cmd" in
-    -h|--help|help) usage; exit 0 ;;
     format)    run_format    || overall_ok=0 ;;
     lint)      run_lint      || overall_ok=0 ;;
     typecheck) run_typecheck || overall_ok=0 ;;