From c5249a92f0a46a03a07e3e4e70180ad20e5924b7 Mon Sep 17 00:00:00 2001 From: Chris Raethke Date: Sat, 7 Mar 2026 15:11:54 +1000 Subject: [PATCH 1/4] feat: add py-agent for Python workflow checks - Add py-agent skill and script (format, lint, typecheck, test) - Auto-detect runner (uv, poetry, plain) and python binary (python3/python) - Auto-detect tools: ruff/black (format), ruff/flake8 (lint), mypy/pyright (typecheck), pytest/unittest (test) - CI-aware: format checks in CI, auto-fixes locally; MAX_LINES unlimited in CI - Support --fail-fast flag (passes -x to pytest) - Fix hints after each failure with auto-fix suggestions where available - Add clean and issues scenario test fixtures - Update README, install.sh with py-agent entry --- README.md | 17 + install.sh | 21 +- skills/py-agent/SKILL.md | 82 +++++ skills/py-agent/scripts/py-agent.sh | 401 ++++++++++++++++++++++++ tests/py-agent/clean/pyproject.toml | 4 + tests/py-agent/clean/scenario.env | 5 + tests/py-agent/clean/src/__init__.py | 0 tests/py-agent/clean/src/app.py | 2 + tests/py-agent/clean/tests/__init__.py | 0 tests/py-agent/clean/tests/test_app.py | 12 + tests/py-agent/issues/pyproject.toml | 4 + tests/py-agent/issues/scenario.env | 5 + tests/py-agent/issues/src/__init__.py | 0 tests/py-agent/issues/src/app.py | 6 + tests/py-agent/issues/tests/__init__.py | 0 tests/py-agent/issues/tests/test_app.py | 12 + 16 files changed, 570 insertions(+), 1 deletion(-) create mode 100644 skills/py-agent/SKILL.md create mode 100755 skills/py-agent/scripts/py-agent.sh create mode 100644 tests/py-agent/clean/pyproject.toml create mode 100644 tests/py-agent/clean/scenario.env create mode 100644 tests/py-agent/clean/src/__init__.py create mode 100644 tests/py-agent/clean/src/app.py create mode 100644 tests/py-agent/clean/tests/__init__.py create mode 100644 tests/py-agent/clean/tests/test_app.py create mode 100644 tests/py-agent/issues/pyproject.toml create mode 100644 tests/py-agent/issues/scenario.env create mode 100644 tests/py-agent/issues/src/__init__.py create mode 100644 tests/py-agent/issues/src/app.py create mode 100644 tests/py-agent/issues/tests/__init__.py create mode 100644 tests/py-agent/issues/tests/test_app.py diff --git a/README.md b/README.md index 566141e..cb3fdf7 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ Standard build tools produce walls of text. Agents waste context window parsing |-------|-----------|-------| | `cargo-agent` | Rust | fmt, check, clippy, test (nextest) | | `npm-agent` | Node.js | format, lint, typecheck, test, build | +| `py-agent` | Python | format (ruff/black), lint (ruff/flake8), typecheck (mypy/pyright), test (pytest) | | `terra-agent` | Terraform | fmt (check/fix), safe init, plan-safe, validate, lint (tflint) | ## Quick Start @@ -48,6 +49,9 @@ path/to/x-agent/skills/cargo-agent/scripts/cargo-agent.sh # Node.js project path/to/x-agent/skills/npm-agent/scripts/npm-agent.sh +# Python project +path/to/x-agent/skills/py-agent/scripts/py-agent.sh + # Terraform project path/to/x-agent/skills/terra-agent/scripts/terra-agent.sh ``` @@ -75,6 +79,18 @@ npm-agent.sh typecheck # type checking only npm-agent auto-detects your package manager (bun, pnpm, yarn, npm) and finds formatters/linters from package.json scripts or common tools (biome, eslint, prettier, tsc). +### py-agent + +```sh +py-agent.sh # full suite: format + lint + typecheck + test +py-agent.sh format # format only (auto-fix locally, check in CI) +py-agent.sh lint # lint only +py-agent.sh test # tests only +py-agent.sh test -k login # tests matching "login" +``` + +py-agent auto-detects your runner (uv, poetry, or plain python) and finds tools (ruff, black, flake8, mypy, pyright, pytest). + ### terra-agent ```sh @@ -138,6 +154,7 @@ The `skills/` directory contains Claude Code skill definitions. After installing - `/cargo-agent` — run Rust checks - `/npm-agent` — run Node.js checks +- `/py-agent` — run Python checks - `/terra-agent` — run Terraform checks/fixes ## License diff --git a/install.sh b/install.sh index aec58ac..6ae0866 100755 --- a/install.sh +++ b/install.sh @@ -24,7 +24,7 @@ SOURCE_DIR="" SOURCE_MODE="remote" # Available skills to install (each has its own scripts/ subdirectory) -SKILLS="cargo-agent npm-agent terra-agent" +SKILLS="cargo-agent npm-agent py-agent terra-agent" SELECTED_SKILLS="" info() { @@ -287,6 +287,22 @@ check_optional_deps() { fi fi + if skill_selected "py-agent"; then + if command -v python3 >/dev/null 2>&1 || command -v python >/dev/null 2>&1; then + info " Found: python" + else + warn " Missing: python3 (needed by py-agent)" + all_ok=0 + fi + + for dep in ruff black; do + if command -v "$dep" >/dev/null 2>&1; then + info " Found: ${dep}" + break + fi + done + fi + if skill_selected "terra-agent"; then if command -v terraform >/dev/null 2>&1; then info " Found: terraform" @@ -326,6 +342,9 @@ print_agents_md_snippet() { npm-agent) echo "- Node.js: use \`/npm-agent\` (format/lint/typecheck/test/build)." ;; + py-agent) + echo "- Python: use \`/py-agent\` (format/lint/typecheck/test)." + ;; terra-agent) echo "- Terraform: use \`/terra-agent\` (fmt-check/fmt-fix/init/plan-safe/validate/lint)." ;; diff --git a/skills/py-agent/SKILL.md b/skills/py-agent/SKILL.md new file mode 100644 index 0000000..19194a6 --- /dev/null +++ b/skills/py-agent/SKILL.md @@ -0,0 +1,82 @@ +--- +name: py-agent +description: | + Run py-agent.sh — a lean Python workflow runner that produces agent-friendly output. + Use when: running Python checks (format, lint, typecheck, test), verifying Python code before committing, + or when the user asks to run Python checks, lint, format, or test a Python project. + Triggers on: py agent, run python checks, python checks, ruff mypy pytest, verify python code, run py checks. +context: fork +allowed-tools: + - Bash(scripts/py-agent.sh*) + - Bash(RUN_*=* scripts/py-agent.sh*) + - Bash(MAX_LINES=* scripts/py-agent.sh*) + - Bash(KEEP_DIR=* scripts/py-agent.sh*) + - Bash(FAIL_FAST=* scripts/py-agent.sh*) +--- + +# Py Agent + +Run the `py-agent.sh` script for lean, structured Python workflow output designed for coding agents. + +## Script Location + +``` +scripts/py-agent.sh +``` + +## Usage + +### Run Full Suite (format + lint + typecheck + test) +```bash +scripts/py-agent.sh +``` + +### Run Individual Steps +```bash +scripts/py-agent.sh format # format (auto-fix locally, check in CI) +scripts/py-agent.sh lint # lint only +scripts/py-agent.sh typecheck # typecheck only +scripts/py-agent.sh test # tests only +scripts/py-agent.sh all # full suite (default) +``` + +### Pass Args to Tests +```bash +scripts/py-agent.sh test -k test_login # filter by name +scripts/py-agent.sh test tests/unit/ # specific directory +``` + +## Environment Knobs + +| Variable | Default | Description | +|----------|---------|-------------| +| `RUN_FORMAT` | `1` | Set to `0` to skip format | +| `RUN_LINT` | `1` | Set to `0` to skip lint | +| `RUN_TYPECHECK` | `1` | Set to `0` to skip typecheck | +| `RUN_TESTS` | `1` | Set to `0` to skip tests | +| `MAX_LINES` | `40` | Max output lines printed per step (unlimited in CI) | +| `KEEP_DIR` | `0` | Set to `1` to keep temp log dir on success | +| `FAIL_FAST` | `0` | Stop after first failing step; passes `-x` to pytest | + +## Auto-Detection + +- **Runner**: detects uv, poetry, or plain python from lock files +- **Format**: tries ruff format, then black +- **Lint**: tries ruff check, then flake8 +- **Typecheck**: tries mypy, then pyright +- **Tests**: tries pytest, then python -m unittest discover +- **CI mode**: format runs `--check` in CI, auto-fixes locally + +## Output Format + +- Each step prints a header (`Step: format`, `Step: lint`, etc.) +- Results are `PASS`, `FAIL`, or `SKIP` +- On failure, output is truncated to `MAX_LINES` with full logs on disk +- `Fix:` hint after each failure (suggests auto-fix commands where available) +- Overall result is printed at the end: `Overall: PASS` or `Overall: FAIL` + +## Important Notes + +- The script must be run from within a Python project directory (requires pyproject.toml, setup.py, setup.cfg, or requirements.txt) +- Steps are skipped gracefully if no matching tool is found +- On failure, the temp log directory is preserved automatically for inspection diff --git a/skills/py-agent/scripts/py-agent.sh b/skills/py-agent/scripts/py-agent.sh new file mode 100755 index 0000000..1a54ede --- /dev/null +++ b/skills/py-agent/scripts/py-agent.sh @@ -0,0 +1,401 @@ +#!/usr/bin/env bash +set -euo pipefail + +# py-agent: lean Python workflow output for coding agents +# deps: bash, mktemp +# optional: ruff, black, mypy, pyright, pytest + +KEEP_DIR="${KEEP_DIR:-0}" # set to 1 to keep temp dir even on success +# In CI, show full output; locally, limit to 40 lines to keep things tidy. +if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then + MAX_LINES="${MAX_LINES:-999999}" +else + MAX_LINES="${MAX_LINES:-40}" +fi +RUN_FORMAT="${RUN_FORMAT:-1}" # set to 0 to skip format +RUN_LINT="${RUN_LINT:-1}" # set to 0 to skip lint +RUN_TYPECHECK="${RUN_TYPECHECK:-1}" # set to 0 to skip typecheck +RUN_TESTS="${RUN_TESTS:-1}" # set to 0 to skip tests +FAIL_FAST="${FAIL_FAST:-0}" # set to 1 or use --fail-fast to stop after first failure + +TMPDIR_ROOT="${TMPDIR_ROOT:-/tmp}" +OUTDIR="$(mktemp -d "${TMPDIR_ROOT%/}/py-agent.XXXXXX")" + +cleanup() { + local code="$?" + if [[ "$KEEP_DIR" == "1" || "$code" != "0" ]]; then + echo "Logs kept in: $OUTDIR" + else + rm -rf "$OUTDIR" + fi + exit "$code" +} +trap cleanup EXIT + +hr() { echo "------------------------------------------------------------"; } + +# Returns 0 (continue) unless fail-fast is on and a step already failed. +should_continue() { [[ "$FAIL_FAST" != "1" || "$overall_ok" == "1" ]]; } + +STEP_START_SECONDS=0 + +step() { + local name="$1" + STEP_START_SECONDS=$SECONDS + hr + echo "Step: $name" +} + +fmt_elapsed() { + local elapsed=$(( SECONDS - STEP_START_SECONDS )) + echo "Time: ${elapsed}s" +} + +# Detect Python runner (uv, poetry, or plain python/pip). +detect_runner() { + if [[ -f "uv.lock" ]] && command -v uv >/dev/null 2>&1; then echo "uv" + elif [[ -f "poetry.lock" ]] && command -v poetry >/dev/null 2>&1; then echo "poetry" + else echo "plain" + fi +} + +# Resolve python binary (prefer python3, fall back to python). +detect_python() { + if command -v python3 >/dev/null 2>&1; then echo "python3" + elif command -v python >/dev/null 2>&1; then echo "python" + else echo "python3" # let it fail with a clear error + fi +} + +RUNNER="$(detect_runner)" +PYTHON="$(detect_python)" +echo "Runner: $RUNNER" + +# Run a command through the detected runner, or plain. +# Replaces bare "python" with the detected python binary. +run_cmd() { + local cmd="$1" + shift + if [[ "$cmd" == "python" ]]; then + cmd="$PYTHON" + fi + case "$RUNNER" in + uv) uv run "$cmd" "$@" ;; + poetry) poetry run "$cmd" "$@" ;; + *) "$cmd" "$@" ;; + esac +} + +# Check if a tool is available (directly or via runner). +have_tool() { + local tool="$1" + case "$RUNNER" in + uv) uv run "$tool" --version >/dev/null 2>&1 ;; + poetry) poetry run "$tool" --version >/dev/null 2>&1 ;; + *) command -v "$tool" >/dev/null 2>&1 ;; + esac +} + +run_format() { + step "format" + local log="$OUTDIR/format.log" + local ok=1 + local found=0 + local mode="fix" + if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then + mode="check" + fi + + echo "Mode: $mode" + + # Try ruff format first, then black + if have_tool ruff; then + found=1 + echo "Using: ruff format" + local -a args=(ruff format) + if [[ "$mode" == "check" ]]; then + args+=(--check --diff) + fi + if run_cmd "${args[@]}" >"$log" 2>&1; then + : + else + ok=0 + fi + elif have_tool black; then + found=1 + echo "Using: black" + local -a args=(black .) + if [[ "$mode" == "check" ]]; then + args=(black --check --diff .) + fi + if run_cmd "${args[@]}" >"$log" 2>&1; then + : + else + ok=0 + fi + fi + + if [[ "$found" == "0" ]]; then + echo "Result: SKIP (no formatter found — install ruff or black)" + fmt_elapsed + return 0 + fi + + if [[ "$ok" == "0" && -s "$log" ]]; then + echo + echo "Output (first ${MAX_LINES} lines):" + head -n "$MAX_LINES" "$log" + fi + + echo + echo "Result: $([[ "$ok" == "1" ]] && echo PASS || echo FAIL)" + if [[ "$ok" == "0" ]]; then + if [[ "$mode" == "check" ]]; then + echo "Fix: run /py-agent format (auto-fixes locally), then re-check" + else + echo "Fix: resolve the formatting issues, then re-run: /py-agent format" + fi + fi + echo "Full log: $log" + fmt_elapsed + [[ "$ok" == "1" ]] +} + +run_lint() { + step "lint" + local log="$OUTDIR/lint.log" + local ok=1 + local found=0 + + # Try ruff check first, then flake8 + if have_tool ruff; then + found=1 + echo "Using: ruff check" + if run_cmd ruff check >"$log" 2>&1; then + : + else + ok=0 + fi + elif have_tool flake8; then + found=1 + echo "Using: flake8" + if run_cmd flake8 >"$log" 2>&1; then + : + else + ok=0 + fi + fi + + if [[ "$found" == "0" ]]; then + echo "Result: SKIP (no linter found — install ruff or flake8)" + fmt_elapsed + return 0 + fi + + if [[ "$ok" == "0" && -s "$log" ]]; then + echo + echo "Output (first ${MAX_LINES} lines):" + head -n "$MAX_LINES" "$log" + fi + + echo + echo "Result: $([[ "$ok" == "1" ]] && echo PASS || echo FAIL)" + if [[ "$ok" == "0" ]]; then + if have_tool ruff; then + echo "Fix: try 'ruff check --fix' for auto-fixable issues, then re-run: /py-agent lint" + else + echo "Fix: resolve the lint errors above, then re-run: /py-agent lint" + fi + fi + echo "Full log: $log" + fmt_elapsed + [[ "$ok" == "1" ]] +} + +run_typecheck() { + step "typecheck" + local log="$OUTDIR/typecheck.log" + local ok=1 + local found=0 + + # Try mypy first, then pyright + if have_tool mypy; then + found=1 + echo "Using: mypy" + if run_cmd mypy . >"$log" 2>&1; then + : + else + ok=0 + fi + elif have_tool pyright; then + found=1 + echo "Using: pyright" + if run_cmd pyright >"$log" 2>&1; then + : + else + ok=0 + fi + fi + + if [[ "$found" == "0" ]]; then + echo "Result: SKIP (no type checker found — install mypy or pyright)" + fmt_elapsed + return 0 + fi + + if [[ "$ok" == "0" && -s "$log" ]]; then + echo + echo "Output (first ${MAX_LINES} lines):" + head -n "$MAX_LINES" "$log" + fi + + echo + echo "Result: $([[ "$ok" == "1" ]] && echo PASS || echo FAIL)" + [[ "$ok" == "0" ]] && echo "Fix: resolve the type errors above, then re-run: /py-agent typecheck" + echo "Full log: $log" + fmt_elapsed + [[ "$ok" == "1" ]] +} + +run_tests() { + step "test" + local log="$OUTDIR/test.log" + local ok=1 + local found=0 + + local -a test_args=() + [[ "$FAIL_FAST" == "1" ]] && test_args+=(-x) + + # Pass through extra args (e.g. test file paths, -k filters). + # Bash 3.2 + `set -u` treats "${arr[@]}" on an empty array as unbound. + if [[ $# -gt 0 ]]; then + test_args+=("$@") + fi + + if have_tool pytest; then + found=1 + echo "Using: pytest" + if [[ ${#test_args[@]} -gt 0 ]]; then + run_cmd pytest "${test_args[@]}" >"$log" 2>&1 || ok=0 + else + run_cmd pytest >"$log" 2>&1 || ok=0 + fi + else + # Fallback: python -m unittest + found=1 + echo "Using: python -m unittest" + local rc=0 + if [[ ${#test_args[@]} -gt 0 ]]; then + run_cmd python -m unittest discover "${test_args[@]}" >"$log" 2>&1 || rc=$? + else + run_cmd python -m unittest discover >"$log" 2>&1 || rc=$? + fi + if [[ "$rc" != "0" ]]; then + # If discover fails with no tests, treat as skip + if grep -q "Ran 0 tests" "$log" 2>/dev/null; then + echo "Result: SKIP (no tests found)" + fmt_elapsed + return 0 + fi + ok=0 + fi + fi + + if [[ "$ok" == "0" && -s "$log" ]]; then + echo + echo "Output (first ${MAX_LINES} lines):" + head -n "$MAX_LINES" "$log" + fi + + echo + echo "Result: $([[ "$ok" == "1" ]] && echo PASS || echo FAIL)" + [[ "$ok" == "0" ]] && echo "Fix: resolve the failing tests, then re-run: /py-agent test" + echo "Full log: $log" + fmt_elapsed + [[ "$ok" == "1" ]] +} + +usage() { + cat <<'EOF' +py-agent: lean Python workflow output for coding agents + +Usage: + py-agent [--fail-fast] # runs format, lint, typecheck, test + py-agent [--fail-fast] format|lint|typecheck|test|all + py-agent [--fail-fast] test [PYTEST_ARGS] + +Flags: + --fail-fast stop after first failing step; also passes -x to pytest + +Env knobs: + MAX_LINES=40 # printed lines per step (unlimited in CI) + KEEP_DIR=0|1 # keep temp log dir even on success + FAIL_FAST=0|1 # same as --fail-fast flag + RUN_FORMAT=0|1 + RUN_LINT=0|1 + RUN_TYPECHECK=0|1 + RUN_TESTS=0|1 + +Auto-detection: + - Runner: detects uv, poetry, or plain python from lock files + - Format: tries ruff format, then black + - Lint: tries ruff check, then flake8 + - Typecheck: tries mypy, then pyright + - Tests: tries pytest, then python -m unittest discover + - CI mode: format runs --check in CI, auto-fixes locally + +Examples: + py-agent # full suite + py-agent --fail-fast # full suite, stop on first failure + py-agent lint # lint only + py-agent test # tests only + py-agent test -k test_login # tests matching "test_login" + py-agent test tests/unit/ # tests in a specific directory + RUN_TESTS=0 py-agent # skip tests +EOF +} + +main() { + while [[ "${1:-}" == --* ]]; do + case "$1" in + --fail-fast) FAIL_FAST=1; shift ;; + *) break ;; + esac + done + + local cmd="${1:-all}" + shift 2>/dev/null || true + local overall_ok=1 + + # Verify we're in a Python project + if [[ ! -f "pyproject.toml" && ! -f "setup.py" && ! -f "setup.cfg" && ! -f "requirements.txt" ]]; then + echo "Error: no Python project found (expected pyproject.toml, setup.py, setup.cfg, or requirements.txt)" >&2 + exit 2 + fi + + case "$cmd" in + -h|--help|help) usage; exit 0 ;; + format) run_format || overall_ok=0 ;; + lint) run_lint || overall_ok=0 ;; + typecheck) run_typecheck || overall_ok=0 ;; + test) run_tests "$@" || overall_ok=0 ;; + all) + if [[ "$RUN_FORMAT" == "1" ]]; then run_format || overall_ok=0; fi + if [[ "$RUN_LINT" == "1" ]] && should_continue; then run_lint || overall_ok=0; fi + if [[ "$RUN_TYPECHECK" == "1" ]] && should_continue; then run_typecheck || overall_ok=0; fi + if [[ "$RUN_TESTS" == "1" ]] && should_continue; then run_tests || overall_ok=0; fi + ;; + *) + echo "Unknown command: $cmd" >&2 + usage + exit 2 + ;; + esac + + hr + echo "Overall: $([[ "$overall_ok" == "1" ]] && echo PASS || echo FAIL)" + echo "Logs: $OUTDIR" + [[ "$overall_ok" == "1" ]] +} + +main "$@" diff --git a/tests/py-agent/clean/pyproject.toml b/tests/py-agent/clean/pyproject.toml new file mode 100644 index 0000000..e073c3b --- /dev/null +++ b/tests/py-agent/clean/pyproject.toml @@ -0,0 +1,4 @@ +[project] +name = "py-agent-clean-scenario" +version = "0.1.0" +requires-python = ">=3.9" diff --git a/tests/py-agent/clean/scenario.env b/tests/py-agent/clean/scenario.env new file mode 100644 index 0000000..baac4c2 --- /dev/null +++ b/tests/py-agent/clean/scenario.env @@ -0,0 +1,5 @@ +SCENARIO_NAME="py-agent clean" +AGENT_SCRIPT="skills/py-agent/scripts/py-agent.sh" +RUN_ARGS="all" +EXPECT_EXIT="0" +REQUIRED_TOOLS="python3" diff --git a/tests/py-agent/clean/src/__init__.py b/tests/py-agent/clean/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/py-agent/clean/src/app.py b/tests/py-agent/clean/src/app.py new file mode 100644 index 0000000..1e21eeb --- /dev/null +++ b/tests/py-agent/clean/src/app.py @@ -0,0 +1,2 @@ +def greet(name: str) -> str: + return f"Hello, {name}!" diff --git a/tests/py-agent/clean/tests/__init__.py b/tests/py-agent/clean/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/py-agent/clean/tests/test_app.py b/tests/py-agent/clean/tests/test_app.py new file mode 100644 index 0000000..ef8234a --- /dev/null +++ b/tests/py-agent/clean/tests/test_app.py @@ -0,0 +1,12 @@ +import unittest + +from src.app import greet + + +class TestApp(unittest.TestCase): + def test_greet(self): + self.assertEqual(greet("world"), "Hello, world!") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/py-agent/issues/pyproject.toml b/tests/py-agent/issues/pyproject.toml new file mode 100644 index 0000000..e3cfed0 --- /dev/null +++ b/tests/py-agent/issues/pyproject.toml @@ -0,0 +1,4 @@ +[project] +name = "py-agent-issues-scenario" +version = "0.1.0" +requires-python = ">=3.9" diff --git a/tests/py-agent/issues/scenario.env b/tests/py-agent/issues/scenario.env new file mode 100644 index 0000000..d783e90 --- /dev/null +++ b/tests/py-agent/issues/scenario.env @@ -0,0 +1,5 @@ +SCENARIO_NAME="py-agent issues" +AGENT_SCRIPT="skills/py-agent/scripts/py-agent.sh" +RUN_ARGS="all" +EXPECT_EXIT="1" +REQUIRED_TOOLS="python3" diff --git a/tests/py-agent/issues/src/__init__.py b/tests/py-agent/issues/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/py-agent/issues/src/app.py b/tests/py-agent/issues/src/app.py new file mode 100644 index 0000000..b48df94 --- /dev/null +++ b/tests/py-agent/issues/src/app.py @@ -0,0 +1,6 @@ +import os +import sys +import json + +def greet(name): + return f"Hello, {name}!" diff --git a/tests/py-agent/issues/tests/__init__.py b/tests/py-agent/issues/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/py-agent/issues/tests/test_app.py b/tests/py-agent/issues/tests/test_app.py new file mode 100644 index 0000000..4024be4 --- /dev/null +++ b/tests/py-agent/issues/tests/test_app.py @@ -0,0 +1,12 @@ +import unittest + +from src.app import greet + + +class TestApp(unittest.TestCase): + def test_greet_fails(self): + self.assertEqual(greet("world"), "Wrong answer!") + + +if __name__ == "__main__": + unittest.main() From 991bb8ea186ff803b93b5d1d1118ec5a9024065a Mon Sep 17 00:00:00 2001 From: Chris Raethke Date: Sat, 7 Mar 2026 15:14:52 +1000 Subject: [PATCH 2/4] feat(cargo-agent): report reformatted files in fmt fix mode In fix mode, run --check first to detect which files need formatting, then apply the fix and list the changed files in the output. Ported from ai-barometer's cargo-agent. --- skills/cargo-agent/scripts/cargo-agent.sh | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/skills/cargo-agent/scripts/cargo-agent.sh b/skills/cargo-agent/scripts/cargo-agent.sh index 1605054..588b465 100755 --- a/skills/cargo-agent/scripts/cargo-agent.sh +++ b/skills/cargo-agent/scripts/cargo-agent.sh @@ -148,7 +148,25 @@ run_fmt() { fi echo "Mode: $mode" - if cargo "${fmt_args[@]}" >"$log" 2>&1; then + if [[ "$mode" == "fix" ]]; then + # In fix mode, first check which files need formatting, then apply. + local needs_fmt + needs_fmt="$(cargo fmt --all -- --check 2>&1 || true)" + cargo "${fmt_args[@]}" >"$log" 2>&1 + if [[ -n "$needs_fmt" ]]; then + local changed_files + changed_files="$(echo "$needs_fmt" | grep '^Diff in' | sed 's/^Diff in //' | sed 's/:[0-9]*:$//' | sort -u)" + if [[ -n "$changed_files" ]]; then + echo "Result: PASS (files reformatted)" + echo "Files fixed:" + echo "$changed_files" | while read -r f; do echo " $f"; done + else + echo "Result: PASS" + fi + else + echo "Result: PASS" + fi + elif cargo "${fmt_args[@]}" >"$log" 2>&1; then echo "Result: PASS" else ok=0 From c22619374f2a004e8e7b65b0365a19423d2fef01 Mon Sep 17 00:00:00 2001 From: Chris Raethke Date: Sun, 8 Mar 2026 19:07:56 +1000 Subject: [PATCH 3/4] fix(py-agent): add CI job, workflow lock, and CHANGED_FILES support - Add py-agent CI job with baseline and with-ruff matrix variants - Add workflow-level flock (Linux) / Perl fallback (macOS) to prevent concurrent py-agent runs - Add CHANGED_FILES support: scopes format and lint to changed .py files; typecheck and test run project-wide (need full context) - Add CHANGED_FILES to SKILL.md allowed-tools and env knobs table --- .github/workflows/ci.yml | 33 ++++++++++++ skills/py-agent/SKILL.md | 2 + skills/py-agent/scripts/py-agent.sh | 84 +++++++++++++++++++++++++++-- 3 files changed, 115 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1caff9e..e7d405c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,6 +71,39 @@ jobs: CI: "true" run: tests/run-scenarios.sh npm-agent + # ── py-agent scenarios ────────────────────────────────────────────── + py-agent: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + # baseline: no optional python tools + - name: baseline + install_ruff: false + + # with ruff + - name: with-ruff + install_ruff: true + + name: "py-agent (${{ matrix.name }})" + steps: + - uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install ruff + if: matrix.install_ruff + run: pip install ruff + + - name: Run py-agent scenarios + env: + CI: "true" + run: tests/run-scenarios.sh py-agent + # ── terra-agent scenarios ─────────────────────────────────────────── terra-agent: runs-on: ubuntu-latest diff --git a/skills/py-agent/SKILL.md b/skills/py-agent/SKILL.md index 19194a6..dfca7c6 100644 --- a/skills/py-agent/SKILL.md +++ b/skills/py-agent/SKILL.md @@ -12,6 +12,7 @@ allowed-tools: - Bash(MAX_LINES=* scripts/py-agent.sh*) - Bash(KEEP_DIR=* scripts/py-agent.sh*) - Bash(FAIL_FAST=* scripts/py-agent.sh*) + - Bash(CHANGED_FILES=* scripts/py-agent.sh*) --- # Py Agent @@ -57,6 +58,7 @@ scripts/py-agent.sh test tests/unit/ # specific directory | `MAX_LINES` | `40` | Max output lines printed per step (unlimited in CI) | | `KEEP_DIR` | `0` | Set to `1` to keep temp log dir on success | | `FAIL_FAST` | `0` | Stop after first failing step; passes `-x` to pytest | +| `CHANGED_FILES` | _(empty)_ | Space-separated changed file paths; scopes format/lint to affected `.py` files | ## Auto-Detection diff --git a/skills/py-agent/scripts/py-agent.sh b/skills/py-agent/scripts/py-agent.sh index 1a54ede..d336bcf 100755 --- a/skills/py-agent/scripts/py-agent.sh +++ b/skills/py-agent/scripts/py-agent.sh @@ -17,6 +17,7 @@ RUN_LINT="${RUN_LINT:-1}" # set to 0 to skip lint RUN_TYPECHECK="${RUN_TYPECHECK:-1}" # set to 0 to skip typecheck RUN_TESTS="${RUN_TESTS:-1}" # set to 0 to skip tests FAIL_FAST="${FAIL_FAST:-0}" # set to 1 or use --fail-fast to stop after first failure +CHANGED_FILES="${CHANGED_FILES:-}" # space-separated list of changed files; scopes format/lint to affected .py files TMPDIR_ROOT="${TMPDIR_ROOT:-/tmp}" OUTDIR="$(mktemp -d "${TMPDIR_ROOT%/}/py-agent.XXXXXX")" @@ -32,6 +33,31 @@ cleanup() { } trap cleanup EXIT +# Workflow-level lock: only one py-agent instance runs at a time. +# Prevents overlapping runs when agents invoke the script concurrently. +LOCKFILE="${TMPDIR_ROOT%/}/py-agent.lock" +exec 9>"$LOCKFILE" +if command -v flock >/dev/null 2>&1; then + if ! flock -n 9; then + echo "py-agent: waiting for another run to finish..." + flock 9 + fi +else + # macOS: flock not available, use perl as a portable fallback. + if ! command -v perl >/dev/null 2>&1; then + echo "Warning: neither flock nor perl available; skipping workflow lock" >&2 + else + perl -e ' + use Fcntl ":flock"; + open(my $fh, ">&=", 9) or die "fdopen: $!"; + if (!flock($fh, LOCK_EX | LOCK_NB)) { + print STDERR "py-agent: waiting for another run to finish...\n"; + flock($fh, LOCK_EX) or die "flock: $!"; + } + ' + fi +fi + hr() { echo "------------------------------------------------------------"; } # Returns 0 (continue) unless fail-fast is on and a step already failed. @@ -96,6 +122,25 @@ have_tool() { esac } +# Resolve CHANGED_FILES to .py files only. +# Populates _CHANGED_PY_FILES with the filtered list. +_CHANGED_PY_FILES=() +resolve_changed_py_files() { + _CHANGED_PY_FILES=() + if [[ -z "$CHANGED_FILES" ]]; then return; fi + + local file + for file in $CHANGED_FILES; do + if [[ "$file" == *.py ]]; then + _CHANGED_PY_FILES+=("$file") + fi + done + + if [[ ${#_CHANGED_PY_FILES[@]} -gt 0 ]]; then + echo "Scoped to ${#_CHANGED_PY_FILES[@]} changed .py file(s)" + fi +} + run_format() { step "format" local log="$OUTDIR/format.log" @@ -108,6 +153,12 @@ run_format() { echo "Mode: $mode" + # Collect scoped file targets (empty = whole project). + local -a targets=() + if [[ ${#_CHANGED_PY_FILES[@]} -gt 0 ]]; then + targets=("${_CHANGED_PY_FILES[@]}") + fi + # Try ruff format first, then black if have_tool ruff; then found=1 @@ -116,6 +167,9 @@ run_format() { if [[ "$mode" == "check" ]]; then args+=(--check --diff) fi + if [[ ${#targets[@]} -gt 0 ]]; then + args+=("${targets[@]}") + fi if run_cmd "${args[@]}" >"$log" 2>&1; then : else @@ -124,9 +178,14 @@ run_format() { elif have_tool black; then found=1 echo "Using: black" - local -a args=(black .) + local -a args=(black) if [[ "$mode" == "check" ]]; then - args=(black --check --diff .) + args+=(--check --diff) + fi + if [[ ${#targets[@]} -gt 0 ]]; then + args+=("${targets[@]}") + else + args+=(.) fi if run_cmd "${args[@]}" >"$log" 2>&1; then : @@ -167,11 +226,21 @@ run_lint() { local ok=1 local found=0 + # Collect scoped file targets (empty = whole project). + local -a targets=() + if [[ ${#_CHANGED_PY_FILES[@]} -gt 0 ]]; then + targets=("${_CHANGED_PY_FILES[@]}") + fi + # Try ruff check first, then flake8 if have_tool ruff; then found=1 echo "Using: ruff check" - if run_cmd ruff check >"$log" 2>&1; then + local -a args=(ruff check) + if [[ ${#targets[@]} -gt 0 ]]; then + args+=("${targets[@]}") + fi + if run_cmd "${args[@]}" >"$log" 2>&1; then : else ok=0 @@ -179,7 +248,11 @@ run_lint() { elif have_tool flake8; then found=1 echo "Using: flake8" - if run_cmd flake8 >"$log" 2>&1; then + local -a args=(flake8) + if [[ ${#targets[@]} -gt 0 ]]; then + args+=("${targets[@]}") + fi + if run_cmd "${args[@]}" >"$log" 2>&1; then : else ok=0 @@ -335,6 +408,7 @@ Env knobs: RUN_LINT=0|1 RUN_TYPECHECK=0|1 RUN_TESTS=0|1 + CHANGED_FILES="f1 f2" # scope format/lint to changed .py files Auto-detection: - Runner: detects uv, poetry, or plain python from lock files @@ -367,6 +441,8 @@ main() { shift 2>/dev/null || true local overall_ok=1 + resolve_changed_py_files + # Verify we're in a Python project if [[ ! -f "pyproject.toml" && ! -f "setup.py" && ! -f "setup.cfg" && ! -f "requirements.txt" ]]; then echo "Error: no Python project found (expected pyproject.toml, setup.py, setup.cfg, or requirements.txt)" >&2 From 5f80b021924180aa036532c30f7f1b463a9450b6 Mon Sep 17 00:00:00 2001 From: Chris Raethke Date: Sun, 8 Mar 2026 19:16:23 +1000 Subject: [PATCH 4/4] fix: address PR review feedback - Move help/--help before project-existence checks (py-agent + convention) - Switch have_tool() to use `which` via runner instead of --version - Make unittest fallback conditional on python being available - Add SKIP result when no test runner is found - Update definition-of-done with help-accessibility requirement --- AGENTS.md | 1 + docs/agents/definition-of-done.md | 2 +- skills/py-agent/scripts/py-agent.sh | 24 ++++++++++++++++++------ 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 9b6525a..1a2f959 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -9,6 +9,7 @@ Read this file first, then open only the linked docs you need. - Keep output concise and structured (`Step`, `Result`, `Fix`, `Overall`, log path). - Support shared knobs (`RUN_`, `MAX_LINES`, `KEEP_DIR`, `FAIL_FAST`, `CHANGED_FILES`). - Every `Result: FAIL` must include a `Fix:` hint. +- The `help`/`--help`/`-h` command must work without project context — resolve it before any project-existence checks. - `shellcheck --severity=warning` must pass on all scripts. - Ship one backlog item per commit. diff --git a/docs/agents/definition-of-done.md b/docs/agents/definition-of-done.md index ed60dc8..dd8f0cf 100644 --- a/docs/agents/definition-of-done.md +++ b/docs/agents/definition-of-done.md @@ -8,7 +8,7 @@ An agent is done only when all items below are complete. - `skills/-agent/SKILL.md` `allowed-tools` includes patterns for all env knobs (`RUN_*`, `MAX_LINES`, `KEEP_DIR`, `FAIL_FAST`, plus any agent-specific knobs). - `skills/-agent/scripts/-agent.sh` exists and is executable. - Script starts with `set -euo pipefail`. -- Script has `--help`/`help`/`-h` usage output. +- Script has `--help`/`help`/`-h` usage output that works without project context (resolved before project-existence checks). ## Output Contract diff --git a/skills/py-agent/scripts/py-agent.sh b/skills/py-agent/scripts/py-agent.sh index d336bcf..8476c69 100755 --- a/skills/py-agent/scripts/py-agent.sh +++ b/skills/py-agent/scripts/py-agent.sh @@ -113,11 +113,13 @@ run_cmd() { } # Check if a tool is available (directly or via runner). +# For uv/poetry, checks whether the tool is accessible via `run which` to +# avoid --version triggering auto-install of missing packages. have_tool() { local tool="$1" case "$RUNNER" in - uv) uv run "$tool" --version >/dev/null 2>&1 ;; - poetry) poetry run "$tool" --version >/dev/null 2>&1 ;; + uv) uv run which "$tool" >/dev/null 2>&1 ;; + poetry) poetry run which "$tool" >/dev/null 2>&1 ;; *) command -v "$tool" >/dev/null 2>&1 ;; esac } @@ -353,7 +355,7 @@ run_tests() { else run_cmd pytest >"$log" 2>&1 || ok=0 fi - else + elif command -v "$PYTHON" >/dev/null 2>&1; then # Fallback: python -m unittest found=1 echo "Using: python -m unittest" @@ -374,6 +376,12 @@ run_tests() { fi fi + if [[ "$found" == "0" ]]; then + echo "Result: SKIP (no test runner found — install pytest)" + fmt_elapsed + return 0 + fi + if [[ "$ok" == "0" && -s "$log" ]]; then echo echo "Output (first ${MAX_LINES} lines):" @@ -439,9 +447,11 @@ main() { local cmd="${1:-all}" shift 2>/dev/null || true - local overall_ok=1 - resolve_changed_py_files + # Help must work without project context. + case "$cmd" in + -h|--help|help) usage; exit 0 ;; + esac # Verify we're in a Python project if [[ ! -f "pyproject.toml" && ! -f "setup.py" && ! -f "setup.cfg" && ! -f "requirements.txt" ]]; then @@ -449,8 +459,10 @@ main() { exit 2 fi + local overall_ok=1 + resolve_changed_py_files + case "$cmd" in - -h|--help|help) usage; exit 0 ;; format) run_format || overall_ok=0 ;; lint) run_lint || overall_ok=0 ;; typecheck) run_typecheck || overall_ok=0 ;;