From 444284300b4f6737d530552fb8812177f2ee0ed7 Mon Sep 17 00:00:00 2001 From: Chris Raethke Date: Sun, 8 Mar 2026 18:26:43 +1000 Subject: [PATCH 1/3] feat(cargo-agent): add SQLX_OFFLINE default, flock, failing test extraction, --changed flag Port improvements from ai-barometer PR #154: - Default SQLX_OFFLINE=true with override support - Workflow-level flock to prevent concurrent cargo-agent runs - extract_failing_tests() parses nextest/libtest output for re-run hints - test --changed flag to scope tests to git-changed crates --- skills/cargo-agent/SKILL.md | 10 ++ skills/cargo-agent/scripts/cargo-agent.sh | 152 +++++++++++++++++++++- 2 files changed, 160 insertions(+), 2 deletions(-) diff --git a/skills/cargo-agent/SKILL.md b/skills/cargo-agent/SKILL.md index 547ddd8..c20875e 100644 --- a/skills/cargo-agent/SKILL.md +++ b/skills/cargo-agent/SKILL.md @@ -43,6 +43,12 @@ scripts/cargo-agent.sh test # tests only scripts/cargo-agent.sh all # full suite (default) ``` +### Run Changed-Crate Tests (Fast Loop) +```bash +scripts/cargo-agent.sh test --changed # tests for crates with changed files +scripts/cargo-agent.sh test --changed test_auth # changed-crate tests filtered by name +``` + ### Run Specific Tests Pass extra arguments through to cargo-nextest: ```bash @@ -63,6 +69,7 @@ scripts/cargo-agent.sh test -p api test_auth # "test_auth" in api crate | `RUN_INTEGRATION` | `0` | Set to `1` to enable integration tests | | `USE_NEXTEST` | `auto` | `auto`/`1`/`0` — controls nextest usage | | `FAIL_FAST` | `0` | Set to `1` to stop after first failure (or use `--fail-fast`) | +| `SQLX_OFFLINE` | `true` | Default SQLx offline mode; set to `false` for live DB (CI overrides this) | | `CHANGED_FILES` | _(empty)_ | Space-separated changed file paths; scopes check/clippy/test to affected packages | | `MAX_LINES` | `40` | Max diagnostic lines printed per step (unlimited in CI) | | `KEEP_DIR` | `0` | Set to `1` to keep temp log dir on success | @@ -85,3 +92,6 @@ scripts/cargo-agent.sh test -p api test_auth # "test_auth" in api crate - Short package names are auto-resolved (e.g. `-p api` matches `my-project-api`) - In CI (`CI=true`), `MAX_LINES` defaults to unlimited; locally it defaults to 40 - Step ordering in `all`: fmt → sqlx → check/clippy → test. sqlx runs before compilation steps because a stale query cache causes confusing downstream errors +- On test failure, failing test names are extracted and re-run commands are printed +- `test --changed` uses `git diff` to detect changed crates and scope tests accordingly +- A workflow-level lock (`flock` on Linux, Perl fallback on macOS) prevents concurrent runs diff --git a/skills/cargo-agent/scripts/cargo-agent.sh b/skills/cargo-agent/scripts/cargo-agent.sh index f0e0e1b..d99f6d1 100755 --- a/skills/cargo-agent/scripts/cargo-agent.sh +++ b/skills/cargo-agent/scripts/cargo-agent.sh @@ -23,6 +23,9 @@ RUN_INTEGRATION="${RUN_INTEGRATION:-0}" # set to 1 to run integration tests FAIL_FAST="${FAIL_FAST:-0}" # set to 1 or use --fail-fast to stop after first failure CHANGED_FILES="${CHANGED_FILES:-}" # space-separated list of changed files; scopes to affected packages +# Default to SQLx offline mode, but allow explicit overrides (e.g. CI sets false). +export SQLX_OFFLINE="${SQLX_OFFLINE:-true}" + TMPDIR_ROOT="${TMPDIR_ROOT:-/tmp}" OUTDIR="$(mktemp -d "${TMPDIR_ROOT%/}/cargo-agent.XXXXXX")" @@ -37,6 +40,31 @@ cleanup() { } trap cleanup EXIT +# Workflow-level lock: only one cargo-agent instance runs at a time. +# Prevents overlapping builds when agents invoke the script concurrently. +LOCKFILE="${TMPDIR_ROOT%/}/cargo-agent.lock" +exec 9>"$LOCKFILE" +if command -v flock >/dev/null 2>&1; then + if ! flock -n 9; then + echo "cargo-agent: waiting for another run to finish..." + flock 9 + fi +else + # macOS: flock not available, use perl as a portable fallback. + if ! command -v perl >/dev/null 2>&1; then + echo "Warning: neither flock nor perl available; skipping workflow lock" >&2 + else + perl -e ' + use Fcntl ":flock"; + open(my $fh, ">&=", 9) or die "fdopen: $!"; + if (!flock($fh, LOCK_EX | LOCK_NB)) { + print STDERR "cargo-agent: waiting for another run to finish...\n"; + flock($fh, LOCK_EX) or die "flock: $!"; + } + ' + fi +fi + need() { command -v "$1" >/dev/null 2>&1 || { echo "Missing required tool: $1" >&2; exit 2; } } @@ -174,6 +202,65 @@ resolve_affected_packages() { fi } +# Build -p package args from changed files in git (tracked + untracked). +# Populates _CHANGED_PACKAGE_ARGS and _CHANGED_FORCE_FULL. +_CHANGED_PACKAGE_ARGS=() +_CHANGED_FORCE_FULL=0 +collect_changed_package_args() { + _CHANGED_PACKAGE_ARGS=() + _CHANGED_FORCE_FULL=0 + + local diff_paths untracked_paths combined_paths changed_crates + diff_paths="$(git diff --name-only HEAD 2>/dev/null || true)" + untracked_paths="$(git ls-files --others --exclude-standard 2>/dev/null || true)" + combined_paths="$(printf '%s\n%s\n' "$diff_paths" "$untracked_paths" | sed '/^$/d' | sort -u)" + + if [[ -z "$combined_paths" ]]; then + return 1 + fi + + # Workspace-level cargo config/manifest changes can impact all crates. + if echo "$combined_paths" | grep -Eq '^(Cargo\.toml|Cargo\.lock|\.cargo/)'; then + _CHANGED_FORCE_FULL=1 + return 0 + fi + + changed_crates="$(echo "$combined_paths" | awk -F/ '$1=="crates" && $2!="" {print $2}' | sort -u)" + if [[ -z "$changed_crates" ]]; then + return 1 + fi + + local crate_name + while IFS= read -r crate_name; do + [[ -n "$crate_name" ]] && _CHANGED_PACKAGE_ARGS+=("-p" "$crate_name") + done <<< "$changed_crates" + + return 0 +} + +# Extract failing test names from a nextest/libtest log file. +extract_failing_tests() { + local log="$1" + [[ -s "$log" ]] || return 0 + + { + # nextest human output, e.g. "FAIL [ 0.001s] crate::module::test_name" + sed -nE 's/^.*FAIL[[:space:]]+\[[^]]+\][[:space:]]+([^[:space:]]+).*$/\1/p' "$log" + # libtest-style output, e.g. "test crate::module::test_name ... FAILED" + sed -nE 's/^test[[:space:]]+([^[:space:]]+)[[:space:]]+\.\.\.[[:space:]]+FAILED$/\1/p' "$log" + # Failure summaries under "failures:" sections. + awk ' + /^failures:$/ { in_failures = 1; next } + in_failures && /^[[:space:]]*$/ { in_failures = 0; next } + in_failures { + line = $0 + sub(/^[[:space:]]+/, "", line) + if (line ~ /::/) print line + } + ' "$log" + } | sort -u +} + run_fmt() { step "fmt" local log="$OUTDIR/fmt.log" @@ -367,6 +454,17 @@ have_nextest() { run_tests() { step "test" local ok=1 + local changed_only=0 + local -a test_args=() + + while [[ $# -gt 0 ]]; do + case "$1" in + --changed) changed_only=1 ;; + --all) changed_only=0 ;; + *) test_args+=("$1") ;; + esac + shift + done if [[ "$USE_NEXTEST" == "0" ]]; then echo "Result: SKIP (USE_NEXTEST=0, no runner configured)" @@ -381,10 +479,43 @@ run_tests() { fi fi + if [[ "$changed_only" == "1" ]]; then + if collect_changed_package_args; then + if [[ "$_CHANGED_FORCE_FULL" == "1" ]]; then + echo "Changed workspace-level Cargo files detected; running full suite." + elif [[ ${#_CHANGED_PACKAGE_ARGS[@]} -gt 0 ]]; then + echo "Changed crates:" + local i + for ((i = 1; i < ${#_CHANGED_PACKAGE_ARGS[@]}; i += 2)); do + echo " ${_CHANGED_PACKAGE_ARGS[$i]}" + done + if [[ ${#test_args[@]} -gt 0 ]]; then + test_args=("${_CHANGED_PACKAGE_ARGS[@]}" "${test_args[@]}") + else + test_args=("${_CHANGED_PACKAGE_ARGS[@]}") + fi + else + echo "Result: SKIP" + echo "No changed crates detected under crates/." + fmt_elapsed + return 0 + fi + else + echo "Result: SKIP" + echo "No changed files detected in git diff/untracked files." + fmt_elapsed + return 0 + fi + fi + local log="$OUTDIR/nextest.log" # Resolve short package names (e.g. -p api → -p ai-barometer-api). - resolve_package_args "$@" + if [[ ${#test_args[@]} -gt 0 ]]; then + resolve_package_args "${test_args[@]}" + else + resolve_package_args + fi # Bash 3.2 + `set -u` treats "${arr[@]}" on an empty array as unbound. if [[ ${#_RESOLVED_ARGS[@]} -gt 0 ]]; then set -- "${_RESOLVED_ARGS[@]}" @@ -416,6 +547,21 @@ run_tests() { echo echo "Output (first ${MAX_LINES} lines):" head -n "$MAX_LINES" "$log" + + local failed_tests + failed_tests="$(extract_failing_tests "$log")" + if [[ -n "$failed_tests" ]]; then + echo + echo "Failing tests:" + echo "$failed_tests" | while read -r test_name; do + [[ -n "$test_name" ]] && echo " $test_name" + done + echo + echo "Re-run failing tests with:" + echo "$failed_tests" | while read -r test_name; do + [[ -n "$test_name" ]] && echo " /cargo-agent test $test_name" + done + fi fi echo @@ -433,7 +579,7 @@ cargo-agent: lean Rust workflow output for coding agents Usage: cargo-agent [--fail-fast] # runs fmt, clippy, sqlx, nextest (if installed) cargo-agent [--fail-fast] fmt|check|clippy|sqlx|all - cargo-agent [--fail-fast] test [NEXTEST_ARGS] + cargo-agent [--fail-fast] test [--changed|--all] [NEXTEST_ARGS] Flags: --fail-fast stop after first failing step; also passed to nextest @@ -457,6 +603,8 @@ Examples: cargo-agent --fail-fast # full suite, stop on first failure cargo-agent sqlx # sqlx cache verify only cargo-agent test test_login # tests matching "test_login" + cargo-agent test --changed # tests for crates with changed files + cargo-agent test --changed test_auth # changed-crate tests filtered by "test_auth" cargo-agent test -p db # tests in the db crate cargo-agent test -p api test_auth # "test_auth" in api crate RUN_TESTS=0 cargo-agent # skip tests From f26abb4f30c45426a420fb27d692d7e333df601d Mon Sep 17 00:00:00 2001 From: Chris Raethke Date: Sun, 8 Mar 2026 18:37:20 +1000 Subject: [PATCH 2/3] test: add test-fail scenario for failing test extraction Adds a cargo-agent scenario with a deliberately failing test to exercise the extract_failing_tests output (nextest FAIL lines, re-run hints). --- tests/cargo-agent/test-fail/Cargo.toml | 7 +++++++ tests/cargo-agent/test-fail/scenario.env | 5 +++++ tests/cargo-agent/test-fail/src/lib.rs | 18 ++++++++++++++++++ 3 files changed, 30 insertions(+) create mode 100644 tests/cargo-agent/test-fail/Cargo.toml create mode 100644 tests/cargo-agent/test-fail/scenario.env create mode 100644 tests/cargo-agent/test-fail/src/lib.rs diff --git a/tests/cargo-agent/test-fail/Cargo.toml b/tests/cargo-agent/test-fail/Cargo.toml new file mode 100644 index 0000000..4ffd636 --- /dev/null +++ b/tests/cargo-agent/test-fail/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "cargo-agent-test-fail-scenario" +version = "0.1.0" +edition = "2021" + +[lib] +path = "src/lib.rs" diff --git a/tests/cargo-agent/test-fail/scenario.env b/tests/cargo-agent/test-fail/scenario.env new file mode 100644 index 0000000..a1b2c9f --- /dev/null +++ b/tests/cargo-agent/test-fail/scenario.env @@ -0,0 +1,5 @@ +SCENARIO_NAME="cargo-agent test-fail" +AGENT_SCRIPT="skills/cargo-agent/scripts/cargo-agent.sh" +RUN_ARGS="test" +EXPECT_EXIT="1" +REQUIRED_TOOLS="cargo jq cargo-nextest" diff --git a/tests/cargo-agent/test-fail/src/lib.rs b/tests/cargo-agent/test-fail/src/lib.rs new file mode 100644 index 0000000..2c6c522 --- /dev/null +++ b/tests/cargo-agent/test-fail/src/lib.rs @@ -0,0 +1,18 @@ +pub fn add(a: i32, b: i32) -> i32 { + a + b +} + +#[cfg(test)] +mod tests { + use super::add; + + #[test] + fn adds_correctly() { + assert_eq!(add(2, 2), 4); + } + + #[test] + fn deliberately_fails() { + assert_eq!(add(2, 2), 5, "this test is meant to fail"); + } +} From dd86071d264052a4d0f78362cbfdd3919e6c3385 Mon Sep 17 00:00:00 2001 From: Chris Raethke Date: Sun, 8 Mar 2026 18:44:43 +1000 Subject: [PATCH 3/3] test: add flock concurrency scenario, document workflow lock for all agents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add tests/cargo-agent/flock/ scenario that holds the lockfile in a background process, launches cargo-agent, verifies the "waiting" message appears, then releases the lock and confirms completion. - Update add-x-agent.md with workflow-level lock as step 7 (flock on Linux, Perl fallback on macOS) — applicable to all agents. - Update contributing.md numbered list to include the new step. - Broaden shellcheck scan in CI and run-scenarios.sh to cover test scripts under tests/. --- .github/workflows/ci.yml | 2 +- docs/agents/add-x-agent.md | 46 ++++++++++++-- docs/contributing.md | 11 ++-- tests/cargo-agent/flock/flock-test.sh | 88 +++++++++++++++++++++++++++ tests/cargo-agent/flock/scenario.env | 5 ++ tests/run-scenarios.sh | 2 +- 6 files changed, 142 insertions(+), 12 deletions(-) create mode 100755 tests/cargo-agent/flock/flock-test.sh create mode 100644 tests/cargo-agent/flock/scenario.env diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1caff9e..c25ab44 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: - name: ShellCheck agent scripts run: | shopt -s globstar - shellcheck --severity=warning skills/*/scripts/*.sh tests/run-scenarios.sh install.sh + shellcheck --severity=warning skills/*/scripts/*.sh tests/run-scenarios.sh tests/**/*.sh install.sh # ── cargo-agent scenarios ─────────────────────────────────────────── cargo-agent: diff --git a/docs/agents/add-x-agent.md b/docs/agents/add-x-agent.md index 314abe4..e929120 100644 --- a/docs/agents/add-x-agent.md +++ b/docs/agents/add-x-agent.md @@ -201,13 +201,49 @@ Guidelines: - Print what the scope resolved to (e.g. `Scoped to packages: -p api -p db`). - Add `Bash(CHANGED_FILES=* scripts/-agent.sh*)` to SKILL.md `allowed-tools`. -## 7) Exit codes +## 7) Workflow-level lock + +Prevent concurrent agent runs from causing build-directory contention by +acquiring an exclusive lock at startup. Place the lock after the cleanup trap +and before any real work: + +```bash +LOCKFILE="${TMPDIR_ROOT%/}/-agent.lock" +exec 9>"$LOCKFILE" +if command -v flock >/dev/null 2>&1; then + if ! flock -n 9; then + echo "-agent: waiting for another run to finish..." + flock 9 + fi +else + # macOS: flock not available, use perl as a portable fallback. + if ! command -v perl >/dev/null 2>&1; then + echo "Warning: neither flock nor perl available; skipping workflow lock" >&2 + else + perl -e ' + use Fcntl ":flock"; + open(my $fh, ">&=", 9) or die "fdopen: $!"; + if (!flock($fh, LOCK_EX | LOCK_NB)) { + print STDERR "-agent: waiting for another run to finish...\n"; + flock($fh, LOCK_EX) or die "flock: $!"; + } + ' + fi +fi +``` + +The lock is automatically released when the script exits (fd 9 is closed). +On Linux `flock` is used directly; on macOS (where `flock` is unavailable) +the script falls back to Perl's `flock`. If neither is available, a warning +is printed and execution continues unlocked. + +## 8) Exit codes - `0` — all steps passed - `1` — one or more steps failed - `2` — bad usage, unknown command, or missing required dependency -## 8) SKILL.md +## 9) SKILL.md The `SKILL.md` front-matter must list `allowed-tools` patterns for every env knob the script supports, so the agent can invoke the script without prompting. Include at minimum: @@ -221,14 +257,14 @@ allowed-tools: - Bash(CHANGED_FILES=* scripts/-agent.sh*) ``` -## 9) Update repository metadata +## 10) Update repository metadata Update: - `README.md` (agent table + usage examples) - `install.sh` (`SKILLS` list and optional dependency checks) -## 10) Add scenario tests +## 11) Add scenario tests Add at least: @@ -237,7 +273,7 @@ Add at least: Each scenario needs a `scenario.env`. See `docs/agents/scenario-tests.md`. -## 11) Validate against definition of done +## 12) Validate against definition of done Run through `docs/agents/definition-of-done.md` before commit. diff --git a/docs/contributing.md b/docs/contributing.md index ca96b1e..b5a1c28 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -74,11 +74,12 @@ Follow `docs/agents/add-x-agent.md` — it covers the full workflow: 4. Shared env knobs (`KEEP_DIR`, `MAX_LINES`, `FAIL_FAST`, `RUN_`) 5. `--fail-fast` support with `should_continue` 6. `CHANGED_FILES` scoping (scope work to affected files/packages) -7. Exit codes -8. SKILL.md `allowed-tools` patterns -9. Repository metadata updates (`README.md`, `install.sh`) -10. Scenario tests (clean + issues fixtures) -11. Validate against `docs/agents/definition-of-done.md` +7. Workflow-level lock (prevent concurrent runs) +8. Exit codes +9. SKILL.md `allowed-tools` patterns +10. Repository metadata updates (`README.md`, `install.sh`) +11. Scenario tests (clean + issues fixtures) +12. Validate against `docs/agents/definition-of-done.md` ## Testing diff --git a/tests/cargo-agent/flock/flock-test.sh b/tests/cargo-agent/flock/flock-test.sh new file mode 100755 index 0000000..50f5eba --- /dev/null +++ b/tests/cargo-agent/flock/flock-test.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Test: cargo-agent workflow-level lock prevents concurrent runs. +# +# 1. Hold the lockfile in a background process. +# 2. Launch cargo-agent (help, so it exits quickly after acquiring the lock). +# 3. Verify cargo-agent prints the "waiting" message. +# 4. Release the lock. +# 5. Verify cargo-agent completes successfully. + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" +SCRIPT="$ROOT_DIR/skills/cargo-agent/scripts/cargo-agent.sh" +TMPDIR_TEST="$(mktemp -d "${TMPDIR:-/tmp}/flock-test.XXXXXX")" + +cleanup() { + # Kill any lingering background jobs. + kill "$HOLDER_PID" 2>/dev/null || true + wait "$HOLDER_PID" 2>/dev/null || true + rm -rf "$TMPDIR_TEST" +} +trap cleanup EXIT + +LOCKFILE="$TMPDIR_TEST/cargo-agent.lock" +HOLDER_PID="" + +# --- Step 1: hold the lock in the background --- +hold_lock() { + exec 9>"$LOCKFILE" + if command -v flock >/dev/null 2>&1; then + flock 9 + else + perl -e ' + use Fcntl ":flock"; + open(my $fh, ">&=", 9) or die "fdopen: $!"; + flock($fh, LOCK_EX) or die "flock: $!"; + ' + fi + # Keep the subprocess alive (and lock held) until killed. + sleep 30 +} + +hold_lock & +HOLDER_PID=$! + +# Give the holder time to acquire the lock. +sleep 0.5 + +# --- Step 2: launch cargo-agent in the background --- +AGENT_OUTPUT="$TMPDIR_TEST/agent-output.txt" +TMPDIR_ROOT="$TMPDIR_TEST" "$SCRIPT" help >"$AGENT_OUTPUT" 2>&1 & +AGENT_PID=$! + +# --- Step 3: wait for the "waiting" message (up to 3s) --- +waited=0 +found=0 +while [[ "$waited" -lt 6 ]]; do + if grep -q "waiting for another run to finish" "$AGENT_OUTPUT" 2>/dev/null; then + found=1 + break + fi + sleep 0.5 + waited=$((waited + 1)) +done + +if [[ "$found" != "1" ]]; then + echo "FAIL: cargo-agent did not print the 'waiting' message within 3s" + echo "--- output ---" + cat "$AGENT_OUTPUT" + echo "--------------" + exit 1 +fi + +# --- Step 4: release the lock --- +kill "$HOLDER_PID" 2>/dev/null || true +wait "$HOLDER_PID" 2>/dev/null || true + +# --- Step 5: verify cargo-agent completes --- +if wait "$AGENT_PID"; then + echo "PASS: cargo-agent waited for lock and completed successfully" + exit 0 +else + echo "FAIL: cargo-agent exited with non-zero status after acquiring lock" + echo "--- output ---" + cat "$AGENT_OUTPUT" + echo "--------------" + exit 1 +fi diff --git a/tests/cargo-agent/flock/scenario.env b/tests/cargo-agent/flock/scenario.env new file mode 100644 index 0000000..346b0b2 --- /dev/null +++ b/tests/cargo-agent/flock/scenario.env @@ -0,0 +1,5 @@ +SCENARIO_NAME="cargo-agent flock" +AGENT_SCRIPT="tests/cargo-agent/flock/flock-test.sh" +RUN_ARGS="run" +EXPECT_EXIT="0" +REQUIRED_TOOLS="cargo jq" diff --git a/tests/run-scenarios.sh b/tests/run-scenarios.sh index 13d1fec..e705f05 100755 --- a/tests/run-scenarios.sh +++ b/tests/run-scenarios.sh @@ -126,7 +126,7 @@ run_shellcheck() { local scripts=() while IFS= read -r -d '' f; do scripts+=("$f") - done < <(find "$ROOT_DIR/skills" -name '*.sh' -print0; find "$ROOT_DIR" -maxdepth 1 -name '*.sh' -print0; printf '%s\0' "$ROOT_DIR/tests/run-scenarios.sh") + done < <(find "$ROOT_DIR/skills" -name '*.sh' -print0; find "$TESTS_DIR" -name '*.sh' -print0; find "$ROOT_DIR" -maxdepth 1 -name '*.sh' -print0) if shellcheck --severity=warning "${scripts[@]}" >/dev/null 2>&1; then print_status "PASS" "shellcheck"