codesoda · codesoda · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -17,7 +17,7 @@ jobs:
       - name: ShellCheck agent scripts
         run: |
           shopt -s globstar
-          shellcheck --severity=warning skills/*/scripts/*.sh tests/run-scenarios.sh install.sh
+          shellcheck --severity=warning skills/*/scripts/*.sh tests/run-scenarios.sh tests/**/*.sh install.sh
 
   # ── cargo-agent scenarios ───────────────────────────────────────────
   cargo-agent:

diff --git a/docs/agents/add-x-agent.md b/docs/agents/add-x-agent.md
@@ -201,13 +201,49 @@ Guidelines:
 - Print what the scope resolved to (e.g. `Scoped to packages: -p api -p db`).
 - Add `Bash(CHANGED_FILES=* scripts/<name>-agent.sh*)` to SKILL.md `allowed-tools`.
 
-## 7) Exit codes
+## 7) Workflow-level lock
+
+Prevent concurrent agent runs from causing build-directory contention by
+acquiring an exclusive lock at startup. Place the lock after the cleanup trap
+and before any real work:
+
+```bash
+LOCKFILE="${TMPDIR_ROOT%/}/<name>-agent.lock"
+exec 9>"$LOCKFILE"
+if command -v flock >/dev/null 2>&1; then
+  if ! flock -n 9; then
+    echo "<name>-agent: waiting for another run to finish..."
+    flock 9
+  fi
+else
+  # macOS: flock not available, use perl as a portable fallback.
+  if ! command -v perl >/dev/null 2>&1; then
+    echo "Warning: neither flock nor perl available; skipping workflow lock" >&2
+  else
+    perl -e '
+      use Fcntl ":flock";
+      open(my $fh, ">&=", 9) or die "fdopen: $!";
+      if (!flock($fh, LOCK_EX | LOCK_NB)) {
+        print STDERR "<name>-agent: waiting for another run to finish...\n";
+        flock($fh, LOCK_EX) or die "flock: $!";
+      }
+    '
+  fi
+fi
+```
+
+The lock is automatically released when the script exits (fd 9 is closed).
+On Linux `flock` is used directly; on macOS (where `flock` is unavailable)
+the script falls back to Perl's `flock`. If neither is available, a warning
+is printed and execution continues unlocked.
+
+## 8) Exit codes
 
 - `0` — all steps passed
 - `1` — one or more steps failed
 - `2` — bad usage, unknown command, or missing required dependency
 
-## 8) SKILL.md
+## 9) SKILL.md
 
 The `SKILL.md` front-matter must list `allowed-tools` patterns for every env knob the script supports, so the agent can invoke the script without prompting. Include at minimum:
 
@@ -221,14 +257,14 @@ allowed-tools:
   - Bash(CHANGED_FILES=* scripts/<name>-agent.sh*)
 ```
 
-## 9) Update repository metadata
+## 10) Update repository metadata
 
 Update:
 
 - `README.md` (agent table + usage examples)
 - `install.sh` (`SKILLS` list and optional dependency checks)
 
-## 10) Add scenario tests
+## 11) Add scenario tests
 
 Add at least:
 
@@ -237,7 +273,7 @@ Add at least:
 
 Each scenario needs a `scenario.env`. See `docs/agents/scenario-tests.md`.
 
-## 11) Validate against definition of done
+## 12) Validate against definition of done
 
 Run through `docs/agents/definition-of-done.md` before commit.
 
diff --git a/docs/contributing.md b/docs/contributing.md
@@ -74,11 +74,12 @@ Follow `docs/agents/add-x-agent.md` — it covers the full workflow:
 4. Shared env knobs (`KEEP_DIR`, `MAX_LINES`, `FAIL_FAST`, `RUN_<STEP>`)
 5. `--fail-fast` support with `should_continue`
 6. `CHANGED_FILES` scoping (scope work to affected files/packages)
-7. Exit codes
-8. SKILL.md `allowed-tools` patterns
-9. Repository metadata updates (`README.md`, `install.sh`)
-10. Scenario tests (clean + issues fixtures)
-11. Validate against `docs/agents/definition-of-done.md`
+7. Workflow-level lock (prevent concurrent runs)
+8. Exit codes
+9. SKILL.md `allowed-tools` patterns
+10. Repository metadata updates (`README.md`, `install.sh`)
+11. Scenario tests (clean + issues fixtures)
+12. Validate against `docs/agents/definition-of-done.md`
 
 ## Testing
 

diff --git a/skills/cargo-agent/SKILL.md b/skills/cargo-agent/SKILL.md
@@ -43,6 +43,12 @@ scripts/cargo-agent.sh test     # tests only
 scripts/cargo-agent.sh all      # full suite (default)
 ```
 
+### Run Changed-Crate Tests (Fast Loop)
+```bash
+scripts/cargo-agent.sh test --changed           # tests for crates with changed files
+scripts/cargo-agent.sh test --changed test_auth  # changed-crate tests filtered by name
+```
+
 ### Run Specific Tests
 Pass extra arguments through to cargo-nextest:
 ```bash
@@ -63,6 +69,7 @@ scripts/cargo-agent.sh test -p api test_auth  # "test_auth" in api crate
 | `RUN_INTEGRATION` | `0` | Set to `1` to enable integration tests |
 | `USE_NEXTEST` | `auto` | `auto`/`1`/`0` — controls nextest usage |
 | `FAIL_FAST` | `0` | Set to `1` to stop after first failure (or use `--fail-fast`) |
+| `SQLX_OFFLINE` | `true` | Default SQLx offline mode; set to `false` for live DB (CI overrides this) |
 | `CHANGED_FILES` | _(empty)_ | Space-separated changed file paths; scopes check/clippy/test to affected packages |
 | `MAX_LINES` | `40` | Max diagnostic lines printed per step (unlimited in CI) |
 | `KEEP_DIR` | `0` | Set to `1` to keep temp log dir on success |
@@ -85,3 +92,6 @@ scripts/cargo-agent.sh test -p api test_auth  # "test_auth" in api crate
 - Short package names are auto-resolved (e.g. `-p api` matches `my-project-api`)
 - In CI (`CI=true`), `MAX_LINES` defaults to unlimited; locally it defaults to 40
 - Step ordering in `all`: fmt → sqlx → check/clippy → test. sqlx runs before compilation steps because a stale query cache causes confusing downstream errors
+- On test failure, failing test names are extracted and re-run commands are printed
+- `test --changed` uses `git diff` to detect changed crates and scope tests accordingly
+- A workflow-level lock (`flock` on Linux, Perl fallback on macOS) prevents concurrent runs
diff --git a/skills/cargo-agent/scripts/cargo-agent.sh b/skills/cargo-agent/scripts/cargo-agent.sh
@@ -23,6 +23,9 @@ RUN_INTEGRATION="${RUN_INTEGRATION:-0}" # set to 1 to run integration tests
 FAIL_FAST="${FAIL_FAST:-0}"      # set to 1 or use --fail-fast to stop after first failure
 CHANGED_FILES="${CHANGED_FILES:-}"  # space-separated list of changed files; scopes to affected packages
 
+# Default to SQLx offline mode, but allow explicit overrides (e.g. CI sets false).
+export SQLX_OFFLINE="${SQLX_OFFLINE:-true}"
+
 TMPDIR_ROOT="${TMPDIR_ROOT:-/tmp}"
 OUTDIR="$(mktemp -d "${TMPDIR_ROOT%/}/cargo-agent.XXXXXX")"
 
@@ -37,6 +40,31 @@ cleanup() {
 }
 trap cleanup EXIT
 
+# Workflow-level lock: only one cargo-agent instance runs at a time.
+# Prevents overlapping builds when agents invoke the script concurrently.
+LOCKFILE="${TMPDIR_ROOT%/}/cargo-agent.lock"
+exec 9>"$LOCKFILE"
+if command -v flock >/dev/null 2>&1; then
+  if ! flock -n 9; then
+    echo "cargo-agent: waiting for another run to finish..."
+    flock 9
+  fi
+else
+  # macOS: flock not available, use perl as a portable fallback.
+  if ! command -v perl >/dev/null 2>&1; then
+    echo "Warning: neither flock nor perl available; skipping workflow lock" >&2
+  else
+    perl -e '
+      use Fcntl ":flock";
+      open(my $fh, ">&=", 9) or die "fdopen: $!";
+      if (!flock($fh, LOCK_EX | LOCK_NB)) {
+        print STDERR "cargo-agent: waiting for another run to finish...\n";
+        flock($fh, LOCK_EX) or die "flock: $!";
+      }
+    '
+  fi
+fi
+
 need() {
   command -v "$1" >/dev/null 2>&1 || { echo "Missing required tool: $1" >&2; exit 2; }
 }
@@ -174,6 +202,65 @@ resolve_affected_packages() {
   fi
 }
 
+# Build -p package args from changed files in git (tracked + untracked).
+# Populates _CHANGED_PACKAGE_ARGS and _CHANGED_FORCE_FULL.
+_CHANGED_PACKAGE_ARGS=()
+_CHANGED_FORCE_FULL=0
+collect_changed_package_args() {
+  _CHANGED_PACKAGE_ARGS=()
+  _CHANGED_FORCE_FULL=0
+
+  local diff_paths untracked_paths combined_paths changed_crates
+  diff_paths="$(git diff --name-only HEAD 2>/dev/null || true)"
+  untracked_paths="$(git ls-files --others --exclude-standard 2>/dev/null || true)"
+  combined_paths="$(printf '%s\n%s\n' "$diff_paths" "$untracked_paths" | sed '/^$/d' | sort -u)"
+
+  if [[ -z "$combined_paths" ]]; then
+    return 1
+  fi
+
+  # Workspace-level cargo config/manifest changes can impact all crates.
+  if echo "$combined_paths" | grep -Eq '^(Cargo\.toml|Cargo\.lock|\.cargo/)'; then
+    _CHANGED_FORCE_FULL=1
+    return 0
+  fi
+
+  changed_crates="$(echo "$combined_paths" | awk -F/ '$1=="crates" && $2!="" {print $2}' | sort -u)"
+  if [[ -z "$changed_crates" ]]; then
+    return 1
+  fi
+
+  local crate_name
+  while IFS= read -r crate_name; do
+    [[ -n "$crate_name" ]] && _CHANGED_PACKAGE_ARGS+=("-p" "$crate_name")
+  done <<< "$changed_crates"
+
+  return 0
+}
+
+# Extract failing test names from a nextest/libtest log file.
+extract_failing_tests() {
+  local log="$1"
+  [[ -s "$log" ]] || return 0
+
+  {
+    # nextest human output, e.g. "FAIL [ 0.001s] crate::module::test_name"
+    sed -nE 's/^.*FAIL[[:space:]]+\[[^]]+\][[:space:]]+([^[:space:]]+).*$/\1/p' "$log"
+    # libtest-style output, e.g. "test crate::module::test_name ... FAILED"
+    sed -nE 's/^test[[:space:]]+([^[:space:]]+)[[:space:]]+\.\.\.[[:space:]]+FAILED$/\1/p' "$log"
+    # Failure summaries under "failures:" sections.
+    awk '
+      /^failures:$/ { in_failures = 1; next }
+      in_failures && /^[[:space:]]*$/ { in_failures = 0; next }
+      in_failures {
+        line = $0
+        sub(/^[[:space:]]+/, "", line)
+        if (line ~ /::/) print line
+      }
+    ' "$log"
+  } | sort -u
+}
+
 run_fmt() {
   step "fmt"
   local log="$OUTDIR/fmt.log"
@@ -367,6 +454,17 @@ have_nextest() {
 run_tests() {
   step "test"
   local ok=1
+  local changed_only=0
+  local -a test_args=()
+
+  while [[ $# -gt 0 ]]; do
+    case "$1" in
+      --changed) changed_only=1 ;;
+      --all) changed_only=0 ;;
+      *) test_args+=("$1") ;;
+    esac
+    shift
+  done
 
   if [[ "$USE_NEXTEST" == "0" ]]; then
     echo "Result: SKIP (USE_NEXTEST=0, no runner configured)"
@@ -381,10 +479,43 @@ run_tests() {
     fi
   fi
 
+  if [[ "$changed_only" == "1" ]]; then
+    if collect_changed_package_args; then
+      if [[ "$_CHANGED_FORCE_FULL" == "1" ]]; then
+        echo "Changed workspace-level Cargo files detected; running full suite."
+      elif [[ ${#_CHANGED_PACKAGE_ARGS[@]} -gt 0 ]]; then
+        echo "Changed crates:"
+        local i
+        for ((i = 1; i < ${#_CHANGED_PACKAGE_ARGS[@]}; i += 2)); do
+          echo "  ${_CHANGED_PACKAGE_ARGS[$i]}"
+        done
+        if [[ ${#test_args[@]} -gt 0 ]]; then
+          test_args=("${_CHANGED_PACKAGE_ARGS[@]}" "${test_args[@]}")
+        else
+          test_args=("${_CHANGED_PACKAGE_ARGS[@]}")
+        fi
+      else
+        echo "Result: SKIP"
+        echo "No changed crates detected under crates/."
+        fmt_elapsed
+        return 0
+      fi
+    else
+      echo "Result: SKIP"
+      echo "No changed files detected in git diff/untracked files."
+      fmt_elapsed
+      return 0
+    fi
+  fi
+
   local log="$OUTDIR/nextest.log"
 
   # Resolve short package names (e.g. -p api → -p ai-barometer-api).
-  resolve_package_args "$@"
+  if [[ ${#test_args[@]} -gt 0 ]]; then
+    resolve_package_args "${test_args[@]}"
+  else
+    resolve_package_args
+  fi
   # Bash 3.2 + `set -u` treats "${arr[@]}" on an empty array as unbound.
   if [[ ${#_RESOLVED_ARGS[@]} -gt 0 ]]; then
     set -- "${_RESOLVED_ARGS[@]}"
@@ -416,6 +547,21 @@ run_tests() {
     echo
     echo "Output (first ${MAX_LINES} lines):"
     head -n "$MAX_LINES" "$log"
+
+    local failed_tests
+    failed_tests="$(extract_failing_tests "$log")"
+    if [[ -n "$failed_tests" ]]; then
+      echo
+      echo "Failing tests:"
+      echo "$failed_tests" | while read -r test_name; do
+        [[ -n "$test_name" ]] && echo "  $test_name"
+      done
+      echo
+      echo "Re-run failing tests with:"
+      echo "$failed_tests" | while read -r test_name; do
+        [[ -n "$test_name" ]] && echo "  /cargo-agent test $test_name"
+      done
+    fi
   fi
 
   echo
@@ -433,7 +579,7 @@ cargo-agent: lean Rust workflow output for coding agents
 Usage:
   cargo-agent [--fail-fast]            # runs fmt, clippy, sqlx, nextest (if installed)
   cargo-agent [--fail-fast] fmt|check|clippy|sqlx|all
-  cargo-agent [--fail-fast] test [NEXTEST_ARGS]
+  cargo-agent [--fail-fast] test [--changed|--all] [NEXTEST_ARGS]
 
 Flags:
   --fail-fast            stop after first failing step; also passed to nextest
@@ -457,6 +603,8 @@ Examples:
   cargo-agent --fail-fast              # full suite, stop on first failure
   cargo-agent sqlx                     # sqlx cache verify only
   cargo-agent test test_login          # tests matching "test_login"
+  cargo-agent test --changed           # tests for crates with changed files
+  cargo-agent test --changed test_auth # changed-crate tests filtered by "test_auth"
   cargo-agent test -p db               # tests in the db crate
   cargo-agent test -p api test_auth    # "test_auth" in api crate
   RUN_TESTS=0 cargo-agent              # skip tests