From 61f7e188cb618a82628755318934f9655c60cb51 Mon Sep 17 00:00:00 2001 From: YiWang24 Date: Mon, 4 May 2026 14:03:03 -0400 Subject: [PATCH 1/4] fix(deploy): pass image-digest + ssh/kubeconfig secrets to stg/prd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses 4 CRITICAL issues from #82: 1. image-digest not threaded — stg.yml/prd.yml built deploy refs as "registry/owner/name@" + empty string, every deploy failed silently. 2. stg-image-digest + stg-deploy-time not threaded to prd — the observation-window safety gate was a no-op; production could ship without staging verification. 3. ssh-key-stg / ssh-key-prd not forwarded — docker (default) deploy preflight aborts without them. 4. kubeconfig-prd not forwarded — k8s deploy mode aborts without it. ## Plumbing - Added 3 new workflow_dispatch inputs (image-digest, stg-image-digest, stg-deploy-time) so manual deploys take an explicit digest. - workflow_run paths read vars.LAST_*_IMAGE_DIGEST / LAST_STG_DEPLOY_TIME with empty fallbacks. Upstream ci.yml / stg deploy is expected to write these on success — that wiring is a follow-up since GHA doesn't directly expose nested workflow outputs across workflow_run boundaries. - Forwarded the full set of secrets each reusable declares (kubeconfig-stg, ssh-key-stg in stg; kubeconfig-prd, ssh-key-prd in prd) by mapping repo UPPER_SNAKE to reusable kebab-case. Refs #82 --- .github/workflows/deploy.yml | 40 ++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 520259f..e9c0389 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -1,6 +1,14 @@ # deploy.yml — event entries for the deploy domain. # Routes CI/release completions to stg.yml and prd.yml. # Observability triggers live in observability.yml. +# +# image-digest plumbing: +# - workflow_dispatch: caller passes inputs.image-digest (and stg-* for prd) +# - workflow_run: ci.yml / release.yml are expected to write the digest +# into vars.LAST_CI_IMAGE_DIGEST (etc.) before completing +# so downstream deploys can read it; until that wiring +# lands, workflow_run dispatches read the same vars.* +# with sensible empty fallbacks. name: deploy on: @@ -13,6 +21,21 @@ on: required: false type: string default: stg + image-digest: + description: "sha256:... digest of the image to deploy. Required for stg/prd." + required: false + type: string + default: "" + stg-image-digest: + description: "Staging image digest (prd only — used for the safety gate)." + required: false + type: string + default: "" + stg-deploy-time: + description: "Staging deploy timestamp ISO 8601 (prd only — observation window start)." + required: false + type: string + default: "" permissions: contents: write @@ -38,6 +61,11 @@ jobs: with: app-name: ${{ vars.APP_NAME || github.event.repository.name }} image-name: ${{ vars.IMAGE_NAME || github.event.repository.name }} + # image-digest must reach the reusable so deploy steps build a real + # registry/owner/name@sha256:... reference. dispatch supplies it via + # inputs; workflow_run paths read vars.LAST_CI_IMAGE_DIGEST written + # by the upstream ci.yml on success. + image-digest: ${{ inputs.image-digest || vars.LAST_CI_IMAGE_DIGEST || '' }} health-url: ${{ vars.STG_HEALTH_URL }} runner: blacksmith-2vcpu-ubuntu-2404 deploy-type: ${{ vars.DEPLOY_TYPE || 'docker' }} @@ -48,7 +76,10 @@ jobs: compose-file: ${{ vars.COMPOSE_FILE || 'docker-compose.yml' }} compose-project-dir: ${{ vars.COMPOSE_PROJECT_DIR || '~' }} # secrets:inherit name-mismatches kebab-case → UPPER_SNAKE; map explicit. + # Forward every secret the reusable declares so SSH/k8s preflight passes. secrets: + kubeconfig-stg: ${{ secrets.KUBECONFIG_STG }} + ssh-key-stg: ${{ secrets.SSH_KEY_STG }} slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL }} sentry-token: ${{ secrets.SENTRY_TOKEN }} datadog-api-key: ${{ secrets.DD_API_KEY }} @@ -63,6 +94,13 @@ jobs: with: app-name: ${{ vars.APP_NAME || github.event.repository.name }} image-name: ${{ vars.IMAGE_NAME || github.event.repository.name }} + # The 3 prd-specific safety-gate inputs MUST be set or the + # observation-window check is skipped silently. Source order: + # 1. workflow_dispatch input (manual override) + # 2. vars.* recorded by the upstream stg deploy on success + image-digest: ${{ inputs.image-digest || vars.LAST_RELEASE_IMAGE_DIGEST || '' }} + stg-image-digest: ${{ inputs.stg-image-digest || vars.LAST_STG_IMAGE_DIGEST || '' }} + stg-deploy-time: ${{ inputs.stg-deploy-time || vars.LAST_STG_DEPLOY_TIME || '' }} health-url: ${{ vars.PRD_HEALTH_URL }} runner: blacksmith-2vcpu-ubuntu-2404 deploy-type: ${{ vars.DEPLOY_TYPE || 'docker' }} @@ -76,6 +114,8 @@ jobs: secrets: anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }} api-base-url: ${{ secrets.ANTHROPIC_BASE_URL }} + kubeconfig-prd: ${{ secrets.KUBECONFIG_PRD }} + ssh-key-prd: ${{ secrets.SSH_KEY_PRD }} slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL }} sentry-token: ${{ secrets.SENTRY_TOKEN }} datadog-api-key: ${{ secrets.DD_API_KEY }} From ea39817c67a9246e3d648a11e5066a6a0487a93b Mon Sep 17 00:00:00 2001 From: YiWang24 Date: Mon, 4 May 2026 14:06:25 -0400 Subject: [PATCH 2/4] chore: gitignore Claude runtime artifacts + add harness parity plan - Add .claude/{worktrees,scheduled_tasks.lock,agents,projects,todos} to .gitignore so per-developer Claude Code state doesn't leak into the repo. - Add docs/superpowers/plans/2026-05-02-claude-harness-param-parity.md documenting the planned upstream parameter parity work for the _common/claude-harness wrapper. --- .gitignore | 7 + .../2026-05-02-claude-harness-param-parity.md | 447 ++++++++++++++++++ 2 files changed, 454 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-02-claude-harness-param-parity.md diff --git a/.gitignore b/.gitignore index 5ef728e..ccd56de 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,10 @@ gate-context/ # act local testing .act.env*.yml-e *.yaml-e + +# Claude Code runtime artifacts (per-developer, not project state) +.claude/worktrees/ +.claude/scheduled_tasks.lock +.claude/agents/ +.claude/projects/ +.claude/todos/ diff --git a/docs/superpowers/plans/2026-05-02-claude-harness-param-parity.md b/docs/superpowers/plans/2026-05-02-claude-harness-param-parity.md new file mode 100644 index 0000000..905154e --- /dev/null +++ b/docs/superpowers/plans/2026-05-02-claude-harness-param-parity.md @@ -0,0 +1,447 @@ +# Claude Harness — Upstream Parameter Parity + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Expose the missing `anthropics/claude-code-action@v1` inputs through the `_common/claude-harness` wrapper so downstream callers gain full control over inline-comment classification, fix links, additional permissions, session timeout, plugins, commit signing, and bot identity — without breaking any of the 13 existing harness callers. + +**Architecture:** All new inputs are optional with safe defaults that reproduce current behaviour. Phase 1 touches `action.yml` (new inputs + `with:` pass-throughs) and `compose-args.sh` (timeout). Phase 2 adds plugin and identity pass-throughs, also only in `action.yml`. + +**Tech Stack:** GitHub Actions composite action (YAML), bash, `anthropics/claude-code-action@v1` (pinned to `12310e4417c3473095c957cb311b3cf59a38d659`) + +--- + +## Correction from pre-plan analysis + +`use_foundry` is **not a bug**. The official action at the pinned SHA does have `use_foundry` as a first-class input (confirmed via GitHub API). The harness already passes it correctly on line 222 of `action.yml`. No fix needed. + +--- + +## File Map + +| File | Change | +|------|--------| +| `actions/_common/claude-harness/action.yml` | Add new inputs; update `with:` block on the `Run claude-code-action` step; update `additional_permissions` block | +| `actions/_common/claude-harness/compose-args.sh` | Replace hardcoded `API_TIMEOUT_MS: "3000000"` with value from env | + +No new files. No changes to downstream callers (all new inputs are optional with defaults matching current behaviour). + +--- + +## Phase 1 — High-Priority Quick Wins + +### Task 1: Add `classify-inline-comments` and `include-fix-links` inputs + +These are single-input pass-throughs. The official action default for both is `true`, so the harness default must also be `true` to preserve existing behaviour. + +**Files:** +- Modify: `actions/_common/claude-harness/action.yml` + +- [ ] **Step 1: Add the two inputs to the `inputs:` block** (after the existing `use-sticky-comment` input) + +```yaml + classify-inline-comments: + description: | + Buffer inline comments and classify them with Claude Haiku before posting. + Set to "false" to post all inline comments immediately (useful in pure + automation pipelines where every comment is intentional). + required: false + default: "true" + include-fix-links: + description: | + Attach a "Fix this" link to each review comment that opens Claude Code + with the issue pre-loaded. Set to "false" to suppress these links in + fully-automated review jobs. + required: false + default: "true" +``` + +- [ ] **Step 2: Pass the inputs through in the `with:` block of the `Run claude-code-action` step** (after the existing `use_sticky_comment:` line) + +```yaml + classify_inline_comments: ${{ inputs.classify-inline-comments }} + include_fix_links: ${{ inputs.include-fix-links }} +``` + +- [ ] **Step 3: Verify no YAML parse errors** + +```bash +cd /Users/wy/projects/yiagent/OpenCI +python3 -c "import yaml; yaml.safe_load(open('actions/_common/claude-harness/action.yml'))" && echo "YAML OK" +``` + +Expected: `YAML OK` + +- [ ] **Step 4: Commit** + +```bash +git add actions/_common/claude-harness/action.yml +git commit -m "feat(harness): add classify-inline-comments and include-fix-links inputs" +``` + +--- + +### Task 2: Make `additional_permissions` additive + +Currently `additional_permissions: actions: read` is hardcoded in `action.yml`. The `mcp__github_ci__*` tools in the baseline tool list **require** `actions: read` to work, so this permission must never be removed. The fix is to expose an `extra-permissions` input that is appended after the hardcoded floor — not a full override. + +**Files:** +- Modify: `actions/_common/claude-harness/action.yml` + +- [ ] **Step 1: Add `extra-permissions` input** (after `extra-disallowed-tools`) + +```yaml + extra-permissions: + description: | + Additional GitHub Actions permissions to grant, in YAML block scalar form. + These are APPENDED to the hardcoded floor permission "actions: read" which + is always present because the baseline tool list includes mcp__github_ci__* + tools that require it. + Example: "contents: write\npull-requests: write" + required: false + default: "" +``` + +- [ ] **Step 2: Update the `additional_permissions:` block in the `Run claude-code-action` step** + +Replace the current hardcoded block: +```yaml + additional_permissions: | + actions: read +``` + +With a block that appends the caller's extras: +```yaml + additional_permissions: | + actions: read + ${{ inputs.extra-permissions }} +``` + +- [ ] **Step 3: Verify YAML parses** + +```bash +python3 -c "import yaml; yaml.safe_load(open('actions/_common/claude-harness/action.yml'))" && echo "YAML OK" +``` + +Expected: `YAML OK` + +- [ ] **Step 4: Commit** + +```bash +git add actions/_common/claude-harness/action.yml +git commit -m "feat(harness): add extra-permissions (additive, keeps actions:read floor)" +``` + +--- + +### Task 3: Expose configurable session timeout + +`compose-args.sh` hardcodes `API_TIMEOUT_MS: "3000000"` (50 min). Long-running overnight analysis jobs need more; quick PR comment handlers need less. Expose as `session-timeout-ms`. + +**Files:** +- Modify: `actions/_common/claude-harness/action.yml` +- Modify: `actions/_common/claude-harness/compose-args.sh` + +- [ ] **Step 1: Add `session-timeout-ms` input to `action.yml`** (after `max-turns`) + +```yaml + session-timeout-ms: + description: | + Maximum milliseconds Claude Code waits for an API response per request. + Default 3 000 000 ms (50 min) is appropriate for long agent loops. + Reduce for quick PR handlers (e.g. "120000" = 2 min). + required: false + default: "3000000" +``` + +- [ ] **Step 2: Thread the new input through the `Compose claude_args & settings` step env block in `action.yml`** + +Add one line to the `env:` block of the compose step (after `EXTRA_ENV_JSON`): +```yaml + SESSION_TIMEOUT_MS: ${{ inputs.session-timeout-ms }} +``` + +- [ ] **Step 3: Update `compose-args.sh` to use the env var instead of the hardcoded literal** + +Find the line: +```bash + API_TIMEOUT_MS: "3000000", +``` + +Replace with: +```bash + API_TIMEOUT_MS: ($ENV.SESSION_TIMEOUT_MS // "3000000"), +``` + +Wait — that's jq syntax. The full jq block in `compose-args.sh` already has access to shell env vars via `$ENV` in jq. But `SESSION_TIMEOUT_MS` is a shell env var, not a jq `--arg`. The cleanest fix is to pass it as an arg. Find the jq call and add `--arg timeout "${SESSION_TIMEOUT_MS:-3000000}"`: + +```bash + settings_json="$(jq -nc \ + --arg slack "${SLACK_WEBHOOK:-}" \ + --arg base_url "${API_BASE_URL:-}" \ + --arg auth_token "${AUTH_TOKEN_PASSTHROUGH:-}" \ + --arg timeout "${SESSION_TIMEOUT_MS:-3000000}" \ + --argjson extra "${EXTRA_ENV_JSON:-{\}}" \ + '{ + env: ( + ({ + SLACK_WEBHOOK_URL: $slack, + ANTHROPIC_BASE_URL: $base_url, + ANTHROPIC_AUTH_TOKEN: $auth_token, + API_TIMEOUT_MS: $timeout, + CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1" + } | with_entries(select(.value != ""))) + + $extra + ) + }')" +``` + +- [ ] **Step 4: Verify the compose script parses** + +```bash +cd /Users/wy/projects/yiagent/OpenCI +bash -n actions/_common/claude-harness/compose-args.sh && echo "BASH OK" +``` + +Expected: `BASH OK` + +- [ ] **Step 5: Smoke-test the jq block manually** + +```bash +SLACK_WEBHOOK="" API_BASE_URL="" AUTH_TOKEN_PASSTHROUGH="" SESSION_TIMEOUT_MS="120000" EXTRA_ENV_JSON='{}' \ + bash -c ' + jq -nc \ + --arg slack "" \ + --arg base_url "" \ + --arg auth_token "" \ + --arg timeout "120000" \ + --argjson extra "{}" \ + "{env: (({API_TIMEOUT_MS: \$timeout, CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: \"1\"} | with_entries(select(.value != \"\"))) + \$extra)}" + ' +``` + +Expected: JSON with `"API_TIMEOUT_MS": "120000"` + +- [ ] **Step 6: Commit** + +```bash +git add actions/_common/claude-harness/action.yml actions/_common/claude-harness/compose-args.sh +git commit -m "feat(harness): expose session-timeout-ms (replaces hardcoded 3000000)" +``` + +--- + +## Phase 2 — Medium-Priority Pass-Throughs + +These are pure YAML additions: new `inputs:` entry + one `with:` line. No shell script changes. + +### Task 4: Add plugin inputs + +**Files:** +- Modify: `actions/_common/claude-harness/action.yml` + +- [ ] **Step 1: Add `plugins` and `plugin-marketplaces` inputs** (after `mcp-config`) + +```yaml + plugins: + description: | + Newline-separated list of Claude Code plugin identifiers to install before + the agent run. Example: "anthropics/claude-code-plugin-foo@1.0.0" + required: false + default: "" + plugin-marketplaces: + description: | + Newline-separated list of custom marketplace URLs used to resolve plugins. + required: false + default: "" +``` + +- [ ] **Step 2: Pass through in the `with:` block** (after `include_fix_links`) + +```yaml + plugins: ${{ inputs.plugins }} + plugin_marketplaces: ${{ inputs.plugin-marketplaces }} +``` + +- [ ] **Step 3: Verify YAML** + +```bash +python3 -c "import yaml; yaml.safe_load(open('actions/_common/claude-harness/action.yml'))" && echo "YAML OK" +``` + +- [ ] **Step 4: Commit** + +```bash +git add actions/_common/claude-harness/action.yml +git commit -m "feat(harness): add plugins and plugin-marketplaces inputs" +``` + +--- + +### Task 5: Add commit-signing inputs + +**Files:** +- Modify: `actions/_common/claude-harness/action.yml` + +- [ ] **Step 1: Add `use-commit-signing` and `ssh-signing-key` inputs** (after `oauth-token`) + +```yaml + use-commit-signing: + description: | + Sign commits made by Claude. Set to "true" to enable GitHub API-based + signing (no key needed), or set to "ssh" to use the ssh-signing-key input. + required: false + default: "false" + ssh-signing-key: + description: | + SSH private key for signing commits when use-commit-signing is "ssh". + Pass as a secret: ${{ secrets.CLAUDE_SSH_SIGNING_KEY }}. + required: false + default: "" +``` + +- [ ] **Step 2: Pass through in the `with:` block** + +```yaml + use_commit_signing: ${{ inputs.use-commit-signing }} + ssh_signing_key: ${{ inputs.ssh-signing-key }} +``` + +- [ ] **Step 3: Verify YAML** + +```bash +python3 -c "import yaml; yaml.safe_load(open('actions/_common/claude-harness/action.yml'))" && echo "YAML OK" +``` + +- [ ] **Step 4: Commit** + +```bash +git add actions/_common/claude-harness/action.yml +git commit -m "feat(harness): add use-commit-signing and ssh-signing-key inputs" +``` + +--- + +### Task 6: Add bot identity and base-branch inputs + +**Files:** +- Modify: `actions/_common/claude-harness/action.yml` + +- [ ] **Step 1: Add `bot-id`, `bot-name`, and `base-branch` inputs** (after `github-token`) + +```yaml + bot-id: + description: | + GitHub App bot ID for custom Claude Code GitHub Apps. Leave empty to use + the default claude[bot] identity. + required: false + default: "" + bot-name: + description: | + GitHub App bot display name. Leave empty to use the default "claude". + required: false + default: "" + base-branch: + description: | + Base branch for new branches created by Claude. Leave empty to let the + official action use its default (the repo's default branch). + required: false + default: "" +``` + +- [ ] **Step 2: Pass through in the `with:` block** + +```yaml + bot_id: ${{ inputs.bot-id }} + bot_name: ${{ inputs.bot-name }} + base_branch: ${{ inputs.base-branch }} +``` + +- [ ] **Step 3: Verify YAML** + +```bash +python3 -c "import yaml; yaml.safe_load(open('actions/_common/claude-harness/action.yml'))" && echo "YAML OK" +``` + +- [ ] **Step 4: Commit** + +```bash +git add actions/_common/claude-harness/action.yml +git commit -m "feat(harness): add bot-id, bot-name, base-branch inputs" +``` + +--- + +## Final Verification + +- [ ] **Run full YAML validation on every action that calls the harness** + +```bash +cd /Users/wy/projects/yiagent/OpenCI +python3 -c " +import yaml, glob, sys +errors = [] +for f in glob.glob('actions/**/*.yml', recursive=True): + try: + yaml.safe_load(open(f)) + except Exception as e: + errors.append(f'{f}: {e}') +if errors: + print('\n'.join(errors)); sys.exit(1) +else: + print(f'All {len(list(glob.glob(\"actions/**/*.yml\", recursive=True)))} YAML files OK') +" +``` + +Expected: all files OK with no errors + +- [ ] **Verify backward compatibility: none of the 13 callers pass the new inputs (they all get safe defaults)** + +```bash +grep -r "classify.inline\|include.fix\|extra.permissions\|session.timeout\|plugins\|commit.signing\|bot.id\|bot.name\|base.branch" \ + /Users/wy/projects/yiagent/OpenCI/actions --include="*.yml" \ + --exclude-path="*/claude-harness/*" | grep -v "^Binary" || echo "No callers use new inputs — backward compat confirmed" +``` + +Expected: output shows no existing callers use the new inputs + +- [ ] **Open a PR** + +```bash +git push -u origin claude/objective-galileo-742e23 +gh pr create \ + --title "feat(harness): upstream parameter parity with claude-code-action@v1" \ + --body "$(cat <<'EOF' +## Summary +- Phase 1: classify-inline-comments, include-fix-links, extra-permissions (additive), session-timeout-ms +- Phase 2: plugins, plugin-marketplaces, use-commit-signing, ssh-signing-key, bot-id, bot-name, base-branch +- use_foundry confirmed correct — no change needed +- All new inputs are optional with defaults matching current behaviour; 0 breaking changes for the 13 existing callers + +## Skipped (intentionally) +trigger_phrase, assignee_trigger, label_trigger — automation-only pipeline, no interactive triggers +allowed_non_write_users — flagged ⚠️ RISKY in official docs +track_progress, include_comments_by_actor, exclude_comments_by_actor — unnecessary in CI context +path_to_claude_code_executable, path_to_bun_executable — Nix edge case + +## Test plan +- [ ] YAML validation passes for all 13 harness callers +- [ ] Existing callers run without specifying new inputs (backward compat) +- [ ] classify_inline_comments=false disables comment batching in PR review job +- [ ] session-timeout-ms=120000 reaches Claude Code as API_TIMEOUT_MS=120000 +EOF +)" +``` + +--- + +## Parameters intentionally skipped + +| Parameter | Reason | +|-----------|--------| +| `trigger_phrase` / `assignee_trigger` / `label_trigger` | All harness invocations are programmatic; no interactive triggers needed | +| `allowed_non_write_users` | Marked ⚠️ RISKY in official docs | +| `track_progress` | Interactive mode only | +| `include_comments_by_actor` / `exclude_comments_by_actor` | Not needed; callers control what events fire via workflow `on:` | +| `path_to_claude_code_executable` / `path_to_bun_executable` | Nix environment edge case only | +| `display_report` / `show_full_output` | Debug-only; out of scope | +| `allowed_bots` | Not needed; harness already controls bot identity via `bot-id`/`bot-name` | From 98a55b68dfb4891dfa2f90a7dc8897a1d1e78f26 Mon Sep 17 00:00:00 2001 From: YiWang24 Date: Mon, 4 May 2026 14:10:58 -0400 Subject: [PATCH 3/4] fix(bump-sha): backfill missing PR + supersede stale bump branches The on-main-bump-sha workflow silently failed at `gh pr create --label "chore"` because no `chore` label exists in the repo, and `2>/dev/null || true` swallowed the error. Result: the workflow pushed bump branches (e.g. chore/bump-self-sha-89792333) without opening a PR, and stale orphan branches accumulated. Changes: - Drop the non-existent `chore` label from `gh pr create`. - Remove the `2>/dev/null || true` so future PR-creation errors surface and fail the workflow. - `set -euo pipefail` on the run block. - Switch to `git checkout -B` + `git push --force-with-lease` so workflow re-runs on the same SHA are idempotent instead of failing on existing branch. - After pushing the new branch, close older `chore/bump-self-sha-*` PRs against `main` with `--delete-branch` so only the latest bump PR is open at any time. - Sweep orphan `chore/bump-self-sha-*` branches that never got a PR (the exact failure mode that just happened) and delete them. - Make `gh pr create` idempotent: if a PR already exists for the branch, log and skip instead of erroring. --- .github/workflows/on-main-bump-sha.yml | 52 +++++++++++++++++++++----- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/.github/workflows/on-main-bump-sha.yml b/.github/workflows/on-main-bump-sha.yml index 888a5bb..1d03622 100644 --- a/.github/workflows/on-main-bump-sha.yml +++ b/.github/workflows/on-main-bump-sha.yml @@ -75,7 +75,7 @@ jobs: if: steps.check.outputs.skip != 'true' run: bash scripts/bump-self-sha.sh - - name: Commit and open PR + - name: Commit, supersede older bumps, open PR if: steps.check.outputs.skip != 'true' env: # gh CLI uses GH_TOKEN; same PAT used for checkout above so the @@ -84,26 +84,60 @@ jobs: NEW_SHA: ${{ steps.check.outputs.new_sha }} OLD_SHA: ${{ steps.check.outputs.current_sha }} run: | + set -euo pipefail + branch="chore/bump-self-sha-${NEW_SHA:0:8}" short_old="${OLD_SHA:0:8}" short_new="${NEW_SHA:0:8}" git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" - git checkout -b "$branch" + git checkout -B "$branch" git add manifest.yml .github/workflows/ git commit -m "chore(manifest): bump YiAgent/OpenCI SHA to ${short_new}" \ -m "Automated update from on-main-bump-sha workflow. old=${OLD_SHA} new=${NEW_SHA}" - git push origin "$branch" + # Force-with-lease so workflow re-runs (same branch name) succeed + # without trampling unrelated work. + git push --force-with-lease origin "$branch" + + # 1) Close older auto-bump PRs and delete their branches. + gh pr list \ + --state open \ + --base main \ + --json number,headRefName \ + --jq '.[] | select(.headRefName | startswith("chore/bump-self-sha-")) + | select(.headRefName != "'"$branch"'") + | .number' \ + | while read -r old_num; do + echo "::notice::Closing superseded bump PR #${old_num}" + gh pr close "$old_num" \ + --comment "Superseded by \`${branch}\` (bump to ${short_new})." \ + --delete-branch + done + + # 2) Delete orphan auto-bump branches that never got a PR. + git ls-remote origin 'refs/heads/chore/bump-self-sha-*' \ + | awk '{sub(/^refs\/heads\//,"",$2); print $2}' \ + | while read -r ref; do + [ "$ref" = "$branch" ] && continue + echo "::notice::Deleting orphan bump branch ${ref}" + git push origin --delete "$ref" || true + done + # 3) Open (or reuse) the PR for the new branch. # shellcheck disable=SC2016 printf 'Automated SHA bump: `%s` to `%s`\n\nThe YiAgent/OpenCI self-reference SHA in manifest.yml was stale. Updated to the latest main HEAD so all reusable workflow calls resolve correctly.\n\n> Generated by the on-main-bump-sha workflow.' \ "$short_old" "$short_new" > /tmp/pr-body.md - gh pr create \ - --title "chore(manifest): bump YiAgent/OpenCI SHA to ${short_new}" \ - --body-file /tmp/pr-body.md \ - --base main \ - --head "$branch" \ - --label "chore" 2>/dev/null || true + existing="$(gh pr list --state open --head "$branch" --base main \ + --json number --jq '.[0].number // ""')" + if [ -n "$existing" ]; then + echo "::notice::PR #${existing} already exists for ${branch}; skipping create" + else + gh pr create \ + --title "chore(manifest): bump YiAgent/OpenCI SHA to ${short_new}" \ + --body-file /tmp/pr-body.md \ + --base main \ + --head "$branch" + fi From 3b756a496c6a7925093fb24db2de478281023e3d Mon Sep 17 00:00:00 2001 From: YiWang24 Date: Mon, 4 May 2026 14:36:00 -0400 Subject: [PATCH 4/4] fix(workflows): satisfy audit tests inherited from main MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The workflow audit added in #84-era (commit 4415cb2) introduced bats tests that were already failing on main when authored: - `reusable workflow names match filenames` — `name:` field in `reusable-agent.yml` (claude-harness) and `reusable-deps.yml` (dep-auto-merge) didn't reference the filename suffix. - `workflow requests security-events: write permission` — `ci-self-test.yml` permissions block was missing `security-events: write`. - `auditor is clean against the live repository` — workflow-audit.sh rule W03 flagged `reusable-release.yml` for redeclaring the caller's `concurrency.group` (deadlock risk per issue #68). Fixes: - Rename `name:` to `reusable-agent` / `reusable-deps`. - Add `security-events: write` to ci-self-test.yml permissions. - Drop the `concurrency:` block from `reusable-release.yml` and leave the caller (release.yml) as the sole owner of the group. All 719 bats tests now pass. --- .github/workflows/ci-self-test.yml | 1 + .github/workflows/reusable-agent.yml | 2 +- .github/workflows/reusable-deps.yml | 2 +- .github/workflows/reusable-release.yml | 7 ++++--- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-self-test.yml b/.github/workflows/ci-self-test.yml index 01be5c4..eb603d9 100644 --- a/.github/workflows/ci-self-test.yml +++ b/.github/workflows/ci-self-test.yml @@ -36,6 +36,7 @@ on: permissions: contents: read + security-events: write # bats `self-test-routing` test asserts this is present concurrency: group: ci-self-test-${{ github.event.pull_request.number || github.ref }} diff --git a/.github/workflows/reusable-agent.yml b/.github/workflows/reusable-agent.yml index e835bfc..734d203 100644 --- a/.github/workflows/reusable-agent.yml +++ b/.github/workflows/reusable-agent.yml @@ -9,7 +9,7 @@ # Inputs / secrets / outputs MUST stay byte-identical to the composite — a # caller can swap one for the other without changing wiring. # ───────────────────────────────────────────────────────────────────────────── -name: claude-harness +name: reusable-agent on: workflow_call: diff --git a/.github/workflows/reusable-deps.yml b/.github/workflows/reusable-deps.yml index 96a25bd..279363e 100644 --- a/.github/workflows/reusable-deps.yml +++ b/.github/workflows/reusable-deps.yml @@ -17,7 +17,7 @@ # Security: we only call `gh pr merge --auto` (gated on branch protection # rules); PR fields are not interpolated into shell commands. # ───────────────────────────────────────────────────────────────────────────── -name: dep-auto-merge +name: reusable-deps on: workflow_call: diff --git a/.github/workflows/reusable-release.yml b/.github/workflows/reusable-release.yml index cea421d..fa2bea8 100644 --- a/.github/workflows/reusable-release.yml +++ b/.github/workflows/reusable-release.yml @@ -58,9 +58,10 @@ on: permissions: {} -concurrency: - group: release-${{ github.ref }} - cancel-in-progress: false +# Reusable workflows must NOT redeclare the caller's concurrency group — +# GitHub's deadlock-detection cancels the inner run when both share it +# (see workflow-audit rule W03 / issue #68). Concurrency is owned by the +# caller (release.yml) instead. jobs: # ── marketplace tagging + GitHub Release ────────────────────────────────