From 3fb0101ca28ce110480945b9ef0f536f99684a35 Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Tue, 30 Jun 2026 15:23:44 +0200 Subject: [PATCH 1/2] ci: guard that every bundled SKILL.md carries upstream provenance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a provenance-presence check to validate-manifests.sh: every plugins/*/skills/*/SKILL.md must carry a non-empty metadata.github-repo in its YAML frontmatter (as gh skill install records). AGENTS.md forbids hand-authored/divergent bundled skills, but CI only enforced structural parity (manifest <-> filesystem <-> README) — a hand-edited or provenance-stripped skill passed. The guard stays jq/grep-only (no yq). Pin it in validate-manifests.test.sh: the fixture SKILL.md now carries provenance (happy path), plus three FAIL cases (stripped provenance, no frontmatter, empty github-repo). Update AGENTS.md to document the check. Fixes #40 Co-Authored-By: Claude Opus 4.8 (1M context) --- AGENTS.md | 7 +++-- scripts/validate-manifests.sh | 28 ++++++++++++++++++ scripts/validate-manifests.test.sh | 46 +++++++++++++++++++++++++++++- 3 files changed, 78 insertions(+), 3 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 981709a..9187505 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -31,7 +31,7 @@ plugins/ └── skills/ └── /SKILL.md # An installed skill copied from upstream, with metadata.github-* provenance scripts/ -├── validate-manifests.sh # Manifest + parity + plugin.json + README-table guard (single source of truth; run locally before pushing) +├── validate-manifests.sh # Manifest + parity + plugin.json + README-table + skill-provenance guard (single source of truth; run locally before pushing) └── validate-manifests.test.sh # Self-test: PASS a consistent fixture, FAIL each drift scenario the guard catches README.md # Human-facing index — the plugin table + per-tool install instructions ``` @@ -87,7 +87,10 @@ the [`update-agent-skills`](https://github.com/devantler-tech/reusable-workflows reusable workflow and opens a PR when any upstream's content drifts — **no lockfile, no sync bot, no custom metadata.** Never hand-edit a bundled `SKILL.md` to diverge from its upstream; fix it in the skill's **own** upstream (the repo named in its `metadata.github-repo`) and let the update workflow pull -it through. Only the marketplace structure (manifests, `plugin.json`, plugin membership) is authored here. +it through. `validate-manifests.sh` enforces this mechanically: every bundled `SKILL.md` must carry a +non-empty `metadata.github-repo` provenance line, so a hand-authored or provenance-stripped skill fails +CI rather than reaching consumers. Only the marketplace structure (manifests, `plugin.json`, plugin +membership) is authored here. ## Conventions diff --git a/scripts/validate-manifests.sh b/scripts/validate-manifests.sh index b701573..6fc14a9 100755 --- a/scripts/validate-manifests.sh +++ b/scripts/validate-manifests.sh @@ -188,6 +188,33 @@ validate_readme_parity() { return "$failed" } +# 6. Every bundled SKILL.md carries its upstream provenance frontmatter. +# `gh skill install` records the true upstream in each skill's `metadata.github-*` +# frontmatter, and AGENTS.md forbids hand-authored/divergent skills — so a bundled +# skill MUST carry that provenance. We assert a non-empty `github-repo:` inside the +# YAML frontmatter (the block between the first two `---` lines), staying jq/grep-only +# like the rest of this guard (no yq dependency). A skill with no frontmatter, or with +# an empty/absent `github-repo`, is rejected — it can only have come from a hand edit. +validate_skill_provenance() { + local failed=0 + local skill fm + while IFS= read -r skill; do + # Slice the YAML frontmatter: the lines strictly between the first '---' and the + # next '---'. A file that does not open with '---' yields an empty slice (→ fail). + fm=$(awk 'NR==1 && $0 !~ /^---[[:space:]]*$/ {exit} + /^---[[:space:]]*$/ {c++; next} + c==1 {print} + c>=2 {exit}' "$skill") + if printf '%s\n' "$fm" | grep -qE '^[[:space:]]*github-repo:[[:space:]]*[^[:space:]]'; then + echo "✓ provenance $skill" + else + echo "::error::$skill: missing upstream provenance (metadata.github-repo) — bundled skills must come from 'gh skill install', never hand-authored" + failed=1 + fi + done < <(find plugins -type f -path '*/skills/*/SKILL.md' | sort) + return "$failed" +} + main() { validate_marketplace_json "$COPILOT_MANIFEST" validate_marketplace_json "$CLAUDE_MANIFEST" @@ -195,6 +222,7 @@ main() { validate_plugin_json validate_marketplace_plugins_parity validate_readme_parity + validate_skill_provenance } main "$@" diff --git a/scripts/validate-manifests.test.sh b/scripts/validate-manifests.test.sh index 6cdbb79..abe1134 100755 --- a/scripts/validate-manifests.test.sh +++ b/scripts/validate-manifests.test.sh @@ -42,10 +42,22 @@ EOF } # Write plugins//plugin.json + one skill with a SKILL.md. +# The SKILL.md carries upstream provenance frontmatter (metadata.github-repo), exactly +# as `gh skill install` records it, so the provenance guard passes on the happy path. make_plugin() { local root="$1" name="$2" desc="$3" version="$4" mkdir -p "$root/plugins/$name/skills/example-skill" - printf 'Example skill.\n' > "$root/plugins/$name/skills/example-skill/SKILL.md" + cat > "$root/plugins/$name/skills/example-skill/SKILL.md" <<'EOF' +--- +name: example-skill +description: Example skill. +metadata: + github-repo: https://github.com/devantler-tech/agent-skills + github-path: skills/example-skill + github-ref: refs/heads/main +--- +Example skill. +EOF cat > "$root/plugins/$name/plugin.json" < "$d/tmp" && mv "$d/tmp" "$d/README.md" check_fail "plugin missing from README table fails" "plugins/beta is not listed in the README.md plugin table" "$d" +# --- check 6: bundled SKILL.md provenance --- +# A skill whose frontmatter has its github-repo provenance stripped (e.g. hand-edited) +# must be rejected. +d=$(fresh) +cat > "$d/plugins/alpha/skills/example-skill/SKILL.md" <<'EOF' +--- +name: example-skill +description: Hand-authored skill with no upstream provenance. +metadata: + domain: testing +--- +Body. +EOF +check_fail "SKILL.md without github-repo provenance fails" "missing upstream provenance" "$d" + +# A skill with no YAML frontmatter at all is likewise rejected. +d=$(fresh) +printf 'Just a body, no frontmatter.\n' > "$d/plugins/alpha/skills/example-skill/SKILL.md" +check_fail "SKILL.md with no frontmatter fails provenance" "missing upstream provenance" "$d" + +# An empty github-repo value (present key, no value) is rejected. +d=$(fresh) +cat > "$d/plugins/alpha/skills/example-skill/SKILL.md" <<'EOF' +--- +name: example-skill +metadata: + github-repo: +--- +Body. +EOF +check_fail "SKILL.md with empty github-repo fails" "missing upstream provenance" "$d" + echo "-----------------------------------------" echo "validate-manifests.sh self-test: $pass passed, $fail failed" [ "$fail" -eq 0 ] From e7c721d1b74589290ef30339bc94c62b05548590 Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Tue, 30 Jun 2026 15:34:50 +0200 Subject: [PATCH 2/2] ci: scope provenance check to metadata block, reject empty/quoted/comment values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address CodeRabbit review on #41: the grep-based provenance check accepted a top-level github-repo: (outside metadata:) and treated github-repo: "" / github-repo: # comment as non-empty, so a provenance-stripped hand edit could still pass. Replace it with a single awk pass that slices the frontmatter, scopes the lookup to the metadata: block, and rejects empty, quoted-empty (""/''), and comment-only (# …) values. Stays jq/grep-free (no yq). Pin the tightening with three new self-test FAIL fixtures: top-level github-repo, quoted-empty, and comment-only. Self-test 27/0; live guard exit 0. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/validate-manifests.sh | 40 +++++++++++++++++++++--------- scripts/validate-manifests.test.sh | 38 ++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 12 deletions(-) diff --git a/scripts/validate-manifests.sh b/scripts/validate-manifests.sh index 6fc14a9..15c54e2 100755 --- a/scripts/validate-manifests.sh +++ b/scripts/validate-manifests.sh @@ -191,21 +191,37 @@ validate_readme_parity() { # 6. Every bundled SKILL.md carries its upstream provenance frontmatter. # `gh skill install` records the true upstream in each skill's `metadata.github-*` # frontmatter, and AGENTS.md forbids hand-authored/divergent skills — so a bundled -# skill MUST carry that provenance. We assert a non-empty `github-repo:` inside the -# YAML frontmatter (the block between the first two `---` lines), staying jq/grep-only -# like the rest of this guard (no yq dependency). A skill with no frontmatter, or with -# an empty/absent `github-repo`, is rejected — it can only have come from a hand edit. +# skill MUST carry a real `github-repo` value *inside the `metadata:` block* of the +# YAML frontmatter (the lines between the first two `---`). Staying jq/grep-only (no +# yq dependency), one awk pass both slices the frontmatter and scopes the lookup to +# `metadata:` so a TOP-LEVEL `github-repo:` cannot satisfy it, and rejects an empty, +# quoted-empty (`""`/`''`) or comment-only (`# …`) value — each of which can only +# come from a hand edit. A skill with no frontmatter yields no match → reject. validate_skill_provenance() { local failed=0 - local skill fm + local skill while IFS= read -r skill; do - # Slice the YAML frontmatter: the lines strictly between the first '---' and the - # next '---'. A file that does not open with '---' yields an empty slice (→ fail). - fm=$(awk 'NR==1 && $0 !~ /^---[[:space:]]*$/ {exit} - /^---[[:space:]]*$/ {c++; next} - c==1 {print} - c>=2 {exit}' "$skill") - if printf '%s\n' "$fm" | grep -qE '^[[:space:]]*github-repo:[[:space:]]*[^[:space:]]'; then + if awk ' + # Walk only the frontmatter (lines between the first two --- ); END decides via found. + NR==1 && $0 !~ /^---[[:space:]]*$/ { exit } + /^---[[:space:]]*$/ { fm++; next } + fm!=1 { next } + # A non-indented key (column 0) is a top-level mapping key. metadata: opens the + # block we care about; any other top-level key closes it (so a TOP-LEVEL + # github-repo: can never satisfy the guard). + /^metadata:[[:space:]]*$/ { in_meta=1; next } + /^[^[:space:]]/ { in_meta=0; next } + # Inside metadata:, an indented github-repo: with a real value is provenance. + in_meta && /^[[:space:]]+github-repo:/ { + v=$0 + sub(/^[[:space:]]+github-repo:[[:space:]]*/, "", v) # drop the key + sub(/[[:space:]]+#.*$/, "", v) # drop trailing " # comment" + if (v ~ /^#/) v="" # whole value is a comment ⇒ null + gsub(/^[[:space:]"'"'"']+|[[:space:]"'"'"']+$/, "", v) # trim spaces and surrounding quotes + if (v != "") found=1 + } + END { exit(found ? 0 : 1) } + ' "$skill"; then echo "✓ provenance $skill" else echo "::error::$skill: missing upstream provenance (metadata.github-repo) — bundled skills must come from 'gh skill install', never hand-authored" diff --git a/scripts/validate-manifests.test.sh b/scripts/validate-manifests.test.sh index abe1134..39695ab 100755 --- a/scripts/validate-manifests.test.sh +++ b/scripts/validate-manifests.test.sh @@ -229,6 +229,44 @@ Body. EOF check_fail "SKILL.md with empty github-repo fails" "missing upstream provenance" "$d" +# A TOP-LEVEL github-repo (outside the metadata: block) must NOT satisfy the guard — +# provenance lives at metadata.github-repo, so a hand-edit faking a top-level key fails. +d=$(fresh) +cat > "$d/plugins/alpha/skills/example-skill/SKILL.md" <<'EOF' +--- +name: example-skill +github-repo: https://github.com/devantler-tech/agent-skills +metadata: + domain: testing +--- +Body. +EOF +check_fail "SKILL.md with top-level github-repo (not under metadata) fails" "missing upstream provenance" "$d" + +# A quoted-empty value ("") is still empty provenance and is rejected. +d=$(fresh) +cat > "$d/plugins/alpha/skills/example-skill/SKILL.md" <<'EOF' +--- +name: example-skill +metadata: + github-repo: "" +--- +Body. +EOF +check_fail "SKILL.md with quoted-empty github-repo fails" "missing upstream provenance" "$d" + +# A comment-only value (github-repo: # …) is null in YAML and is rejected. +d=$(fresh) +cat > "$d/plugins/alpha/skills/example-skill/SKILL.md" <<'EOF' +--- +name: example-skill +metadata: + github-repo: # not a real value +--- +Body. +EOF +check_fail "SKILL.md with comment-only github-repo fails" "missing upstream provenance" "$d" + echo "-----------------------------------------" echo "validate-manifests.sh self-test: $pass passed, $fail failed" [ "$fail" -eq 0 ]