From 75048a78c12728aeaa9c467ce281f5190799bbcc Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Wed, 1 Jul 2026 08:05:48 -0500 Subject: [PATCH] Make catalog validation real and guard against link rot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Harden the CI that keeps this catalog installable. The previous PR check silently verified nothing: it derived its file list from `git diff origin/main...HEAD` against a shallow checkout with no origin/main ref, so the list was always empty and schema/URL/checksum validation never ran. It reported success in seconds having checked no manifests. This replaces it with validation that always runs against the whole catalog: - Schema-validate every plugin manifest, resolve every advertised download URL, and verify each archive's sha256 against the manifest — so a wrong checksum or a missing release asset fails in CI, not in a user's install. - Gate index.yaml on being in sync with plugins/. It is what datumctl reads and is fully derived from the manifests, so CI regenerates it and fails on drift. This keeps main's index correct by construction and removes the bot-authored "regenerate index" pull request that previously trailed every merge. - Add a weekly health check that re-verifies the entire catalog and opens an issue when a plugin's links or checksums have rotted, since those assets live in other repositories and can change after a plugin lands here. The generator and verifier are committed as scripts so contributors and CI run exactly the same logic; the README documents regenerating the index. --- .github/workflows/catalog-health.yaml | 53 +++++++++++++++ .github/workflows/generate-index.yaml | 59 ----------------- .github/workflows/validate-pr.yaml | 69 -------------------- .github/workflows/validate.yaml | 38 +++++++++++ README.md | 10 ++- scripts/generate_index.py | 81 +++++++++++++++++++++++ scripts/verify_manifests.py | 94 +++++++++++++++++++++++++++ 7 files changed, 274 insertions(+), 130 deletions(-) create mode 100644 .github/workflows/catalog-health.yaml delete mode 100644 .github/workflows/generate-index.yaml delete mode 100644 .github/workflows/validate-pr.yaml create mode 100644 .github/workflows/validate.yaml create mode 100755 scripts/generate_index.py create mode 100755 scripts/verify_manifests.py diff --git a/.github/workflows/catalog-health.yaml b/.github/workflows/catalog-health.yaml new file mode 100644 index 0000000..839a835 --- /dev/null +++ b/.github/workflows/catalog-health.yaml @@ -0,0 +1,53 @@ +name: Catalog health + +# The catalog points at release assets that live in other repositories, which +# can be deleted, retagged, or re-cut after a plugin lands here. PR validation +# only catches breakage at merge time, so this re-verifies every plugin's +# download links and checksums on a schedule and opens an issue if the catalog +# has rotted — before a user hits it during `datumctl plugin install`. +on: + schedule: + - cron: "0 12 * * 1" # Mondays, 12:00 UTC + workflow_dispatch: + +permissions: + contents: read + issues: write + +jobs: + verify-assets: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + + - name: Install tooling + run: pip install --quiet "jsonschema[format-nongpl]>=4" "PyYAML>=6" + + - name: Verify every plugin's download links and checksums + run: python3 scripts/verify_manifests.py + + - name: Open an issue if the catalog is unhealthy + if: failure() + env: + GH_TOKEN: ${{ github.token }} + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + gh label create catalog-health \ + --color b60205 \ + --description "A plugin download link or checksum is broken" 2>/dev/null || true + + body=$(printf 'The scheduled catalog health check failed: one or more plugin download URLs no longer resolve, or their checksums no longer match the published release assets.\n\nUsers may be unable to install or verify the affected plugin(s) with datumctl until this is fixed. See the run log for the specific plugin and platform:\n\n%s\n' "$RUN_URL") + + number=$(gh issue list --state open --label catalog-health --json number --jq '.[0].number // empty') + if [ -n "$number" ]; then + gh issue comment "$number" --body "$body" + else + gh issue create \ + --title "Catalog health check is failing" \ + --label catalog-health \ + --body "$body" + fi diff --git a/.github/workflows/generate-index.yaml b/.github/workflows/generate-index.yaml deleted file mode 100644 index a84a57a..0000000 --- a/.github/workflows/generate-index.yaml +++ /dev/null @@ -1,59 +0,0 @@ -name: Regenerate index.yaml - -on: - workflow_dispatch: - push: - branches: [main] - paths: - - "plugins/**" - -permissions: - contents: write - pull-requests: write - -jobs: - generate: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Generate index.yaml from plugins/ - run: | - python3 - <<'EOF' - import os, yaml - - # Catalog identity header, surfaced in `datumctl plugin index list` - # and `datumctl plugin browse`. Followed by the plugin list. - index = { - "apiVersion": "datumctl.datum.net/v1alpha1", - "kind": "PluginList", - "name": "milo-os", - "description": "Portable CLI plugins for the Milo platform", - "owner": "milo-os", - "homepage": "https://github.com/milo-os/cli-plugins", - "items": [], - } - - for filename in sorted(os.listdir("plugins")): - if not filename.endswith(".yaml"): - continue - with open(f"plugins/{filename}") as fh: - plugin = yaml.safe_load(fh) - index["items"].append(plugin) - - with open("index.yaml", "w") as fh: - yaml.dump(index, fh, default_flow_style=False, allow_unicode=True, sort_keys=False) - - print(f"Generated index.yaml with {len(index['items'])} plugin(s)") - EOF - - - name: Open PR with updated index.yaml - uses: peter-evans/create-pull-request@v8 - with: - commit-message: "chore: regenerate index.yaml" - branch: chore/regen-index - title: "chore: regenerate index.yaml" - body: | - The plugin catalog index has been automatically regenerated following a change to one or more plugin manifests. - - The index is what `datumctl` reads to discover available plugins, their current versions, and where to download them for each platform. Keeping it in sync ensures users always see accurate information when installing or updating plugins. diff --git a/.github/workflows/validate-pr.yaml b/.github/workflows/validate-pr.yaml deleted file mode 100644 index 2f4c6b5..0000000 --- a/.github/workflows/validate-pr.yaml +++ /dev/null @@ -1,69 +0,0 @@ -name: Validate plugin manifests - -on: - pull_request: - branches: [main] - paths: - - "plugins/**" - -jobs: - validate: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Install ajv-cli - run: npm install -g ajv-cli ajv-formats - - - name: Find changed plugin manifests - id: changed - run: | - files=$(git diff --name-only origin/main...HEAD -- plugins/ | tr '\n' ' ') - echo "files=$files" >> "$GITHUB_OUTPUT" - - - name: Convert YAML manifests to JSON and validate schema - run: | - for f in ${{ steps.changed.outputs.files }}; do - echo "Validating schema: $f" - python3 -c " - import sys, json, yaml - with open('$f') as fh: - data = yaml.safe_load(fh) - print(json.dumps(data)) - " > /tmp/manifest.json - ajv validate -s schema/plugin-v1alpha1.json -d /tmp/manifest.json --spec=draft7 - done - - - name: Check URIs resolve and verify SHA256 - run: | - for f in ${{ steps.changed.outputs.files }}; do - echo "Checking URIs in: $f" - python3 - <<'EOF' - import sys, yaml, urllib.request, hashlib - - with open("$f") as fh: - manifest = yaml.safe_load(fh) - - for platform in manifest["spec"]["platforms"]: - uri = platform["uri"] - expected_sha = platform["sha256"] - print(f" Checking {uri}") - - req = urllib.request.Request(uri, method="HEAD") - with urllib.request.urlopen(req) as resp: - if resp.status != 200: - print(f" ERROR: {uri} returned {resp.status}", file=sys.stderr) - sys.exit(1) - - print(f" Downloading and verifying SHA256...") - with urllib.request.urlopen(uri) as resp: - data = resp.read() - actual_sha = hashlib.sha256(data).hexdigest() - if actual_sha != expected_sha: - print(f" ERROR: SHA256 mismatch for {uri}", file=sys.stderr) - print(f" expected: {expected_sha}", file=sys.stderr) - print(f" actual: {actual_sha}", file=sys.stderr) - sys.exit(1) - print(f" OK: {actual_sha}") - EOF - done diff --git a/.github/workflows/validate.yaml b/.github/workflows/validate.yaml new file mode 100644 index 0000000..05661f1 --- /dev/null +++ b/.github/workflows/validate.yaml @@ -0,0 +1,38 @@ +name: Validate catalog + +# Runs on every PR and on pushes to main. The catalog is small, so we validate +# and verify the whole thing each time rather than trying to diff which plugins +# changed — that keeps the check honest (a manifest can break for reasons +# unrelated to the files a PR touched) and always exercises the real assets. +on: + pull_request: + branches: [main] + push: + branches: [main] + +permissions: + contents: read + +jobs: + validate: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + + - name: Install tooling + run: pip install --quiet "jsonschema[format-nongpl]>=4" "PyYAML>=6" + + # index.yaml is what datumctl reads, and it is fully derived from + # plugins/*.yaml. Fail if the committed index doesn't match what the + # generator produces, so main's index can never drift from the manifests. + - name: Check index.yaml is in sync with plugins/ + run: python3 scripts/generate_index.py --check + + # Schema-validate every manifest and prove every advertised download + # actually resolves and matches its checksum. + - name: Validate manifests and verify release assets + run: python3 scripts/verify_manifests.py diff --git a/README.md b/README.md index caa3577..65cd6a2 100644 --- a/README.md +++ b/README.md @@ -39,8 +39,14 @@ datumctl ipam pool list 1. Add the `datumctl-plugin` topic to your plugin's GitHub repository. 2. Open a pull request adding `plugins/.yaml`, following the [schema](schema/plugin-v1alpha1.json). -3. CI validates your manifest: schema conformance, that every download URL resolves, and that each SHA256 matches the published archive. -4. Once merged, [`index.yaml`](index.yaml) — the file `datumctl` actually reads — is regenerated automatically. +3. Regenerate the index and commit it alongside your manifest: + + ```sh + python3 scripts/generate_index.py + ``` + + [`index.yaml`](index.yaml) — the file `datumctl` actually reads — is derived entirely from `plugins/*.yaml`, and CI fails if the two drift. +4. CI validates every manifest: schema conformance, that each download URL resolves, and that each SHA256 matches the published archive. A weekly [health check](.github/workflows/catalog-health.yaml) re-verifies the whole catalog so a plugin's links can't rot unnoticed. ## Plugin manifest format diff --git a/scripts/generate_index.py b/scripts/generate_index.py new file mode 100755 index 0000000..cd92fcf --- /dev/null +++ b/scripts/generate_index.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +"""Generate index.yaml from the per-plugin manifests in plugins/. + +index.yaml is the single file datumctl reads to discover plugins, their +versions, and per-platform download locations. It is fully derived from +plugins/*.yaml, so it is regenerated deterministically here and checked for +drift in CI (`--check`) rather than hand-edited. + +Usage: + scripts/generate_index.py # write index.yaml + scripts/generate_index.py --check # fail if index.yaml is out of date +""" +import difflib +import os +import sys + +import yaml + +REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +PLUGINS_DIR = os.path.join(REPO_ROOT, "plugins") +INDEX_PATH = os.path.join(REPO_ROOT, "index.yaml") + +# Catalog identity header, surfaced in `datumctl plugin index list` and +# `datumctl plugin browse`, followed by one entry per plugin manifest. +HEADER = { + "apiVersion": "datumctl.datum.net/v1alpha1", + "kind": "PluginList", + "name": "milo-os", + "description": "Portable CLI plugins for the Milo platform", + "owner": "milo-os", + "homepage": "https://github.com/milo-os/cli-plugins", +} + + +def render() -> tuple[str, int]: + index = dict(HEADER) + index["items"] = [] + for filename in sorted(os.listdir(PLUGINS_DIR)): + if not filename.endswith(".yaml"): + continue + with open(os.path.join(PLUGINS_DIR, filename)) as fh: + index["items"].append(yaml.safe_load(fh)) + text = yaml.dump( + index, default_flow_style=False, allow_unicode=True, sort_keys=False + ) + return text, len(index["items"]) + + +def main() -> None: + expected, count = render() + + if "--check" in sys.argv[1:]: + current = "" + if os.path.exists(INDEX_PATH): + with open(INDEX_PATH) as fh: + current = fh.read() + if current != expected: + print( + "index.yaml is out of sync with plugins/.\n" + "Run `python3 scripts/generate_index.py` and commit the result.\n", + file=sys.stderr, + ) + diff = difflib.unified_diff( + current.splitlines(), + expected.splitlines(), + fromfile="index.yaml (committed)", + tofile="index.yaml (expected)", + lineterm="", + ) + print("\n".join(diff), file=sys.stderr) + sys.exit(1) + print(f"index.yaml is in sync ({count} plugin(s)).") + return + + with open(INDEX_PATH, "w") as fh: + fh.write(expected) + print(f"Wrote index.yaml ({count} plugin(s)).") + + +if __name__ == "__main__": + main() diff --git a/scripts/verify_manifests.py b/scripts/verify_manifests.py new file mode 100755 index 0000000..11614af --- /dev/null +++ b/scripts/verify_manifests.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +"""Validate plugin manifests and verify their published release assets. + +For each manifest (all of plugins/*.yaml by default, or the paths passed as +arguments) this: + * validates it against schema/plugin-v1alpha1.json, + * confirms every platform download URL resolves, and + * downloads each archive and confirms its sha256 matches the manifest. + +This is the check that keeps the catalog installable: a wrong checksum or a +deleted/retagged release asset fails here, in CI, rather than in a user's +`datumctl plugin install`. + +Usage: + scripts/verify_manifests.py # every plugins/*.yaml + scripts/verify_manifests.py plugins/ipam.yaml ... +""" +import hashlib +import json +import os +import sys +import urllib.error +import urllib.request + +import yaml +from jsonschema import Draft7Validator, FormatChecker + +REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +SCHEMA_PATH = os.path.join(REPO_ROOT, "schema", "plugin-v1alpha1.json") +PLUGINS_DIR = os.path.join(REPO_ROOT, "plugins") + + +def verify(path: str, validator: Draft7Validator) -> bool: + print(f"== {os.path.relpath(path, REPO_ROOT)}") + with open(path) as fh: + manifest = yaml.safe_load(fh) + + errors = sorted(validator.iter_errors(manifest), key=lambda e: list(e.path)) + if errors: + for e in errors: + loc = "/".join(str(p) for p in e.path) or "(root)" + print(f" SCHEMA ERROR at {loc}: {e.message}", file=sys.stderr) + return False + print(" schema OK") + + ok = True + for platform in manifest["spec"]["platforms"]: + uri = platform["uri"] + expected = platform["sha256"] + print(f" {uri}") + try: + with urllib.request.urlopen(uri) as resp: + data = resp.read() + except urllib.error.HTTPError as exc: + print(f" ERROR: returned HTTP {exc.code}", file=sys.stderr) + ok = False + continue + except urllib.error.URLError as exc: + print(f" ERROR: unreachable: {exc.reason}", file=sys.stderr) + ok = False + continue + actual = hashlib.sha256(data).hexdigest() + if actual != expected: + print( + f" ERROR: sha256 mismatch\n" + f" expected {expected}\n" + f" actual {actual}", + file=sys.stderr, + ) + ok = False + else: + print(f" OK {actual}") + return ok + + +def main() -> None: + paths = sys.argv[1:] or [ + os.path.join(PLUGINS_DIR, f) + for f in sorted(os.listdir(PLUGINS_DIR)) + if f.endswith(".yaml") + ] + with open(SCHEMA_PATH) as fh: + validator = Draft7Validator(json.load(fh), format_checker=FormatChecker()) + + failed = [p for p in paths if not verify(p, validator)] + if failed: + rel = ", ".join(os.path.relpath(p, REPO_ROOT) for p in failed) + print(f"\nFAILED {len(failed)} manifest(s): {rel}", file=sys.stderr) + sys.exit(1) + print(f"\nAll {len(paths)} manifest(s) valid and verified.") + + +if __name__ == "__main__": + main()