From e2546380b268d979c44637a4e765f60518635bd1 Mon Sep 17 00:00:00 2001 From: Raymond Khalife Date: Wed, 3 Jun 2026 10:30:25 -0400 Subject: [PATCH 1/3] feat: publish per-skill content hash in .well-known/skills/index.json MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a `hash` field ("sha256:") to each skill entry so consumers can detect which skills changed from a single index fetch, without downloading and hashing every file. The hash is computed over the skill's served files (path + per-file sha256, sorted) — order-independent and rename-sensitive. `files` stays a string array; the field is purely additive, so already deployed sync scripts are unaffected. Includes the design doc documenting the hash contract. This is the publishing half; the differential-sync consumer upgrade in autosync-ic-skills is a follow-up. Co-Authored-By: Claude Opus 4.8 (1M context) --- README.md | 28 ++++++++++++++++++++++ src/lib/skills.ts | 26 ++++++++++++++++++++ src/pages/.well-known/skills/index.json.ts | 3 ++- 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 856dc1c..0a6467b 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,34 @@ The files are plain markdown — paste into any system prompt, rules file, or co | Skill index | [`llms.txt`](https://skills.internetcomputer.org/llms.txt) | All skills with descriptions and discovery links | | Skill page | [`/skills/{name}/`](https://skills.internetcomputer.org/skills/ckbtc/) | Pre-rendered skill page for humans | +### Change detection — the `hash` field + +Each skill entry in [`index.json`](https://skills.internetcomputer.org/.well-known/skills/index.json) carries a `hash`: + +```jsonc +{ + "name": "asset-canister", + "url": "https://.../asset-canister/SKILL.md", + "files": ["SKILL.md"], + "hash": "sha256:f3ee5a3e…" // per-skill aggregate content hash +} +``` + +**What it is.** A `sha256:` digest over all of the skill's served files. It is +computed from each file's path plus the sha256 of its bytes, sorted by path — so it +changes whenever any file in the skill changes (including `references/` and `scripts/` +files), and is sensitive to renames. It is **not** tied to a git commit; it is a pure +content hash of what the server actually serves. + +**What it's for.** Detecting *which* skills changed from a single fetch of `index.json`, +without downloading and hashing every file yourself. Store the `{name: hash}` map, and on +the next fetch re-download only the skills whose `hash` differs (or are new), and prune +those no longer listed. This is the basis for the differential sync in the +[`autosync-ic-skills`](skills/autosync-ic-skills/SKILL.md) skill. + +**What it's not for.** It is not a version number or changelog signal — it carries no +ordering or human meaning, only equality. Compare hashes for equality; do not parse them. + ## Evaluations Each skill can have an evaluation file at `evaluations/.json` that tests whether agents produce correct output with the skill loaded. Evals compare agent output with and without the skill, using an LLM judge to score expected behaviors. diff --git a/src/lib/skills.ts b/src/lib/skills.ts index cf6a60b..d7e35b7 100644 --- a/src/lib/skills.ts +++ b/src/lib/skills.ts @@ -5,6 +5,7 @@ import { getCollection, type CollectionEntry } from 'astro:content'; import fs from 'node:fs/promises'; import path from 'node:path'; +import crypto from 'node:crypto'; import { execFile } from 'node:child_process'; import { promisify } from 'node:util'; @@ -117,6 +118,31 @@ export async function getSkillFiles(skill: Skill): Promise { return ['SKILL.md', ...allFiles.filter((f) => f !== 'SKILL.md').sort()]; } +/** + * Per-skill aggregate content hash, published in .well-known/skills/index.json so + * consumers can detect which skills changed without downloading every file. + * + * Returns "sha256:" over the skill's files. The input is built from each served + * file (the same set getSkillFiles returns) sorted by path, contributing: + * "\n" "\n" + * Hashing path + per-file digest (rather than concatenating raw bytes) makes the + * result order-independent and sensitive to renames. The hash definition is part of + * the public contract — consumers key off it — so it must stay stable. + */ +export async function getSkillHash(skill: Skill): Promise { + const rel = skill.filePath ?? `skills/${skill.id}/SKILL.md`; + const skillDir = path.dirname(path.resolve(process.cwd(), rel)); + const files = (await getSkillFiles(skill)).slice().sort(); + + const agg = crypto.createHash('sha256'); + for (const f of files) { + const bytes = await fs.readFile(path.join(skillDir, f)); + const fileHash = crypto.createHash('sha256').update(bytes).digest('hex'); + agg.update(`${f}\n${fileHash}\n`); + } + return `sha256:${agg.digest('hex')}`; +} + export interface SkillFileEntry { name: string; path: string; diff --git a/src/pages/.well-known/skills/index.json.ts b/src/pages/.well-known/skills/index.json.ts index 3ec4e4e..2dad508 100644 --- a/src/pages/.well-known/skills/index.json.ts +++ b/src/pages/.well-known/skills/index.json.ts @@ -3,7 +3,7 @@ // https://github.com/cloudflare/agent-skills-discovery-rfc import type { APIRoute } from 'astro'; import { absUrl } from '../../../lib/site'; -import { getAllSkills, getSkillFiles } from '../../../lib/skills'; +import { getAllSkills, getSkillFiles, getSkillHash } from '../../../lib/skills'; export const GET: APIRoute = async () => { const skills = await getAllSkills(); @@ -15,6 +15,7 @@ export const GET: APIRoute = async () => { description: s.data.description, url: absUrl(`/.well-known/skills/${s.data.name}/SKILL.md`), files: await getSkillFiles(s), + hash: await getSkillHash(s), })), ), }; From 9db2a42ed2281222b8e42abbcfec38cce95c28f9 Mon Sep 17 00:00:00 2001 From: Raymond Khalife Date: Wed, 3 Jun 2026 11:25:57 -0400 Subject: [PATCH 2/3] feat(autosync-ic-skills): differential sync keyed off per-skill hash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the blind full-mirror with a differential sync. The script fetches index.json once, compares each skill's published `hash` against a {name: hash} manifest (.ic-managed.json), and re-downloads only changed or new skills — pruning removed ones. Unchanged skills are skipped with no per-file downloads, and a no-op sync is silent. Falls back to re-downloading any skill the server publishes no `hash` for, keeps cached skills on network/jq failure, retains the old hash on a failed download so the next run retries, and transparently migrates the legacy bare-array manifest format. The script is now shipped as an attached file (scripts/sync-ic-skills.sh) and the installer fetches it via curl for byte-exact delivery instead of transcribing an inline block. On change it emits a SessionStart JSON object (systemMessage + additionalContext) so the summary surfaces in the Claude Code UI and Claude's context. Co-Authored-By: Claude Opus 4.8 (1M context) --- skills/autosync-ic-skills/SKILL.md | 105 +++++--------- .../scripts/sync-ic-skills.sh | 132 ++++++++++++++++++ 2 files changed, 169 insertions(+), 68 deletions(-) create mode 100644 skills/autosync-ic-skills/scripts/sync-ic-skills.sh diff --git a/skills/autosync-ic-skills/SKILL.md b/skills/autosync-ic-skills/SKILL.md index e3645a1..a8ec468 100644 --- a/skills/autosync-ic-skills/SKILL.md +++ b/skills/autosync-ic-skills/SKILL.md @@ -18,13 +18,17 @@ needs this link again — the installed `SessionStart` hook does the work from t ## What you will create -1. `.claude/sync-ic-skills.sh` — mirrors the live skill index into `.claude/skills/`. +1. `.claude/sync-ic-skills.sh` — a **differential** sync script that mirrors the live + skill index into `.claude/skills/`. 2. A `SessionStart` hook in `.claude/settings.json` that runs that script. 3. An immediate first run, so skills are present right away. -The sync is a **mirror**: it always re-downloads the current skills, so it picks up -new skills, updated versions of existing skills, and removals — with no version -metadata required on the server side. +The script is a **differential mirror**. It fetches the discovery index once and +compares each skill's published `hash` against a stored manifest, re-downloading only +the skills that actually changed (and pruning ones removed upstream). Unchanged skills +are skipped with no per-file downloads, and the script stays silent unless something +changed. If the server does not publish a `hash` for a skill, the script falls back to +re-downloading it every run, so it remains correct either way. ## Important: tell the user what to expect @@ -60,70 +64,34 @@ command -v jq >/dev/null 2>&1 && echo "jq: ok" || echo "jq: MISSING" (it exits cleanly with a warning when `jq` is absent), and they can install `jq` later and the next session will sync. -## Step 1 — Write the sync script +## Step 1 — Download the sync script -Create `.claude/sync-ic-skills.sh` with **exactly** this content: +The script is published as a file alongside this skill, so you fetch it verbatim rather +than transcribing it (this guarantees byte-exact content). Create the `.claude` directory +and download it: ```bash -#!/usr/bin/env bash -# sync-ic-skills.sh — mirror the latest Internet Computer skills into .claude/skills/ -# Idempotent and offline-safe. Only skills this script installed are ever pruned, -# so your own local skills are never touched. -set -euo pipefail - -BASE="https://skills.internetcomputer.org/.well-known/skills" -INDEX_URL="$BASE/index.json" -DEST=".claude/skills" -MANIFEST="$DEST/.ic-managed.json" # tracks which skills this script manages - -mkdir -p "$DEST" - -# --- Fetch the index. On any network failure, keep cached skills and exit cleanly. --- -TMP_INDEX="$(mktemp)" -trap 'rm -f "$TMP_INDEX"' EXIT -if ! curl -fsSL --max-time 20 "$INDEX_URL" -o "$TMP_INDEX"; then - echo "[ic-skills] could not reach $INDEX_URL — keeping cached skills" >&2 - exit 0 -fi - -# --- jq is required to parse the index. If absent, warn and exit without failing. --- -if ! command -v jq >/dev/null 2>&1; then - echo "[ic-skills] 'jq' not found — install jq to enable IC skill sync" >&2 - exit 0 -fi - -NEW_NAMES="$(jq -r '.skills[].name' "$TMP_INDEX")" - -# --- Prune: drop previously-managed skills that are no longer in the index. --- -if [ -f "$MANIFEST" ]; then - while IFS= read -r old; do - [ -n "$old" ] || continue - if ! grep -qxF "$old" <<<"$NEW_NAMES"; then - rm -rf "${DEST:?}/$old" - echo "[ic-skills] pruned removed skill: $old" >&2 - fi - done < <(jq -r '.[]?' "$MANIFEST" 2>/dev/null || true) -fi - -# --- Download every skill's files (overwrite == always latest). --- -jq -c '.skills[]' "$TMP_INDEX" | while IFS= read -r entry; do - name="$(jq -r '.name' <<<"$entry")" - [ -n "$name" ] && [ "$name" != "null" ] || continue - mkdir -p "$DEST/$name" - while IFS= read -r f; do - [ -n "$f" ] || continue - mkdir -p "$(dirname "$DEST/$name/$f")" # files may live in subdirs (e.g. scripts/) - if ! curl -fsSL --max-time 20 "$BASE/$name/$f" -o "$DEST/$name/$f"; then - echo "[ic-skills] warning: failed to fetch $name/$f" >&2 - fi - done < <(jq -r '.files[]?' <<<"$entry") -done - -# --- Record managed skill names for the next prune pass. --- -jq '[.skills[].name]' "$TMP_INDEX" > "$MANIFEST" -echo "[ic-skills] synced $(jq '.skills | length' "$TMP_INDEX") Internet Computer skills into $DEST" >&2 +mkdir -p .claude +curl -fsSL https://skills.internetcomputer.org/.well-known/skills/autosync-ic-skills/scripts/sync-ic-skills.sh \ + -o .claude/sync-ic-skills.sh ``` +Do **not** hand-write or paraphrase the script — always fetch the published copy so the +sync logic stays correct as it is updated upstream. + +**What the script does** (for the user's awareness): + +- Fetches `https://skills.internetcomputer.org/.well-known/skills/index.json` once. +- For each skill, compares the published `hash` against `.claude/skills/.ic-managed.json` + (a `{ "": "" }` manifest of skills it manages) and re-downloads only the + skills whose hash changed or are new. +- Prunes skills it previously installed that are no longer in the index. +- Prints a one-line `added / updated / removed` summary only when something changed; + otherwise it is silent. +- Degrades gracefully: exits cleanly (keeping cached skills) if the network is down or + `jq` is missing, and falls back to re-downloading skills the server publishes no + `hash` for. + ## Step 2 — Register the SessionStart hook (idempotently) Add a `SessionStart` hook to `.claude/settings.json` that runs the script. @@ -164,7 +132,7 @@ bash .claude/sync-ic-skills.sh - Confirm `.claude/skills/` now contains skill directories (e.g. `motoko`, `asset-canister`, `internet-identity`, …) each with a `SKILL.md`. -- Confirm `.claude/skills/.ic-managed.json` lists the synced skill names. +- Confirm `.claude/skills/.ic-managed.json` maps each synced skill name to its hash. - Tell the user: how many skills were installed, that the `SessionStart` hook is in place, and that they'll be prompted to trust the hook before it auto-runs next session. From then on, their IC skills refresh automatically every session. @@ -173,10 +141,11 @@ bash .claude/sync-ic-skills.sh - **Safe to re-run.** Re-invoking this skill or the script is idempotent: the hook is not duplicated, and only skills tracked in `.ic-managed.json` are ever pruned. -- **No server-side versioning needed.** Because the script re-mirrors current content, - it captures new skills, new versions, and removals automatically. If the index later - adds `sha256`/`version` fields, the script can be upgraded to a differential sync, - but that is not required for correctness. +- **Differential by hash.** The script keys off the per-skill `hash` field in the + discovery index, so a normal session that touches nothing downloads only `index.json` + and exits silently. Skills are re-downloaded only when their hash changes. Migrating + from an older version of this script (whose manifest was a bare name array) is handled + automatically on the next run. - **Optional mid-session refresh.** For very long-running sessions, the user can also run `bash .claude/sync-ic-skills.sh` manually, or schedule it (e.g. via `/loop` or a cron routine) — but the SessionStart hook covers the normal case. diff --git a/skills/autosync-ic-skills/scripts/sync-ic-skills.sh b/skills/autosync-ic-skills/scripts/sync-ic-skills.sh new file mode 100644 index 0000000..9c70f83 --- /dev/null +++ b/skills/autosync-ic-skills/scripts/sync-ic-skills.sh @@ -0,0 +1,132 @@ +#!/usr/bin/env bash +# sync-ic-skills.sh — mirror the latest Internet Computer skills into .claude/skills/ +# +# Differential sync: fetches the discovery index once and re-downloads only the +# skills whose published `hash` changed (or are new). Skills already at the current +# hash are skipped entirely — no per-file downloads. Prints a one-line summary only +# when something actually changed. +# +# Idempotent and offline-safe. Only skills this script installed are ever pruned, +# so your own local skills are never touched. +set -euo pipefail + +BASE="https://skills.internetcomputer.org/.well-known/skills" +INDEX_URL="$BASE/index.json" +DEST=".claude/skills" +MANIFEST="$DEST/.ic-managed.json" # { "": "" } of skills this script manages + +mkdir -p "$DEST" + +# --- Temp files. NEW_MANIFEST is built up as we go, then swapped in atomically. --- +TMP_INDEX="$(mktemp)" +NEW_MANIFEST="$(mktemp)" +trap 'rm -f "$TMP_INDEX" "$NEW_MANIFEST"' EXIT + +# --- Fetch the index. On any network failure, keep cached skills and exit cleanly. --- +if ! curl -fsSL --max-time 20 "$INDEX_URL" -o "$TMP_INDEX"; then + echo "[autosync-ic-skills] could not reach $INDEX_URL — keeping cached skills" >&2 + exit 0 +fi + +# --- jq is required to parse the index. If absent, warn and exit without failing. --- +if ! command -v jq >/dev/null 2>&1; then + echo "[autosync-ic-skills] 'jq' not found — install jq to enable IC skill sync" >&2 + exit 0 +fi + +# --- Previously-managed skill names. Supports the legacy manifest format +# (a bare array of names, no hashes) as well as the current object form. --- +managed_names() { + [ -f "$MANIFEST" ] || return 0 + jq -r 'if type == "object" then keys[] elif type == "array" then .[] else empty end' \ + "$MANIFEST" 2>/dev/null || true +} + +# --- Stored hash for a skill, or empty if unknown (new skill, or legacy manifest). --- +stored_hash() { + [ -f "$MANIFEST" ] || return 0 + jq -r --arg n "$1" 'if type == "object" then (.[$n] // "") else "" end' \ + "$MANIFEST" 2>/dev/null || true +} + +# --- Append a name->hash pair to the new manifest being built. --- +record() { + local tmp; tmp="$(mktemp)" + jq --arg n "$1" --arg h "$2" '.[$n] = $h' "$NEW_MANIFEST" > "$tmp" && mv "$tmp" "$NEW_MANIFEST" +} + +NEW_NAMES="$(jq -r '.skills[].name' "$TMP_INDEX")" +MANAGED="$(managed_names)" +echo '{}' > "$NEW_MANIFEST" + +# --- Prune: drop previously-managed skills that are no longer in the index. --- +removed=0 +while IFS= read -r old; do + [ -n "$old" ] || continue + if ! grep -qxF "$old" <<<"$NEW_NAMES"; then + rm -rf "${DEST:?}/$old" + removed=$((removed + 1)) + echo "[autosync-ic-skills] removed: $old" >&2 + fi +done <<<"$MANAGED" + +# --- Sync: download only skills whose hash changed (new / hashless always download). --- +added=0; updated=0; unchanged=0 +while IFS= read -r entry; do + name="$(jq -r '.name' <<<"$entry")" + [ -n "$name" ] && [ "$name" != "null" ] || continue + new_hash="$(jq -r '.hash // ""' <<<"$entry")" + old_hash="$(stored_hash "$name")" + + # Skip when the hash is known, unchanged, and the files are already on disk. + if [ -n "$new_hash" ] && [ "$new_hash" = "$old_hash" ] && [ -d "$DEST/$name" ]; then + unchanged=$((unchanged + 1)) + record "$name" "$new_hash" + continue + fi + + # Otherwise (re)download every file for this skill. + ok=1 + mkdir -p "$DEST/$name" + while IFS= read -r f; do + [ -n "$f" ] || continue + mkdir -p "$(dirname "$DEST/$name/$f")" # files may live in subdirs (e.g. scripts/) + if ! curl -fsSL --max-time 20 "$BASE/$name/$f" -o "$DEST/$name/$f"; then + echo "[autosync-ic-skills] warning: failed to fetch $name/$f" >&2 + ok=0 + fi + done < <(jq -r '.files[]?' <<<"$entry") + + if [ "$ok" -eq 1 ]; then + # Record the new hash so the next run can skip this skill. A hashless server + # records an empty hash, which never equals new_hash -> always re-downloads. + record "$name" "$new_hash" + if grep -qxF "$name" <<<"$MANAGED"; then + updated=$((updated + 1)) + else + added=$((added + 1)) + fi + else + # Download incomplete: keep the old hash so the next run retries this skill. + record "$name" "$old_hash" + fi +done < <(jq -c '.skills[]' "$TMP_INDEX") + +# --- Swap in the updated manifest. --- +mv "$NEW_MANIFEST" "$MANIFEST" + +# --- Report only when something changed; stay silent on a no-op sync. --- +# SessionStart hook stdout/stderr is NOT shown in the Claude Code UI — only JSON +# fields are surfaced. We emit a single JSON object on stdout: +# - systemMessage -> rendered to the USER as a visible system notice +# - additionalContext -> injected into Claude's context so it can mention it too +if [ $((added + updated + removed)) -gt 0 ]; then + summary="[autosync-ic-skills] ${added} added, ${updated} updated, ${removed} removed (${unchanged} unchanged) in $DEST" + jq -n --arg msg "$summary" '{ + systemMessage: $msg, + hookSpecificOutput: { + hookEventName: "SessionStart", + additionalContext: $msg + } + }' +fi From d56494899a286440ef7b40b9a991020a3624718a Mon Sep 17 00:00:00 2001 From: Raymond Khalife Date: Wed, 3 Jun 2026 13:34:22 -0400 Subject: [PATCH 3/3] Make sure skills get reloaded on change --- skills/autosync-ic-skills/scripts/sync-ic-skills.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/skills/autosync-ic-skills/scripts/sync-ic-skills.sh b/skills/autosync-ic-skills/scripts/sync-ic-skills.sh index 9c70f83..eb20f04 100644 --- a/skills/autosync-ic-skills/scripts/sync-ic-skills.sh +++ b/skills/autosync-ic-skills/scripts/sync-ic-skills.sh @@ -125,6 +125,7 @@ if [ $((added + updated + removed)) -gt 0 ]; then jq -n --arg msg "$summary" '{ systemMessage: $msg, hookSpecificOutput: { + reloadSkills: true, hookEventName: "SessionStart", additionalContext: $msg }