From ecfb90ff25c9577eb950a36dc73a54e9a1ae7b2c Mon Sep 17 00:00:00 2001 From: Deven Ducommun Date: Sat, 27 Jun 2026 17:15:19 -0700 Subject: [PATCH 1/3] Sync KAI 7.7.0 release artifacts --- CHANGELOG.md | 4 +- MEMORY/README.md | 2 +- PAI/ACTIONS.md | 2 +- PAI/ACTIONS/README.md | 2 +- PAI/AISTEERINGRULES.md | 2 +- PAI/CLI.md | 2 +- PAI/FLOWS.md | 2 +- PAI/FLOWS/README.md | 2 +- PAI/PIPELINES.md | 2 +- PAI/PIPELINES/README.md | 2 +- PAI/THEDELEGATIONSYSTEM.md | 20 + PAI/Tools/Banner.ts | 2 +- PAI/Tools/BuildCLAUDE.ts | 2 +- PAI/Tools/MemoryCurate.ts | 152 ++++-- PAI/Tools/Orchestrator.ts | 140 ++++++ .../orchestrator/adapters/claude-local.ts | 16 + .../orchestrator/adapters/codex-local.ts | 16 + PAI/Tools/orchestrator/adapters/command.ts | 282 +++++++++++ PAI/Tools/orchestrator/comments.ts | 92 ++++ PAI/Tools/orchestrator/github.ts | 149 ++++++ PAI/Tools/orchestrator/ledger.ts | 169 +++++++ PAI/Tools/orchestrator/policy.ts | 159 +++++++ PAI/Tools/orchestrator/schema.ts | 376 +++++++++++++++ .../workflows/adversarial-review.ts | 295 ++++++++++++ PAI/Tools/orchestrator/workflows/pr-review.ts | 442 ++++++++++++++++++ PAI/Tools/pai.ts | 15 + README.md | 4 +- config/hooks.jsonc | 11 + config/preferences.jsonc | 2 +- config/spinner-tips.json | 2 +- docs/QUICKSTART.md | 6 +- docs/WHATS-DIFFERENT.md | 6 +- fixtures/orchestrator/adversarial-review.json | 39 ++ fixtures/orchestrator/pr-review.json | 44 ++ hooks/AgentExecutionGuard.hook.ts | 37 +- hooks/AgentMemoryCapture.hook.ts | 62 ++- hooks/GitHubWriteGuard.hook.ts | 21 +- hooks/InstructionsLoaded.hook.ts | 58 +++ hooks/LoadContext.hook.ts | 6 +- hooks/MemoryRecall.hook.ts | 10 +- hooks/SessionCleanup.hook.ts | 6 + hooks/SkillDiscoveryRecommender.hook.ts | 181 +++++++ hooks/TaskCompleted.hook.ts | 27 +- hooks/TeammateIdle.hook.ts | 23 + hooks/handlers/WikiCurrency.ts | 3 +- hooks/lib/agent-context-handoff.ts | 147 ++++++ hooks/lib/algorithm-state.ts | 16 + hooks/lib/memory-disclosure.ts | 47 +- hooks/lib/memory-telemetry.ts | 12 +- hooks/lib/payload-schema.ts | 10 + install.sh | 2 +- manifest.json | 9 +- scripts/bun-test-gate.ts | 103 ++++ scripts/deploy.ts | 4 +- scripts/docs-spec-consistency.ts | 18 +- scripts/hooks/pre-push | 19 +- scripts/kai-artifact-fidelity.ts | 125 +++++ scripts/lib/sync-classification.ts | 53 +++ scripts/memory-telemetry-report.ts | 56 ++- scripts/release.sh | 44 +- scripts/sync-ci-gate.ts | 178 ++----- scripts/sync-drift.ts | 99 ++-- scripts/sync-manifest-schema.json | 37 ++ scripts/sync-manifest.json | 41 +- scripts/version-targets.json | 2 +- scripts/weekly-maintenance.ts | 4 +- skills-lock.json | 24 +- skills/PAI/ACTIONS.md | 2 +- skills/PAI/BROWSERAUTOMATION.md | 2 +- skills/PAI/CLI.md | 2 +- skills/PAI/DEPLOYMENT.md | 2 +- skills/PAI/FLOWS.md | 2 +- skills/PAI/FLOWS/README.md | 2 +- skills/PAI/MEMORYSYSTEM.md | 2 +- skills/PAI/PIPELINES.md | 2 +- skills/PAI/PIPELINES/README.md | 2 +- skills/PAI/THEHOOKSYSTEM.md | 2 +- skills/SpecKit/SKILL.md | 38 ++ skills/SpecKit/Workflows/Clarify.md | 12 + skills/SpecKit/Workflows/Implement.md | 12 + skills/SpecKit/Workflows/Plan.md | 11 + skills/SpecKit/Workflows/Spec.md | 26 ++ skills/SpecKit/Workflows/Tasks.md | 10 + skills/SpecKit/Workflows/Validate.md | 11 + tests/BumpVersionManifest.test.ts | 2 - tests/BunTestGate.test.ts | 66 +++ tests/DocsSpecConsistency.test.ts | 4 +- tests/GitHubWriteGuard.integration.test.ts | 17 + tests/HookEventCoverage.test.ts | 4 +- tests/InstallRelocation.test.ts | 30 ++ tests/InstructionsLoaded.test.ts | 41 ++ tests/KaiArtifactFidelity.test.ts | 88 ++++ tests/MemoryCuratePromotion.test.ts | 138 ++++++ tests/MemoryDisclosure.test.ts | 39 ++ tests/MemoryFunctions.test.ts | 9 +- tests/MemoryTelemetryHooks.test.ts | 18 +- tests/NativeEventMigration.test.ts | 199 ++++++++ tests/OrchestratorAdapters.test.ts | 213 +++++++++ tests/OrchestratorAdversarialReview.test.ts | 142 ++++++ tests/OrchestratorCli.test.ts | 64 +++ tests/OrchestratorComments.test.ts | 90 ++++ tests/OrchestratorLedger.test.ts | 207 ++++++++ tests/OrchestratorPolicy.test.ts | 195 ++++++++ tests/OrchestratorPrReview.test.ts | 182 ++++++++ tests/OrchestratorSchema.test.ts | 135 ++++++ tests/PayloadSchema.test.ts | 22 + tests/RedactAndGate.test.ts | 11 + tests/ReleaseScript.test.ts | 17 + tests/SettingsReproducibility.test.ts | 23 +- tests/SkillDiscoveryRecommender.test.ts | 68 +++ tests/SyncCIGate.test.ts | 26 +- tests/SyncDrift.test.ts | 32 ++ 112 files changed, 5809 insertions(+), 360 deletions(-) create mode 100644 PAI/Tools/Orchestrator.ts create mode 100644 PAI/Tools/orchestrator/adapters/claude-local.ts create mode 100644 PAI/Tools/orchestrator/adapters/codex-local.ts create mode 100644 PAI/Tools/orchestrator/adapters/command.ts create mode 100644 PAI/Tools/orchestrator/comments.ts create mode 100644 PAI/Tools/orchestrator/github.ts create mode 100644 PAI/Tools/orchestrator/ledger.ts create mode 100644 PAI/Tools/orchestrator/policy.ts create mode 100644 PAI/Tools/orchestrator/schema.ts create mode 100644 PAI/Tools/orchestrator/workflows/adversarial-review.ts create mode 100644 PAI/Tools/orchestrator/workflows/pr-review.ts create mode 100644 fixtures/orchestrator/adversarial-review.json create mode 100644 fixtures/orchestrator/pr-review.json create mode 100644 hooks/InstructionsLoaded.hook.ts create mode 100644 hooks/SkillDiscoveryRecommender.hook.ts create mode 100644 hooks/lib/agent-context-handoff.ts create mode 100644 scripts/bun-test-gate.ts create mode 100644 scripts/kai-artifact-fidelity.ts create mode 100644 scripts/lib/sync-classification.ts create mode 100644 scripts/sync-manifest-schema.json create mode 100644 skills/SpecKit/SKILL.md create mode 100644 skills/SpecKit/Workflows/Clarify.md create mode 100644 skills/SpecKit/Workflows/Implement.md create mode 100644 skills/SpecKit/Workflows/Plan.md create mode 100644 skills/SpecKit/Workflows/Spec.md create mode 100644 skills/SpecKit/Workflows/Tasks.md create mode 100644 skills/SpecKit/Workflows/Validate.md create mode 100644 tests/BunTestGate.test.ts create mode 100644 tests/InstallRelocation.test.ts create mode 100644 tests/InstructionsLoaded.test.ts create mode 100644 tests/KaiArtifactFidelity.test.ts create mode 100644 tests/MemoryCuratePromotion.test.ts create mode 100644 tests/OrchestratorAdapters.test.ts create mode 100644 tests/OrchestratorAdversarialReview.test.ts create mode 100644 tests/OrchestratorCli.test.ts create mode 100644 tests/OrchestratorComments.test.ts create mode 100644 tests/OrchestratorLedger.test.ts create mode 100644 tests/OrchestratorPolicy.test.ts create mode 100644 tests/OrchestratorPrReview.test.ts create mode 100644 tests/OrchestratorSchema.test.ts create mode 100644 tests/ReleaseScript.test.ts create mode 100644 tests/SkillDiscoveryRecommender.test.ts create mode 100644 tests/SyncDrift.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 28fc6fbd..61d8efcc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -172,7 +172,7 @@ Progressive learning, adaptive memory, and research expansion. - `hooks/SecurityValidator.hook.ts`, `hooks/SecretScanner.hook.ts` — security prefix rebranded to `[KAI SECURITY]` - `hooks/lib/change-detection.ts` — system label rebranded to `KAI System` - `PAI/Tools/RoutingAudit.ts` — MEM_PREFIX derived from HOME (portable, no hardcoded paths) -- Manifest updated: 70 skills, 59 hooks, 20 agents +- Manifest updated: 71 skills, 61 hooks, 20 agents - VERSION 5.2.0 → 5.6.0 - README, QUICKSTART, WHATS-DIFFERENT, releases/README updated to KAI 5.6.0 @@ -354,7 +354,7 @@ Initial public release of KAI (Kaizen AI). ### Features - **Algorithm v3.13.0** — Parallelization gate, phantom capability prune, version centralization -- **70 skills** — Research, Security, Analysis, Writing, Engineering Manager workflows, and more +- **71 skills** — Research, Security, Analysis, Writing, Engineering Manager workflows, and more - **47 hooks** — Lifecycle automation including SecretScanner, GitHubWriteGuard, RatingCapture, BuildSettings - **18 named agents** — Architect, Engineer, researchers, Pentester, and domain specialists - **Memory system** — Cross-project knowledge distillation, staging, curation diff --git a/MEMORY/README.md b/MEMORY/README.md index 3788e446..5793dc47 100755 --- a/MEMORY/README.md +++ b/MEMORY/README.md @@ -1,6 +1,6 @@ # MEMORY - Unified Memory System -**Version:** 7.4.2 (Projects-native architecture, 2026-01-12) +**Version:** 7.7.0 (Projects-native architecture, 2026-01-12) Full documentation: `~/.claude/skills/PAI/MEMORYSYSTEM.md` diff --git a/PAI/ACTIONS.md b/PAI/ACTIONS.md index 6e34f737..d9733c54 100644 --- a/PAI/ACTIONS.md +++ b/PAI/ACTIONS.md @@ -1,6 +1,6 @@ # Actions -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. **Atomic, Composable Units of Work** diff --git a/PAI/ACTIONS/README.md b/PAI/ACTIONS/README.md index ae9d228e..34a78bfc 100644 --- a/PAI/ACTIONS/README.md +++ b/PAI/ACTIONS/README.md @@ -1,6 +1,6 @@ # PAI Actions -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. Atomic, composable units of work. Each action does one thing, takes JSON in, returns JSON out. diff --git a/PAI/AISTEERINGRULES.md b/PAI/AISTEERINGRULES.md index b548ee89..3b701399 100644 --- a/PAI/AISTEERINGRULES.md +++ b/PAI/AISTEERINGRULES.md @@ -51,4 +51,4 @@ Personal overrides in `USER/AISTEERINGRULES.md`. Full examples in `AISTEERINGRUL **Memcarry capture — offer to capture a durable lesson when {PRINCIPAL.NAME} establishes one.** The forward half of the cross-project cycle (capture; backflow above is the B→A half). When {PRINCIPAL.NAME} states a durable, reusable rule during the work ("remember this", "from now on…", "the lesson here is…", or clearly articulates a cross-project gotcha), OFFER to capture it as a lesson atom: draft the `WHEN → DO → BECAUSE` (the `because` should carry a dated evidence anchor) and preview it via `memcarry capture-lesson --when "…" --do "…" --because "…" --trigger "a,b" [--scope global|project:]` (no `--apply`, writes nothing — it also dup-checks and reports any `similar` existing lesson or exact-id `collision`). If it surfaces a `similar`/`collision` hit, prefer `refine` of that atom over a new one. Default `--scope global` when the rule is cross-project; `project:` when it's specific to one project. Only AFTER explicit confirmation, add `--apply`. NEVER pass `--apply` without confirmation — same anti-circular-loop gate as backflow (a lesson gains `human-confirmed` authority from {PRINCIPAL.NAME}, never my own assertion). Don't auto-capture, don't pump the store with marginal lessons — capture only what {PRINCIPAL.NAME} confirms is worth keeping. CLI/store paths same as backflow above. -**Checkpoint after a subagent returns (capture-loss guard).** Subagents (the Agent/Task tool) CANNOT persist memory themselves — no CLAUDE.md, no memory hooks fire inside them. The parent is the ONLY actor that can save what a subagent learned; if the parent doesn't, the learning is lost (this is the rayhunter loss class amplified). When a Task subagent returns having established something durable — a cross-project lesson, a project fact, a resolved gotcha, a verified state — checkpoint it before moving on: a `memcarry capture-lesson` (per the rule above) for a reusable lesson, or a project-memory note for a project fact. The `AgentMemoryCapture` hook surfaces a `` on substantive returns as a prompt — act on it when the subagent's work is worth keeping, ignore it when nothing durable was learned. Don't capture marginal subagent output; do capture what you'd be annoyed to re-derive next session. +**Checkpoint after an Agent subagent returns (capture-loss guard).** Subagents cannot persist memory themselves — no parent memory hooks fire inside them. The parent is the ONLY actor that can save what a subagent learned; if the parent doesn't, the learning is lost (this is the rayhunter loss class amplified). When an Agent subagent returns having established something durable — a cross-project lesson, a project fact, a resolved gotcha, a verified state, or a `Durable findings for parent checkpoint:` section — checkpoint it before moving on: a `memcarry capture-lesson` (per the rule above) for a reusable lesson, or a project-memory note for a project fact. The `AgentMemoryCapture` hook surfaces a `` on substantive or explicitly marked durable returns as a prompt — act on it when the subagent's work is worth keeping, ignore it when nothing durable was learned. Don't capture marginal subagent output; do capture what you'd be annoyed to re-derive next session. diff --git a/PAI/CLI.md b/PAI/CLI.md index 7a470a95..b9a52e46 100644 --- a/PAI/CLI.md +++ b/PAI/CLI.md @@ -1,4 +1,4 @@ -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. # PAI Command-Line Tools diff --git a/PAI/FLOWS.md b/PAI/FLOWS.md index 1f873c0b..d5f9b09f 100644 --- a/PAI/FLOWS.md +++ b/PAI/FLOWS.md @@ -1,6 +1,6 @@ # Flows -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. **Connecting Sources to Pipelines on a Schedule** diff --git a/PAI/FLOWS/README.md b/PAI/FLOWS/README.md index 03dd44a4..d4a87be7 100644 --- a/PAI/FLOWS/README.md +++ b/PAI/FLOWS/README.md @@ -1,6 +1,6 @@ # PAI Flows -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. Flows connect **sources** to **pipelines** on a **schedule**. A flow fetches content from an external source (RSS feed, API, etc.), pipes it through a pipeline of actions, and delivers results to a destination (email, webhook, etc.). diff --git a/PAI/PIPELINES.md b/PAI/PIPELINES.md index 9495bcee..7a9944b2 100755 --- a/PAI/PIPELINES.md +++ b/PAI/PIPELINES.md @@ -1,6 +1,6 @@ # Pipelines -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. **Chaining Actions into Sequential Workflows** diff --git a/PAI/PIPELINES/README.md b/PAI/PIPELINES/README.md index 626d1a33..de65a218 100644 --- a/PAI/PIPELINES/README.md +++ b/PAI/PIPELINES/README.md @@ -1,6 +1,6 @@ # PAI Pipelines -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. Pipelines chain actions together. A pipeline is just a list of actions executed in order using the **pipe model** — the output of each action becomes the input of the next. diff --git a/PAI/THEDELEGATIONSYSTEM.md b/PAI/THEDELEGATIONSYSTEM.md index b3fce80b..7eab1d30 100755 --- a/PAI/THEDELEGATIONSYSTEM.md +++ b/PAI/THEDELEGATIONSYSTEM.md @@ -249,6 +249,26 @@ claude --attach - ❌ No callback/RPC from background to foreground (use file-based coordination) - ❌ Don't rely on background agents to report back synchronously (they may outlive parent) +### Context Envelope for Background / SDK Sessions + +Background and SDK-launched sessions do not share the parent conversation. Prepend the same narrow +PAI handoff used for context-sensitive `Explore` / `Plan` delegation: + +```text + +- Follow current repo rules before acting: branch-only workflow, read before modifying, minimal scope, verify before reporting done. +- Do not mutate public KAI, remotes, PRs, or release artifacts unless the parent prompt includes explicit current-turn approval. +- Preserve user and memory content. Never delete, move, or rewrite memory based on format judgment. +- Subagents cannot persist durable memory. If you learn something the parent should save, return a section named "Durable findings for parent checkpoint:" with only the facts worth keeping. +- ADA pointer: if the task touches ambient domain activation or delegation reach, read docs/planning/ambient-domain-activation-design.md and docs/planning/ROADMAP-7.x.md before changing behavior. + + +- Background/SDK sessions do not share parent conversation state. Include only task-relevant paths, constraints, approvals, and this handoff block. +- Do not include private conversation history, unrelated memories, credentials, tokens, or public-KAI-prohibited content. +- Coordinate through commits, PR comments, or explicit task/status files; do not assume synchronous return to the parent session. + +``` + ### Model Selection for Background Sessions Background sessions have different cost/capability tradeoffs than intra-session agents: diff --git a/PAI/Tools/Banner.ts b/PAI/Tools/Banner.ts index d590f1c3..39eb9665 100755 --- a/PAI/Tools/Banner.ts +++ b/PAI/Tools/Banner.ts @@ -40,7 +40,7 @@ interface Stats { function getStats(): Stats { let name = "PAI"; - let paiVersion = "7.4.2"; + let paiVersion = "7.7.0"; let algorithmVersion = "3.14.0"; let skills = 0, workflows = 0, hooks = 0, learnings = 0, userFiles = 0; let connection = "API"; diff --git a/PAI/Tools/BuildCLAUDE.ts b/PAI/Tools/BuildCLAUDE.ts index ebccbcd3..240df8fe 100644 --- a/PAI/Tools/BuildCLAUDE.ts +++ b/PAI/Tools/BuildCLAUDE.ts @@ -48,7 +48,7 @@ function loadVariables(): Record { "{DAIDENTITY.DISPLAYNAME}": settings.daidentity?.displayName || "Assistant", "{PRINCIPAL.NAME}": settings.principal?.name || "User", "{PRINCIPAL.TIMEZONE}": settings.principal?.timezone || "UTC", - "{{PAI_VERSION}}": settings.pai?.version || "7.4.2", + "{{PAI_VERSION}}": settings.pai?.version || "7.7.0", "{{PRODUCT_NAME}}": settings.pai?.productName || "PAI", "{{ALGO_VERSION}}": algoVersion, "{{ALGO_PATH}}": `PAI/Algorithm/${algoVersion}.md`, diff --git a/PAI/Tools/MemoryCurate.ts b/PAI/Tools/MemoryCurate.ts index 3dd7c058..88a672be 100644 --- a/PAI/Tools/MemoryCurate.ts +++ b/PAI/Tools/MemoryCurate.ts @@ -323,7 +323,8 @@ function approveDraft(index: number): { success: boolean; error?: string } { const targetPath = join(targetMemoryDir, draft.targetFilename); try { - // Write the draft content (without frontmatter) to target + // Write new targets with frontmatter; existing targets are append-only so repeated fixed-name + // feedback drafts cannot clobber earlier approved memory or hand edits. const memContent = `--- type: ${draft.type.includes('success') ? 'feedback' : 'project'} description: ${draft.title} @@ -333,7 +334,14 @@ source_session: ${draft.sourceSession} ${draft.content} `; - writeFileSync(targetPath, memContent); + if (!existsSync(targetPath)) { + writeFileSync(targetPath, memContent); + } else { + const existing = readFileSync(targetPath, 'utf-8'); + const approvedDate = new Date().toISOString().split('T')[0]; + const entry = `\n## ${draft.title}\n_source: auto-generated (approved ${approvedDate}) · source_session: ${draft.sourceSession}_\n\n${draft.content.trim()}\n`; + writeFileSync(targetPath, existing.trimEnd() + '\n' + entry); + } // Update MEMORY.md index const memoryMd = join(targetMemoryDir, 'MEMORY.md'); @@ -409,6 +417,20 @@ function parseInsight(content: string): { title: string; category: string; confi }; } +function listCandidateInsightFiles(): string[] { + if (!existsSync(INSIGHTS_DIR)) return []; + return readdirSync(INSIGHTS_DIR) + .filter(f => f.endsWith('.md')) + .filter(f => { + try { + return readFileSync(join(INSIGHTS_DIR, f), 'utf-8').includes('status: candidate'); + } catch { + return false; + } + }) + .sort(); +} + /** * Promote an INSIGHTS candidate into a consolidated per-project memory file. * Appends the lesson under a dated bullet to `insights_promoted.md`, updates the @@ -463,6 +485,50 @@ function promoteInsight(filename: string, projectOverride?: string): { success: } } +interface InsightPromotionRequest { + filename: string; + project?: string; +} + +interface InsightPromotionBatchResult { + promoted: number; + failed: { filename: string; error: string }[]; + targets: string[]; +} + +function parseInsightPromotionManifest(manifestPath: string): InsightPromotionRequest[] { + const lines = readFileSync(manifestPath, 'utf-8') + .split('\n') + .map(line => line.trim()) + .filter(line => line && !line.startsWith('#')); + + return lines.map(line => { + const [filename, project] = line.split('\t').map(part => part.trim()); + if (!filename) throw new Error(`Invalid promotion manifest row: ${line}`); + return { filename, project: project || undefined }; + }); +} + +function promoteInsightBatch(requests: InsightPromotionRequest[], projectOverride?: string): InsightPromotionBatchResult { + const result: InsightPromotionBatchResult = { promoted: 0, failed: [], targets: [] }; + for (const request of requests) { + const promoted = promoteInsight(request.filename, projectOverride || request.project); + if (promoted.success) { + result.promoted += 1; + if (promoted.target && !result.targets.includes(promoted.target)) result.targets.push(promoted.target); + } else { + result.failed.push({ filename: request.filename, error: promoted.error || 'unknown error' }); + } + } + return result; +} + +function printBatchPromotionResult(result: InsightPromotionBatchResult): void { + if (result.promoted > 0) console.log(green(` ✓ Promoted ${result.promoted} insight(s).`)); + for (const target of result.targets) console.log(dim(` → ${target}`)); + for (const failure of result.failed) console.log(red(` ✗ ${failure.filename}: ${failure.error}`)); +} + // ============================================================================ // Interactive single-key prompt // ============================================================================ @@ -663,31 +729,33 @@ async function interactiveInsights(dryRun: boolean): Promise { console.log(dim(' Run `pai curate drafts` after those sessions to review generated memories.')); } - // Show insight candidates from InsightExtractor - const insightsDir = join(paiDir, 'MEMORY', 'LEARNING', 'INSIGHTS'); - if (existsSync(insightsDir)) { - const insightFiles = readdirSync(insightsDir).filter(f => f.endsWith('.md')); - const candidates = insightFiles.filter(f => { + const candidates = listCandidateInsightFiles(); + if (candidates.length > 0) { + console.log(`\n ${bold(String(candidates.length))} insight candidate(s) pending review:`); + for (const f of dryRun ? candidates.slice(0, 5) : candidates) { try { - const content = readFileSync(join(insightsDir, f), 'utf-8'); - return content.includes('status: candidate'); - } catch { return false; } - }); - if (candidates.length > 0) { - console.log(`\n ${bold(String(candidates.length))} insight candidate(s) pending review:`); - for (const f of candidates.slice(0, 5)) { - try { - const content = readFileSync(join(insightsDir, f), 'utf-8'); - const titleMatch = content.match(/title:\s*"([^"]+)"/); - const title = titleMatch ? titleMatch[1] : f; - console.log(` • ${title}`); - } catch { console.log(` • ${f}`); } - } - if (candidates.length > 5) { - console.log(dim(` ... and ${candidates.length - 5} more`)); + const content = readFileSync(join(INSIGHTS_DIR, f), 'utf-8'); + const insight = parseInsight(content); + console.log(` • ${insight.title || f}`); + } catch { console.log(` • ${f}`); } + + if (!dryRun) { + const key = await prompt( + ` Action: ${green('[p]')}romote to kai ${dim('[s]')}kip ${dim('[q]')}uit → `, + ['p', 's', 'q'] + ); + if (key === 'q') { console.log(dim(' (quit)\n')); return; } + if (key === 'p') { + const result = promoteInsight(f); + if (result.success) console.log(green(` ✓ Promoted → ${result.target}`)); + else console.log(red(` ✗ Failed: ${result.error}`)); + } } - console.log(dim(' Promote to project memory with `pai curate promote `')); } + if (dryRun && candidates.length > 5) { + console.log(dim(` ... and ${candidates.length - 5} more`)); + } + console.log(dim(' Batch promote with `pai curate promote --from-manifest ` or `pai curate promote --all `.')); } console.log(); @@ -913,6 +981,8 @@ if (flags.includes('--help') || subcommand === 'help') { ${cyan('pai curate reject ')} Reject draft #n ${cyan('pai curate restore ')} Restore archived file ${cyan('pai curate promote [--project

]')} Promote an INSIGHTS candidate to project memory + ${cyan('pai curate promote --from-manifest ')} Batch promote rows + ${cyan('pai curate promote --all ')} Promote every candidate insight to one project ${cyan('pai curate check')} Validate knowledge + detect contradictions ${cyan('pai curate approve-all')} Auto-approve eligible drafts (≥14d, conf≥0.8) ${cyan('pai curate approve-all --dry-run')} Preview what would be promoted @@ -972,13 +1042,43 @@ switch (subcommand) { break; } case 'promote': { + const projIdx = process.argv.indexOf('--project'); + const projectOverride = projIdx !== -1 ? process.argv[projIdx + 1] : undefined; + if (flags.includes('--from-manifest')) { + const manifestPath = subArg; + if (!manifestPath) { + console.log(red(' Usage: pai curate promote --from-manifest [--project ]')); + process.exit(1); + } + try { + const requests = parseInsightPromotionManifest(manifestPath); + const result = promoteInsightBatch(requests, projectOverride); + printBatchPromotionResult(result); + if (result.failed.length > 0) process.exit(1); + } catch (e: unknown) { + console.log(red(` ✗ ${e}`)); + process.exit(1); + } + break; + } + if (flags.includes('--all')) { + const project = projectOverride || subArg; + if (!project) { + console.log(red(' Usage: pai curate promote --all ')); + process.exit(1); + } + const requests = listCandidateInsightFiles().map(filename => ({ filename, project })); + const result = promoteInsightBatch(requests); + printBatchPromotionResult(result); + if (result.failed.length > 0) process.exit(1); + break; + } + const filename = subArg; if (!filename) { console.log(red(' Usage: pai curate promote [--project ]')); process.exit(1); } - const projIdx = process.argv.indexOf('--project'); - const projectOverride = projIdx !== -1 ? process.argv[projIdx + 1] : undefined; const result = promoteInsight(filename, projectOverride); if (result.success) { console.log(green(` ✓ Promoted ${basename(filename)} → ${result.target}`)); diff --git a/PAI/Tools/Orchestrator.ts b/PAI/Tools/Orchestrator.ts new file mode 100644 index 00000000..1509746c --- /dev/null +++ b/PAI/Tools/Orchestrator.ts @@ -0,0 +1,140 @@ +#!/usr/bin/env bun + +import { existsSync, readFileSync } from 'fs'; +import { validateWorkItem, type WorkItem } from './orchestrator/schema'; +import { runAdversarialReview } from './orchestrator/workflows/adversarial-review'; +import { runPrReview } from './orchestrator/workflows/pr-review'; + +interface CliResult { + status: 'complete' | 'fixed' | 'blocked' | 'failed'; + workItemId?: string; + workflow?: string; + reason: string; + artifacts?: string[]; + packets?: string[]; + findings?: number; + checkState?: string; + commentsWritten?: number; + errors?: string[]; + warnings?: string[]; +} + +function printHelp(): void { + console.log(`PAI Orchestrator + +Usage: + pai orchestrator run [--dry-run] + pai orchestrator status + pai orchestrator resume + +Current 7.7 slice: + - validates durable work-item schemas + - fixture-backed dry-run only + - no live agent, GitHub, push, merge, or public KAI mutation paths +`); +} + +function printResult(result: CliResult): void { + console.log(JSON.stringify(result, null, 2)); +} + +function readWorkItem(path: string): { workItem?: WorkItem; errors: string[]; warnings: string[] } { + if (!existsSync(path)) return { errors: [`Work item file not found: ${path}`], warnings: [] }; + try { + const parsed = JSON.parse(readFileSync(path, 'utf-8')); + const validation = validateWorkItem(parsed); + return { workItem: validation.value, errors: validation.errors, warnings: validation.warnings }; + } catch (error) { + return { errors: [`Failed to parse work item JSON: ${error instanceof Error ? error.message : String(error)}`], warnings: [] }; + } +} + +export async function runCli(args: string[]): Promise { + const [command, firstArg, ...rest] = args; + if (!command || command === '--help' || command === '-h' || command === 'help') { + printHelp(); + return 0; + } + + if (command === 'run') { + const dryRun = rest.includes('--dry-run'); + if (!firstArg) { + printResult({ status: 'failed', reason: 'Missing work-item path.', errors: ['Usage: run [--dry-run]'] }); + return 2; + } + const loaded = readWorkItem(firstArg); + if (!loaded.workItem) { + printResult({ status: 'failed', reason: 'Work item validation failed.', errors: loaded.errors, warnings: loaded.warnings }); + return 2; + } + if (!dryRun) { + printResult({ + status: 'blocked', + workItemId: loaded.workItem.id, + workflow: loaded.workItem.type, + reason: 'Workflow execution is not implemented in this 7.7 foundation slice. Re-run with --dry-run to validate only.', + warnings: loaded.warnings, + }); + return 2; + } + if (loaded.workItem.type === 'adversarial-review') { + const result = await runAdversarialReview(loaded.workItem, { dryRun: true }); + printResult({ + status: result.status === 'failed' ? 'failed' : result.status === 'blocked' ? 'blocked' : 'complete', + workItemId: result.workItemId, + workflow: result.workflow, + reason: result.decision.reason, + artifacts: result.artifacts.map((artifact) => artifact.path ?? artifact.id), + packets: result.packets.map((packet) => packet.id), + findings: result.findings.length, + warnings: result.warnings, + }); + return result.status === 'failed' ? 2 : 0; + } + if (loaded.workItem.type === 'pr-review') { + const result = await runPrReview(loaded.workItem, { dryRun: true }); + printResult({ + status: result.status === 'failed' ? 'failed' : result.status === 'blocked' ? 'blocked' : result.status === 'fixed' ? 'fixed' : 'complete', + workItemId: result.workItemId, + workflow: result.workflow, + reason: result.decision.reason, + artifacts: result.artifacts.map((artifact) => artifact.path ?? artifact.id), + packets: result.packets.map((packet) => packet.id), + findings: result.findings.length, + checkState: result.checkState, + commentsWritten: result.commentsWritten, + warnings: result.warnings, + }); + return result.status === 'failed' ? 2 : 0; + } + printResult({ + status: 'blocked', + workItemId: loaded.workItem.id, + workflow: loaded.workItem.type, + reason: 'Dry-run validated the work item. Execution engines are intentionally not active in this foundation slice.', + artifacts: [], + warnings: loaded.warnings, + }); + return 0; + } + + if (command === 'status' || command === 'resume') { + if (!firstArg) { + printResult({ status: 'failed', reason: `Missing work-item id for ${command}.`, errors: [`Usage: ${command} `] }); + return 2; + } + printResult({ + status: 'blocked', + workItemId: firstArg, + reason: `The ${command} command is reserved for the ledger slice and is not implemented yet.`, + }); + return 2; + } + + printResult({ status: 'failed', reason: `Unknown orchestrator command: ${command}`, errors: ['Use --help for usage.'] }); + return 2; +} + +if (import.meta.main) { + process.exit(await runCli(process.argv.slice(2))); +} diff --git a/PAI/Tools/orchestrator/adapters/claude-local.ts b/PAI/Tools/orchestrator/adapters/claude-local.ts new file mode 100644 index 00000000..4f242563 --- /dev/null +++ b/PAI/Tools/orchestrator/adapters/claude-local.ts @@ -0,0 +1,16 @@ +import { type CommandAdapterConfig } from './command'; + +export function claudeLocalAdapterConfig(overrides: Partial = {}): CommandAdapterConfig { + return { + id: 'claude-local', + engine: 'claude-local', + command: process.env.PAI_ORCHESTRATOR_CLAUDE_COMMAND ?? 'claude', + args: ['-p', '{{prompt}}', '--output-format', 'text'], + timeoutMs: 600_000, + requireStructuredOutput: true, + supportedRoles: ['reviewer', 'red-team', 'judge', 'validator'], + supportedCapabilities: ['review', 'red-team', 'judge', 'validate'], + envAllowlist: ['HOME', 'PATH', 'TMPDIR', 'LANG', 'LC_ALL', 'ANTHROPIC_API_KEY'], + ...overrides, + }; +} diff --git a/PAI/Tools/orchestrator/adapters/codex-local.ts b/PAI/Tools/orchestrator/adapters/codex-local.ts new file mode 100644 index 00000000..81ac31fb --- /dev/null +++ b/PAI/Tools/orchestrator/adapters/codex-local.ts @@ -0,0 +1,16 @@ +import { type CommandAdapterConfig } from './command'; + +export function codexLocalAdapterConfig(overrides: Partial = {}): CommandAdapterConfig { + return { + id: 'codex-local', + engine: 'codex-local', + command: process.env.PAI_ORCHESTRATOR_CODEX_COMMAND ?? 'codex', + args: ['exec', '{{prompt}}'], + timeoutMs: 600_000, + requireStructuredOutput: true, + supportedRoles: ['implementer', 'fixer', 'judge', 'validator'], + supportedCapabilities: ['implement', 'judge', 'validate'], + envAllowlist: ['HOME', 'PATH', 'TMPDIR', 'LANG', 'LC_ALL', 'OPENAI_API_KEY'], + ...overrides, + }; +} diff --git a/PAI/Tools/orchestrator/adapters/command.ts b/PAI/Tools/orchestrator/adapters/command.ts new file mode 100644 index 00000000..58ee4406 --- /dev/null +++ b/PAI/Tools/orchestrator/adapters/command.ts @@ -0,0 +1,282 @@ +import { spawn, type ChildProcess } from 'child_process'; +import { + type AgentCapability, + type AgentResult, + type WorkArtifact, + type WorkPacket, + type WorkRoleName, + validateAgentResult, +} from '../schema'; + +export interface CommandAdapterConfig { + id: string; + engine: string; + command: string; + args?: string[]; + cwd?: string; + timeoutMs?: number; + killGraceMs?: number; + maxOutputBytes?: number; + envAllowlist?: string[]; + env?: Record; + requireStructuredOutput?: boolean; + supportedRoles?: WorkRoleName[]; + supportedCapabilities?: AgentCapability[]; +} + +export interface CommandAdapterRun { + command: string; + args: string[]; + cwd?: string; + timeoutMs: number; + envKeys: string[]; +} + +const DEFAULT_TIMEOUT_MS = 120_000; +const DEFAULT_KILL_GRACE_MS = 1_000; +const DEFAULT_MAX_OUTPUT_BYTES = 1024 * 1024; +const DEFAULT_ENV_ALLOWLIST = ['HOME', 'PATH', 'TMPDIR', 'TEMP', 'LANG', 'LC_ALL']; + +function nowIso(): string { + return new Date().toISOString(); +} + +function commandOutputArtifact( + packet: WorkPacket, + config: CommandAdapterConfig, + content: string, + metadata: Record, +): WorkArtifact { + return { + id: `${packet.id}-${config.id}-command-output`, + type: 'agent-result', + source: config.engine, + createdAt: nowIso(), + content, + metadata, + }; +} + +function result( + status: AgentResult['status'], + summary: string, + artifact: WorkArtifact, +): AgentResult { + return { status, summary, artifacts: [artifact] }; +} + +function blocked(packet: WorkPacket, config: CommandAdapterConfig, summary: string, metadata: Record): AgentResult { + return result('blocked', summary, commandOutputArtifact(packet, config, '', metadata)); +} + +function renderTemplate(value: string, packet: WorkPacket): string { + const prompt = structuredPrompt(packet); + const replacements: Record = { + '{{packet_json}}': JSON.stringify(packet), + '{{prompt}}': prompt, + '{{packet.id}}': packet.id, + '{{packet.workItemId}}': packet.workItemId, + '{{packet.type}}': packet.type, + '{{packet.role.role}}': packet.role.role, + '{{packet.role.engine}}': packet.role.engine, + }; + return Object.entries(replacements).reduce((rendered, [token, replacement]) => rendered.replaceAll(token, replacement), value); +} + +function buildEnv(config: CommandAdapterConfig): Record { + const env: Record = {}; + for (const key of config.envAllowlist ?? DEFAULT_ENV_ALLOWLIST) { + const value = process.env[key]; + if (value !== undefined) env[key] = value; + } + return { ...env, ...(config.env ?? {}) }; +} + +function killProcessTree(child: ChildProcess, signal: NodeJS.Signals): void { + if (!child.pid) return; + try { + process.kill(-child.pid, signal); + } catch { + try { + child.kill(signal); + } catch { + // Process may have already exited between timeout and cleanup. + } + } +} + +function supportsPacket(packet: WorkPacket, config: CommandAdapterConfig): string | null { + if (packet.role.engine !== config.engine) { + return `Packet role engine ${packet.role.engine} does not match adapter ${config.engine}.`; + } + if (config.supportedRoles && !config.supportedRoles.includes(packet.role.role)) { + return `Role ${packet.role.role} is not supported by adapter ${config.engine}.`; + } + const supportedCapabilities = config.supportedCapabilities; + if (supportedCapabilities) { + const unsupported = packet.role.capabilities.find((capability) => !supportedCapabilities.includes(capability)); + if (unsupported) return `Capability ${unsupported} is not supported by adapter ${config.engine}.`; + } + return null; +} + +export function structuredPrompt(packet: WorkPacket): string { + return [ + 'You are running as a PAI orchestration agent.', + 'Return ONLY JSON matching this AgentResult shape:', + '{"status":"pass|findings|fixed|blocked|error","summary":"string","artifacts":[],"findings":[]}', + 'Do not include markdown fences or prose outside the JSON object.', + '', + 'Work packet:', + JSON.stringify(packet, null, 2), + ].join('\n'); +} + +export function describeCommandRun(packet: WorkPacket, config: CommandAdapterConfig): CommandAdapterRun { + const args = (config.args ?? []).map((arg) => renderTemplate(arg, packet)); + const env = buildEnv(config); + return { + command: renderTemplate(config.command, packet), + args, + cwd: config.cwd ? renderTemplate(config.cwd, packet) : undefined, + timeoutMs: config.timeoutMs ?? DEFAULT_TIMEOUT_MS, + envKeys: Object.keys(env).sort(), + }; +} + +export async function runCommandAdapter(packet: WorkPacket, config: CommandAdapterConfig): Promise { + const unsupported = supportsPacket(packet, config); + if (unsupported) { + return blocked(packet, config, unsupported, { blocked: true, reason: unsupported }); + } + + const command = renderTemplate(config.command, packet); + const args = (config.args ?? []).map((arg) => renderTemplate(arg, packet)); + const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS; + const killGraceMs = config.killGraceMs ?? DEFAULT_KILL_GRACE_MS; + const maxOutputBytes = config.maxOutputBytes ?? DEFAULT_MAX_OUTPUT_BYTES; + const startedAt = Date.now(); + const env = buildEnv(config); + + return await new Promise((resolve) => { + let settled = false; + let stdout = ''; + let stderr = ''; + let stdoutBytes = 0; + let stderrBytes = 0; + let timedOut = false; + let outputExceeded = false; + let timer: ReturnType | undefined; + let killTimer: ReturnType | undefined; + + const child = spawn(command, args, { + cwd: config.cwd ? renderTemplate(config.cwd, packet) : undefined, + env, + stdio: ['pipe', 'pipe', 'pipe'], + detached: true, + }); + + const finish = (agentResult: AgentResult): void => { + if (settled) return; + settled = true; + if (timer) clearTimeout(timer); + if (killTimer) clearTimeout(killTimer); + resolve(agentResult); + }; + + timer = setTimeout(() => { + timedOut = true; + killProcessTree(child, 'SIGTERM'); + killTimer = setTimeout(() => { + killProcessTree(child, 'SIGKILL'); + }, killGraceMs); + }, timeoutMs); + + const appendOutput = (stream: 'stdout' | 'stderr', chunk: string): void => { + if (outputExceeded) return; + const bytes = Buffer.byteLength(chunk, 'utf8'); + const currentTotal = stdoutBytes + stderrBytes; + if (currentTotal + bytes > maxOutputBytes) { + outputExceeded = true; + const remaining = Math.max(0, maxOutputBytes - currentTotal); + if (remaining > 0) { + const partial = Buffer.from(chunk, 'utf8').subarray(0, remaining).toString('utf8'); + if (stream === 'stdout') { + stdout += partial; + stdoutBytes += Buffer.byteLength(partial, 'utf8'); + } else { + stderr += partial; + stderrBytes += Buffer.byteLength(partial, 'utf8'); + } + } + killProcessTree(child, 'SIGTERM'); + killTimer ??= setTimeout(() => { + killProcessTree(child, 'SIGKILL'); + }, killGraceMs); + return; + } + if (stream === 'stdout') { + stdout += chunk; + stdoutBytes += bytes; + } else { + stderr += chunk; + stderrBytes += bytes; + } + }; + + child.stdout?.setEncoding('utf-8'); + child.stderr?.setEncoding('utf-8'); + child.stdout?.on('data', (chunk) => { appendOutput('stdout', chunk); }); + child.stderr?.on('data', (chunk) => { appendOutput('stderr', chunk); }); + child.stdin?.end(JSON.stringify(packet)); + + child.on('error', (error) => { + const elapsedMs = Date.now() - startedAt; + finish(result('error', `Adapter command failed to start: ${error.message}`, commandOutputArtifact(packet, config, stdout, { + error: error.message, + stderr, + elapsedMs, + command, + args, + }))); + }); + + child.on('close', (exitCode, signal) => { + const elapsedMs = Date.now() - startedAt; + const metadata = { exitCode, signal, timedOut, outputExceeded, timeoutMs, killGraceMs, maxOutputBytes, stdoutBytes, stderrBytes, elapsedMs, stderr, command, args }; + if (outputExceeded) { + finish(result('blocked', `Adapter command exceeded max output of ${maxOutputBytes} bytes.`, commandOutputArtifact(packet, config, stdout, metadata))); + return; + } + if (timedOut) { + finish(result('blocked', `Adapter command timed out after ${timeoutMs}ms.`, commandOutputArtifact(packet, config, stdout, metadata))); + return; + } + if (exitCode !== 0) { + finish(result('error', `Adapter command exited with code ${exitCode}.`, commandOutputArtifact(packet, config, stdout, metadata))); + return; + } + if (config.requireStructuredOutput === false) { + finish(result('pass', 'Adapter command completed without structured-output validation.', commandOutputArtifact(packet, config, stdout, metadata))); + return; + } + let parsed: unknown; + try { + parsed = JSON.parse(stdout); + } catch (error) { + finish(result('error', `Adapter output was not valid JSON: ${error instanceof Error ? error.message : String(error)}`, commandOutputArtifact(packet, config, stdout, metadata))); + return; + } + const validation = validateAgentResult(parsed); + if (!validation.value) { + finish(result('error', `Adapter output failed AgentResult validation: ${validation.errors.join('; ')}`, commandOutputArtifact(packet, config, stdout, metadata))); + return; + } + const commandArtifact = commandOutputArtifact(packet, config, stdout, metadata); + finish({ + ...validation.value, + artifacts: [...validation.value.artifacts, commandArtifact], + }); + }); + }); +} diff --git a/PAI/Tools/orchestrator/comments.ts b/PAI/Tools/orchestrator/comments.ts new file mode 100644 index 00000000..54a547da --- /dev/null +++ b/PAI/Tools/orchestrator/comments.ts @@ -0,0 +1,92 @@ +import { + type Decision, + type Finding, + validateDecision, + validateFinding, +} from './schema'; + +export const COMMENT_MARKER_START = ''; +export const COMMENT_MARKER_END = ''; + +export interface OrchestratorCommentState { + version: 1; + workItemId: string; + source: string; + updatedAt: string; + findings: Finding[]; + decision: Decision; +} + +export interface CommentParseResult { + found: boolean; + state?: OrchestratorCommentState; + errors: string[]; +} + +function nowIso(): string { + return new Date().toISOString(); +} + +function normalizeState(state: Omit & Partial>): OrchestratorCommentState { + return { + version: 1, + workItemId: state.workItemId, + source: state.source, + updatedAt: state.updatedAt ?? nowIso(), + findings: state.findings, + decision: state.decision, + }; +} + +export function emitOrchestratorComment(state: Omit & Partial>): string { + const normalized = normalizeState(state); + const blockerCount = normalized.findings.filter((finding) => + finding.status === 'open' && (finding.severity === 'blocker' || finding.severity === 'high') + ).length; + const summary = [ + '### PAI Orchestration Review', + '', + `Work item: \`${normalized.workItemId}\``, + `Source: \`${normalized.source}\``, + `Decision: \`${normalized.decision.status}\` - ${normalized.decision.reason}`, + `Open blocker/high findings: ${blockerCount}`, + '', + COMMENT_MARKER_START, + '```json', + JSON.stringify(normalized, null, 2), + '```', + COMMENT_MARKER_END, + ]; + return `${summary.join('\n')}\n`; +} + +export function parseOrchestratorComment(markdown: string): CommentParseResult { + const start = markdown.indexOf(COMMENT_MARKER_START); + const end = markdown.indexOf(COMMENT_MARKER_END); + if (start < 0 || end < 0 || end <= start) return { found: false, errors: [] }; + + const block = markdown.slice(start + COMMENT_MARKER_START.length, end); + const fence = block.match(/```json\s*([\s\S]*?)\s*```/); + const jsonText = fence ? fence[1] : block.trim(); + try { + const parsed = JSON.parse(jsonText) as Record; + const errors: string[] = []; + if (parsed.version !== 1) errors.push('version: must be 1'); + if (typeof parsed.workItemId !== 'string' || parsed.workItemId.length === 0) errors.push('workItemId: must be a non-empty string'); + if (typeof parsed.source !== 'string' || parsed.source.length === 0) errors.push('source: must be a non-empty string'); + if (typeof parsed.updatedAt !== 'string' || parsed.updatedAt.length === 0) errors.push('updatedAt: must be a non-empty string'); + if (!Array.isArray(parsed.findings)) { + errors.push('findings: must be an array'); + } else { + parsed.findings.forEach((finding, index) => { + errors.push(...validateFinding(finding, `findings[${index}]`).errors); + }); + } + const decision = validateDecision(parsed.decision, 'decision'); + errors.push(...decision.errors); + if (errors.length > 0) return { found: true, errors }; + return { found: true, state: parsed as unknown as OrchestratorCommentState, errors: [] }; + } catch (error) { + return { found: true, errors: [`Failed to parse orchestrator marker JSON: ${error instanceof Error ? error.message : String(error)}`] }; + } +} diff --git a/PAI/Tools/orchestrator/github.ts b/PAI/Tools/orchestrator/github.ts new file mode 100644 index 00000000..5ea504f1 --- /dev/null +++ b/PAI/Tools/orchestrator/github.ts @@ -0,0 +1,149 @@ +import { type CheckState } from './policy'; +import { parseOrchestratorComment, type OrchestratorCommentState } from './comments'; +import { type WorkInput, type WorkItem } from './schema'; + +export interface PullRequestRef { + repo: string; + number: number; + url: string; + head: string; + base: string; + labels: string[]; +} + +export interface PullRequestContext { + pullRequest: PullRequestRef; + diff: string; + changedPaths: string[]; + checkState: CheckState; + comments: string[]; + paiComment?: OrchestratorCommentState; +} + +export interface GitHubClient { + getPullRequest(workItem: WorkItem): Promise; + upsertComment(markdown: string, options?: GitHubWriteOptions): Promise; + pushFixes(options?: GitHubWriteOptions): Promise; + mergePullRequest(options?: GitHubWriteOptions): Promise; +} + +export interface GitHubFixtureOptions { + diff?: string; + checkState?: CheckState; + labels?: string[]; + comments?: string[]; + allowLiveWrites?: boolean; + liveWriteToken?: string; +} + +export interface GitHubWriteOptions { + allowLiveWrites?: boolean; + liveWriteToken?: string; +} + +export const REQUIRED_LIVE_WRITE_TOKEN = 'PAI_ORCHESTRATOR_LIVE_WRITE'; + +function githubInput(workItem: WorkItem): WorkInput | undefined { + return workItem.inputs.find((input) => input.type === 'github-pr'); +} + +function metadataString(metadata: Record | undefined, key: string, fallback: string): string { + const value = metadata?.[key]; + return typeof value === 'string' && value.length > 0 ? value : fallback; +} + +function metadataNumber(metadata: Record | undefined, key: string, fallback: number): number { + const value = metadata?.[key]; + return typeof value === 'number' && Number.isInteger(value) ? value : fallback; +} + +function parsePaiComment(comments: string[]): OrchestratorCommentState | undefined { + for (const comment of comments) { + const parsed = parseOrchestratorComment(comment); + if (parsed.state) return parsed.state; + } + return undefined; +} + +function requireLiveWrite(action: string, options: GitHubWriteOptions | undefined): void { + if (options?.allowLiveWrites !== true || options.liveWriteToken !== REQUIRED_LIVE_WRITE_TOKEN) { + throw new Error(`${action} requires explicit live GitHub write approval.`); + } +} + +export function changedPathsFromDiff(diff: string): string[] { + const paths = new Set(); + for (const line of diff.split('\n')) { + const diffMatch = line.match(/^diff --git a\/(.+?) b\/(.+)$/); + if (diffMatch) { + const [, oldPath, newPath] = diffMatch; + paths.add(newPath === '/dev/null' ? oldPath : newPath); + continue; + } + const fileMatch = line.match(/^(\+\+\+|---) (?:[ab]\/)?(.+)$/); + if (fileMatch && fileMatch[2] !== '/dev/null') paths.add(fileMatch[2]); + } + return [...paths].sort(); +} + +export class FixtureGitHubClient implements GitHubClient { + readonly writes: string[] = []; + private readonly options: GitHubFixtureOptions; + + constructor(options: GitHubFixtureOptions = {}) { + this.options = options; + } + + async getPullRequest(workItem: WorkItem): Promise { + const input = githubInput(workItem); + if (!input) throw new Error('Work item requires a github-pr input.'); + const metadata = input.metadata; + const comments = this.options.comments ?? []; + const repo = metadataString(metadata, 'repo', 'example/project'); + const number = metadataNumber(metadata, 'number', 0); + const diff = this.options.diff ?? [ + 'diff --git a/src/example.ts b/src/example.ts', + 'index 1111111..2222222 100644', + '--- a/src/example.ts', + '+++ b/src/example.ts', + '@@ -1,3 +1,4 @@', + ' export function example() {', + '+ return true;', + ' }', + ].join('\n'); + return { + pullRequest: { + repo, + number, + url: input.source, + head: metadataString(metadata, 'head', 'feature/example'), + base: metadataString(metadata, 'base', 'main'), + labels: this.options.labels ?? [], + }, + diff, + changedPaths: changedPathsFromDiff(diff), + checkState: this.options.checkState ?? 'green', + comments, + paiComment: parsePaiComment(comments), + }; + } + + async upsertComment(markdown: string, options?: GitHubWriteOptions): Promise { + requireLiveWrite('upsertComment', options); + this.writes.push(markdown); + } + + async pushFixes(options?: GitHubWriteOptions): Promise { + requireLiveWrite('pushFixes', options); + this.writes.push('pushFixes'); + } + + async mergePullRequest(options?: GitHubWriteOptions): Promise { + requireLiveWrite('mergePullRequest', options); + this.writes.push('mergePullRequest'); + } +} + +export function fixtureGitHubClient(options: GitHubFixtureOptions = {}): FixtureGitHubClient { + return new FixtureGitHubClient(options); +} diff --git a/PAI/Tools/orchestrator/ledger.ts b/PAI/Tools/orchestrator/ledger.ts new file mode 100644 index 00000000..04a843c0 --- /dev/null +++ b/PAI/Tools/orchestrator/ledger.ts @@ -0,0 +1,169 @@ +import { createHash, randomUUID } from 'crypto'; +import { existsSync, mkdirSync, readFileSync } from 'fs'; +import { join, relative } from 'path'; +import { atomicWriteJSON, atomicWriteText } from '../../../hooks/lib/atomic'; +import { + type WorkArtifact, + type WorkItem, + validateArtifact, + validateWorkItem, +} from './schema'; + +export interface LedgerEvent { + id: string; + workItemId: string; + type: 'work-item-saved' | 'artifact-written' | 'status-changed'; + timestamp: string; + message: string; + artifactId?: string; + metadata?: Record; +} + +export interface LedgerPaths { + root: string; + workDir: string; + workItemPath: string; + eventsPath: string; + artifactsDir: string; +} + +function safeSegment(value: string): string { + const segment = value.replace(/[^A-Za-z0-9._-]/g, '-').replace(/-+/g, '-').replace(/^-|-$/g, ''); + if (!segment || /^\.+$/.test(segment)) return 'item'; + return segment === value ? segment : `${segment}-${hashSegment(value)}`; +} + +function hashSegment(value: string): string { + return createHash('sha256').update(value, 'utf8').digest('hex').slice(0, 10); +} + +function nowIso(): string { + return new Date().toISOString(); +} + +function eventId(type: string): string { + return `${Date.now()}-${process.pid}-${type}-${randomUUID()}`; +} + +function appendJsonLine(path: string, line: string): void { + const existing = existsSync(path) ? readFileSync(path, 'utf-8') : ''; + const prefix = existing && !existing.endsWith('\n') ? `${existing}\n` : existing; + atomicWriteText(path, `${prefix}${line}\n`); +} + +function isLedgerEvent(value: unknown): value is LedgerEvent { + if (!value || typeof value !== 'object') return false; + const event = value as Partial; + return ( + typeof event.id === 'string' && + typeof event.workItemId === 'string' && + typeof event.timestamp === 'string' && + typeof event.message === 'string' && + (event.type === 'work-item-saved' || event.type === 'artifact-written' || event.type === 'status-changed') + ); +} + +export function ledgerPaths(root: string, workItemId: string): LedgerPaths { + const workDir = join(root, safeSegment(workItemId)); + return { + root, + workDir, + workItemPath: join(workDir, 'work-item.json'), + eventsPath: join(workDir, 'events.jsonl'), + artifactsDir: join(workDir, 'artifacts'), + }; +} + +export function ensureLedger(root: string, workItemId: string): LedgerPaths { + const paths = ledgerPaths(root, workItemId); + mkdirSync(paths.artifactsDir, { recursive: true }); + return paths; +} + +export function appendLedgerEvent(root: string, event: Omit): LedgerEvent { + const paths = ensureLedger(root, event.workItemId); + const fullEvent: LedgerEvent = { + ...event, + id: eventId(event.type), + timestamp: nowIso(), + }; + appendJsonLine(paths.eventsPath, JSON.stringify(fullEvent)); + return fullEvent; +} + +export function readLedgerEvents(root: string, workItemId: string): LedgerEvent[] { + const paths = ledgerPaths(root, workItemId); + if (!existsSync(paths.eventsPath)) return []; + const events: LedgerEvent[] = []; + for (const line of readFileSync(paths.eventsPath, 'utf-8').split('\n').filter(Boolean)) { + try { + const parsed = JSON.parse(line); + if (isLedgerEvent(parsed)) events.push(parsed); + } catch { + // Preserve readable history even if one JSONL line is corrupt. + } + } + return events; +} + +export function saveWorkItem(root: string, workItem: WorkItem): WorkItem { + const validation = validateWorkItem(workItem); + if (!validation.value) { + throw new Error(`Invalid work item: ${validation.errors.join('; ')}`); + } + const paths = ensureLedger(root, workItem.id); + atomicWriteJSON(paths.workItemPath, validation.value); + appendLedgerEvent(root, { + workItemId: workItem.id, + type: 'work-item-saved', + message: `Saved work item ${workItem.id}`, + metadata: { status: workItem.status, type: workItem.type }, + }); + return validation.value; +} + +export function loadWorkItem(root: string, workItemId: string): WorkItem | null { + const paths = ledgerPaths(root, workItemId); + if (!existsSync(paths.workItemPath)) return null; + const parsed = JSON.parse(readFileSync(paths.workItemPath, 'utf-8')); + const validation = validateWorkItem(parsed); + if (!validation.value) { + throw new Error(`Stored work item is invalid: ${validation.errors.join('; ')}`); + } + return validation.value; +} + +export function writeArtifact(root: string, workItemId: string, artifact: WorkArtifact): WorkArtifact { + const validation = validateArtifact(artifact); + if (!validation.value) { + throw new Error(`Invalid artifact: ${validation.errors.join('; ')}`); + } + const paths = ensureLedger(root, workItemId); + const artifactPath = join(paths.artifactsDir, `${safeSegment(artifact.id)}.json`); + const artifactWithPath: WorkArtifact = { + ...validation.value, + createdAt: validation.value.createdAt ?? nowIso(), + path: validation.value.path ?? relative(paths.workDir, artifactPath), + }; + atomicWriteJSON(artifactPath, artifactWithPath); + appendLedgerEvent(root, { + workItemId, + type: 'artifact-written', + message: `Wrote artifact ${artifact.id}`, + artifactId: artifact.id, + metadata: { artifactType: artifact.type, source: artifact.source }, + }); + return artifactWithPath; +} + +export function readArtifact(root: string, workItemId: string, artifactId: string): WorkArtifact | null { + const paths = ledgerPaths(root, workItemId); + const artifactPath = join(paths.artifactsDir, `${safeSegment(artifactId)}.json`); + if (!existsSync(artifactPath)) return null; + const parsed = JSON.parse(readFileSync(artifactPath, 'utf-8')); + const validation = validateArtifact(parsed); + if (!validation.value) { + throw new Error(`Stored artifact is invalid: ${validation.errors.join('; ')}`); + } + return validation.value; +} diff --git a/PAI/Tools/orchestrator/policy.ts b/PAI/Tools/orchestrator/policy.ts new file mode 100644 index 00000000..47971bc7 --- /dev/null +++ b/PAI/Tools/orchestrator/policy.ts @@ -0,0 +1,159 @@ +import { + type AgentResult, + type Decision, + type Finding, + type PolicyResult, + type WorkPolicy, + validateAgentResult, + validatePolicy, +} from './schema'; +import { posix } from 'path'; + +export type CheckState = 'green' | 'red' | 'pending' | 'missing' | 'unknown'; +export type RequestedAction = 'advise' | 'fix' | 'iterate' | 'push' | 'merge'; + +export interface PolicyEvaluationInput { + policy: WorkPolicy; + requestedAction: RequestedAction; + changedPaths?: string[]; + findings?: Finding[]; + checkState?: CheckState; + privateBoundaryRisk?: boolean; + agentResult?: AgentResult | unknown; + malformedAgentOutput?: boolean; +} + +function block(reason: string, reasons: string[], requiredActions?: string[]): PolicyResult { + const decision: Decision = { status: 'blocked', reason, actions: requiredActions }; + return { allowed: false, decision, reasons, requiredActions }; +} + +function pass(reason: string, reasons: string[]): PolicyResult { + return { + allowed: true, + decision: { status: 'complete', reason }, + reasons, + }; +} + +function globToRegExp(glob: string): RegExp { + const globstar = '\u0000'; + const escaped = glob + .replace(/\*\*/g, globstar) + .replace(/[.+^${}()|[\]\\]/g, '\\$&'); + const pattern = escaped + .replace(/\*/g, '[^/]*') + .replaceAll(globstar, '.*'); + return new RegExp(`^${pattern}$`); +} + +function normalizePolicyPath(path: string): string { + return posix.normalize(path.replace(/\\/g, '/').replace(/^[ab]\//, '').replace(/^\/+/, '')); +} + +function matchesAny(path: string, patterns: string[]): boolean { + const normalized = normalizePolicyPath(path); + return patterns.some((pattern) => pattern === '**' || globToRegExp(pattern).test(normalizePolicyPath(normalized))); +} + +function openBlockingFindings(findings: Finding[]): Finding[] { + return findings.filter((finding) => + finding.status === 'open' && + (finding.severity === 'blocker' || finding.severity === 'high') + ); +} + +export function evaluatePolicy(input: PolicyEvaluationInput): PolicyResult { + const reasons: string[] = []; + const policyValidation = validatePolicy(input.policy); + if (!policyValidation.value) { + return block('Policy is invalid.', [`Policy validation failed: ${policyValidation.errors.join('; ')}`], ['fix-policy']); + } + const policy = policyValidation.value; + reasons.push(...policyValidation.warnings); + + if (input.privateBoundaryRisk && policy.stopOnPrivateBoundaryRisk) { + return block('Private/public boundary risk detected.', [...reasons, 'Private/public boundary risk detected.'], ['remove-private-boundary-risk']); + } + + const changedPaths = (input.changedPaths ?? []).map(normalizePolicyPath); + const blockedPath = changedPaths.find((path) => matchesAny(path, policy.blockedPaths)); + if (blockedPath) { + return block(`Changed path is blocked by policy: ${blockedPath}`, [...reasons, `Blocked path changed: ${blockedPath}`], ['remove-blocked-path-change']); + } + + const disallowedPath = changedPaths.find((path) => !matchesAny(path, policy.allowedPaths)); + if (disallowedPath) { + return block(`Changed path is outside allowed paths: ${disallowedPath}`, [...reasons, `Path outside allowedPaths: ${disallowedPath}`], ['narrow-changes-to-allowed-paths']); + } + + if (policy.requireGreenCI && input.checkState !== 'green') { + return block(`CI is not green: ${input.checkState ?? 'missing'}`, [...reasons, `CI state is ${input.checkState ?? 'missing'}`], ['wait-for-green-ci']); + } + + const findings = input.findings ?? []; + const blockingFindings = openBlockingFindings(findings); + if (blockingFindings.length > 0) { + return block( + `Open blocker/high findings remain: ${blockingFindings.map((finding) => finding.id).join(', ')}`, + [...reasons, `Open blocker/high findings: ${blockingFindings.map((finding) => finding.id).join(', ')}`], + ['fix-or-triage-blocking-findings'], + ); + } + + if (input.malformedAgentOutput) { + return block('Agent output is malformed.', [...reasons, 'Agent output is malformed.'], ['rerun-or-inspect-agent-output']); + } + if (input.agentResult !== undefined) { + const result = validateAgentResult(input.agentResult); + if (!result.value) { + return block('Agent result failed schema validation.', [...reasons, `Agent result invalid: ${result.errors.join('; ')}`], ['fix-agent-output']); + } + if (result.value.status === 'error' || result.value.status === 'blocked') { + return block(`Agent result is ${result.value.status}.`, [...reasons, `Agent result is ${result.value.status}.`], ['inspect-agent-result']); + } + } + + if (input.requestedAction === 'merge') { + if (policy.autonomy !== 'merge') { + return block('Merge requested but policy autonomy is not merge.', [...reasons, `Policy autonomy is ${policy.autonomy}.`], ['raise-autonomy-if-intended']); + } + if (!policy.allowMerge) { + return block('Merge requested but allowMerge is false.', [...reasons, 'allowMerge is false.'], ['enable-allowMerge-if-intended']); + } + if (input.checkState !== 'green') { + return block( + `Merge requested but CI is not green: ${input.checkState ?? 'missing'}`, + [...reasons, `Merge requires green CI; state is ${input.checkState ?? 'missing'}.`], + ['wait-for-green-ci'], + ); + } + return { + allowed: true, + decision: { status: 'merge-ready', reason: 'Policy permits merge.' }, + reasons, + }; + } + + if (input.requestedAction === 'push') { + if (policy.autonomy === 'advise') { + return block('Push requested but policy autonomy is advise.', [...reasons, 'Policy autonomy is advise.'], ['raise-autonomy-if-intended']); + } + if (!policy.allowPush) { + return block('Push requested but allowPush is false.', [...reasons, 'allowPush is false.'], ['enable-allowPush-if-intended']); + } + if (input.checkState !== 'green') { + return block( + `Push requested but CI is not green: ${input.checkState ?? 'missing'}`, + [...reasons, `Push requires green CI; state is ${input.checkState ?? 'missing'}.`], + ['wait-for-green-ci'], + ); + } + } + + if ((input.requestedAction === 'fix' || input.requestedAction === 'iterate') && policy.autonomy === 'advise') { + return block(`${input.requestedAction} requested but policy autonomy is advise.`, [...reasons, 'Policy autonomy is advise.'], ['raise-autonomy-if-intended']); + } + + return pass(`Policy permits ${input.requestedAction}.`, reasons); +} diff --git a/PAI/Tools/orchestrator/schema.ts b/PAI/Tools/orchestrator/schema.ts new file mode 100644 index 00000000..ad392880 --- /dev/null +++ b/PAI/Tools/orchestrator/schema.ts @@ -0,0 +1,376 @@ +export type WorkItemType = + | 'pr-review' + | 'adversarial-review' + | 'plan-validation' + | 'roadmap-review' + | 'idea-stress-test' + | 'implementation' + | 'release-readiness'; + +export type WorkItemStatus = 'queued' | 'running' | 'blocked' | 'complete' | 'failed'; +export type WorkInputType = 'github-pr' | 'git-diff' | 'file' | 'markdown' | 'prompt' | 'test-output' | 'transcript'; +export type WorkRoleName = 'reviewer' | 'red-team' | 'implementer' | 'fixer' | 'judge' | 'validator'; +export type AgentCapability = 'plan' | 'review' | 'red-team' | 'implement' | 'judge' | 'validate'; +export type AutonomyLevel = 'advise' | 'fix' | 'iterate' | 'merge'; +export type ArtifactType = 'packet' | 'agent-result' | 'finding-report' | 'patch' | 'commit' | 'test-log' | 'decision' | 'pr-comment'; +export type AgentResultStatus = 'pass' | 'findings' | 'fixed' | 'blocked' | 'error'; +export type FindingSeverity = 'blocker' | 'high' | 'medium' | 'low' | 'note'; +export type FindingCategory = 'bug' | 'test' | 'security' | 'privacy' | 'design' | 'docs' | 'process'; +export type FindingConfidence = 'low' | 'medium' | 'high'; +export type FindingStatus = 'open' | 'fixed' | 'wontfix' | 'needs-human'; +export type DecisionStatus = 'complete' | 'fixed' | 'blocked' | 'failed' | 'merge-ready'; + +export interface WorkInput { + id?: string; + type: WorkInputType; + source: string; + content?: string; + metadata?: Record; +} + +export interface WorkRole { + id?: string; + role: WorkRoleName; + engine: string; + capabilities: AgentCapability[]; +} + +export interface WorkPolicy { + autonomy: AutonomyLevel; + maxRounds: number; + allowedPaths: string[]; + blockedPaths: string[]; + requireGreenCI: boolean; + allowPush: boolean; + allowMerge: boolean; + stopOnPrivateBoundaryRisk: boolean; +} + +export interface WorkArtifact { + id: string; + type: ArtifactType; + source: string; + createdAt?: string; + path?: string; + content?: string; + metadata?: Record; +} + +export interface Finding { + id: string; + severity: FindingSeverity; + category: FindingCategory; + location?: { path: string; line?: number }; + issue: string; + recommendation: string; + confidence: FindingConfidence; + status: FindingStatus; +} + +export interface AgentResult { + status: AgentResultStatus; + artifacts: WorkArtifact[]; + findings?: Finding[]; + summary: string; +} + +export interface Decision { + status: DecisionStatus; + reason: string; + actions?: string[]; + metadata?: Record; +} + +export interface PolicyResult { + allowed: boolean; + decision: Decision; + reasons: string[]; + requiredActions?: string[]; +} + +export interface WorkItem { + id: string; + type: WorkItemType; + objective: string; + inputs: WorkInput[]; + roles: WorkRole[]; + policy: WorkPolicy; + artifacts: WorkArtifact[]; + status: WorkItemStatus; +} + +export interface WorkPacket { + id: string; + workItemId: string; + type: WorkItemType; + objective: string; + role: WorkRole; + inputs: WorkInput[]; + policy: WorkPolicy; + artifacts: WorkArtifact[]; + findings?: Finding[]; +} + +export interface SchemaValidationResult { + valid: boolean; + value?: T; + errors: string[]; + warnings: string[]; +} + +const WORK_ITEM_TYPES: WorkItemType[] = [ + 'pr-review', + 'adversarial-review', + 'plan-validation', + 'roadmap-review', + 'idea-stress-test', + 'implementation', + 'release-readiness', +]; +const WORK_STATUSES: WorkItemStatus[] = ['queued', 'running', 'blocked', 'complete', 'failed']; +const INPUT_TYPES: WorkInputType[] = ['github-pr', 'git-diff', 'file', 'markdown', 'prompt', 'test-output', 'transcript']; +const ROLE_NAMES: WorkRoleName[] = ['reviewer', 'red-team', 'implementer', 'fixer', 'judge', 'validator']; +const CAPABILITIES: AgentCapability[] = ['plan', 'review', 'red-team', 'implement', 'judge', 'validate']; +const AUTONOMY_LEVELS: AutonomyLevel[] = ['advise', 'fix', 'iterate', 'merge']; +const ARTIFACT_TYPES: ArtifactType[] = ['packet', 'agent-result', 'finding-report', 'patch', 'commit', 'test-log', 'decision', 'pr-comment']; +const AGENT_STATUSES: AgentResultStatus[] = ['pass', 'findings', 'fixed', 'blocked', 'error']; +const DECISION_STATUSES: DecisionStatus[] = ['complete', 'fixed', 'blocked', 'failed', 'merge-ready']; +const FINDING_SEVERITIES: FindingSeverity[] = ['blocker', 'high', 'medium', 'low', 'note']; +const FINDING_CATEGORIES: FindingCategory[] = ['bug', 'test', 'security', 'privacy', 'design', 'docs', 'process']; +const FINDING_CONFIDENCES: FindingConfidence[] = ['low', 'medium', 'high']; +const FINDING_STATUSES: FindingStatus[] = ['open', 'fixed', 'wontfix', 'needs-human']; + +function isObject(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function isString(value: unknown): value is string { + return typeof value === 'string' && value.trim().length > 0; +} + +function isStringArray(value: unknown): value is string[] { + return Array.isArray(value) && value.every((entry) => typeof entry === 'string'); +} + +function enumValue(value: unknown, allowed: readonly T[]): value is T { + return typeof value === 'string' && (allowed as readonly string[]).includes(value); +} + +function error(errors: string[], path: string, message: string): void { + errors.push(`${path}: ${message}`); +} + +function validateInput(value: unknown, path: string, errors: string[]): value is WorkInput { + if (!isObject(value)) { + error(errors, path, 'must be an object'); + return false; + } + if (!enumValue(value.type, INPUT_TYPES)) error(errors, `${path}.type`, `must be one of ${INPUT_TYPES.join(', ')}`); + if (!isString(value.source)) error(errors, `${path}.source`, 'must be a non-empty string'); + if (value.content !== undefined && typeof value.content !== 'string') error(errors, `${path}.content`, 'must be a string when present'); + if (value.metadata !== undefined && !isObject(value.metadata)) error(errors, `${path}.metadata`, 'must be an object when present'); + return true; +} + +function validateRole(value: unknown, path: string, errors: string[]): value is WorkRole { + if (!isObject(value)) { + error(errors, path, 'must be an object'); + return false; + } + if (!enumValue(value.role, ROLE_NAMES)) error(errors, `${path}.role`, `must be one of ${ROLE_NAMES.join(', ')}`); + if (!isString(value.engine)) error(errors, `${path}.engine`, 'must be a non-empty string'); + if (!Array.isArray(value.capabilities) || !value.capabilities.every((entry) => enumValue(entry, CAPABILITIES))) { + error(errors, `${path}.capabilities`, `must be an array of ${CAPABILITIES.join(', ')}`); + } + return true; +} + +export function conservativeDefaultPolicy(): WorkPolicy { + return { + autonomy: 'advise', + maxRounds: 1, + allowedPaths: ['**'], + blockedPaths: ['docs/planning/**', 'specs/**', 'MEMORY/**', 'USER/**', 'Plans/**'], + requireGreenCI: true, + allowPush: false, + allowMerge: false, + stopOnPrivateBoundaryRisk: true, + }; +} + +export function validatePolicy(value: unknown): SchemaValidationResult { + const errors: string[] = []; + const warnings: string[] = []; + + if (!isObject(value)) { + return { valid: false, errors: ['policy: must be an object'], warnings }; + } + if (!enumValue(value.autonomy, AUTONOMY_LEVELS)) error(errors, 'policy.autonomy', `must be one of ${AUTONOMY_LEVELS.join(', ')}`); + if (typeof value.maxRounds !== 'number' || !Number.isInteger(value.maxRounds) || value.maxRounds < 1) { + error(errors, 'policy.maxRounds', 'must be a positive integer'); + } + if (!isStringArray(value.allowedPaths)) error(errors, 'policy.allowedPaths', 'must be an array of strings'); + if (!isStringArray(value.blockedPaths)) error(errors, 'policy.blockedPaths', 'must be an array of strings'); + for (const key of ['requireGreenCI', 'allowPush', 'allowMerge', 'stopOnPrivateBoundaryRisk'] as const) { + if (typeof value[key] !== 'boolean') error(errors, `policy.${key}`, 'must be a boolean'); + } + if (value.autonomy === 'merge' && value.allowMerge !== true) { + warnings.push('policy.autonomy is merge, but allowMerge is false; merge will remain blocked.'); + } + if (value.allowMerge === true && value.requireGreenCI !== true) { + warnings.push('allowMerge without requireGreenCI is unsafe; policy evaluation must still fail closed.'); + } + if (value.stopOnPrivateBoundaryRisk !== true) { + warnings.push('stopOnPrivateBoundaryRisk is false; default policy should keep it true.'); + } + + return { valid: errors.length === 0, value: errors.length === 0 ? value as WorkPolicy : undefined, errors, warnings }; +} + +export function validateArtifact(value: unknown, path = 'artifact'): SchemaValidationResult { + const errors: string[] = []; + const warnings: string[] = []; + if (!isObject(value)) return { valid: false, errors: [`${path}: must be an object`], warnings }; + if (!isString(value.id)) error(errors, `${path}.id`, 'must be a non-empty string'); + if (!enumValue(value.type, ARTIFACT_TYPES)) error(errors, `${path}.type`, `must be one of ${ARTIFACT_TYPES.join(', ')}`); + if (!isString(value.source)) error(errors, `${path}.source`, 'must be a non-empty string'); + if (value.path !== undefined && typeof value.path !== 'string') error(errors, `${path}.path`, 'must be a string when present'); + if (value.content !== undefined && typeof value.content !== 'string') error(errors, `${path}.content`, 'must be a string when present'); + if (value.metadata !== undefined && !isObject(value.metadata)) error(errors, `${path}.metadata`, 'must be an object when present'); + return { valid: errors.length === 0, value: errors.length === 0 ? value as WorkArtifact : undefined, errors, warnings }; +} + +export function validateFinding(value: unknown, path = 'finding'): SchemaValidationResult { + const errors: string[] = []; + const warnings: string[] = []; + if (!isObject(value)) return { valid: false, errors: [`${path}: must be an object`], warnings }; + if (!isString(value.id)) error(errors, `${path}.id`, 'must be a non-empty string'); + if (!enumValue(value.severity, FINDING_SEVERITIES)) error(errors, `${path}.severity`, `must be one of ${FINDING_SEVERITIES.join(', ')}`); + if (!enumValue(value.category, FINDING_CATEGORIES)) error(errors, `${path}.category`, `must be one of ${FINDING_CATEGORIES.join(', ')}`); + if (!isString(value.issue)) error(errors, `${path}.issue`, 'must be a non-empty string'); + if (!isString(value.recommendation)) error(errors, `${path}.recommendation`, 'must be a non-empty string'); + if (!enumValue(value.confidence, FINDING_CONFIDENCES)) error(errors, `${path}.confidence`, `must be one of ${FINDING_CONFIDENCES.join(', ')}`); + if (!enumValue(value.status, FINDING_STATUSES)) error(errors, `${path}.status`, `must be one of ${FINDING_STATUSES.join(', ')}`); + if (value.location !== undefined) { + if (!isObject(value.location)) { + error(errors, `${path}.location`, 'must be an object when present'); + } else { + if (!isString(value.location.path)) error(errors, `${path}.location.path`, 'must be a non-empty string'); + if (value.location.line !== undefined && (typeof value.location.line !== 'number' || value.location.line < 1)) { + error(errors, `${path}.location.line`, 'must be a positive number when present'); + } + } + } + return { valid: errors.length === 0, value: errors.length === 0 ? value as Finding : undefined, errors, warnings }; +} + +export function validateAgentResult(value: unknown): SchemaValidationResult { + const errors: string[] = []; + const warnings: string[] = []; + if (!isObject(value)) return { valid: false, errors: ['agentResult: must be an object'], warnings }; + if (!enumValue(value.status, AGENT_STATUSES)) error(errors, 'agentResult.status', `must be one of ${AGENT_STATUSES.join(', ')}`); + if (!isString(value.summary)) error(errors, 'agentResult.summary', 'must be a non-empty string'); + if (!Array.isArray(value.artifacts)) { + error(errors, 'agentResult.artifacts', 'must be an array'); + } else { + value.artifacts.forEach((artifact, index) => errors.push(...validateArtifact(artifact, `agentResult.artifacts[${index}]`).errors)); + } + if (value.findings !== undefined) { + if (!Array.isArray(value.findings)) { + error(errors, 'agentResult.findings', 'must be an array when present'); + } else { + value.findings.forEach((finding, index) => errors.push(...validateFinding(finding, `agentResult.findings[${index}]`).errors)); + } + } + return { valid: errors.length === 0, value: errors.length === 0 ? value as AgentResult : undefined, errors, warnings }; +} + +export function validateDecision(value: unknown, path = 'decision'): SchemaValidationResult { + const errors: string[] = []; + const warnings: string[] = []; + if (!isObject(value)) return { valid: false, errors: [`${path}: must be an object`], warnings }; + if (!enumValue(value.status, DECISION_STATUSES)) error(errors, `${path}.status`, `must be one of ${DECISION_STATUSES.join(', ')}`); + if (!isString(value.reason)) error(errors, `${path}.reason`, 'must be a non-empty string'); + if (value.actions !== undefined && !isStringArray(value.actions)) error(errors, `${path}.actions`, 'must be an array of strings when present'); + if (value.metadata !== undefined && !isObject(value.metadata)) error(errors, `${path}.metadata`, 'must be an object when present'); + return { valid: errors.length === 0, value: errors.length === 0 ? value as Decision : undefined, errors, warnings }; +} + +export function validatePolicyResult(value: unknown): SchemaValidationResult { + const errors: string[] = []; + const warnings: string[] = []; + if (!isObject(value)) return { valid: false, errors: ['policyResult: must be an object'], warnings }; + if (typeof value.allowed !== 'boolean') error(errors, 'policyResult.allowed', 'must be a boolean'); + const decision = validateDecision(value.decision, 'policyResult.decision'); + errors.push(...decision.errors); + warnings.push(...decision.warnings); + if (!isStringArray(value.reasons)) error(errors, 'policyResult.reasons', 'must be an array of strings'); + if (value.requiredActions !== undefined && !isStringArray(value.requiredActions)) { + error(errors, 'policyResult.requiredActions', 'must be an array of strings when present'); + } + if (value.allowed === true && decision.value?.status === 'blocked') { + warnings.push('policyResult allows a blocked decision; callers should treat this as fail-closed.'); + } + return { valid: errors.length === 0, value: errors.length === 0 ? value as PolicyResult : undefined, errors, warnings }; +} + +export function validateWorkPacket(value: unknown): SchemaValidationResult { + const errors: string[] = []; + const warnings: string[] = []; + if (!isObject(value)) return { valid: false, errors: ['workPacket: must be an object'], warnings }; + if (!isString(value.id)) error(errors, 'workPacket.id', 'must be a non-empty string'); + if (!isString(value.workItemId)) error(errors, 'workPacket.workItemId', 'must be a non-empty string'); + if (!enumValue(value.type, WORK_ITEM_TYPES)) error(errors, 'workPacket.type', `must be one of ${WORK_ITEM_TYPES.join(', ')}`); + if (!isString(value.objective)) error(errors, 'workPacket.objective', 'must be a non-empty string'); + validateRole(value.role, 'workPacket.role', errors); + if (!Array.isArray(value.inputs) || value.inputs.length === 0) { + error(errors, 'workPacket.inputs', 'must be a non-empty array'); + } else { + value.inputs.forEach((input, index) => validateInput(input, `workPacket.inputs[${index}]`, errors)); + } + const policyResult = validatePolicy(value.policy); + errors.push(...policyResult.errors); + warnings.push(...policyResult.warnings); + if (!Array.isArray(value.artifacts)) { + error(errors, 'workPacket.artifacts', 'must be an array'); + } else { + value.artifacts.forEach((artifact, index) => errors.push(...validateArtifact(artifact, `workPacket.artifacts[${index}]`).errors)); + } + if (value.findings !== undefined) { + if (!Array.isArray(value.findings)) { + error(errors, 'workPacket.findings', 'must be an array when present'); + } else { + value.findings.forEach((finding, index) => errors.push(...validateFinding(finding, `workPacket.findings[${index}]`).errors)); + } + } + return { valid: errors.length === 0, value: errors.length === 0 ? value as WorkPacket : undefined, errors, warnings }; +} + +export function validateWorkItem(value: unknown): SchemaValidationResult { + const errors: string[] = []; + const warnings: string[] = []; + if (!isObject(value)) return { valid: false, errors: ['workItem: must be an object'], warnings }; + if (!isString(value.id)) error(errors, 'workItem.id', 'must be a non-empty string'); + if (!enumValue(value.type, WORK_ITEM_TYPES)) error(errors, 'workItem.type', `must be one of ${WORK_ITEM_TYPES.join(', ')}`); + if (!isString(value.objective)) error(errors, 'workItem.objective', 'must be a non-empty string'); + if (!Array.isArray(value.inputs) || value.inputs.length === 0) { + error(errors, 'workItem.inputs', 'must be a non-empty array'); + } else { + value.inputs.forEach((input, index) => validateInput(input, `workItem.inputs[${index}]`, errors)); + } + if (!Array.isArray(value.roles) || value.roles.length === 0) { + error(errors, 'workItem.roles', 'must be a non-empty array'); + } else { + value.roles.forEach((role, index) => validateRole(role, `workItem.roles[${index}]`, errors)); + } + const policyResult = validatePolicy(value.policy); + errors.push(...policyResult.errors); + warnings.push(...policyResult.warnings); + if (!Array.isArray(value.artifacts)) { + error(errors, 'workItem.artifacts', 'must be an array'); + } else { + value.artifacts.forEach((artifact, index) => errors.push(...validateArtifact(artifact, `workItem.artifacts[${index}]`).errors)); + } + if (!enumValue(value.status, WORK_STATUSES)) error(errors, 'workItem.status', `must be one of ${WORK_STATUSES.join(', ')}`); + return { valid: errors.length === 0, value: errors.length === 0 ? value as WorkItem : undefined, errors, warnings }; +} diff --git a/PAI/Tools/orchestrator/workflows/adversarial-review.ts b/PAI/Tools/orchestrator/workflows/adversarial-review.ts new file mode 100644 index 00000000..744d5bac --- /dev/null +++ b/PAI/Tools/orchestrator/workflows/adversarial-review.ts @@ -0,0 +1,295 @@ +import { writeArtifact, saveWorkItem } from '../ledger'; +import { evaluatePolicy, type CheckState } from '../policy'; +import { + type AgentResult, + type Decision, + type Finding, + type PolicyResult, + type WorkArtifact, + type WorkItem, + type WorkItemStatus, + type WorkPacket, + type WorkRole, + validateAgentResult, + validateWorkItem, +} from '../schema'; + +export type AgentRunner = (packet: WorkPacket) => Promise; + +export interface AdversarialReviewOptions { + dryRun?: boolean; + runAgent?: AgentRunner; + ledgerRoot?: string; + checkState?: CheckState; +} + +export interface AdversarialReviewResult { + status: Decision['status']; + workItemId: string; + workflow: 'adversarial-review'; + packets: WorkPacket[]; + agentResults: AgentResult[]; + findings: Finding[]; + policyResult: PolicyResult; + decision: Decision; + artifacts: WorkArtifact[]; + warnings: string[]; +} + +function nowIso(): string { + return new Date().toISOString(); +} + +function roleId(role: WorkRole): string { + return role.id ?? `${role.engine}-${role.role}`; +} + +function packetForRole(workItem: WorkItem, role: WorkRole, suffix: string, artifacts: WorkArtifact[], findings?: Finding[]): WorkPacket { + return { + id: `${workItem.id}-${roleId(role)}-${suffix}`, + workItemId: workItem.id, + type: workItem.type, + objective: workItem.objective, + role, + inputs: workItem.inputs, + policy: workItem.policy, + artifacts, + findings, + }; +} + +function packetArtifact(packet: WorkPacket): WorkArtifact { + return { + id: `${packet.id}-packet`, + type: 'packet', + source: 'orchestrator', + createdAt: nowIso(), + content: JSON.stringify(packet, null, 2), + metadata: { + role: packet.role.role, + engine: packet.role.engine, + workItemId: packet.workItemId, + }, + }; +} + +function findingReportArtifact(workItem: WorkItem, findings: Finding[]): WorkArtifact { + return { + id: `${workItem.id}-findings`, + type: 'finding-report', + source: 'orchestrator', + createdAt: nowIso(), + content: JSON.stringify({ findings }, null, 2), + metadata: { + findingCount: findings.length, + openBlockingCount: findings.filter((finding) => + finding.status === 'open' && (finding.severity === 'blocker' || finding.severity === 'high') + ).length, + }, + }; +} + +function decisionArtifact(workItem: WorkItem, policyResult: PolicyResult, agentResults: AgentResult[]): WorkArtifact { + return { + id: `${workItem.id}-decision`, + type: 'decision', + source: 'orchestrator', + createdAt: nowIso(), + content: JSON.stringify({ policyResult, agentSummaries: agentResults.map(({ status, summary }) => ({ status, summary })) }, null, 2), + metadata: { + status: policyResult.decision.status, + allowed: policyResult.allowed, + reasonCount: policyResult.reasons.length, + }, + }; +} + +function artifactResult(status: AgentResult['status'], summary: string, packet: WorkPacket, finding?: Finding): AgentResult { + return { + status, + summary, + artifacts: [{ + id: `${packet.id}-fixture-agent-result`, + type: 'agent-result', + source: packet.role.engine, + createdAt: nowIso(), + content: JSON.stringify({ role: packet.role.role, summary }, null, 2), + metadata: { + dryRunFixture: true, + role: packet.role.role, + engine: packet.role.engine, + }, + }], + findings: finding ? [finding] : undefined, + }; +} + +export async function fixtureAdversarialAgentRunner(packet: WorkPacket): Promise { + if (packet.role.role === 'red-team') { + return artifactResult('findings', 'Fixture red-team found one medium design risk.', packet, { + id: `${packet.id}-rollback-risk`, + severity: 'medium', + category: 'design', + issue: 'The plan does not name a rollback checkpoint.', + recommendation: 'Add a rollback checkpoint before unattended execution is enabled.', + confidence: 'high', + status: 'open', + }); + } + if (packet.role.role === 'reviewer') { + return artifactResult('pass', 'Fixture reviewer found no blocking issues.', packet); + } + if (packet.role.role === 'judge') { + return artifactResult('pass', `Fixture judge synthesized ${packet.findings?.length ?? 0} finding(s).`, packet); + } + return artifactResult('blocked', `Unsupported adversarial-review role: ${packet.role.role}`, packet); +} + +function workflowStatus(decision: Decision): WorkItemStatus { + if (decision.status === 'complete' || decision.status === 'fixed' || decision.status === 'merge-ready') return 'complete'; + if (decision.status === 'failed') return 'failed'; + return 'blocked'; +} + +function workItemPolicyPaths(workItem: WorkItem): string[] { + const paths = new Set(); + for (const input of workItem.inputs) { + if (!/^[a-z][a-z0-9+.-]*:\/\//i.test(input.source)) paths.add(input.source); + } + for (const artifact of workItem.artifacts) { + if (artifact.path) paths.add(artifact.path); + } + return [...paths].sort(); +} + +function blockedResult(workItem: WorkItem, reason: string, artifacts: WorkArtifact[] = [], warnings: string[] = []): AdversarialReviewResult { + const policyResult: PolicyResult = { + allowed: false, + decision: { status: 'blocked', reason, actions: ['fix-work-item'] }, + reasons: [reason], + requiredActions: ['fix-work-item'], + }; + return { + status: 'blocked', + workItemId: workItem.id, + workflow: 'adversarial-review', + packets: [], + agentResults: [], + findings: [], + policyResult, + decision: policyResult.decision, + artifacts: [...artifacts, decisionArtifact(workItem, policyResult, [])], + warnings, + }; +} + +export async function runAdversarialReview(workItem: WorkItem, options: AdversarialReviewOptions = {}): Promise { + const validation = validateWorkItem(workItem); + if (!validation.value) { + return blockedResult(workItem, `Work item validation failed: ${validation.errors.join('; ')}`, [], validation.warnings); + } + if (validation.value.type !== 'adversarial-review') { + return blockedResult(validation.value, `Unsupported workflow for adversarial-review runner: ${validation.value.type}`, [], validation.warnings); + } + + const runner = options.runAgent ?? (options.dryRun ? fixtureAdversarialAgentRunner : undefined); + if (!runner) { + return blockedResult(validation.value, 'No agent runner configured; live adversarial-review execution is disabled by default.', [], validation.warnings); + } + + const reviewRoles = validation.value.roles.filter((role) => role.role === 'reviewer' || role.role === 'red-team'); + const judgeRole = validation.value.roles.find((role) => role.role === 'judge'); + if (reviewRoles.length === 0) return blockedResult(validation.value, 'Adversarial review requires at least one reviewer or red-team role.', [], validation.warnings); + if (!judgeRole) return blockedResult(validation.value, 'Adversarial review requires a judge role.', [], validation.warnings); + + const artifacts: WorkArtifact[] = [...validation.value.artifacts]; + const packets: WorkPacket[] = []; + const agentResults: AgentResult[] = []; + const malformedReasons: string[] = []; + + const recordArtifact = (artifact: WorkArtifact): WorkArtifact => { + const written = options.ledgerRoot ? writeArtifact(options.ledgerRoot, validation.value.id, artifact) : artifact; + artifacts.push(written); + return written; + }; + + for (const role of reviewRoles) { + const packet = packetForRole(validation.value, role, 'review', artifacts); + packets.push(packet); + recordArtifact(packetArtifact(packet)); + try { + const rawResult = await runner(packet); + const result = validateAgentResult(rawResult); + if (!result.value) { + malformedReasons.push(`${packet.id}: ${result.errors.join('; ')}`); + const errorResult = artifactResult('error', `Agent result failed validation: ${result.errors.join('; ')}`, packet); + agentResults.push(errorResult); + errorResult.artifacts.forEach(recordArtifact); + } else { + agentResults.push(result.value); + result.value.artifacts.forEach(recordArtifact); + } + } catch (error) { + const errorResult = artifactResult('error', `Agent runner threw: ${error instanceof Error ? error.message : String(error)}`, packet); + agentResults.push(errorResult); + errorResult.artifacts.forEach(recordArtifact); + } + } + + const reviewFindings = agentResults.flatMap((result) => result.findings ?? []); + recordArtifact(findingReportArtifact(validation.value, reviewFindings)); + + const judgePacket = packetForRole(validation.value, judgeRole, 'judge', artifacts, reviewFindings); + packets.push(judgePacket); + recordArtifact(packetArtifact(judgePacket)); + try { + const rawJudgeResult = await runner(judgePacket); + const judgeValidation = validateAgentResult(rawJudgeResult); + if (!judgeValidation.value) { + malformedReasons.push(`${judgePacket.id}: ${judgeValidation.errors.join('; ')}`); + const errorResult = artifactResult('error', `Judge result failed validation: ${judgeValidation.errors.join('; ')}`, judgePacket); + agentResults.push(errorResult); + errorResult.artifacts.forEach(recordArtifact); + } else { + agentResults.push(judgeValidation.value); + judgeValidation.value.artifacts.forEach(recordArtifact); + } + } catch (error) { + const errorResult = artifactResult('error', `Judge runner threw: ${error instanceof Error ? error.message : String(error)}`, judgePacket); + agentResults.push(errorResult); + errorResult.artifacts.forEach(recordArtifact); + } + + const findings = agentResults.flatMap((result) => result.findings ?? []); + const badAgentResult = agentResults.find((result) => result.status === 'error' || result.status === 'blocked'); + const policyResult = evaluatePolicy({ + policy: validation.value.policy, + requestedAction: 'advise', + findings, + checkState: options.checkState ?? 'green', + changedPaths: workItemPolicyPaths(validation.value), + agentResult: badAgentResult, + malformedAgentOutput: malformedReasons.length > 0, + }); + const finalDecisionArtifact = recordArtifact(decisionArtifact(validation.value, policyResult, agentResults)); + const finalArtifacts = artifacts.includes(finalDecisionArtifact) ? artifacts : [...artifacts, finalDecisionArtifact]; + const updatedWorkItem: WorkItem = { + ...validation.value, + artifacts: finalArtifacts, + status: workflowStatus(policyResult.decision), + }; + if (options.ledgerRoot) saveWorkItem(options.ledgerRoot, updatedWorkItem); + + return { + status: policyResult.decision.status, + workItemId: validation.value.id, + workflow: 'adversarial-review', + packets, + agentResults, + findings, + policyResult, + decision: policyResult.decision, + artifacts: finalArtifacts, + warnings: [...validation.warnings, ...malformedReasons], + }; +} diff --git a/PAI/Tools/orchestrator/workflows/pr-review.ts b/PAI/Tools/orchestrator/workflows/pr-review.ts new file mode 100644 index 00000000..b563a4a4 --- /dev/null +++ b/PAI/Tools/orchestrator/workflows/pr-review.ts @@ -0,0 +1,442 @@ +import { emitOrchestratorComment } from '../comments'; +import { type CheckState, evaluatePolicy } from '../policy'; +import { fixtureGitHubClient, type GitHubClient, type PullRequestContext } from '../github'; +import { saveWorkItem, writeArtifact } from '../ledger'; +import { + type AgentResult, + type Decision, + type Finding, + type PolicyResult, + type WorkArtifact, + type WorkInput, + type WorkItem, + type WorkItemStatus, + type WorkPacket, + type WorkRole, + validateAgentResult, + validateWorkItem, +} from '../schema'; + +export type PrAgentRunner = (packet: WorkPacket) => Promise; + +export interface PrReviewOptions { + dryRun?: boolean; + runAgent?: PrAgentRunner; + github?: GitHubClient; + ledgerRoot?: string; + ciWaitTimeoutMs?: number; + emitComment?: boolean; + allowGithubWrites?: boolean; + liveWriteToken?: string; +} + +export interface PrReviewResult { + status: Decision['status']; + workItemId: string; + workflow: 'pr-review'; + packets: WorkPacket[]; + agentResults: AgentResult[]; + findings: Finding[]; + policyResult: PolicyResult; + decision: Decision; + artifacts: WorkArtifact[]; + checkState: CheckState; + commentsWritten: number; + warnings: string[]; +} + +function nowIso(): string { + return new Date().toISOString(); +} + +function roleId(role: WorkRole): string { + return role.id ?? `${role.engine}-${role.role}`; +} + +function workflowStatus(decision: Decision): WorkItemStatus { + if (decision.status === 'complete' || decision.status === 'fixed' || decision.status === 'merge-ready') return 'complete'; + if (decision.status === 'failed') return 'failed'; + return 'blocked'; +} + +function packetForRole(workItem: WorkItem, role: WorkRole, suffix: string, inputs: WorkInput[], artifacts: WorkArtifact[], findings?: Finding[]): WorkPacket { + return { + id: `${workItem.id}-${roleId(role)}-${suffix}`, + workItemId: workItem.id, + type: workItem.type, + objective: workItem.objective, + role, + inputs, + policy: workItem.policy, + artifacts, + findings, + }; +} + +function packetArtifact(packet: WorkPacket): WorkArtifact { + return { + id: `${packet.id}-packet`, + type: 'packet', + source: 'orchestrator', + createdAt: nowIso(), + content: JSON.stringify(packet, null, 2), + metadata: { + role: packet.role.role, + engine: packet.role.engine, + workItemId: packet.workItemId, + }, + }; +} + +function findingReportArtifact(workItem: WorkItem, findings: Finding[], idSuffix: string): WorkArtifact { + return { + id: `${workItem.id}-${idSuffix}`, + type: 'finding-report', + source: 'orchestrator', + createdAt: nowIso(), + content: JSON.stringify({ findings }, null, 2), + metadata: { + findingCount: findings.length, + openBlockingCount: findings.filter((finding) => + finding.status === 'open' && (finding.severity === 'blocker' || finding.severity === 'high') + ).length, + }, + }; +} + +function decisionArtifact(workItem: WorkItem, policyResult: PolicyResult, agentResults: AgentResult[], checkState: CheckState): WorkArtifact { + return { + id: `${workItem.id}-decision`, + type: 'decision', + source: 'orchestrator', + createdAt: nowIso(), + content: JSON.stringify({ policyResult, checkState, agentSummaries: agentResults.map(({ status, summary }) => ({ status, summary })) }, null, 2), + metadata: { + status: policyResult.decision.status, + allowed: policyResult.allowed, + checkState, + reasonCount: policyResult.reasons.length, + }, + }; +} + +function finalReportArtifact(workItem: WorkItem, policyResult: PolicyResult, findings: Finding[]): WorkArtifact { + return { + id: `${workItem.id}-final-report`, + type: 'finding-report', + source: 'orchestrator', + createdAt: nowIso(), + content: JSON.stringify({ decision: policyResult.decision, findings }, null, 2), + metadata: { + status: policyResult.decision.status, + findingCount: findings.length, + }, + }; +} + +function checkStateArtifact(workItem: WorkItem, checkState: CheckState, timeoutMs: number | undefined): WorkArtifact { + return { + id: `${workItem.id}-ci-state`, + type: 'test-log', + source: 'github-fixture', + createdAt: nowIso(), + content: JSON.stringify({ checkState, timeoutMs, blocked: checkState !== 'green' }, null, 2), + metadata: { checkState, timeoutMs }, + }; +} + +function prInputs(workItem: WorkItem, context: PullRequestContext): WorkInput[] { + return [ + ...workItem.inputs, + { + id: 'fixture-pr-diff', + type: 'git-diff', + source: context.pullRequest.url, + content: context.diff, + metadata: { + repo: context.pullRequest.repo, + number: context.pullRequest.number, + head: context.pullRequest.head, + base: context.pullRequest.base, + labels: context.pullRequest.labels, + changedPaths: context.changedPaths, + }, + }, + { + id: 'fixture-pr-check-state', + type: 'test-output', + source: 'github-checks', + content: `checkState=${context.checkState}`, + metadata: { checkState: context.checkState }, + }, + ...(context.paiComment ? [{ + id: 'existing-pai-comment', + type: 'markdown' as const, + source: 'github-comment', + content: JSON.stringify(context.paiComment, null, 2), + metadata: { parsedPaiMarker: true }, + }] : []), + ]; +} + +function fixtureAgentResult(status: AgentResult['status'], summary: string, packet: WorkPacket, findings?: Finding[]): AgentResult { + return { + status, + summary, + artifacts: [{ + id: `${packet.id}-fixture-agent-result`, + type: 'agent-result', + source: packet.role.engine, + createdAt: nowIso(), + content: JSON.stringify({ role: packet.role.role, summary }, null, 2), + metadata: { + dryRunFixture: true, + role: packet.role.role, + engine: packet.role.engine, + }, + }], + findings, + }; +} + +export async function fixturePrAgentRunner(packet: WorkPacket): Promise { + if (packet.role.role === 'reviewer') { + return fixtureAgentResult('findings', 'Fixture reviewer found one high test issue.', packet, [{ + id: `${packet.id}-missing-test`, + severity: 'high', + category: 'test', + location: { path: 'tests/example.test.ts', line: 1 }, + issue: 'The PR changes behavior without a regression test.', + recommendation: 'Add a focused regression test before merge.', + confidence: 'high', + status: 'open', + }]); + } + if (packet.role.role === 'fixer') { + const fixedFindings = (packet.findings ?? []).map((finding) => ({ + ...finding, + status: 'fixed' as const, + recommendation: `${finding.recommendation} Fixture fix added.`, + })); + return fixtureAgentResult('fixed', `Fixture fixer addressed ${fixedFindings.length} finding(s).`, packet, fixedFindings); + } + return fixtureAgentResult('blocked', `Unsupported pr-review role: ${packet.role.role}`, packet); +} + +function reconcileFindings(current: Finding[], updates: Finding[]): Finding[] { + const byId = new Map(current.map((finding) => [finding.id, finding])); + for (const update of updates) byId.set(update.id, update); + return [...byId.values()]; +} + +function openBlockingFindings(findings: Finding[]): Finding[] { + return findings.filter((finding) => + finding.status === 'open' && (finding.severity === 'blocker' || finding.severity === 'high') + ); +} + +function blockedResult(workItem: WorkItem, reason: string, artifacts: WorkArtifact[] = [], warnings: string[] = []): PrReviewResult { + const policyResult: PolicyResult = { + allowed: false, + decision: { status: 'blocked', reason, actions: ['fix-work-item'] }, + reasons: [reason], + requiredActions: ['fix-work-item'], + }; + return { + status: 'blocked', + workItemId: workItem.id, + workflow: 'pr-review', + packets: [], + agentResults: [], + findings: [], + policyResult, + decision: policyResult.decision, + artifacts: [...artifacts, decisionArtifact(workItem, policyResult, [], 'unknown')], + checkState: 'unknown', + commentsWritten: 0, + warnings, + }; +} + +export async function runPrReview(workItem: WorkItem, options: PrReviewOptions = {}): Promise { + const validation = validateWorkItem(workItem); + if (!validation.value) { + return blockedResult(workItem, `Work item validation failed: ${validation.errors.join('; ')}`, [], validation.warnings); + } + if (validation.value.type !== 'pr-review') { + return blockedResult(validation.value, `Unsupported workflow for pr-review runner: ${validation.value.type}`, [], validation.warnings); + } + + const github = options.github ?? fixtureGitHubClient(); + const runner = options.runAgent ?? (options.dryRun ? fixturePrAgentRunner : undefined); + if (!runner) { + return blockedResult(validation.value, 'No agent runner configured; live pr-review execution is disabled by default.', [], validation.warnings); + } + + const reviewer = validation.value.roles.find((role) => role.role === 'reviewer'); + const fixer = validation.value.roles.find((role) => role.role === 'fixer'); + if (!reviewer) return blockedResult(validation.value, 'PR review requires a reviewer role.', [], validation.warnings); + if (!fixer) return blockedResult(validation.value, 'PR review requires a fixer role.', [], validation.warnings); + + const artifacts: WorkArtifact[] = [...validation.value.artifacts]; + const packets: WorkPacket[] = []; + const agentResults: AgentResult[] = []; + const malformedReasons: string[] = []; + let commentsWritten = 0; + + const recordArtifact = (artifact: WorkArtifact): WorkArtifact => { + const written = options.ledgerRoot ? writeArtifact(options.ledgerRoot, validation.value.id, artifact) : artifact; + artifacts.push(written); + return written; + }; + + let context: PullRequestContext; + try { + context = await github.getPullRequest(validation.value); + } catch (error) { + return blockedResult(validation.value, `Failed to load PR fixture context: ${error instanceof Error ? error.message : String(error)}`, artifacts, validation.warnings); + } + + recordArtifact(checkStateArtifact(validation.value, context.checkState, options.ciWaitTimeoutMs)); + const inputs = prInputs(validation.value, context); + const reviewPacket = packetForRole(validation.value, reviewer, 'review', inputs, artifacts); + packets.push(reviewPacket); + recordArtifact(packetArtifact(reviewPacket)); + + let findings: Finding[] = []; + try { + const rawReviewResult = await runner(reviewPacket); + const reviewResult = validateAgentResult(rawReviewResult); + if (!reviewResult.value) { + malformedReasons.push(`${reviewPacket.id}: ${reviewResult.errors.join('; ')}`); + const errorResult = fixtureAgentResult('error', `Review result failed validation: ${reviewResult.errors.join('; ')}`, reviewPacket); + agentResults.push(errorResult); + errorResult.artifacts.forEach(recordArtifact); + } else { + agentResults.push(reviewResult.value); + reviewResult.value.artifacts.forEach(recordArtifact); + findings = reconcileFindings(findings, reviewResult.value.findings ?? []); + } + } catch (error) { + const errorResult = fixtureAgentResult('error', `Review runner threw: ${error instanceof Error ? error.message : String(error)}`, reviewPacket); + agentResults.push(errorResult); + errorResult.artifacts.forEach(recordArtifact); + } + + recordArtifact(findingReportArtifact(validation.value, findings, 'review-findings')); + let policyResult = evaluatePolicy({ + policy: validation.value.policy, + requestedAction: 'iterate', + findings, + checkState: context.checkState, + changedPaths: context.changedPaths, + malformedAgentOutput: malformedReasons.length > 0, + agentResult: agentResults.find((result) => result.status === 'error' || result.status === 'blocked'), + }); + + const fixableFindings = openBlockingFindings(findings); + if (fixableFindings.length > 0 && validation.value.policy.maxRounds > 1) { + const fixPacket = packetForRole(validation.value, fixer, 'fix', inputs, artifacts, findings); + packets.push(fixPacket); + recordArtifact(packetArtifact(fixPacket)); + try { + const rawFixResult = await runner(fixPacket); + const fixResult = validateAgentResult(rawFixResult); + if (!fixResult.value) { + malformedReasons.push(`${fixPacket.id}: ${fixResult.errors.join('; ')}`); + const errorResult = fixtureAgentResult('error', `Fix result failed validation: ${fixResult.errors.join('; ')}`, fixPacket); + agentResults.push(errorResult); + errorResult.artifacts.forEach(recordArtifact); + } else { + agentResults.push(fixResult.value); + fixResult.value.artifacts.forEach(recordArtifact); + findings = reconcileFindings(findings, fixResult.value.findings ?? []); + } + } catch (error) { + const errorResult = fixtureAgentResult('error', `Fix runner threw: ${error instanceof Error ? error.message : String(error)}`, fixPacket); + agentResults.push(errorResult); + errorResult.artifacts.forEach(recordArtifact); + } + recordArtifact(findingReportArtifact(validation.value, findings, 'post-fix-findings')); + policyResult = evaluatePolicy({ + policy: validation.value.policy, + requestedAction: 'iterate', + findings, + checkState: context.checkState, + changedPaths: context.changedPaths, + malformedAgentOutput: malformedReasons.length > 0, + agentResult: agentResults.find((result) => result.status === 'error' || result.status === 'blocked'), + }); + if (policyResult.allowed) { + policyResult = { + ...policyResult, + decision: { + ...policyResult.decision, + status: 'fixed', + reason: 'Fix round completed and policy permits iterate.', + }, + }; + } + } + + if (options.emitComment) { + const markdown = emitOrchestratorComment({ + workItemId: validation.value.id, + source: context.pullRequest.url, + findings, + decision: policyResult.decision, + }); + const commentArtifact: WorkArtifact = { + id: `${validation.value.id}-pr-comment`, + type: 'pr-comment', + source: 'orchestrator', + createdAt: nowIso(), + content: markdown, + metadata: { dryRun: options.dryRun === true }, + }; + recordArtifact(commentArtifact); + try { + await github.upsertComment(markdown, { + allowLiveWrites: options.allowGithubWrites, + liveWriteToken: options.liveWriteToken, + }); + commentsWritten += 1; + } catch (error) { + policyResult = { + allowed: false, + decision: { + status: 'blocked', + reason: `GitHub comment write blocked: ${error instanceof Error ? error.message : String(error)}`, + actions: ['rerun-with-explicit-live-write-approval'], + }, + reasons: [...policyResult.reasons, 'GitHub comment write blocked.'], + requiredActions: ['rerun-with-explicit-live-write-approval'], + }; + } + } + + const decision = recordArtifact(decisionArtifact(validation.value, policyResult, agentResults, context.checkState)); + const final = recordArtifact(finalReportArtifact(validation.value, policyResult, findings)); + const finalArtifacts = artifacts.includes(final) && artifacts.includes(decision) ? artifacts : [...artifacts, decision, final]; + const updatedWorkItem: WorkItem = { + ...validation.value, + artifacts: finalArtifacts, + status: workflowStatus(policyResult.decision), + }; + if (options.ledgerRoot) saveWorkItem(options.ledgerRoot, updatedWorkItem); + + return { + status: policyResult.decision.status, + workItemId: validation.value.id, + workflow: 'pr-review', + packets, + agentResults, + findings, + policyResult, + decision: policyResult.decision, + artifacts: finalArtifacts, + checkState: context.checkState, + commentsWritten, + warnings: [...validation.warnings, ...malformedReasons], + }; +} diff --git a/PAI/Tools/pai.ts b/PAI/Tools/pai.ts index 4b058734..f032ae7a 100755 --- a/PAI/Tools/pai.ts +++ b/PAI/Tools/pai.ts @@ -638,6 +638,7 @@ COMMANDS: k mcp list List all available MCPs k mcp set Set MCP profile permanently k prompt "" One-shot prompt execution + k orchestrator Run PAI orchestration work items k curate Review and approve/reject staged memory drafts k -w, --wallpaper List/switch wallpapers (Kitty + macOS) k help, -h Show this help @@ -744,6 +745,11 @@ async function main() { wallpaperArgs = args.slice(i + 1); i = args.length; break; + case "orchestrator": + command = "orchestrator"; + wallpaperArgs = args.slice(i + 1); + i = args.length; + break; case "harvest": command = "harvest"; wallpaperArgs = args.slice(i + 1); @@ -814,6 +820,15 @@ async function main() { process.exit(curateProc.exitCode ?? 0); break; } + case "orchestrator": { + const orchestratorProc = spawnSync(['bun', 'run', paiPath('PAI', 'Tools', 'Orchestrator.ts'), ...wallpaperArgs], { + stdin: 'inherit', + stdout: 'inherit', + stderr: 'inherit', + }); + process.exit(orchestratorProc.exitCode ?? 0); + break; + } case "harvest": { const harvestArgs = wallpaperArgs; const harvestProc = spawnSync(['bun', 'run', paiPath('PAI', 'Tools', 'ReflectionHarvester.ts'), ...harvestArgs], { diff --git a/README.md b/README.md index 039703d2..f32294cf 100755 --- a/README.md +++ b/README.md @@ -26,8 +26,8 @@ The installer symlinks `~/.claude/` to your repo, walks you through identity set | Directory | Contents | |-----------|----------| | `PAI/` | Core system: Algorithm v3.14.0, context routing, system docs | -| `skills/` | 70 skill modules (Research, Security, Writing, Analysis, EM/PLM workflows) | -| `hooks/` | 59 lifecycle hooks (security guards, formatters, analytics, cleanup) | +| `skills/` | 71 skill modules (Research, Security, Writing, Analysis, EM/PLM workflows) | +| `hooks/` | 61 lifecycle hooks (security guards, formatters, analytics, cleanup) | | `agents/` | 20 named agents (Architect, Engineer, 5 researchers, Pentester, etc.) | | `config/` | 7 domain config files that generate settings.json | | `scripts/` | KAI Board dashboard, deployment packager | diff --git a/config/hooks.jsonc b/config/hooks.jsonc index 313f5d72..1d05b591 100644 --- a/config/hooks.jsonc +++ b/config/hooks.jsonc @@ -151,6 +151,8 @@ { "type": "command", "command": "${PAI_DIR}/hooks/lib/run-hook.sh MemRecall.hook.ts" }, // Local context injection — outputs additionalContext { "type": "command", "command": "${PAI_DIR}/hooks/lib/run-hook.sh LocalContextFirst.hook.ts" }, + // Skill discovery nudge — deterministic, local-only suggestion + { "type": "command", "command": "${PAI_DIR}/hooks/lib/run-hook.sh SkillDiscoveryRecommender.hook.ts" }, // Classify effort tier and inject context modifications { "type": "command", "command": "${PAI_DIR}/hooks/lib/run-hook.sh ModeClassifier.hook.ts" }, // Detect PAI output format violations and inject correction hint @@ -239,6 +241,15 @@ } ], + // InstructionsLoaded — native instruction observability; telemetry only + "InstructionsLoaded": [ + { + "hooks": [ + { "type": "command", "command": "${PAI_DIR}/hooks/lib/run-hook.sh InstructionsLoaded.hook.ts", "async": true } + ] + } + ], + // UserPromptExpansion — direct slash command telemetry (does not gate prompts) "UserPromptExpansion": [ { diff --git a/config/preferences.jsonc b/config/preferences.jsonc index 65943497..6a951a65 100644 --- a/config/preferences.jsonc +++ b/config/preferences.jsonc @@ -62,7 +62,7 @@ // PAI version information "pai": { "repoUrl": "github.com/kai-cli/kai", - "version": "7.4.2", + "version": "7.7.0", "algorithmVersion": "3.14.0", "productName": "KAI" }, diff --git a/config/spinner-tips.json b/config/spinner-tips.json index 74a25260..16c932e6 100644 --- a/config/spinner-tips.json +++ b/config/spinner-tips.json @@ -124,7 +124,7 @@ "Phase discipline: 7 separate phases. BUILD is not EXECUTE. Never merge or skip.", "spinnerVerbs in settings.json customizes the animated verb shown while working.", "The meaning of PAI: magnifying human capabilities through general problem-solving.", - "KAI v.4.2 with Algorithm v3.14.0 \u2014 the latest system architecture.", + "KAI v.7.0 with Algorithm v3.14.0 \u2014 the latest system architecture.", "The counts section in settings.json tracks skills, signals, files, and sessions.", "v4.0: contextFiles is empty. Core context in CLAUDE.md (native). Dynamic context via LoadContext.hook.ts.", "The Splitting Test checks every ISC: and/with test, independent failure, scope words, domain boundaries.", diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md index 56c46eca..f304f4f9 100644 --- a/docs/QUICKSTART.md +++ b/docs/QUICKSTART.md @@ -76,7 +76,7 @@ All configuration lives in `~/.claude/config/*.jsonc` (JSON with comments): | File | What it controls | |------|-----------------| | `identity.jsonc` | Your name, assistant name, timezone | -| `hooks.jsonc` | 59 lifecycle hooks (security, formatting, etc.) | +| `hooks.jsonc` | 61 lifecycle hooks (security, formatting, etc.) | | `permissions.jsonc` | Tool permissions (allow/deny/ask rules) | | `preferences.jsonc` | Environment variables, MCP servers, tech stack | | `notifications.jsonc` | Alert routing (disabled by default) | @@ -91,7 +91,7 @@ bun ~/.claude/hooks/handlers/BuildSettings.ts ## What You Get -### Skills (70 modules) +### Skills (71 modules) KAI includes specialized skills invoked with slash commands or automatically: - **Research** — multi-agent parallel research with dedup - **Security** — recon, web assessment, prompt injection testing @@ -112,7 +112,7 @@ For complex tasks, KAI uses a 7-phase execution framework: Each task gets Ideal State Criteria (ISC) — verifiable checkboxes that must all pass. This is what makes KAI systematically reliable rather than just "AI that tries." -### Hooks (59 lifecycle events) +### Hooks (61 lifecycle events) Automated behaviors that fire on specific events: - **SecretScanner** — warns if you're about to commit secrets - **GitHubWriteGuard** — requires confirmation before git push diff --git a/docs/WHATS-DIFFERENT.md b/docs/WHATS-DIFFERENT.md index 766c31bb..c3aa21db 100644 --- a/docs/WHATS-DIFFERENT.md +++ b/docs/WHATS-DIFFERENT.md @@ -10,8 +10,8 @@ KAI diverges with production hardening, multi-model debate/research, self-learni | | Daniel's Original (v4.0.3) | This Fork (KAI) | |---|---|---| -| **Skills** | 63 (12 categories) | 70 (streamlined, no dead skills) | -| **Hooks** | 21 | 59 (all through stderr wrapper) | +| **Skills** | 63 (12 categories) | 71 (streamlined, no dead skills) | +| **Hooks** | 21 | 61 (all through stderr wrapper) | | **Agents** | ~6 generic | 20 specialized (named personas) | | **Algorithm** | v3.5.0 | v3.14.0 | | **Context footprint** | ~19% at startup | Optimized with lazy loading | @@ -68,7 +68,7 @@ Instead of one monolithic `settings.json`, configuration is split into purpose-s ### 4. Hook System Hardening -All 59 hooks now go through `run-hook.sh`, which: +All 61 hooks now go through `run-hook.sh`, which: - **Redirects stderr to `/tmp/pai-hooks/`** — prevents "hook error" messages in Claude Code UI - **Async flags on analytics hooks** — PromptAnalysis, RatingCapture, StopOrchestrator, and other inference-heavy hooks run in the background instead of blocking the UI for 15+ seconds diff --git a/fixtures/orchestrator/adversarial-review.json b/fixtures/orchestrator/adversarial-review.json new file mode 100644 index 00000000..279c4818 --- /dev/null +++ b/fixtures/orchestrator/adversarial-review.json @@ -0,0 +1,39 @@ +{ + "id": "fixture-adversarial-review-001", + "type": "adversarial-review", + "objective": "Stress-test a local architecture plan and produce a structured decision artifact.", + "inputs": [ + { + "id": "input-plan", + "type": "markdown", + "source": "docs/planning/example-plan.md", + "content": "# Example Plan\n\nBuild a local-only fixture workflow for orchestration tests." + } + ], + "roles": [ + { + "id": "claude-red-team", + "role": "red-team", + "engine": "claude-local", + "capabilities": ["red-team", "review"] + }, + { + "id": "codex-judge", + "role": "judge", + "engine": "codex-local", + "capabilities": ["judge", "validate"] + } + ], + "policy": { + "autonomy": "advise", + "maxRounds": 1, + "allowedPaths": ["docs/**", "specs/**"], + "blockedPaths": ["MEMORY/**", "USER/**", "Plans/**"], + "requireGreenCI": true, + "allowPush": false, + "allowMerge": false, + "stopOnPrivateBoundaryRisk": true + }, + "artifacts": [], + "status": "queued" +} diff --git a/fixtures/orchestrator/pr-review.json b/fixtures/orchestrator/pr-review.json new file mode 100644 index 00000000..26c8faae --- /dev/null +++ b/fixtures/orchestrator/pr-review.json @@ -0,0 +1,44 @@ +{ + "id": "fixture-pr-review-001", + "type": "pr-review", + "objective": "Review a fixture pull request, capture structured findings, and decide whether it is safe to continue.", + "inputs": [ + { + "id": "input-pr", + "type": "github-pr", + "source": "https://github.com/example/project/pull/123", + "metadata": { + "repo": "example/project", + "number": 123, + "head": "feature/example", + "base": "main" + } + } + ], + "roles": [ + { + "id": "claude-reviewer", + "role": "reviewer", + "engine": "claude-local", + "capabilities": ["review"] + }, + { + "id": "codex-fixer", + "role": "fixer", + "engine": "codex-local", + "capabilities": ["implement", "validate"] + } + ], + "policy": { + "autonomy": "iterate", + "maxRounds": 2, + "allowedPaths": ["src/**", "tests/**", "docs/**"], + "blockedPaths": ["docs/planning/**", "specs/**", "MEMORY/**", "USER/**", "Plans/**"], + "requireGreenCI": true, + "allowPush": false, + "allowMerge": false, + "stopOnPrivateBoundaryRisk": true + }, + "artifacts": [], + "status": "queued" +} diff --git a/hooks/AgentExecutionGuard.hook.ts b/hooks/AgentExecutionGuard.hook.ts index 97258c12..0a889296 100755 --- a/hooks/AgentExecutionGuard.hook.ts +++ b/hooks/AgentExecutionGuard.hook.ts @@ -13,14 +13,19 @@ * - run_in_background: true → PASS (correct usage) * - run_in_background: false/missing AND model is "haiku" → PASS (fast-tier inline) * - run_in_background: false/missing AND subagent_type is "Explore" → PASS (quick lookups) - * - All other cases → WARNING (inject system-reminder) + * - All other foreground cases → WARNING (inject system-reminder) + * - Non-lean Agent prompts without the KAI context handoff envelope → WARNING * * PERFORMANCE: * - Non-blocking: Yes (warning only, never blocks) * - Typical execution: <10ms (pure JSON parsing, no I/O) */ +import { agentCallId, decideAgentContextHandoff, missingAgentContextMessage } from './lib/agent-context-handoff'; +import { emitMemoryTelemetry } from './lib/memory-telemetry'; interface HookInput { + session_id?: string; + cwd?: string; tool_name: string; tool_input: { run_in_background?: boolean; @@ -58,33 +63,53 @@ async function main() { const data: HookInput = JSON.parse(input); const toolInput = data.tool_input || {}; - // Already using background — correct usage, pass silently + const reminders: string[] = []; + const contextReminder = missingAgentContextMessage(toolInput); + if (contextReminder) reminders.push(contextReminder); + const contextDecision = decideAgentContextHandoff(toolInput); + emitMemoryTelemetry('agent.spawn', { + session_id: data.session_id, + project: projectName(data), + agent_call_id: agentCallId(toolInput, data.session_id), + agent_type: toolInput.subagent_type || '', + description: toolInput.description || '', + run_in_background: toolInput.run_in_background === true, + context_tier: contextDecision.tier, + context_handoff_present: contextDecision.hasHandoff, + context_handoff_missing: contextDecision.tier !== 'none' && !contextDecision.hasHandoff, + }); + + // Already using background — correct usage for execution mode, but still validate context handoff. if (toolInput.run_in_background === true) { + if (reminders.length > 0) console.log(reminders.join('\n\n')); process.exit(0); } // Fast-tier agents don't need background (quick lookups) const agentType = toolInput.subagent_type || ''; if (FAST_AGENT_TYPES.includes(agentType)) { + if (reminders.length > 0) console.log(reminders.join('\n\n')); process.exit(0); } // Haiku model indicates fast-tier — inline is acceptable const model = toolInput.model || ''; if (FAST_MODELS.includes(model)) { + if (reminders.length > 0) console.log(reminders.join('\n\n')); process.exit(0); } // Check if prompt contains ## Scope with FAST timing const prompt = toolInput.prompt || ''; if (/##\s*Scope[\s\S]*?Timing:\s*FAST/i.test(prompt)) { + if (reminders.length > 0) console.log(reminders.join('\n\n')); process.exit(0); } // VIOLATION: Non-fast agent spawned without run_in_background: true const desc = toolInput.description || agentType || 'unknown'; - console.log(` + reminders.push(` WARNING: FOREGROUND AGENT DETECTED — "${desc}" (${agentType}) run_in_background is NOT set to true. This will BLOCK the user interface. @@ -99,6 +124,7 @@ The Algorithm requires ALL non-fast agents to run in background: Only exceptions: Explore agents, haiku-model agents, and agents with ## Scope FAST. `); + console.log(reminders.join('\n\n')); process.exit(0); } catch (err) { // On any error, pass silently — don't block agent execution @@ -109,3 +135,8 @@ Only exceptions: Explore agents, haiku-model agents, and agents with ## Scope FA main().catch((err) => { console.error(`[AgentExecutionGuard] Error:`, err); process.exit(0); }); export {}; + +function projectName(input: HookInput): string { + const dir = process.env.CLAUDE_PROJECT_DIR ?? input.cwd ?? process.cwd(); + return dir.split('/').filter(Boolean).pop() ?? 'unknown'; +} diff --git a/hooks/AgentMemoryCapture.hook.ts b/hooks/AgentMemoryCapture.hook.ts index 2267a8ea..1fc41d28 100644 --- a/hooks/AgentMemoryCapture.hook.ts +++ b/hooks/AgentMemoryCapture.hook.ts @@ -16,6 +16,7 @@ * * NON-NEGOTIABLE: exits 0 on every path; any error is swallowed (mirrors ReadActivity hygiene). */ +import { agentCallId } from './lib/agent-context-handoff'; import { emitMemoryTelemetry } from './lib/memory-telemetry'; interface HookInput { @@ -50,6 +51,46 @@ function resultSize(resp: unknown): number { } } +function resultText(resp: unknown): string { + try { + if (typeof resp === 'string') return resp; + if (resp == null) return ''; + return JSON.stringify(resp); + } catch { + return ''; + } +} + +function hasDurableCheckpointMarker(resp: unknown): boolean { + return /Durable findings for parent checkpoint\s*:/i.test(resultText(resp)); +} + +type AgentReturnStatus = 'ok' | 'partial' | 'failed' | 'malformed'; + +function statusField(resp: unknown): string { + if (!resp || typeof resp !== 'object' || Array.isArray(resp)) return ''; + const obj = resp as Record; + for (const key of ['status', 'state', 'exit_status', 'conclusion']) { + if (typeof obj[key] === 'string') return obj[key].toLowerCase(); + } + return ''; +} + +function classifyReturnStatus(resp: unknown): AgentReturnStatus { + if (resp === undefined) return 'malformed'; + const explicit = statusField(resp); + if (/^(failed|failure|error|errored|timeout|timed_out|cancelled|canceled|aborted)$/.test(explicit)) { + return 'failed'; + } + if (/^(partial|incomplete|truncated|stalled)$/.test(explicit)) return 'partial'; + + const text = resultText(resp); + if (!text.trim()) return 'malformed'; + if (/"error"\s*:|"exception"\s*:|timed out|timeout|failed|aborted|cancelled|canceled/i.test(text)) return 'failed'; + if (/partial result|partially complete|incomplete|truncated|stalled/i.test(text)) return 'partial'; + return 'ok'; +} + function main(): void { try { const input = readStdin(); @@ -65,36 +106,51 @@ function main(): void { const description = input.tool_input?.description ?? ''; const size = resultSize(input.tool_response); const project = projectName(input); + const returnStatus = classifyReturnStatus(input.tool_response); + const durableMarker = hasDurableCheckpointMarker(input.tool_response); + const agent_call_id = agentCallId(input.tool_input, input.session_id); // Always record the return (the save-gap signal). Cheap, swallows errors. emitMemoryTelemetry('agent.return', { session_id: input.session_id, project, + agent_call_id, agent_type: agentType, description, result_chars: size, + return_status: returnStatus, + durable_marker: durableMarker, + run_in_background: input.tool_input?.run_in_background === true, }); // Background agents return asynchronously and the parent often isn't at a checkpoint boundary — // and trivially-small returns rarely carry durable lessons. Gate the *reminder* (not the telemetry) // to substantive, foreground returns to keep the already-busy context chain quiet. - const substantive = size >= 400 && input.tool_input?.run_in_background !== true; + const needsParentAttention = returnStatus !== 'ok' || size >= 400 || durableMarker; + const substantive = needsParentAttention && input.tool_input?.run_in_background !== true; if (!substantive) process.exit(0); emitMemoryTelemetry('agent.checkpoint', { session_id: input.session_id, project, + agent_call_id, agent_type: agentType, + return_status: returnStatus, + durable_marker: durableMarker, }); const label = agentType ? `${agentType} subagent` : 'a subagent'; + const failureClause = returnStatus !== 'ok' + ? ` It appears to have returned ${returnStatus} output; inspect the result before delegating more work.` + : ''; const reminder = `\n` + `🧠 ${label} just returned${description ? ` ("${description}")` : ''}. Subagents cannot persist ` + `memory themselves — YOU are the only actor that can. If it established anything durable ` + - `(a cross-project lesson, a project fact, a resolved gotcha), checkpoint it now: offer a ` + + `(a cross-project lesson, a project fact, a resolved gotcha, or a ` + + `"Durable findings for parent checkpoint:" section), checkpoint it now: offer a ` + `\`memcarry capture-lesson\` (per the steering rule) or a project-memory note before moving on. ` + - `If nothing durable was learned, ignore this.\n`; + `If nothing durable was learned, ignore this.${failureClause}\n`; // PostToolUse additionalContext contract (same shape MemoryRecall/LocalContextFirst use). console.log(JSON.stringify({ additionalContext: reminder })); diff --git a/hooks/GitHubWriteGuard.hook.ts b/hooks/GitHubWriteGuard.hook.ts index 79705834..e073c411 100755 --- a/hooks/GitHubWriteGuard.hook.ts +++ b/hooks/GitHubWriteGuard.hook.ts @@ -474,6 +474,8 @@ function cleanExpiredTokens(): void { } async function main() { + let command = ''; + let cwd = process.env.CLAUDE_PROJECT_DIR || process.cwd(); try { const raw = await Promise.race([ Bun.stdin.text(), @@ -481,8 +483,8 @@ async function main() { ]).catch(() => '{}'); const input: HookInput = JSON.parse(raw || '{}'); - const command = input.tool_input?.command || ''; - const cwd = input.cwd || process.env.CLAUDE_PROJECT_DIR || process.cwd(); + command = input.tool_input?.command || ''; + cwd = input.cwd || process.env.CLAUDE_PROJECT_DIR || process.cwd(); if (!command) { console.log(JSON.stringify({ continue: true })); @@ -506,6 +508,10 @@ async function main() { process.exit(0); } + if (process.env.PAI_GITHUB_WRITE_GUARD_TEST_THROW_AFTER_DETECT === '1') { + throw new Error('forced guard error after write detection'); + } + const adaBranch = checkAdaBranch(command, cwd); const adaProcedureCard = adaBranch ? formatProcedureCard(adaBranch.config) : []; if (adaBranch?.mismatch && adaBranch.hardBlock) { @@ -572,7 +578,16 @@ async function main() { process.exit(0); } catch (err) { console.error(`[GitHubWriteGuard] Error: ${err}`); - // Fail-open: don't block operations on hook errors + // Fail closed for detected GitHub writes: owner-access mutations must not pass just because + // a later guard check failed. Unknown/read-only commands still fail open to avoid wedging tooling. + const writeInfo = command ? isGitHubWriteCommand(command) : { write: false, description: '' }; + if (writeInfo.write) { + console.log(JSON.stringify({ + decision: 'block', + reason: `🔒 GITHUB WRITE BLOCKED: guard error while validating ${writeInfo.description}. Fail-closed; rerun after fixing the hook error.`, + })); + process.exit(0); + } console.log(JSON.stringify({ continue: true })); process.exit(0); } diff --git a/hooks/InstructionsLoaded.hook.ts b/hooks/InstructionsLoaded.hook.ts new file mode 100644 index 00000000..3a28d938 --- /dev/null +++ b/hooks/InstructionsLoaded.hook.ts @@ -0,0 +1,58 @@ +#!/usr/bin/env bun +/** + * InstructionsLoaded.hook.ts — native instruction-load observability. + * + * Telemetry only. This hook records the current event shape and source metadata + * so instruction churn can be analyzed without prompt-time file polling. + */ +import { readFileSync } from 'node:fs'; +import { basename } from 'node:path'; +import { emitMemoryTelemetry } from './lib/memory-telemetry'; + +interface InstructionsLoadedInput { + session_id?: string; + hook_event_name?: string; + cwd?: string; + path?: string; + file_path?: string; + memory_type?: string; + reason?: string; + source?: string; + globs?: unknown; +} + +function readInput(): InstructionsLoadedInput { + try { + return JSON.parse(readFileSync(0, 'utf8') || '{}') as InstructionsLoadedInput; + } catch { + return {}; + } +} + +function projectName(input: InstructionsLoadedInput): string { + const dir = process.env.CLAUDE_PROJECT_DIR ?? input.cwd ?? process.cwd(); + return dir.split('/').filter(Boolean).pop() ?? 'unknown'; +} + +function main(): void { + try { + const input = readInput(); + const sourcePath = input.file_path ?? input.path; + emitMemoryTelemetry('instructions.loaded', { + session_id: input.session_id, + project: projectName(input), + hook_event_name: input.hook_event_name ?? 'InstructionsLoaded', + source: input.source, + reason: input.reason, + memory_type: input.memory_type, + path: sourcePath, + file: sourcePath ? basename(sourcePath) : undefined, + globs_count: Array.isArray(input.globs) ? input.globs.length : undefined, + }); + } catch { + // Observability only. Never disrupt instruction loading. + } + process.exit(0); +} + +if (import.meta.main) main(); diff --git a/hooks/LoadContext.hook.ts b/hooks/LoadContext.hook.ts index 03d0f6f4..71336cee 100755 --- a/hooks/LoadContext.hook.ts +++ b/hooks/LoadContext.hook.ts @@ -35,7 +35,7 @@ */ import { readFileSync, existsSync, readdirSync, appendFileSync, mkdirSync } from 'fs'; -import { loadIndexMemory, initializeMeta, loadMeta } from './lib/memory-disclosure'; +import { loadIndexMemory, loadPromotedInsights, initializeMeta, loadMeta } from './lib/memory-disclosure'; import { decayInstincts, surfaceInstincts, formatInstinctContext } from './lib/instinct-store'; function ttyLog(msg: string): void { @@ -685,7 +685,7 @@ async function main() { } } } - indexMemory = loadIndexMemory(paiDir); + indexMemory = [loadIndexMemory(paiDir), loadPromotedInsights(paiDir)].filter(Boolean).join('\n\n'); if (indexMemory) { console.error(`[LoadContext] Feature A: loaded index memory (${indexMemory.length} chars)`); } @@ -813,7 +813,7 @@ Dynamic context loaded. Core identity, rules, and format are in CLAUDE.md. } catch { /* non-fatal */ } flushTty(); - console.error('✅ KAI session initialization complete (v7.4.2)'); + console.error('✅ KAI session initialization complete (v7.7.0)'); process.exit(0); } catch (error) { flushTty(); diff --git a/hooks/MemoryRecall.hook.ts b/hooks/MemoryRecall.hook.ts index bc3a2f34..bd715736 100755 --- a/hooks/MemoryRecall.hook.ts +++ b/hooks/MemoryRecall.hook.ts @@ -27,6 +27,7 @@ import { rankEntries, type MemoryEntry as ScorerEntry } from './lib/memory-score import { isCrossProjectBodyInjectionEnabled } from './lib/config-loader'; import { emitMemoryTelemetry } from './lib/memory-telemetry'; import { recordSurfaced } from './lib/recall-hit-ledger'; +import { redactSecrets } from './lib/redact'; interface MemoryEntry { title: string; @@ -356,6 +357,11 @@ function lookupCrossProject(promptKeywords: string[], currentMemDir: string): st /** * Find the best-matching memory file in a cross-project and return its body (capped). */ +export function sanitizeCrossProjectBody(body: string, maxChars = 1000): string { + const redacted = redactSecrets(body); + return redacted.length > maxChars ? redacted.substring(0, maxChars) + '…' : redacted; +} + function findAndReadBestMatch(projectSlug: string, promptKeywords: string[], projectsBase: string): string | null { // Find the project directory let projMemDir: string | null = null; @@ -401,8 +407,8 @@ function findAndReadBestMatch(projectSlug: string, promptKeywords: string[], pro const secondDash = raw.indexOf('---', 4); const body = secondDash > 0 ? raw.substring(secondDash + 4).trim() : raw.trim(); - // Cap at 1000 chars to control context budget - return body.length > 1000 ? body.substring(0, 1000) + '…' : body; + // Cap after redaction so credential-shaped content never enters model context. + return sanitizeCrossProjectBody(body); } catch { return null; } } diff --git a/hooks/SessionCleanup.hook.ts b/hooks/SessionCleanup.hook.ts index 67d896f2..9d918a09 100755 --- a/hooks/SessionCleanup.hook.ts +++ b/hooks/SessionCleanup.hook.ts @@ -46,6 +46,10 @@ const MEMORY_DIR = paiPath('MEMORY'); const STATE_DIR = paiPath('MEMORY', 'STATE'); const WORK_DIR = paiPath('MEMORY', 'WORK'); +export function autoConsolidateEnabled(env: NodeJS.ProcessEnv = process.env): boolean { + return /^(1|true|yes)$/i.test(env.PAI_AUTO_CONSOLIDATE || ''); +} + // Session-scoped state file lookup with legacy fallback function findStateFile(sessionId?: string): string | null { if (sessionId) { @@ -528,6 +532,8 @@ export function maybeRunSynthesisBackstop(): void { */ export function maybeAutoConsolidate(): void { try { + if (!autoConsolidateEnabled()) return; + const consolidatePath = join(paiPath(), 'PAI', 'Tools', 'AutoConsolidate.ts'); if (!existsSync(consolidatePath)) return; diff --git a/hooks/SkillDiscoveryRecommender.hook.ts b/hooks/SkillDiscoveryRecommender.hook.ts new file mode 100644 index 00000000..f30be182 --- /dev/null +++ b/hooks/SkillDiscoveryRecommender.hook.ts @@ -0,0 +1,181 @@ +#!/usr/bin/env bun +/** + * SkillDiscoveryRecommender.hook.ts — lightweight skill discoverability nudge. + * + * Deterministic and local-only: reads SKILL.md frontmatter descriptions, scores + * USE WHEN phrases against the prompt, and injects one compact suggestion. + */ +import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, writeFileSync } from 'node:fs'; +import { dirname, join } from 'node:path'; +import { readHookInput } from './lib/hook-io'; +import { paiPath } from './lib/paths'; + +export interface SkillCard { + name: string; + description: string; + triggers: string[]; +} + +export interface SkillRecommendation { + skill: string; + score: number; + trigger: string; +} + +interface SkillCacheFile { + version: 1; + skillsDir: string; + manifest: string; + cards: SkillCard[]; +} + +const STOPWORDS = new Set([ + 'a', 'an', 'and', 'as', 'for', 'from', 'in', 'into', 'of', 'on', 'or', 'the', 'to', 'use', 'when', + 'with', 'this', 'that', 'please', 'can', 'you', 'my', 'our', +]); + +const CACHE_PATH = '/tmp/pai-hooks/skill-discovery-cache.json'; + +function parseFrontmatter(raw: string): Record { + const match = raw.match(/^---\r?\n([\s\S]*?)\r?\n---/); + if (!match) return {}; + const out: Record = {}; + for (const line of match[1].split(/\r?\n/)) { + const m = line.match(/^([\w-]+):\s*(.*)$/); + if (m) out[m[1]] = m[2].replace(/^["']|["']$/g, '').trim(); + } + return out; +} + +function extractTriggers(description: string): string[] { + const match = description.match(/USE WHEN\s+(.+)$/i); + const source = match ? match[1] : description; + return source + .split(/[,;|]/) + .map(part => part.trim().replace(/\.$/, '')) + .filter(Boolean); +} + +export function loadSkillCards(skillsDir = paiPath('skills')): SkillCard[] { + if (!existsSync(skillsDir)) return []; + return readdirSync(skillsDir, { withFileTypes: true }) + .filter(entry => entry.isDirectory() && !entry.name.startsWith('.')) + .map(entry => { + const path = join(skillsDir, entry.name, 'SKILL.md'); + if (!existsSync(path)) return null; + try { + const fm = parseFrontmatter(readFileSync(path, 'utf8')); + const name = fm.name || entry.name; + const description = fm.description || ''; + return { name, description, triggers: extractTriggers(description) }; + } catch { + return null; + } + }) + .filter((card): card is SkillCard => card !== null); +} + +function skillManifest(skillsDir: string): string { + if (!existsSync(skillsDir)) return ''; + const entries = readdirSync(skillsDir, { withFileTypes: true }) + .filter(entry => entry.isDirectory() && !entry.name.startsWith('.')) + .map(entry => { + const skillPath = join(skillsDir, entry.name, 'SKILL.md'); + if (!existsSync(skillPath)) return `${entry.name}:missing`; + try { + const stat = statSync(skillPath); + return `${entry.name}:${stat.mtimeMs}:${stat.size}`; + } catch { + return `${entry.name}:unreadable`; + } + }); + return entries.sort().join('|'); +} + +export function clearSkillCardCache(cachePath = CACHE_PATH): void { + try { + writeFileSync(cachePath, '', 'utf8'); + } catch { + // Test helper / best-effort cache invalidation. + } +} + +export function loadSkillCardsCached(skillsDir = paiPath('skills'), cachePath = CACHE_PATH): SkillCard[] { + const manifest = skillManifest(skillsDir); + if (!manifest) return []; + try { + const cached = JSON.parse(readFileSync(cachePath, 'utf8')) as SkillCacheFile; + if (cached.version === 1 && cached.skillsDir === skillsDir && cached.manifest === manifest && Array.isArray(cached.cards)) { + return cached.cards; + } + } catch { + // Cache miss/corruption: rebuild from source. + } + const cards = loadSkillCards(skillsDir); + try { + mkdirSync(dirname(cachePath), { recursive: true }); + writeFileSync(cachePath, JSON.stringify({ version: 1, skillsDir, manifest, cards }), 'utf8'); + } catch { + // Recommendations are optional; cache write failures must not affect prompts. + } + return cards; +} + +function tokenize(text: string): string[] { + return text + .toLowerCase() + .replace(/[^a-z0-9/ -]/g, ' ') + .split(/\s+/) + .filter(token => token.length > 2 && !STOPWORDS.has(token)); +} + +function explicitlyInvoked(prompt: string, skill: string): boolean { + const escaped = skill.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + return new RegExp(`(?:/|Skill\\(["'])${escaped}(?:\\b|["'])`, 'i').test(prompt); +} + +export function recommendSkills(prompt: string, cards: SkillCard[], limit = 1): SkillRecommendation[] { + const lower = prompt.toLowerCase(); + const promptTokens = new Set(tokenize(prompt)); + const recs: SkillRecommendation[] = []; + + for (const card of cards) { + if (explicitlyInvoked(prompt, card.name)) continue; + let best: SkillRecommendation | null = null; + for (const trigger of card.triggers) { + const triggerLower = trigger.toLowerCase(); + const triggerTokens = tokenize(triggerLower); + let score = 0; + if (triggerLower.length >= 4 && lower.includes(triggerLower)) score += 4; + for (const token of triggerTokens) { + if (promptTokens.has(token)) score += 1; + } + if (score >= 2 && (!best || score > best.score)) { + best = { skill: card.name, score, trigger }; + } + } + if (best) recs.push(best); + } + + return recs + .sort((a, b) => b.score - a.score || a.skill.localeCompare(b.skill)) + .slice(0, limit); +} + +async function main(): Promise { + try { + const input = await readHookInput(); + const prompt = (input?.prompt ?? input?.user_prompt ?? '').trim(); + if (!prompt) process.exit(0); + const [rec] = recommendSkills(prompt, loadSkillCardsCached()); + if (!rec) process.exit(0); + console.log(JSON.stringify({ + additionalContext: `Relevant PAI capability: consider invoking /${rec.skill} (${rec.trigger}).`, + })); + } catch { + // Suggestions are optional and must never block prompt submission. + } + process.exit(0); +} + +if (import.meta.main) main(); diff --git a/hooks/TaskCompleted.hook.ts b/hooks/TaskCompleted.hook.ts index f74d5275..611570ae 100755 --- a/hooks/TaskCompleted.hook.ts +++ b/hooks/TaskCompleted.hook.ts @@ -25,6 +25,8 @@ * - A CLI command result (contains $ or exit code reference) * - A test result reference (pass/fail count) */ +import { agentUpdateStatus } from './lib/algorithm-state'; +import { emitMemoryTelemetry } from './lib/memory-telemetry'; interface HookInput { task_id?: string; @@ -33,6 +35,7 @@ interface HookInput { teammate_name?: string; team_name?: string; session_id?: string; + cwd?: string; } async function main() { @@ -58,7 +61,7 @@ async function main() { const isISCTask = /^ISC-/i.test(subject.trim()); if (!isISCTask) { - // Non-ISC task — pass through freely + emitCompletion(input, subject, false); process.exit(0); } @@ -85,6 +88,8 @@ async function main() { process.exit(2); } + emitCompletion(input, subject, true); + // Verification evidence found — allow completion process.exit(0); } catch (error) { @@ -97,3 +102,23 @@ async function main() { main().catch((err) => { console.error(`[TaskCompleted] Error:`, err); process.exit(0); }); export {}; + +function emitCompletion(input: HookInput, subject: string, iscTask: boolean): void { + emitMemoryTelemetry('agent.task_completed', { + session_id: input.session_id, + project: projectName(input), + task_id: input.task_id, + task_subject: subject, + teammate_name: input.teammate_name, + team_name: input.team_name, + isc_task: iscTask, + }); + if (input.session_id && input.teammate_name) { + agentUpdateStatus(input.session_id, input.teammate_name, 'completed'); + } +} + +function projectName(input: HookInput): string { + const dir = process.env.CLAUDE_PROJECT_DIR ?? input.cwd ?? process.cwd(); + return dir.split('/').filter(Boolean).pop() ?? 'unknown'; +} diff --git a/hooks/TeammateIdle.hook.ts b/hooks/TeammateIdle.hook.ts index 597117b6..5e7644fe 100755 --- a/hooks/TeammateIdle.hook.ts +++ b/hooks/TeammateIdle.hook.ts @@ -24,6 +24,8 @@ * - A completion signal ("completed", "done", "finished", "passing") * - Any non-trivial content (>100 chars) */ +import { agentUpdateStatus } from './lib/algorithm-state'; +import { emitMemoryTelemetry } from './lib/memory-telemetry'; interface HookInput { session_id?: string; @@ -31,6 +33,7 @@ interface HookInput { teammate_name?: string; team_name?: string; last_message?: string; // Legacy/future optional field; current Claude payload does not include it. + cwd?: string; } async function main() { @@ -64,6 +67,7 @@ async function main() { // If no message content is available, allow idle; current Claude payloads do not include it. if (!last_message) { + emitIdle(input, false); process.exit(0); } @@ -90,6 +94,7 @@ async function main() { } // Allow idle + emitIdle(input, true); process.exit(0); } catch (error) { // Never block on hook error @@ -101,3 +106,21 @@ async function main() { main().catch((err) => { console.error(`[TeammateIdle] Error:`, err); process.exit(0); }); export {}; + +function emitIdle(input: HookInput, hadMessage: boolean): void { + emitMemoryTelemetry('agent.idle', { + session_id: input.session_id, + project: projectName(input), + teammate_name: input.teammate_name, + team_name: input.team_name, + had_message: hadMessage, + }); + if (input.session_id && input.teammate_name) { + agentUpdateStatus(input.session_id, input.teammate_name, 'idle'); + } +} + +function projectName(input: HookInput): string { + const dir = process.env.CLAUDE_PROJECT_DIR ?? input.cwd ?? process.cwd(); + return dir.split('/').filter(Boolean).pop() ?? 'unknown'; +} diff --git a/hooks/handlers/WikiCurrency.ts b/hooks/handlers/WikiCurrency.ts index e23207d7..542eb288 100644 --- a/hooks/handlers/WikiCurrency.ts +++ b/hooks/handlers/WikiCurrency.ts @@ -41,8 +41,9 @@ export interface WikiProject { } const HOME = process.env.HOME || ''; +const PAI_REPO = process.env.PAI_REPO_DIR || getPaiDir(); const WIKI_PROJECTS: WikiProject[] = [ - { repo: join(HOME, 'Projects/kai'), name: 'PAI', wikiRepo: join(HOME, 'Projects/PAI-Wiki') }, + { repo: PAI_REPO, name: 'PAI', wikiRepo: join(HOME, 'Projects/PAI-Wiki') }, { repo: join(HOME, 'Projects/Du_tracking'), name: 'Du-tracking', wikiSubdir: 'wiki/' }, // YourCompany firmware code lives in many repos; its wiki is ~/Projects/YourCompany-Wiki — wire per-repo if needed. ]; diff --git a/hooks/lib/agent-context-handoff.ts b/hooks/lib/agent-context-handoff.ts new file mode 100644 index 00000000..21594b06 --- /dev/null +++ b/hooks/lib/agent-context-handoff.ts @@ -0,0 +1,147 @@ +export type AgentContextTier = 'none' | 'rules' | 'full'; + +export interface AgentToolInput { + subagent_type?: string; + description?: string; + prompt?: string; + model?: string; + run_in_background?: boolean; +} + +export interface AgentContextDecision { + tier: AgentContextTier; + reason: string; + hasHandoff: boolean; +} + +const HANDOFF_OPEN_RE = /): string { + const shared = [ + 'Follow current repo rules before acting: branch-only workflow, read before modifying, minimal scope, verify before reporting done.', + 'Do not mutate public KAI, remotes, PRs, or release artifacts unless the parent prompt includes explicit current-turn approval.', + 'Preserve user and memory content. Never delete, move, or rewrite memory based on format judgment.', + 'Subagents cannot persist durable memory. If you learn something the parent should save, return a section named "Durable findings for parent checkpoint:" with only the facts worth keeping.', + 'ADA pointer: if the task touches ambient domain activation or delegation reach, read docs/planning/ambient-domain-activation-design.md and docs/planning/ROADMAP-7.x.md before changing behavior.', + ]; + + const full = [ + 'For broad design/research/security work, first inspect the relevant repo docs and nearby tests, then state assumptions and evidence in the result.', + 'When changing code, include the verification commands you ran and any residual risk the parent must decide on.', + ]; + + const lines = tier === 'full' ? [...shared, ...full] : shared; + return `\n${lines.map(line => `- ${line}`).join('\n')}\n`; +} + +export function buildBackgroundDelegationEnvelope(): string { + return [ + buildAgentContextHandoff('rules'), + ``, + `- Background/SDK sessions do not share parent conversation state. Include only task-relevant paths, constraints, approvals, and this handoff block.`, + `- Do not include private conversation history, unrelated memories, credentials, tokens, or public-KAI-prohibited content.`, + `- Coordinate through commits, PR comments, or explicit task/status files; do not assume synchronous return to the parent session.`, + ``, + ].join('\n'); +} + +export function missingAgentContextMessage(input: AgentToolInput = {}): string | null { + const decision = decideAgentContextHandoff(input); + if (decision.tier === 'none' || decision.hasHandoff) return null; + + const label = input.subagent_type || input.description || 'Agent'; + return [ + ``, + `Agent context handoff missing for ${label} (tier: ${decision.tier}; ${decision.reason}).`, + `Add this block near the top of the Agent prompt so delegated work receives the critical project rules:`, + buildAgentContextHandoff(decision.tier), + `Use PAI_CONTEXT_TIER:none only for intentionally lean lookup agents.`, + ``, + ].join('\n'); +} + +export function agentCallId(input: AgentToolInput = {}, sessionId = ''): string { + const stable = [ + sessionId, + input.subagent_type ?? '', + input.description ?? '', + (input.prompt ?? '').slice(0, 240), + ].join('\u001f'); + + let hash = 2166136261; + for (let i = 0; i < stable.length; i++) { + hash ^= stable.charCodeAt(i); + hash = Math.imul(hash, 16777619); + } + return `agent_${(hash >>> 0).toString(16).padStart(8, '0')}`; +} diff --git a/hooks/lib/algorithm-state.ts b/hooks/lib/algorithm-state.ts index 28460a50..926f82ed 100755 --- a/hooks/lib/algorithm-state.ts +++ b/hooks/lib/algorithm-state.ts @@ -406,6 +406,22 @@ export function agentAdd(sessionId: string, agent: { name: string; agentType: st writeState(state); } +/** + * Called by native team lifecycle hooks when a known teammate transitions state. + * Matching is intentionally exact to avoid attributing a team event to the wrong agent. + */ +export function agentUpdateStatus(sessionId: string, agentName: string, status: AlgorithmAgent['status']): boolean { + const state = readState(sessionId); + if (!state) return false; + + const agent = state.agents.find(a => a.name === agentName); + if (!agent) return false; + + agent.status = status; + writeState(state); + return true; +} + /** * Called by Stop handler after response is complete. * Enriches state with data extracted from the full transcript. diff --git a/hooks/lib/memory-disclosure.ts b/hooks/lib/memory-disclosure.ts index beadad64..95fed4e5 100644 --- a/hooks/lib/memory-disclosure.ts +++ b/hooks/lib/memory-disclosure.ts @@ -34,6 +34,16 @@ function timelinePath(paiDir: string): string { return join(paiDir, 'MEMORY', 'STATE', 'timeline.jsonl'); } +function currentProjectMemoryPath(paiDir: string, filename: string): string { + const claudeProjectDir = process.env.CLAUDE_PROJECT_DIR || ''; + if (claudeProjectDir) { + const encoded = encodeProjectDir(claudeProjectDir); + const projectPath = join(paiDir, 'projects', encoded, 'memory', filename); + if (existsSync(projectPath)) return projectPath; + } + return join(paiDir, filename); +} + export function loadMeta(paiDir: string): MemoryMetaEntry[] { const path = metaPath(paiDir); if (!existsSync(path)) return []; @@ -81,16 +91,7 @@ export function applyAging(entries: MemoryMetaEntry[]): MemoryMetaEntry[] { * Updates last_accessed in metadata for every surfaced entry. */ export function loadIndexMemory(paiDir: string): string { - // Find MEMORY.md via project dir convention (Claude Code encodes paths as -Users-x-Projects-y) - const claudeProjectDir = process.env.CLAUDE_PROJECT_DIR || ''; - let mdPath = ''; - if (claudeProjectDir) { - const encoded = encodeProjectDir(claudeProjectDir); - const projectMemPath = join(paiDir, 'projects', encoded, 'memory', 'MEMORY.md'); - if (existsSync(projectMemPath)) mdPath = projectMemPath; - } - if (!mdPath) mdPath = join(paiDir, 'MEMORY.md'); - if (!existsSync(mdPath)) return ''; + const mdPath = currentProjectMemoryPath(paiDir, 'MEMORY.md'); if (!existsSync(mdPath)) return ''; try { @@ -106,6 +107,32 @@ export function loadIndexMemory(paiDir: string): string { } } +/** + * Load the latest promoted insight sections for the current project. + * + * These are consolidated lessons written by `pai curate promote`; they are not + * guaranteed to fit in the first 50 lines of MEMORY.md, so they get a compact + * recall path here. + */ +export function loadPromotedInsights(paiDir: string, maxSections = 3): string { + const path = currentProjectMemoryPath(paiDir, 'insights_promoted.md'); + if (!existsSync(path)) return ''; + + try { + const content = readFileSync(path, 'utf-8'); + const sections = content + .split(/\n(?=## )/) + .filter(section => section.startsWith('## ')) + .slice(-maxSections) + .map(section => section.trim()); + + if (sections.length === 0) return ''; + return `## Promoted Insights (recent)\n\n${sections.join('\n\n')}`; + } catch { + return ''; + } +} + function updateIndexAccess(paiDir: string, indexLines: string[]): void { try { let entries = loadMeta(paiDir); diff --git a/hooks/lib/memory-telemetry.ts b/hooks/lib/memory-telemetry.ts index f87af820..e0a85609 100644 --- a/hooks/lib/memory-telemetry.ts +++ b/hooks/lib/memory-telemetry.ts @@ -17,14 +17,16 @@ * - recall.latency / capture.latency → wall-time of those paths (ms). Health + revert signal. * - coherence.drift → the D2 proceed-to-5 TRIGGER metric: facts diverging between the * PAI .md store and the atom store (correctness drift; see plan §6/§9 — sufficient-not-necessary). - * - agent.return / agent.checkpoint → Phase-0 capture-loss-guard signal (subagent returned; parent - * prompted to persist). + * - agent.spawn / agent.return / agent.checkpoint → Agent resilience + Phase-0 capture-loss-guard + * signal (subagent spawned/returned; parent prompted to persist). + * - agent.task_completed / agent.idle → native team-event observability for delegated work recovery. * - knowledge.sync → KnowledgeSync run/domain cost baseline: mode, status, facts, * output size, and wall time. Observability only; must not change SessionEnd behavior. * - session_end.composite → SessionEndComposite gate observability: metrics, selected/skipped * hooks, succeeded/failed counts, and wall time. This explains why KnowledgeSync did or did not run. * - turn.prompt → cheap UserPromptSubmit heartbeat. Metadata only; proves prompt * telemetry is wired and gives offline reports a per-turn anchor before model response latency. + * - instructions.loaded → native instruction-load event: which instruction source changed and why. */ import { appendFileSync, mkdirSync, readFileSync } from 'node:fs'; import { join, dirname } from 'node:path'; @@ -36,11 +38,15 @@ export type MemoryTelemetryType = | 'recall.latency' | 'capture.latency' | 'coherence.drift' + | 'agent.spawn' | 'agent.return' | 'agent.checkpoint' + | 'agent.task_completed' + | 'agent.idle' | 'knowledge.sync' | 'session_end.composite' - | 'turn.prompt'; + | 'turn.prompt' + | 'instructions.loaded'; export interface MemoryTelemetryEvent { ts: string; diff --git a/hooks/lib/payload-schema.ts b/hooks/lib/payload-schema.ts index 01141c38..56655f1e 100644 --- a/hooks/lib/payload-schema.ts +++ b/hooks/lib/payload-schema.ts @@ -15,6 +15,7 @@ export type HookEventName = | 'PreToolUse' | 'PostToolUse' | 'PreCompact' + | 'InstructionsLoaded' | 'UserPromptExpansion' | 'ConfigChange' | 'WorktreeRemove' @@ -73,6 +74,15 @@ export const PAYLOAD_SCHEMAS: Record = { { field: 'trigger', type: 'string', required: false }, // "manual" | "auto" { field: 'custom_instructions', type: 'string', required: false }, ], + InstructionsLoaded: [ + ...BASE, + { field: 'path', type: 'string', required: false }, + { field: 'file_path', type: 'string', required: false }, + { field: 'memory_type', type: 'string', required: false }, + { field: 'reason', type: 'string', required: false }, + { field: 'source', type: 'string', required: false }, + { field: 'globs', type: 'object', required: false }, + ], UserPromptExpansion: [ ...BASE, { field: 'command_name', type: 'string', required: false }, // direct slash command name diff --git a/install.sh b/install.sh index 506a893e..8d190848 100755 --- a/install.sh +++ b/install.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # ═══════════════════════════════════════════════════════════ -# KAI Installer v7.4 — Bootstrap Script +# KAI Installer v7.7 — Bootstrap Script # Requirements: bash, curl # This script bootstraps the installer by ensuring Bun is # available, then hands off to the TypeScript installer. diff --git a/manifest.json b/manifest.json index 474019f3..39e830fd 100644 --- a/manifest.json +++ b/manifest.json @@ -1,10 +1,10 @@ { - "version": "7.4.2", + "version": "7.7.0", "productName": "KAI", "algorithmVersion": "v3.14.0", "counts": { - "skills": 70, - "hooks": 59, + "skills": 71, + "hooks": 61, "agents": 20 }, "hookInventory": [ @@ -22,6 +22,7 @@ "HealthCheck.hook", "InsightExtractor.hook", "InstinctCapture.hook", + "InstructionsLoaded.hook", "IntegrityCheck.hook", "KnowledgeSync.hook", "LastResponseCache.hook", @@ -50,6 +51,7 @@ "SessionEndComposite.hook", "SessionSummary.hook", "SetQuestionTab.hook", + "SkillDiscoveryRecommender.hook", "SkillGuard.hook", "SkillTracker.hook", "StartupGreeting.hook", @@ -126,6 +128,7 @@ "Security/Recon", "Security/SECUpdates", "Security/WebAssessment", + "SpecKit", "Telos", "Thinking", "Thinking/BeCreative", diff --git a/scripts/bun-test-gate.ts b/scripts/bun-test-gate.ts new file mode 100644 index 00000000..81db1c4d --- /dev/null +++ b/scripts/bun-test-gate.ts @@ -0,0 +1,103 @@ +#!/usr/bin/env bun +/** + * Parse Bun test output as a gate instead of grepping one substring. + * + * Fails closed on missing summary, non-zero fail count, zero tests, non-zero + * exit status, or known runtime crash signatures. + */ + +export interface BunTestSummary { + pass: number; + skip: number; + fail: number; + expectCalls: number | null; + testsRun: number; + filesRun: number; +} + +export interface BunTestGateResult { + ok: boolean; + summary?: BunTestSummary; + errors: string[]; +} + +const CRASH_PATTERNS = [ + /panic:/i, + /segmentation fault/i, + /\bSIG(?:ABRT|SEGV|BUS|ILL)\b/i, + /Illegal instruction/i, + /Bus error/i, + /core dumped/i, +]; + +function parseCount(output: string, label: string): number { + const match = output.match(new RegExp(`(?:^|\\n)\\s*(\\d+)\\s+${label}\\b`, 'i')); + return match ? Number(match[1]) : 0; +} + +export function parseBunTestOutput(output: string): BunTestSummary | null { + const ran = output.match(/Ran\s+(\d+)\s+tests?\s+across\s+(\d+)\s+files?/i); + if (!ran) return null; + + const expectMatch = output.match(/(?:^|\n)\s*(\d+)\s+expect\(\)\s+calls?/i); + return { + pass: parseCount(output, 'pass'), + skip: parseCount(output, 'skip'), + fail: parseCount(output, 'fail'), + expectCalls: expectMatch ? Number(expectMatch[1]) : null, + testsRun: Number(ran[1]), + filesRun: Number(ran[2]), + }; +} + +export function validateBunTestOutput(output: string, exitCode: number): BunTestGateResult { + const errors: string[] = []; + const summary = parseBunTestOutput(output); + + if (!summary) { + errors.push('Bun test output did not include a complete "Ran N tests across M files" summary.'); + } else { + if (summary.testsRun <= 0) errors.push('Bun reported zero tests run.'); + if (summary.filesRun <= 0) errors.push('Bun reported zero test files run.'); + if (summary.fail !== 0) errors.push(`Bun reported ${summary.fail} failing test(s).`); + if (summary.pass + summary.fail <= 0) errors.push('Bun reported no passing or failing tests.'); + } + + if (exitCode !== 0) errors.push(`Bun exited non-zero (${exitCode}).`); + for (const pattern of CRASH_PATTERNS) { + if (pattern.test(output)) { + errors.push(`Bun output matched crash signature: ${pattern}`); + break; + } + } + + return { ok: errors.length === 0, summary: summary ?? undefined, errors }; +} + +async function readStdin(): Promise { + const chunks: Uint8Array[] = []; + for await (const chunk of Bun.stdin.stream()) chunks.push(chunk); + return Buffer.concat(chunks).toString('utf-8'); +} + +function argValue(name: string, fallback = ''): string { + const idx = process.argv.indexOf(name); + return idx >= 0 ? process.argv[idx + 1] ?? fallback : fallback; +} + +if (import.meta.main) { + const output = await readStdin(); + const exitCode = Number(argValue('--exit-code', '0')); + const label = argValue('--label', 'bun test'); + const result = validateBunTestOutput(output, Number.isFinite(exitCode) ? exitCode : 1); + + if (result.ok) { + const s = result.summary!; + console.log(`${label}: ${s.pass} pass, ${s.skip} skip, ${s.fail} fail across ${s.filesRun} files`); + process.exit(0); + } + + console.error(`${label}: Bun test gate failed`); + for (const error of result.errors) console.error(` - ${error}`); + process.exit(1); +} diff --git a/scripts/deploy.ts b/scripts/deploy.ts index 0bbfd337..4b6223ea 100644 --- a/scripts/deploy.ts +++ b/scripts/deploy.ts @@ -483,8 +483,8 @@ async function main() { | Component | Count | Description | |-----------|-------|-------------| | Algorithm | v3.14.0 | Core reasoning engine | -| Skills | 70 | Research, security, writing, analysis, etc. | -| Hooks | 59 | Pre/post tool guards, format enforcement, etc. | +| Skills | 72 | Research, security, writing, analysis, etc. | +| Hooks | 61 | Pre/post tool guards, format enforcement, etc. | | Agents | 20 | Specialized agent definitions | | Scripts | 4 | Board, Ralph Loop, deploy | | Config | 8 | Domain configuration files | diff --git a/scripts/docs-spec-consistency.ts b/scripts/docs-spec-consistency.ts index cbc895b4..3df692a9 100644 --- a/scripts/docs-spec-consistency.ts +++ b/scripts/docs-spec-consistency.ts @@ -57,20 +57,6 @@ export function mergedPrNumbersFromGit(root = repoRoot()): Set { } } -function isPublicKaiRepo(root = repoRoot()): boolean { - try { - const remote = execFileSync('git', ['remote', 'get-url', 'origin'], { - cwd: root, - encoding: 'utf8', - stdio: ['ignore', 'pipe', 'ignore'], - timeout: 5000, - }); - return remote.includes('kai-cli/kai'); - } catch { - return false; - } -} - export function prNumbersFromText(text: string): Set { const prs = new Set(); for (const match of text.matchAll(/#(\d+)/g)) { @@ -137,7 +123,7 @@ export function checkStaleTaskTerminology(files: Record): Findin } export function checkRoadmapVersionSequence(roadmap: string, file = 'ROADMAP-7.x.md'): Finding[] { - const required = ['7.4.1', '7.4.2', '7.5.0', '7.5.1', '7.5.2']; + const required = ['7.4.1', '7.4.2', '7.4.4', '7.5.0', '7.5.1', '7.5.2']; const positions = required.map(version => ({ version, index: roadmap.search(new RegExp(`^##\\s+${version.replace('.', '\\.')}`, 'm')), @@ -187,7 +173,7 @@ export function runDocsSpecConsistency(root = repoRoot()): ConsistencyResult { } const findings: Finding[] = [ - ...(isPublicKaiRepo(root) ? [] : checkShippedPrReferences(files, mergedPrNumbersFromGit(root))), + ...checkShippedPrReferences(files, mergedPrNumbersFromGit(root)), ...checkStaleTaskTerminology(files), ]; diff --git a/scripts/hooks/pre-push b/scripts/hooks/pre-push index ae6416ce..bf02fd1f 100755 --- a/scripts/hooks/pre-push +++ b/scripts/hooks/pre-push @@ -171,7 +171,7 @@ if [[ "$GATE_REMOTE_URL" == *"kai-cli/kai"* ]]; then # 0b. PII CONTENT scan. Load regexes from an external file if present (never committed to kai); # otherwise fall back to a minimal built-in check so the gate still functions. - GATE_PII_FILE="${KAI_PII_PATTERNS:-$HOME/Projects/pai-config/scripts/pii-patterns.json}" + GATE_PII_FILE="${KAI_PII_PATTERNS:-$REPO_ROOT/scripts/pii-patterns.json}" GATE_PII_REGEX="" if [[ -f "$GATE_PII_FILE" ]] && command -v jq >/dev/null 2>&1; then GATE_PII_REGEX=$(jq -r '.[]' "$GATE_PII_FILE" 2>/dev/null | paste -sd '|' -) @@ -204,11 +204,8 @@ if [[ "$GATE_REMOTE_URL" == *"kai-cli/kai"* ]]; then fi # ── 1. Test suite ──────────────────────────────────────────── -# Note: bun 1.3.x has a known crash-on-exit bug (SIGABRT after tests complete). -# We capture output and check for "0 fail" in the summary line rather than -# trusting the exit code alone. -# Integration tests that spawn subprocesses (e.g. GitHubWriteGuard) can produce -# a single flaky fail under parallel load — retry once before blocking. +# Parse Bun's test summary instead of grepping one substring. Integration tests that spawn +# subprocesses can produce a single flaky fail under parallel load — retry once before blocking. LOCAL_CHANGED_FILES="" if [[ -n "$SAFE_RANGE" ]]; then # SAFE_RANGE unquoted — may carry "$lsha --not --remotes=origin" for a new branch. @@ -238,14 +235,16 @@ fi if [[ "$LOCAL_RUN_FULL_TESTS" == "1" ]]; then echo "Running tests (CI simulation: PAI_DIR=\$REPO_ROOT)..." - TEST_OUTPUT=$(PAI_DIR="$REPO_ROOT" bun test --cwd "$REPO_ROOT" 2>&1) || true + TEST_OUTPUT=$(PAI_DIR="$REPO_ROOT" bun test --cwd "$REPO_ROOT" 2>&1) + TEST_STATUS=$? echo "$TEST_OUTPUT" - if echo "$TEST_OUTPUT" | grep -q "[1-9][0-9]* fail"; then + if ! printf '%s\n' "$TEST_OUTPUT" | bun "$REPO_ROOT/scripts/bun-test-gate.ts" --exit-code "$TEST_STATUS" --label "pre-push bun test"; then echo "" echo "⚠️ Test run had failures — retrying once (subprocess spawn flakiness)..." - TEST_OUTPUT=$(PAI_DIR="$REPO_ROOT" bun test --cwd "$REPO_ROOT" 2>&1) || true + TEST_OUTPUT=$(PAI_DIR="$REPO_ROOT" bun test --cwd "$REPO_ROOT" 2>&1) + TEST_STATUS=$? echo "$TEST_OUTPUT" - if echo "$TEST_OUTPUT" | grep -q "[1-9][0-9]* fail"; then + if ! printf '%s\n' "$TEST_OUTPUT" | bun "$REPO_ROOT/scripts/bun-test-gate.ts" --exit-code "$TEST_STATUS" --label "pre-push bun test retry"; then echo "" echo "❌ Pre-push blocked: test suite has failures (failed on retry too)." echo " Fix failing tests, then push again." diff --git a/scripts/kai-artifact-fidelity.ts b/scripts/kai-artifact-fidelity.ts new file mode 100644 index 00000000..c6591821 --- /dev/null +++ b/scripts/kai-artifact-fidelity.ts @@ -0,0 +1,125 @@ +#!/usr/bin/env bun +/** + * Compare a verified staged KAI artifact against the live KAI checkout after + * apply. This catches partial copies and unexpected live-only drift while + * ignoring repo/runtime surfaces that sync intentionally excludes. + */ + +import { createHash } from 'crypto'; +import { existsSync, readdirSync, readFileSync, statSync } from 'fs'; +import { join } from 'path'; + +export interface FidelityResult { + ok: boolean; + missing: string[]; + extra: string[]; + different: string[]; +} + +const DEFAULT_EXCLUDES = [ + '.git/', + 'node_modules/', + '.DS_Store', + 'settings.json', + 'settings.json.backup*', + 'sessions/', + 'projects/', + 'file-history/', + 'debug/', + 'todos/', + 'tasks/', + 'session-env/', + 'shell-snapshots/', + 'paste-cache/', + 'chrome/', + 'history.jsonl', + 'telemetry/', + 'backups/', + 'cache/', + 'plugins/', + 'daemon/', + '.last-update-result.json', + 'stats-cache.json', + '.env', + '.env.local', + '.env.*.local', +]; + +function isExcluded(rel: string, excludes = DEFAULT_EXCLUDES): boolean { + return excludes.some((pattern) => { + if (pattern.endsWith('/')) { + const dir = pattern.slice(0, -1); + return rel === dir || rel.startsWith(`${dir}/`) || rel.endsWith(`/${dir}`) || rel.includes(`/${dir}/`); + } + if (pattern.endsWith('*')) return rel.startsWith(pattern.slice(0, -1)); + return rel === pattern || rel.startsWith(`${pattern}.`); + }); +} + +function walk(root: string, rel = '', out: string[] = []): string[] { + if (rel && isExcluded(rel)) return out; + const full = join(root, rel); + if (!existsSync(full)) return out; + const stat = statSync(full); + if (stat.isFile()) { + out.push(rel); + return out; + } + if (!stat.isDirectory()) return out; + + for (const entry of readdirSync(full)) { + const next = rel ? `${rel}/${entry}` : entry; + if (!isExcluded(next)) walk(root, next, out); + } + return out; +} + +function hash(path: string): string { + return createHash('sha256').update(readFileSync(path)).digest('hex'); +} + +export function compareKaiArtifacts(expectedDir: string, actualDir: string): FidelityResult { + const expected = new Set(walk(expectedDir).sort()); + const actual = new Set(walk(actualDir).sort()); + const missing = [...expected].filter((file) => !actual.has(file)); + const extra = [...actual].filter((file) => !expected.has(file)); + const different: string[] = []; + + for (const file of expected) { + if (!actual.has(file)) continue; + if (hash(join(expectedDir, file)) !== hash(join(actualDir, file))) different.push(file); + } + + return { ok: missing.length === 0 && extra.length === 0 && different.length === 0, missing, extra, different }; +} + +function usage(): never { + console.error('Usage: bun scripts/kai-artifact-fidelity.ts --expected --actual '); + process.exit(2); +} + +function arg(name: string): string { + const idx = process.argv.indexOf(name); + return idx >= 0 ? process.argv[idx + 1] ?? '' : ''; +} + +if (import.meta.main) { + const expected = arg('--expected'); + const actual = arg('--actual'); + if (!expected || !actual) usage(); + const result = compareKaiArtifacts(expected, actual); + + if (result.ok) { + console.log('KAI artifact fidelity passed'); + process.exit(0); + } + + console.error('KAI artifact fidelity failed'); + for (const [label, files] of Object.entries({ missing: result.missing, extra: result.extra, different: result.different })) { + if (files.length === 0) continue; + console.error(` ${label}: ${files.length}`); + for (const file of files.slice(0, 20)) console.error(` ${file}`); + if (files.length > 20) console.error(` ... and ${files.length - 20} more`); + } + process.exit(1); +} diff --git a/scripts/lib/sync-classification.ts b/scripts/lib/sync-classification.ts new file mode 100644 index 00000000..ebe10f32 --- /dev/null +++ b/scripts/lib/sync-classification.ts @@ -0,0 +1,53 @@ +export type SyncClass = 'private' | 'kai-only' | 'public' | 'unclassified'; + +export interface SyncManifestLike { + private?: string[]; + kai_only?: string[]; + public?: string[]; +} + +export function normalizeSyncPath(path: string): string { + return path + .replace(/\\/g, '/') + .replace(/^\.\//, '') + .replace(/^\/+/, ''); +} + +function globToRegExp(glob: string): RegExp { + const globstar = '\u0000'; + const escaped = glob + .replace(/\*\*/g, globstar) + .replace(/[.+^${}()|[\]\\]/g, '\\$&'); + const pattern = escaped + .replace(/\*/g, '[^/]*') + .replaceAll(globstar, '.*'); + return new RegExp(`^${pattern}$`); +} + +export function matchesSyncPattern(filePath: string, pattern: string): boolean { + const file = normalizeSyncPath(filePath); + let normalizedPattern = pattern.replace(/\\/g, '/').replace(/^\.\//, ''); + if (normalizedPattern.startsWith('/')) normalizedPattern = normalizedPattern.slice(1); + + if (normalizedPattern.endsWith('/')) { + const dir = normalizeSyncPath(normalizedPattern.slice(0, -1)); + return file === dir || file.startsWith(`${dir}/`); + } + + const candidate = normalizeSyncPath(normalizedPattern); + if (candidate.includes('*')) return globToRegExp(candidate).test(file); + return file === candidate || file.startsWith(`${candidate}/`); +} + +export function classifyBySyncManifest(filePath: string, manifest: SyncManifestLike): SyncClass { + for (const pattern of manifest.private ?? []) { + if (matchesSyncPattern(filePath, pattern)) return 'private'; + } + for (const pattern of manifest.kai_only ?? []) { + if (matchesSyncPattern(filePath, pattern)) return 'kai-only'; + } + for (const pattern of manifest.public ?? []) { + if (matchesSyncPattern(filePath, pattern)) return 'public'; + } + return 'unclassified'; +} diff --git a/scripts/memory-telemetry-report.ts b/scripts/memory-telemetry-report.ts index 571a4b6a..e498c213 100644 --- a/scripts/memory-telemetry-report.ts +++ b/scripts/memory-telemetry-report.ts @@ -146,20 +146,49 @@ function sessionEndCompositeSummary(events: MemoryTelemetryEvent[]): { } function agentReturnSummary(events: MemoryTelemetryEvent[]): { + spawns: number; returns: number; checkpoints: number; + task_completed: number; + idle: number; + returns_without_checkpoint_prompt: number; + return_status: Record; + missing_context_handoff: number; + spawn_to_return_latency: { count: number; p50: number; p95: number }; result_chars: { count: number; p50: number; p95: number; total: number; max: number }; by_project: Record; largest_returns: Array<{ ts: string; project: string; agent_type: string; description: string; result_chars: number }>; } { + const spawns = events.filter(e => e.type === 'agent.spawn'); const returns = events.filter(e => e.type === 'agent.return'); - const checkpoints = events.filter(e => e.type === 'agent.checkpoint').length; + const checkpointEvents = events.filter(e => e.type === 'agent.checkpoint'); + const completedEvents = events.filter(e => e.type === 'agent.task_completed'); + const idleEvents = events.filter(e => e.type === 'agent.idle'); + const checkpoints = checkpointEvents.length; + const checkpointIds = new Set(checkpointEvents.map(e => String(e.agent_call_id ?? '')).filter(Boolean)); const sizes = returns.map(e => Number(e.result_chars)).filter(Number.isFinite); const byProject = new Map(); + const byStatus = new Map(); + const spawnTimes = new Map(); + const latencies: number[] = []; + + for (const e of spawns) { + const id = String(e.agent_call_id ?? ''); + const t = Date.parse(String(e.ts ?? '')); + if (id && Number.isFinite(t) && !spawnTimes.has(id)) spawnTimes.set(id, t); + } for (const e of returns) { const project = String(e.project ?? 'unknown'); const chars = Number(e.result_chars); + const status = String(e.return_status ?? 'ok'); + byStatus.set(status, (byStatus.get(status) ?? 0) + 1); + const id = String(e.agent_call_id ?? ''); + const spawnedAt = id ? spawnTimes.get(id) : undefined; + const returnedAt = Date.parse(String(e.ts ?? '')); + if (spawnedAt !== undefined && Number.isFinite(returnedAt) && returnedAt >= spawnedAt) { + latencies.push(returnedAt - spawnedAt); + } const row = byProject.get(project) ?? { returns: 0, total_chars: 0, max_chars: 0 }; row.returns++; if (Number.isFinite(chars)) { @@ -182,8 +211,18 @@ function agentReturnSummary(events: MemoryTelemetryEvent[]): { const sizeSummary = latencySummary(sizes); return { + spawns: spawns.length, returns: returns.length, checkpoints, + task_completed: completedEvents.length, + idle: idleEvents.length, + returns_without_checkpoint_prompt: returns.filter(e => { + const id = String(e.agent_call_id ?? ''); + return id ? !checkpointIds.has(id) : true; + }).length, + return_status: Object.fromEntries(byStatus), + missing_context_handoff: spawns.filter(e => e.context_handoff_missing === true).length, + spawn_to_return_latency: latencySummary(latencies), result_chars: { ...sizeSummary, total: sizes.reduce((n, x) => n + x, 0), @@ -249,7 +288,10 @@ function main(): void { // Agent capture-loss-guard activity (Phase 0). const agentReturns = byType.get('agent.return') ?? 0; + const agentSpawns = byType.get('agent.spawn') ?? 0; const agentCheckpoints = byType.get('agent.checkpoint') ?? 0; + const agentTaskCompleted = byType.get('agent.task_completed') ?? 0; + const agentIdle = byType.get('agent.idle') ?? 0; // Coherence drift (D2 trigger metric). const driftCount = byType.get('coherence.drift') ?? 0; @@ -270,7 +312,8 @@ function main(): void { session_end_composite: sessionEndComposite, agent_return: agentReturn, turn_prompt: turnPrompt, - agent_returns: agentReturns, agent_checkpoints: agentCheckpoints, + agent_spawns: agentSpawns, agent_returns: agentReturns, agent_checkpoints: agentCheckpoints, + agent_task_completed: agentTaskCompleted, agent_idle: agentIdle, coherence_drift: driftCount, }, null, 2)); return; @@ -314,9 +357,18 @@ function main(): void { console.log(` - skipped hooks ${sessionEndComposite.skipped_hooks_total}`); } console.log('\n Phase-0 capture-loss guard:'); + console.log(` • subagent spawns : ${agentSpawns}`); console.log(` • subagent returns : ${agentReturns}`); console.log(` • checkpoint prompts : ${agentCheckpoints}`); + console.log(` • task completed events : ${agentTaskCompleted}`); + console.log(` • idle events : ${agentIdle}`); if (agentReturn.returns > 0) { + console.log(` • no-checkpoint returns : ${agentReturn.returns_without_checkpoint_prompt}`); + console.log(` • missing handoffs : ${agentReturn.missing_context_handoff}`); + console.log(` • spawn→return ms p50/p95: ${fmt(agentReturn.spawn_to_return_latency.p50, 0)} / ${fmt(agentReturn.spawn_to_return_latency.p95, 0)} (n=${agentReturn.spawn_to_return_latency.count})`); + for (const [status, count] of Object.entries(agentReturn.return_status)) { + console.log(` - status ${status.padEnd(9)} ${count}`); + } console.log(` • return chars total : ${agentReturn.result_chars.total}`); console.log(` • return chars p50/p95 : ${fmt(agentReturn.result_chars.p50, 0)} / ${fmt(agentReturn.result_chars.p95, 0)}`); console.log(` • largest return chars : ${agentReturn.result_chars.max}`); diff --git a/scripts/release.sh b/scripts/release.sh index 97697f16..ca9053a0 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -54,11 +54,8 @@ fi # kai tree (where PII/brand strings live by design). verify-release.sh now splits checks into # STRUCTURAL (run on this repo), ARTIFACT/leak-brand (run on --target), and SECRETS (run on both). # -# Producing/verifying the scrubbed artifact is the job of the sync pipeline (sync-to-kai.sh → -# sync-ci-gate.ts on the real ~/Projects/kai tree, plus kai's own pre-push gate). Here we run the -# STRUCTURAL + SECRETS checks against kai; the artifact leak/brand pass happens at sync time -# against the real kai tree. (Run `bash scripts/verify-release.sh --target ` to scan a -# specific scrubbed tree directly.) +# Gate 4 builds a scrubbed temp KAI artifact and runs the public-boundary checks before this script can +# tag. Gate 3 still runs the source-tree structural/secrets checks directly against kai. echo "" echo "── Gate 3: Release verification (structural + secrets) ──" if bash "$REPO_ROOT/scripts/verify-release.sh" 2>&1; then @@ -67,17 +64,28 @@ else gate_fail "verify-release.sh failed — see output above" fi -# ── Gate 4: Test suite ─────────────────────────────────────── -# bun 1.3.x crashes-on-exit (SIGABRT/C++ teardown panic) AFTER tests complete, which corrupts the -# pipe and made `grep -q '0 fail'` flaky. Mirror the pre-push hook: detect an ACTUAL non-zero failure -# count ([1-9][0-9]* fail) and retry once for subprocess-spawn flakiness before failing the gate. +# ── Gate 4: Temp KAI artifact boundary ─────────────────────── echo "" -echo "── Gate 4: Test suite ──" -TEST_OUTPUT=$(bun test 2>&1) || true -if echo "$TEST_OUTPUT" | grep -qE '[1-9][0-9]* fail'; then +echo "── Gate 4: Temp KAI artifact boundary ──" +if bun "$REPO_ROOT/scripts/kai-temp-release-gate.ts" 2>&1; then + gate_pass "Temp KAI artifact gate passed" +else + gate_fail "Temp KAI artifact gate failed — release artifact is not safe to tag" +fi + +# ── Gate 5: Test suite ─────────────────────────────────────── +# Parse Bun's summary instead of grepping one substring. Retry once for the same subprocess-spawn +# flakiness this gate has historically tolerated, then fail closed on missing summaries, failures, +# crashes, or non-zero exits. +echo "" +echo "── Gate 5: Test suite ──" +TEST_OUTPUT=$(bun test 2>&1) +TEST_STATUS=$? +if ! printf '%s\n' "$TEST_OUTPUT" | bun "$REPO_ROOT/scripts/bun-test-gate.ts" --exit-code "$TEST_STATUS" --label "release bun test"; then echo " Test run had failures — retrying once (subprocess spawn flakiness)..." - TEST_OUTPUT=$(bun test 2>&1) || true - if echo "$TEST_OUTPUT" | grep -qE '[1-9][0-9]* fail'; then + TEST_OUTPUT=$(bun test 2>&1) + TEST_STATUS=$? + if ! printf '%s\n' "$TEST_OUTPUT" | bun "$REPO_ROOT/scripts/bun-test-gate.ts" --exit-code "$TEST_STATUS" --label "release bun test retry"; then gate_fail "Test suite has failures (failed on retry too) — run 'bun test' for details" else gate_pass "All tests pass (retry)" @@ -86,9 +94,9 @@ else gate_pass "All tests pass" fi -# ── Gate 5: BuildDocs freshness ────────────────────────────── +# ── Gate 6: BuildDocs freshness ────────────────────────────── echo "" -echo "── Gate 5: BuildDocs freshness ──" +echo "── Gate 6: BuildDocs freshness ──" if [[ -f "$REPO_ROOT/PAI/Tools/BuildDocs.ts" ]]; then if bun "$REPO_ROOT/PAI/Tools/BuildDocs.ts" --check 2>&1; then gate_pass "All marker regions are fresh" @@ -100,9 +108,9 @@ else gate_skip "BuildDocs.ts not found — skipping (Phase 2 prerequisite)" fi -# ── Gate 6: RELEASE-BLOCKERS.md ────────────────────────────── +# ── Gate 7: RELEASE-BLOCKERS.md ────────────────────────────── echo "" -echo "── Gate 6: Release blockers ──" +echo "── Gate 7: Release blockers ──" BLOCKERS_FILE="$REPO_ROOT/docs/planning/RELEASE-BLOCKERS.md" if [[ -f "$BLOCKERS_FILE" ]]; then # grep exits 1 on no-match. Under `set -euo pipefail` that 1 propagates through the pipe and kills diff --git a/scripts/sync-ci-gate.ts b/scripts/sync-ci-gate.ts index f101cbc0..a6c54894 100755 --- a/scripts/sync-ci-gate.ts +++ b/scripts/sync-ci-gate.ts @@ -11,7 +11,7 @@ * bun scripts/sync-ci-gate.ts --strict # also fail on dependency-closure warnings * * DESIGN: - * 1. Parse EXCLUDE_PATHS and KAI_ONLY_FILES from sync-to-kai.sh + * 1. Load private/kai-only/public classification from scripts/sync-manifest.json * 2. Classify all tracked files into private/kai-only/public * 3. Scan public files for PII patterns (`--warn-pii` keeps scrub-covered findings non-blocking) * 4. Verify manifest counts align with filesystem @@ -26,6 +26,7 @@ import { existsSync, readFileSync, readdirSync, statSync } from 'fs'; import { join } from 'path'; import { execSync } from 'child_process'; import { reconcile } from './reconcile-wiring'; +import { classifyBySyncManifest, matchesSyncPattern } from './lib/sync-classification'; const RED = '\x1b[0;31m'; const GREEN = '\x1b[0;32m'; @@ -42,9 +43,10 @@ const STRICT = process.argv.includes('--strict'); const WARN_PII = process.argv.includes('--warn-pii'); type SyncManifest = { - private?: string[]; - kai_only?: string[]; - public?: string[]; + private: string[]; + kai_only: string[]; + public: string[]; + stale_kai_paths?: string[]; }; // Get PAI_DIR with fallback chain (evaluated lazily to avoid test environment issues) @@ -72,19 +74,6 @@ function getKaiDir(): string { return process.env.KAI_DIR || join(process.env.HOME!, 'Projects', 'kai'); } -function isPublicKaiRepo(root: string): boolean { - try { - const remote = execSync('git remote get-url origin', { - cwd: root, - encoding: 'utf-8', - stdio: ['ignore', 'pipe', 'ignore'], - }); - return remote.includes('kai-cli/kai'); - } catch { - return false; - } -} - // PII patterns loaded from external file (excluded from kai sync to avoid leaking identifiers) function loadPIIPatterns(paiDir: string): string[] { const patternsPath = join(paiDir, 'scripts', 'pii-patterns.json'); @@ -107,66 +96,41 @@ function getPIIPatterns(paiDir: string): string[] { return _piiPatterns; } -// Parse EXCLUDE_PATHS from sync-to-kai.sh -function parseExcludePaths(paiDir: string = getPaiDir()): string[] { - const syncScript = join(paiDir, 'scripts', 'sync-to-kai.sh'); - if (!existsSync(syncScript)) { - fail(`sync-to-kai.sh not found at ${syncScript} (PAI_DIR=${paiDir})`); +function loadSyncManifest(paiDir: string = getPaiDir()): SyncManifest { + const manifestPath = join(paiDir, 'scripts', 'sync-manifest.json'); + if (!existsSync(manifestPath)) { + fail(`scripts/sync-manifest.json not found at ${manifestPath}`); process.exit(1); } - - const content = readFileSync(syncScript, 'utf-8'); - // Match to the array-closing ')' at START of a line — a ')' inside a comment/value (e.g. a - // parenthetical in an inline # comment) must NOT prematurely terminate the capture. - const match = content.match(/EXCLUDE_PATHS=\(([\s\S]*?)\n\)/); - if (!match) { - fail('Could not parse EXCLUDE_PATHS from sync-to-kai.sh'); + try { + const manifest = JSON.parse(readFileSync(manifestPath, 'utf-8')) as SyncManifest; + const errors: string[] = []; + for (const key of ['private', 'kai_only', 'public'] as const) { + if (!Array.isArray(manifest[key]) || manifest[key].length === 0) { + errors.push(`sync-manifest.json field "${key}" must be a non-empty array`); + } + } + if (manifest.stale_kai_paths !== undefined && !Array.isArray(manifest.stale_kai_paths)) { + errors.push('sync-manifest.json field "stale_kai_paths" must be an array when present'); + } + if (errors.length > 0) { + for (const err of errors) fail(err); + process.exit(1); + } + return manifest; + } catch (err) { + fail(`Could not parse scripts/sync-manifest.json: ${err}`); process.exit(1); } - - // Parse bash array — each line contains a path (possibly quoted) - const paths: string[] = []; - const lines = match[1].split('\n'); - for (const line of lines) { - const trimmed = line.trim(); - if (!trimmed || trimmed.startsWith('#')) continue; - // Remove quotes and trailing comments - const cleaned = trimmed.replace(/^["']|["']$/g, '').split('#')[0].trim(); - if (cleaned) paths.push(cleaned); - } - return paths; } -// Parse KAI_ONLY_FILES from sync-to-kai.sh -function parseKaiOnlyFiles(paiDir: string = getPaiDir()): string[] { - const syncScript = join(paiDir, 'scripts', 'sync-to-kai.sh'); - const content = readFileSync(syncScript, 'utf-8'); - const match = content.match(/KAI_ONLY_FILES=\(([\s\S]*?)\n\)/); - if (!match) { - fail('Could not parse KAI_ONLY_FILES from sync-to-kai.sh'); - process.exit(1); - } - - const paths: string[] = []; - const lines = match[1].split('\n'); - for (const line of lines) { - const trimmed = line.trim(); - if (!trimmed || trimmed.startsWith('#')) continue; - const cleaned = trimmed.replace(/^["']|["']$/g, '').split('#')[0].trim(); - if (cleaned) paths.push(cleaned); - } - return paths; +// Compatibility wrappers retained for tests/callers; source of truth is sync-manifest.json. +function parseExcludePaths(paiDir: string = getPaiDir()): string[] { + return loadSyncManifest(paiDir).private; } -function loadSyncManifest(paiDir: string = getPaiDir()): SyncManifest | null { - const manifestPath = join(paiDir, 'scripts', 'sync-manifest.json'); - if (!existsSync(manifestPath)) return null; - try { - return JSON.parse(readFileSync(manifestPath, 'utf-8')) as SyncManifest; - } catch (err) { - warn(`Could not parse scripts/sync-manifest.json: ${err}`); - return null; - } +function parseKaiOnlyFiles(paiDir: string = getPaiDir()): string[] { + return loadSyncManifest(paiDir).kai_only; } // Get all tracked files from git @@ -183,37 +147,6 @@ function getTrackedFiles(paiDir: string = getPaiDir()): string[] { } } -// Check if file path matches pattern (with glob support for directories) -function matchesPattern(filePath: string, pattern: string): boolean { - // A leading '/' in an rsync filter anchors the pattern to the transfer root. - // classifyFile works with root-relative paths, so a root-anchored pattern is - // just an exact-from-root match — strip the anchor and fall through to the - // exact/prefix logic below (which is already root-relative, not depth-matching). - if (pattern.startsWith('/')) pattern = pattern.slice(1); - if (pattern.endsWith('/')) { - return filePath.startsWith(pattern); - } - if (pattern.includes('*')) { - // Simple glob: "foo/*.md" or "*.ext" - const regex = new RegExp('^' + pattern.replace(/\*/g, '.*') + '$'); - return regex.test(filePath); - } - return filePath === pattern || filePath.startsWith(pattern + '/'); -} - -function classifyBySyncManifest(filePath: string, manifest: SyncManifest): 'private' | 'kai-only' | 'public' | 'unclassified' { - for (const pattern of manifest.private ?? []) { - if (matchesPattern(filePath, pattern)) return 'private'; - } - for (const pattern of manifest.kai_only ?? []) { - if (matchesPattern(filePath, pattern)) return 'kai-only'; - } - for (const pattern of manifest.public ?? []) { - if (matchesPattern(filePath, pattern)) return 'public'; - } - return 'unclassified'; -} - // Classify file into private/kai-only/public function classifyFile( filePath: string, @@ -222,14 +155,14 @@ function classifyFile( ): 'private' | 'kai-only' | 'public' { // Check if excluded from sync (kai only) for (const pattern of excludePaths) { - if (matchesPattern(filePath, pattern)) { + if (matchesSyncPattern(filePath, pattern)) { return 'private'; } } // Check if kai-only (won't be synced, but exists in kai) for (const pattern of kaiOnlyFiles) { - if (matchesPattern(filePath, pattern)) { + if (matchesSyncPattern(filePath, pattern)) { return 'kai-only'; } } @@ -318,10 +251,6 @@ function resolveLocalImport(fromFile: string, specifier: string, paiDir: string) function runDependencyClosureReport(paiDir: string): { errors: string[]; warnings: string[] } { const manifest = loadSyncManifest(paiDir); - if (!manifest) { - return { errors: [], warnings: ['scripts/sync-manifest.json missing; dependency closure report skipped'] }; - } - const roots = ['hooks', 'PAI', 'scripts', 'agents', 'tests'] .map((root) => join(paiDir, root)) .filter((root) => existsSync(root)); @@ -397,7 +326,6 @@ function runDependencyClosureReport(paiDir: string): { errors: string[]; warning function main() { const PAI_DIR = getPaiDir(); const KAI_DIR = getKaiDir(); - const syncScript = join(PAI_DIR, 'scripts', 'sync-to-kai.sh'); console.log('\n=== Sync CI Gate ==='); console.log(`PAI: ${PAI_DIR}`); @@ -408,40 +336,12 @@ function main() { process.exit(1); } - if (!existsSync(syncScript) && isPublicKaiRepo(PAI_DIR)) { - info('Public KAI checkout detected; sync-to-kai.sh is intentionally not shipped'); - const manifestPath = join(PAI_DIR, 'manifest.json'); - const manifest = existsSync(manifestPath) ? JSON.parse(readFileSync(manifestPath, 'utf-8')) : null; - if (!manifest?.counts) { - fail('manifest.json missing product counts'); - process.exit(1); - } - const actualSkills = execSync( - "find skills -name SKILL.md -not -path '*/.archive/*' | wc -l | tr -d ' '", - { cwd: PAI_DIR, encoding: 'utf-8' } - ).trim(); - const actualHooks = execSync( - "find hooks -maxdepth 1 -name '*.hook.ts' | wc -l | tr -d ' '", - { cwd: PAI_DIR, encoding: 'utf-8' } - ).trim(); - const actualAgents = execSync( - "find agents -maxdepth 1 -name '*.md' ! -name README.md | wc -l | tr -d ' '", - { cwd: PAI_DIR, encoding: 'utf-8' } - ).trim(); - if (String(manifest.counts.skills) !== actualSkills || String(manifest.counts.hooks) !== actualHooks || String(manifest.counts.agents) !== actualAgents) { - fail(`Manifest counts do not match filesystem (${actualSkills} skills, ${actualHooks} hooks, ${actualAgents} agents)`); - process.exit(1); - } - pass(`Public KAI manifest counts match filesystem (${actualSkills} skills, ${actualHooks} hooks, ${actualAgents} agents)`); - console.log('\n✅ Public KAI sync readiness gate skipped private sync checks\n'); - process.exit(0); - } - - // Step 1: Parse sync rules - info('Parsing sync rules from sync-to-kai.sh'); + // Step 1: Load sync rules + info('Loading sync rules from scripts/sync-manifest.json'); const excludePaths = parseExcludePaths(PAI_DIR); const kaiOnlyFiles = parseKaiOnlyFiles(PAI_DIR); - pass(`Loaded ${excludePaths.length} exclude patterns, ${kaiOnlyFiles.length} kai-only patterns`); + const syncManifest = loadSyncManifest(PAI_DIR); + pass(`Loaded ${excludePaths.length} private patterns, ${kaiOnlyFiles.length} kai-only patterns, ${syncManifest.public.length} public patterns`); // Step 2: Get tracked files info('Scanning tracked files'); diff --git a/scripts/sync-drift.ts b/scripts/sync-drift.ts index 2d10f5aa..f6f46126 100644 --- a/scripts/sync-drift.ts +++ b/scripts/sync-drift.ts @@ -11,7 +11,7 @@ * * DESIGN: * - Uses git ls-files to get tracked files (ignores untracked/gitignored) - * - Respects KAI_ONLY_FILES exclusions from sync-to-kai.sh + * - Respects private/kai-only paths from scripts/sync-manifest.json * - Compares file hashes for content drift detection * - Output: summary to stdout (parseable for CI) * @@ -24,59 +24,39 @@ import { existsSync, readFileSync } from 'fs'; import { join } from 'path'; import { createHash } from 'crypto'; import { execSync } from 'child_process'; +import { matchesSyncPattern } from './lib/sync-classification'; -const PAI_DIR = join(process.env.HOME!, 'Projects', 'kai'); +const PAI_DIR = process.env.PAI_DIR || ( + existsSync(join(process.cwd(), 'scripts', 'sync-manifest.json')) + ? process.cwd() + : join(process.env.HOME!, 'Projects', 'kai') +); const KAI_DIR = process.env.KAI_DIR || join(process.env.HOME!, 'Projects', 'kai'); -// Files that exist ONLY in kai (should not be synced from kai) -const KAI_ONLY_FILES = [ - '.github/workflows/test.yml', - 'CHANGELOG.md', - 'CONTRIBUTING.md', - 'LICENSE', - 'LICENSE-UPSTREAM', - 'config/domains.jsonc', - 'config/identity.jsonc.template', - 'config/user-hooks.jsonc.example', - 'docs/CUSTOMIZATION.md', - 'docs/architecture/archive/', - 'docs/planning/deliberate-research-mode.md', - 'get-kai.sh', - 'PAI/CONTEXT_ROUTING.md', - 'README.md', - 'docs/WHATS-DIFFERENT.md', - 'hooks/user/', - 'MEMORY/KNOWLEDGE/.gitkeep', - 'MEMORY/LEARNING/.gitkeep', - 'MEMORY/RELATIONSHIP/.gitkeep', - 'MEMORY/SECURITY/.gitkeep', - 'MEMORY/STAGING/.gitkeep', - 'MEMORY/STATE/.gitkeep', - 'MEMORY/WORK/.gitkeep', - 'tests/ConfigLoader.test.ts', - 'tests/Installer.test.ts', - 'tests/OncePerSession.test.ts', -]; - -// Files excluded from sync (kai only, should NOT appear in kai) -const PAI_ONLY_FILES = [ - 'CLAUDE.md', - 'VERSION', - 'config/identity.jsonc', - 'docs/planning/GIT-HISTORY-REWRITE.md', - 'docs/planning/PAI-5.0.0-PLAN.md', - 'docs/planning/RELEASE-BLOCKERS.md', - '.github/workflows/test.yml', - 'hooks/archive/', - 'Plans/', - 'skills/KAIUpgrade/', - 'scripts/sync-to-kai.sh', - 'scripts/verify-release.sh', - 'scripts/kai-release-audit.ts', - 'scripts/board-config.json', - 'USER/', - '.claude/', -]; +type SyncManifest = { + private: string[]; + kai_only: string[]; + public: string[]; + stale_kai_paths?: string[]; +}; + +function loadSyncManifest(paiDir = PAI_DIR): SyncManifest { + const manifestPath = join(paiDir, 'scripts', 'sync-manifest.json'); + if (!existsSync(manifestPath)) { + console.error(`Error: sync manifest not found: ${manifestPath}`); + process.exit(1); + } + try { + const manifest = JSON.parse(readFileSync(manifestPath, 'utf-8')) as SyncManifest; + for (const key of ['private', 'kai_only', 'public'] as const) { + if (!Array.isArray(manifest[key])) throw new Error(`field "${key}" must be an array`); + } + return manifest; + } catch (err) { + console.error(`Error: could not parse sync manifest: ${err}`); + process.exit(1); + } +} function getTrackedFiles(repoDir: string): string[] { try { @@ -98,12 +78,7 @@ function hashFile(filePath: string): string { } function isExcluded(file: string, patterns: string[]): boolean { - return patterns.some(pattern => { - if (pattern.endsWith('/')) { - return file.startsWith(pattern); - } - return file === pattern || file.startsWith(pattern + '/'); - }); + return patterns.some(pattern => matchesSyncPattern(file, pattern)); } function main() { @@ -123,6 +98,8 @@ function main() { console.log(`PAI: ${PAI_DIR}`); console.log(`KAI: ${KAI_DIR}\n`); + const manifest = loadSyncManifest(PAI_DIR); + // Get tracked files const paiFiles = getTrackedFiles(PAI_DIR); const kaiFiles = getTrackedFiles(KAI_DIR); @@ -130,8 +107,8 @@ function main() { console.log(`Files tracked: PAI=${paiFiles.length}, KAI=${kaiFiles.length}\n`); // Filter out excluded files - const sharedPaiFiles = paiFiles.filter(f => !isExcluded(f, PAI_ONLY_FILES)); - const sharedKaiFiles = kaiFiles.filter(f => !isExcluded(f, KAI_ONLY_FILES)); + const sharedPaiFiles = paiFiles.filter(f => !isExcluded(f, manifest.private)); + const sharedKaiFiles = kaiFiles.filter(f => !isExcluded(f, manifest.kai_only)); const paiSet = new Set(sharedPaiFiles); const kaiSet = new Set(sharedKaiFiles); @@ -147,7 +124,7 @@ function main() { // Files in KAI but not in PAI (unexpected) const extraInKai: string[] = []; for (const file of kaiSet) { - if (!paiSet.has(file) && !isExcluded(file, KAI_ONLY_FILES)) { + if (!paiSet.has(file) && !isExcluded(file, manifest.kai_only)) { extraInKai.push(file); } } @@ -228,3 +205,5 @@ function main() { if (import.meta.main) { main(); } + +export { isExcluded, loadSyncManifest }; diff --git a/scripts/sync-manifest-schema.json b/scripts/sync-manifest-schema.json new file mode 100644 index 00000000..f1f117b3 --- /dev/null +++ b/scripts/sync-manifest-schema.json @@ -0,0 +1,37 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "PAI to KAI sync manifest", + "type": "object", + "required": ["version", "private", "kai_only", "public"], + "additionalProperties": false, + "properties": { + "$schema": { "type": "string" }, + "description": { "type": "string" }, + "version": { "type": "string" }, + "private": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "uniqueItems": true + }, + "kai_only": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "uniqueItems": true + }, + "stale_kai_paths": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "uniqueItems": true, + "default": [] + }, + "public": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "uniqueItems": true + }, + "notes": { + "type": "object", + "additionalProperties": { "type": "string" } + } + } +} diff --git a/scripts/sync-manifest.json b/scripts/sync-manifest.json index f8b935ed..3a0309da 100644 --- a/scripts/sync-manifest.json +++ b/scripts/sync-manifest.json @@ -3,14 +3,24 @@ "description": "File categorization for kai → kai sync. This manifest defines which files are private (kai only), kai-only (protected), or public (synced).", "version": "1.0.0", "private": [ - "CLAUDE.md", + "/CLAUDE.md", "VERSION", "config/identity.jsonc", ".github/workflows/test.yml", "docs/planning/", "auto-memory/", "hooks/archive/", + "hooks/lib/.archive/", + "skills/.archive/", + "skills/DevTeam/Context/LinksysResources.md", + "skills/LinksysDev/", "MEMORY/KNOWLEDGE/", + "MEMORY/memcarry/", + "memcarry/store/", + "memcarry/**/*.test.ts", + "tests/MemcarryRecallShared.test.ts", + "tests/MemRecallDegraded.test.ts", + "tests/MemcarrySemantic.test.ts", "MEMORY/STAGING/.staging-state.json", "MEMORY/STAGING/2026-*", "MEMORY/WISDOM/FRAMES/", @@ -18,15 +28,20 @@ "PAI/Tools/ada-generate.ts", "PAI/Tools/bump-version.ts", "Plans/", + "skills/Utilities/KAIUpgrade/", "skills/KAIUpgrade/", "policy-limits.json", "teams/", "scripts/auto-memory-inbox-report.ts", "scripts/board-config.json", "scripts/codex-trust-bootstrap.ts", + "devices.json", "scripts/kai-release-audit.ts", "scripts/literal-replace.ts", "scripts/MAINTENANCE.md", + "scripts/pii-patterns.json", + "scripts/pii-replacements.json", + "scripts/migrate-knowledge-frontmatter.ts", "scripts/prompt-latency-report.ts", "scripts/README.md", "scripts/sync-to-kai.sh", @@ -37,9 +52,13 @@ "tests/CodexTrustBootstrap.test.ts", "tests/LiteralReplace.test.ts", "tests/PromptLatencyReport.test.ts", + "tests/SyncKaiManifestRegen.test.ts", + "tests/SyncToKaiStagingArtifact.test.ts", + "tests/SyncToKaiUpgradePath.test.ts", + "tests/SyncToKaiVerifyTarget.test.ts", "tests/fixtures/ada-registry.md", "ada/", - "TOOLS.md", + "/TOOLS.md", "TOOLS-FULL.md", "CAPABILITIES.md", "MEMORY/CAPABILITIES/", @@ -65,7 +84,7 @@ "docs/planning/deliberate-research-mode.md", "get-kai.sh", "PAI/CONTEXT_ROUTING.md", - "README.md", + "/README.md", "docs/WHATS-DIFFERENT.md", "hooks/user/", "MEMORY/KNOWLEDGE/.gitkeep", @@ -84,6 +103,16 @@ "tests/Installer.test.ts", "tests/OncePerSession.test.ts" ], + "stale_kai_paths": [ + "skills/Utilities/KAIUpgrade/", + "skills/Utilities/KAIUpgrade/", + "skills/Utilities/", + "tests/LiteralReplace.test.ts", + "tests/SyncKaiManifestRegen.test.ts", + "tests/SyncToKaiStagingArtifact.test.ts", + "tests/SyncToKaiUpgradePath.test.ts", + "tests/SyncToKaiVerifyTarget.test.ts" + ], "public": [ "hooks/*.hook.ts", "hooks/lib/", @@ -98,6 +127,8 @@ "scripts/ci-plan.ts", "scripts/ci-observability.ts", "scripts/docs-spec-consistency.ts", + "scripts/bun-test-gate.ts", + "scripts/kai-artifact-fidelity.ts", "scripts/kai-temp-release-gate.ts", "scripts/rebuild-learning-view.ts", "scripts/reconcile-wiring.ts", @@ -107,6 +138,7 @@ "scripts/settings-validate.ts", "scripts/skills-lock.ts", "scripts/sync-ci-gate.ts", + "scripts/sync-drift.ts", "scripts/telemetry-doctor.ts", "scripts/telemetry-fast-gate.ts", "scripts/workflow-run.ts", @@ -126,6 +158,7 @@ "notes": { "private": "Files excluded from sync — personal data, maintainer tooling, PII-containing patterns", "kai_only": "Files that exist only in kai — protected from rsync --delete during sync", - "public": "Files that will be synced to kai — must be PII-free and ready for public consumption" + "public": "Files that will be synced to kai — must be PII-free and ready for public consumption", + "stale_kai_paths": "Legacy public KAI paths protected from rsync delete during transfer, then explicitly removed after transforms" } } diff --git a/scripts/version-targets.json b/scripts/version-targets.json index 71066585..c48573c2 100644 --- a/scripts/version-targets.json +++ b/scripts/version-targets.json @@ -8,7 +8,7 @@ { "file": "PAI/Tools/Banner.ts", "kind": "banner-pai-algo-fallback", "category": "fallback" }, { "file": "PAI/Tools/BuildCLAUDE.ts", "kind": "buildclaude-default-algo", "category": "fallback" }, { "file": "PAI/Tools/BuildCLAUDE.ts", "kind": "buildclaude-default-pai", "category": "fallback" }, - { "file": "CLAUDE.md", "kind": "h1-kai-version", "category": "docs", "optional": true }, + { "file": "CLAUDE.md", "kind": "h1-kai-version", "category": "docs" }, { "file": "README.md", "kind": "h1-kai-version", "category": "docs" }, { "file": "README.md", "kind": "readme-algo-inline", "category": "docs" }, { "file": "docs/WHATS-DIFFERENT.md", "kind": "whats-different-title", "category": "docs" }, diff --git a/scripts/weekly-maintenance.ts b/scripts/weekly-maintenance.ts index 2e98b42f..4f139ccc 100644 --- a/scripts/weekly-maintenance.ts +++ b/scripts/weekly-maintenance.ts @@ -16,6 +16,7 @@ import { writeFileSync, readFileSync, mkdirSync, existsSync, readdirSync } from import { join } from 'path'; const PAI_DIR = process.env.PAI_DIR ?? join(process.env.HOME!, '.claude'); +const REPO_DIR = process.env.PAI_REPO_DIR ?? PAI_DIR; const STATE_FILE = join(PAI_DIR, 'MEMORY', 'STATE', '.weekly-maintenance.json'); const WORK_JSON = join(PAI_DIR, 'MEMORY', 'STATE', 'work.json'); const DRY_RUN = process.argv.includes('--dry-run'); @@ -200,8 +201,7 @@ async function main() { // Backlog surfacing trigger — keep targeted ideas in view so nothing rots in limbo (the SpecKit // lesson; see roadmap "📋 Backlog — the no-lose system"). Echo the next TARGETED release block. try { - const roadmap = join(PAI_DIR, '..', 'Projects', 'kai', 'docs', 'planning', 'ROADMAP-7.x.md'); - const rmPath = existsSync(roadmap) ? roadmap : join(process.env.HOME!, 'Projects/kai/docs/planning/ROADMAP-7.x.md'); + const rmPath = join(REPO_DIR, 'docs', 'planning', 'ROADMAP-7.x.md'); if (existsSync(rmPath)) { const lines = readFileSync(rmPath, 'utf-8').split('\n'); const start = lines.findIndex(l => /^## .*\(TARGETED\)/.test(l)); diff --git a/skills-lock.json b/skills-lock.json index bca6b057..d8510333 100644 --- a/skills-lock.json +++ b/skills-lock.json @@ -1,6 +1,6 @@ { "version": 1, - "generated": "2026-06-26T15:21:17.086Z", + "generated": "2026-06-28T00:11:57.340Z", "skills": { "Agents": { "source": "kai", @@ -600,6 +600,28 @@ "workflowHashes": {}, "specializes": null }, + "SpecKit": { + "source": "kai", + "path": "skills/SpecKit/SKILL.md", + "hash": "sha256:35703f5aca5dce63d88059f83e8817647d014847773165192420df1cac076ba1", + "workflows": [ + "Clarify", + "Implement", + "Plan", + "Spec", + "Tasks", + "Validate" + ], + "workflowHashes": { + "Clarify": "sha256:b8b54729e582159adc9a56b5a04aeffb958e014f889afa7923a929b291426896", + "Implement": "sha256:b840e14d408ded054ec95ea3c3cd254a8edeb94f45cfa2049ce00fa13db67f27", + "Plan": "sha256:cc328a713a9e68f75683b78e3513c783db368f32ec2886a07249d4085e08d6ff", + "Spec": "sha256:aeeaa507c7641b52aebe3a96e71e61b3f3ccce9cd78ac05dcd005751a9451838", + "Tasks": "sha256:5db256f8d743fc4b060366046bdcd36f616a1e484ea86570a49ceba3e03aea3b", + "Validate": "sha256:934220799d4ce75823313b3add1858652bbc7b62a0bdde5e6be790fb3190b3fe" + }, + "specializes": null + }, "Telos": { "source": "kai", "path": "skills/Telos/SKILL.md", diff --git a/skills/PAI/ACTIONS.md b/skills/PAI/ACTIONS.md index 4b51f192..3e72eb56 100755 --- a/skills/PAI/ACTIONS.md +++ b/skills/PAI/ACTIONS.md @@ -1,6 +1,6 @@ # Actions -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. **Atomic, Composable Units of Work** diff --git a/skills/PAI/BROWSERAUTOMATION.md b/skills/PAI/BROWSERAUTOMATION.md index 542c7303..5b6c646a 100755 --- a/skills/PAI/BROWSERAUTOMATION.md +++ b/skills/PAI/BROWSERAUTOMATION.md @@ -1,6 +1,6 @@ # Browser Automation -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. **Debug-first browser automation with always-on visibility.** diff --git a/skills/PAI/CLI.md b/skills/PAI/CLI.md index 92a1f111..0c00ade2 100755 --- a/skills/PAI/CLI.md +++ b/skills/PAI/CLI.md @@ -1,4 +1,4 @@ -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. # PAI Command-Line Tools diff --git a/skills/PAI/DEPLOYMENT.md b/skills/PAI/DEPLOYMENT.md index 67c87a29..bafabbbc 100755 --- a/skills/PAI/DEPLOYMENT.md +++ b/skills/PAI/DEPLOYMENT.md @@ -1,4 +1,4 @@ -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. # Arbol Deployment Guide diff --git a/skills/PAI/FLOWS.md b/skills/PAI/FLOWS.md index 56268937..649d8a1e 100755 --- a/skills/PAI/FLOWS.md +++ b/skills/PAI/FLOWS.md @@ -1,6 +1,6 @@ # Flows -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. **Connecting Sources to Pipelines on a Schedule** diff --git a/skills/PAI/FLOWS/README.md b/skills/PAI/FLOWS/README.md index e05e3239..8748b80a 100755 --- a/skills/PAI/FLOWS/README.md +++ b/skills/PAI/FLOWS/README.md @@ -1,6 +1,6 @@ # PAI Flows -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. Flows connect **sources** to **pipelines** on a **schedule**. A flow fetches content from an external source (RSS feed, API, etc.), pipes it through a pipeline of actions, and delivers results to a destination (email, webhook, etc.). diff --git a/skills/PAI/MEMORYSYSTEM.md b/skills/PAI/MEMORYSYSTEM.md index b9f79c2e..f8af0e00 100755 --- a/skills/PAI/MEMORYSYSTEM.md +++ b/skills/PAI/MEMORYSYSTEM.md @@ -2,7 +2,7 @@ **The unified system memory - what happened, what we learned, what we're working on.** -**Version:** 7.4.2 (Projects-native architecture, 2026-01-12) +**Version:** 7.7.0 (Projects-native architecture, 2026-01-12) **Location:** `~/.claude/MEMORY/` --- diff --git a/skills/PAI/PIPELINES.md b/skills/PAI/PIPELINES.md index eba049bd..86012285 100755 --- a/skills/PAI/PIPELINES.md +++ b/skills/PAI/PIPELINES.md @@ -1,6 +1,6 @@ # Pipelines -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. **Orchestrating Sequences of Actions with Verification Gates** diff --git a/skills/PAI/PIPELINES/README.md b/skills/PAI/PIPELINES/README.md index 40d5562a..378530dc 100755 --- a/skills/PAI/PIPELINES/README.md +++ b/skills/PAI/PIPELINES/README.md @@ -1,6 +1,6 @@ # PAI Pipelines -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. Pipelines chain actions together. A pipeline is just a list of actions executed in order using the **pipe model** — the output of each action becomes the input of the next. diff --git a/skills/PAI/THEHOOKSYSTEM.md b/skills/PAI/THEHOOKSYSTEM.md index a84bd921..7d05ba24 100755 --- a/skills/PAI/THEHOOKSYSTEM.md +++ b/skills/PAI/THEHOOKSYSTEM.md @@ -1,6 +1,6 @@ # Hook System -> **KAI 7.4.2** — Stable release. +> **KAI 7.7.0** — Stable release. **Event-Driven Automation Infrastructure** diff --git a/skills/SpecKit/SKILL.md b/skills/SpecKit/SKILL.md new file mode 100644 index 00000000..e84e3560 --- /dev/null +++ b/skills/SpecKit/SKILL.md @@ -0,0 +1,38 @@ +--- +name: SpecKit +description: Spec-driven development accelerator for turning ideas into specs, plans, task lists, implementations, and validation loops. USE WHEN speckit, spec kit, specification kit, write a spec, create a plan from a spec, derive tasks, implement from tasks, spec-driven development, clarify requirements, validate a plan against a spec. +--- + +# SpecKit + +SpecKit turns an idea into an executable development path: + +1. Clarify requirements and constraints. +2. Write a compact feature spec. +3. Produce an implementation plan. +4. Break the plan into ordered tasks. +5. Implement against the task list. +6. Validate the result against the original spec. + +This skill complements the KAI Algorithm and the `Development` skill. Use SpecKit when the user wants a +structured design-to-implementation flow, especially for work with unclear requirements or several dependent +steps. + +## Workflow Routing + +| Request Pattern | Route To | +|---|---| +| Clarify an idea before writing requirements | `Workflows/Clarify.md` | +| Write a spec, create requirements, define acceptance criteria | `Workflows/Spec.md` | +| Turn a spec into an implementation plan | `Workflows/Plan.md` | +| Break a plan into ordered implementation tasks | `Workflows/Tasks.md` | +| Implement from tasks/spec | `Workflows/Implement.md` | +| Validate implementation against spec/tasks | `Workflows/Validate.md` | + +## Operating Rules + +- Keep specs short enough to execute from. +- Capture open questions explicitly instead of inventing answers. +- Use existing project conventions before introducing new structure. +- Treat tests, release gates, and public/private sync boundaries as first-class criteria. +- Preserve a trace from idea -> spec -> plan -> tasks -> implementation -> validation. diff --git a/skills/SpecKit/Workflows/Clarify.md b/skills/SpecKit/Workflows/Clarify.md new file mode 100644 index 00000000..621185d3 --- /dev/null +++ b/skills/SpecKit/Workflows/Clarify.md @@ -0,0 +1,12 @@ +# Clarify + +Use this when the request is promising but underspecified. + +## Steps + +1. Restate the objective in one paragraph. +2. List explicit requirements. +3. List implied requirements. +4. List constraints, risks, and unknowns. +5. Ask only blocking questions. If reasonable defaults exist, state them and proceed. +6. Produce a readiness decision: `ready-for-spec`, `needs-input`, or `park`. diff --git a/skills/SpecKit/Workflows/Implement.md b/skills/SpecKit/Workflows/Implement.md new file mode 100644 index 00000000..495f048b --- /dev/null +++ b/skills/SpecKit/Workflows/Implement.md @@ -0,0 +1,12 @@ +# Implement + +Use this when executing a SpecKit task list. + +## Steps + +1. Read the spec and task list. +2. Inspect the existing implementation before editing. +3. Make the smallest coherent code chunk that satisfies the next task group. +4. Add or update focused tests. +5. Run targeted verification before continuing to the next group. +6. Keep the spec/task trace updated if the implementation reveals a better path. diff --git a/skills/SpecKit/Workflows/Plan.md b/skills/SpecKit/Workflows/Plan.md new file mode 100644 index 00000000..10fa3f10 --- /dev/null +++ b/skills/SpecKit/Workflows/Plan.md @@ -0,0 +1,11 @@ +# Plan + +Use this to turn a spec into an implementation plan. + +## Steps + +1. Identify existing code paths, ownership boundaries, and tests. +2. Split the work into coherent chunks that can be committed or reviewed independently. +3. Name migration, compatibility, and rollback concerns. +4. Map each acceptance criterion to one or more implementation steps. +5. Mark anything that should move to a later release instead of expanding scope. diff --git a/skills/SpecKit/Workflows/Spec.md b/skills/SpecKit/Workflows/Spec.md new file mode 100644 index 00000000..c3f018b6 --- /dev/null +++ b/skills/SpecKit/Workflows/Spec.md @@ -0,0 +1,26 @@ +# Spec + +Use this to create a compact implementation spec. + +## Output + +```md +# + +## Objective + +## Context + +## Requirements + +## Non-Goals + +## Acceptance Criteria + +## Risks + +## Verification +``` + +Acceptance criteria must be observable. Verification must name the commands, tests, manual checks, or review +gates that prove completion. diff --git a/skills/SpecKit/Workflows/Tasks.md b/skills/SpecKit/Workflows/Tasks.md new file mode 100644 index 00000000..b1a11067 --- /dev/null +++ b/skills/SpecKit/Workflows/Tasks.md @@ -0,0 +1,10 @@ +# Tasks + +Use this to convert a plan into ordered tasks. + +## Task Rules + +- Each task must produce a concrete artifact: code, test, doc, config, or verification output. +- Order tasks by dependency, not by file type. +- Include a verification step for every risky behavior change. +- Keep follow-up ideas separate from required completion work. diff --git a/skills/SpecKit/Workflows/Validate.md b/skills/SpecKit/Workflows/Validate.md new file mode 100644 index 00000000..f870f0f3 --- /dev/null +++ b/skills/SpecKit/Workflows/Validate.md @@ -0,0 +1,11 @@ +# Validate + +Use this before declaring a spec-driven effort complete. + +## Checklist + +- Every acceptance criterion is satisfied or explicitly deferred. +- Tests cover the risky behavior, not just the easy path. +- Release gates relevant to the changed surface pass. +- Documentation matches executable behavior. +- Any remaining risk is captured as a follow-up, not hidden in the summary. diff --git a/tests/BumpVersionManifest.test.ts b/tests/BumpVersionManifest.test.ts index 35a69b61..36f49908 100644 --- a/tests/BumpVersionManifest.test.ts +++ b/tests/BumpVersionManifest.test.ts @@ -95,8 +95,6 @@ describe('bump-version target manifest', () => { }); test('implementation loads the manifest instead of owning a static target list', () => { - if (!existsSync(SCRIPT_PATH)) return; - const script = readFileSync(SCRIPT_PATH, 'utf-8'); expect(script).toContain('scripts/version-targets.json'); expect(script).toContain('discoverTargets'); diff --git a/tests/BunTestGate.test.ts b/tests/BunTestGate.test.ts new file mode 100644 index 00000000..9eaeb2cc --- /dev/null +++ b/tests/BunTestGate.test.ts @@ -0,0 +1,66 @@ +import { describe, expect, test } from 'bun:test'; +import { parseBunTestOutput, validateBunTestOutput } from '../scripts/bun-test-gate'; + +const passingOutput = `bun test v1.3.14 + +tests/example.test.ts: +(pass) example > passes + + 1 pass + 0 fail + 1 expect() calls +Ran 1 tests across 1 file. [12.00ms] +`; + +describe('bun-test-gate', () => { + test('parses a complete passing Bun summary', () => { + expect(parseBunTestOutput(passingOutput)).toEqual({ + pass: 1, + skip: 0, + fail: 0, + expectCalls: 1, + testsRun: 1, + filesRun: 1, + }); + }); + + test('accepts passing output with zero exit status', () => { + expect(validateBunTestOutput(passingOutput, 0).ok).toBe(true); + }); + + test('fails closed on a missing run summary', () => { + const result = validateBunTestOutput('1 pass\n0 fail\n', 0); + expect(result.ok).toBe(false); + expect(result.errors.join('\n')).toContain('complete "Ran N tests across M files" summary'); + }); + + test('fails on non-zero fail counts', () => { + const result = validateBunTestOutput(passingOutput.replace('0 fail', '2 fail'), 0); + expect(result.ok).toBe(false); + expect(result.errors).toContain('Bun reported 2 failing test(s).'); + }); + + test('fails on zero tests', () => { + const result = validateBunTestOutput(passingOutput.replace('Ran 1 tests across 1 file', 'Ran 0 tests across 0 files'), 0); + expect(result.ok).toBe(false); + expect(result.errors).toContain('Bun reported zero tests run.'); + expect(result.errors).toContain('Bun reported zero test files run.'); + }); + + test('fails on non-zero process exit even with a good summary', () => { + const result = validateBunTestOutput(passingOutput, 134); + expect(result.ok).toBe(false); + expect(result.errors).toContain('Bun exited non-zero (134).'); + }); + + test('fails on runtime crash signatures', () => { + const result = validateBunTestOutput(`${passingOutput}\nSIGABRT`, 0); + expect(result.ok).toBe(false); + expect(result.errors.some((error) => error.includes('crash signature'))).toBe(true); + }); + + test('does not fail on benign crash wording in passing test output', () => { + const result = validateBunTestOutput(`${passingOutput}\n(pass) gate > mentions crash signature text in a test name`, 0); + expect(result.ok).toBe(true); + }); +}); diff --git a/tests/DocsSpecConsistency.test.ts b/tests/DocsSpecConsistency.test.ts index e1ec384e..49f8c8eb 100644 --- a/tests/DocsSpecConsistency.test.ts +++ b/tests/DocsSpecConsistency.test.ts @@ -46,10 +46,11 @@ describe('docs-spec-consistency', () => { expect(findings[0].line).toBe(1); }); - test('validates 7.4.1 → 7.4.2 → 7.5.x roadmap order', () => { + test('validates 7.4.1 → 7.4.2 → 7.4.4 → 7.5.x roadmap order', () => { const ok = checkRoadmapVersionSequence([ '## 7.4.1 (TARGETED)', '## 7.4.2 (TARGETED)', + '## 7.4.4 (TARGETED)', '## 7.5.0 (TARGETED)', '## 7.5.1 (TARGETED)', '## 7.5.2 (TARGETED)', @@ -60,6 +61,7 @@ describe('docs-spec-consistency', () => { const bad = checkRoadmapVersionSequence([ '## 7.4.2 (TARGETED)', '## 7.4.1 (TARGETED)', + '## 7.4.4 (TARGETED)', '## 7.5.0 (TARGETED)', '## 7.5.1 (TARGETED)', '## 7.5.2 (TARGETED)', diff --git a/tests/GitHubWriteGuard.integration.test.ts b/tests/GitHubWriteGuard.integration.test.ts index 768a2f52..ae59b8af 100644 --- a/tests/GitHubWriteGuard.integration.test.ts +++ b/tests/GitHubWriteGuard.integration.test.ts @@ -246,6 +246,23 @@ describe("GitHubWriteGuard — edge cases", () => { const result = JSON.parse(out.trim()); expect(result.continue).toBe(true); }); + + test("fails closed for write commands when a guard error occurs after detection", async () => { + const result = await runGuard("gh pr create --title test --body test", { + env: { PAI_GITHUB_WRITE_GUARD_TEST_THROW_AFTER_DETECT: "1" }, + }); + + expect(result.decision).toBe("block"); + expect(result.reason).toContain("guard error"); + }); + + test("still fails open for read-only commands when the forced write-path error is enabled", async () => { + const result = await runGuard("gh pr view 67", { + env: { PAI_GITHUB_WRITE_GUARD_TEST_THROW_AFTER_DETECT: "1" }, + }); + + expect(result.continue).toBe(true); + }); }); describe("GitHubWriteGuard — approval ergonomics", () => { diff --git a/tests/HookEventCoverage.test.ts b/tests/HookEventCoverage.test.ts index 03454922..4c06168f 100644 --- a/tests/HookEventCoverage.test.ts +++ b/tests/HookEventCoverage.test.ts @@ -34,11 +34,11 @@ describe('hook-event schema coverage (PAI-SR-013 release gate)', () => { expect(uncovered).toEqual([]); }); - test('all 12 currently-registered events are present', () => { + test('all 13 currently-registered events are present', () => { // Snapshot of the known-registered set; tightens the gate so silent registration is caught. const events = registeredEvents().sort(); expect(events).toEqual([ - 'ConfigChange', 'PostToolUse', 'PreCompact', 'PreToolUse', 'SessionEnd', + 'ConfigChange', 'InstructionsLoaded', 'PostToolUse', 'PreCompact', 'PreToolUse', 'SessionEnd', 'SessionStart', 'Stop', 'TaskCompleted', 'TeammateIdle', 'UserPromptExpansion', 'UserPromptSubmit', 'WorktreeRemove', ].sort()); diff --git a/tests/InstallRelocation.test.ts b/tests/InstallRelocation.test.ts new file mode 100644 index 00000000..5fcd59bf --- /dev/null +++ b/tests/InstallRelocation.test.ts @@ -0,0 +1,30 @@ +import { describe, expect, test } from 'bun:test'; +import { readFileSync } from 'fs'; +import { join } from 'path'; + +const repo = join(import.meta.dir, '..'); + +describe('install relocation resilience', () => { + test('runtime scripts do not pin the old ~/Projects/kai location', () => { + const files = [ + 'scripts/hooks/pre-push', + 'scripts/weekly-maintenance.ts', + 'hooks/handlers/WikiCurrency.ts', + ]; + + for (const file of files) { + const text = readFileSync(join(repo, file), 'utf8'); + expect(text).not.toContain('Projects/kai'); + } + }); + + test('relocation-sensitive paths derive from repo or PAI environment', () => { + const prePush = readFileSync(join(repo, 'scripts/hooks/pre-push'), 'utf8'); + const weekly = readFileSync(join(repo, 'scripts/weekly-maintenance.ts'), 'utf8'); + const wikiCurrency = readFileSync(join(repo, 'hooks/handlers/WikiCurrency.ts'), 'utf8'); + + expect(prePush).toContain('${KAI_PII_PATTERNS:-$REPO_ROOT/scripts/pii-patterns.json}'); + expect(weekly).toContain("process.env.PAI_REPO_DIR ?? PAI_DIR"); + expect(wikiCurrency).toContain('process.env.PAI_REPO_DIR || getPaiDir()'); + }); +}); diff --git a/tests/InstructionsLoaded.test.ts b/tests/InstructionsLoaded.test.ts new file mode 100644 index 00000000..9ca47008 --- /dev/null +++ b/tests/InstructionsLoaded.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, test } from 'bun:test'; +import { existsSync, mkdtempSync, readFileSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { spawnSync } from 'node:child_process'; + +const REPO = new URL('..', import.meta.url).pathname.replace(/\/$/, ''); +const HOOK = join(REPO, 'hooks', 'InstructionsLoaded.hook.ts'); + +describe('InstructionsLoaded hook', () => { + test('emits instruction-load telemetry and never includes file contents', () => { + const pai = mkdtempSync(join(tmpdir(), 'instructions-loaded-')); + try { + const result = spawnSync('bun', [HOOK], { + input: JSON.stringify({ + hook_event_name: 'InstructionsLoaded', + session_id: 's1', + cwd: '/tmp/project-a', + file_path: '/tmp/project-a/CLAUDE.md', + reason: 'changed', + memory_type: 'project', + source: 'native', + }), + env: { ...process.env, PAI_DIR: pai }, + encoding: 'utf8', + }); + expect(result.status).toBe(0); + const telemetry = join(pai, 'MEMORY', 'STATE', 'memory-telemetry.jsonl'); + expect(existsSync(telemetry)).toBe(true); + const event = JSON.parse(readFileSync(telemetry, 'utf8').trim()); + expect(event.type).toBe('instructions.loaded'); + expect(event.session_id).toBe('s1'); + expect(event.project).toBe('project-a'); + expect(event.file).toBe('CLAUDE.md'); + expect(event.reason).toBe('changed'); + expect(JSON.stringify(event)).not.toContain('secret'); + } finally { + rmSync(pai, { recursive: true, force: true }); + } + }); +}); diff --git a/tests/KaiArtifactFidelity.test.ts b/tests/KaiArtifactFidelity.test.ts new file mode 100644 index 00000000..f2339c32 --- /dev/null +++ b/tests/KaiArtifactFidelity.test.ts @@ -0,0 +1,88 @@ +import { describe, expect, test } from 'bun:test'; +import { mkdirSync, mkdtempSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { compareKaiArtifacts } from '../scripts/kai-artifact-fidelity'; + +function makePair() { + const root = mkdtempSync(join(tmpdir(), 'kai-fidelity-')); + const expected = join(root, 'expected'); + const actual = join(root, 'actual'); + mkdirSync(expected, { recursive: true }); + mkdirSync(actual, { recursive: true }); + return { expected, actual }; +} + +function write(root: string, rel: string, content: string) { + const path = join(root, rel); + mkdirSync(join(path, '..'), { recursive: true }); + writeFileSync(path, content); +} + +describe('kai-artifact-fidelity', () => { + test('passes for identical artifact trees', () => { + const { expected, actual } = makePair(); + write(expected, 'README.md', '# KAI\n'); + write(actual, 'README.md', '# KAI\n'); + + expect(compareKaiArtifacts(expected, actual).ok).toBe(true); + }); + + test('reports files missing from live KAI', () => { + const { expected, actual } = makePair(); + write(expected, 'PAI/Tools/BuildDocs.ts', 'export {};\n'); + + const result = compareKaiArtifacts(expected, actual); + expect(result.ok).toBe(false); + expect(result.missing).toEqual(['PAI/Tools/BuildDocs.ts']); + }); + + test('reports live-only extra files', () => { + const { expected, actual } = makePair(); + write(actual, 'unexpected.md', 'live drift\n'); + + const result = compareKaiArtifacts(expected, actual); + expect(result.ok).toBe(false); + expect(result.extra).toEqual(['unexpected.md']); + }); + + test('reports changed content', () => { + const { expected, actual } = makePair(); + write(expected, 'manifest.json', '{"version":"1"}\n'); + write(actual, 'manifest.json', '{"version":"2"}\n'); + + const result = compareKaiArtifacts(expected, actual); + expect(result.ok).toBe(false); + expect(result.different).toEqual(['manifest.json']); + }); + + test('ignores git and runtime files excluded from sync', () => { + const { expected, actual } = makePair(); + write(expected, 'README.md', '# KAI\n'); + write(actual, 'README.md', '# KAI\n'); + write(actual, '.git/config', '[core]\n'); + write(actual, 'settings.json.backup-2026', '{}\n'); + write(actual, '.env.test.local', 'TOKEN=x\n'); + + expect(compareKaiArtifacts(expected, actual).ok).toBe(true); + }); + + test('ignores nested generated dependency directories', () => { + const { expected, actual } = makePair(); + write(expected, 'memcarry/package.json', '{"name":"memcarry"}\n'); + write(actual, 'memcarry/package.json', '{"name":"memcarry"}\n'); + write(actual, 'memcarry/node_modules/.bin/tsc', 'generated\n'); + write(actual, 'skills/Browser/node_modules/bun-types/index.d.ts', 'generated\n'); + + expect(compareKaiArtifacts(expected, actual).ok).toBe(true); + }); + + test('still reports real similarly named source files', () => { + const { actual } = makePair(); + write(actual, 'docs/node_modules-note.md', 'not generated\n'); + + const result = compareKaiArtifacts('', actual); + expect(result.ok).toBe(false); + expect(result.extra).toEqual(['docs/node_modules-note.md']); + }); +}); diff --git a/tests/MemoryCuratePromotion.test.ts b/tests/MemoryCuratePromotion.test.ts new file mode 100644 index 00000000..91f7b17a --- /dev/null +++ b/tests/MemoryCuratePromotion.test.ts @@ -0,0 +1,138 @@ +import { describe, expect, test } from 'bun:test'; +import { execFileSync } from 'child_process'; +import { existsSync, mkdirSync, mkdtempSync, readFileSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; + +const repoRoot = join(import.meta.dir, '..'); + +function makePaiFixture() { + const root = mkdtempSync(join(tmpdir(), 'pai-curate-promotion-')); + const paiDir = join(root, 'pai'); + const home = join(root, 'home'); + const insightsDir = join(paiDir, 'MEMORY', 'LEARNING', 'INSIGHTS'); + const stagingDir = join(paiDir, 'MEMORY', 'STAGING'); + const kaiMemory = join(paiDir, 'projects', 'kai', 'memory'); + mkdirSync(insightsDir, { recursive: true }); + mkdirSync(stagingDir, { recursive: true }); + mkdirSync(kaiMemory, { recursive: true }); + mkdirSync(home, { recursive: true }); + writeFileSync(join(kaiMemory, 'MEMORY.md'), '# KAI Memory\n'); + return { root, paiDir, home, insightsDir, stagingDir, kaiMemory }; +} + +function writeInsight(insightsDir: string, filename: string, title: string) { + writeFileSync(join(insightsDir, filename), `--- +title: "${title}" +category: testing +confidence: high +captured: 2026-06-26T12:00:00.000Z +session_id: test-session +status: candidate +--- + +${title} should become durable project memory. +`); +} + +function runCurate(args: string[], paiDir: string, home: string): string { + return execFileSync('bun', ['PAI/Tools/MemoryCurate.ts', ...args], { + cwd: repoRoot, + env: { ...process.env, PAI_DIR: paiDir, HOME: home }, + encoding: 'utf8', + }); +} + +function writeDraft(stagingDir: string, filename: string, title: string, body: string) { + const now = new Date().toISOString(); + const expires = new Date(Date.now() + 7 * 86_400_000).toISOString(); + writeFileSync(join(stagingDir, filename), `--- +type: draft-memory +source_type: success-pattern +source_session: test-session +source_rating: 9 +confidence: 0.9 +generated: ${now} +expires: ${expires} +target_project: kai +target_filename: feedback_success_pattern.md +title: "${title}" +--- + +${body} +`); + writeFileSync(join(stagingDir, '.staging-state.json'), JSON.stringify({ + created: now.slice(0, 10), + expiryDays: 14, + drafts: [{ filename, generated: now, expires, type: 'success-pattern' }], + stats: { totalGenerated: 1, totalApproved: 0, totalRejected: 0, totalExpired: 0 }, + }, null, 2)); +} + +describe('MemoryCurate insight promotion', () => { + test('promote --from-manifest drains multiple candidate insights in one process', () => { + const fixture = makePaiFixture(); + writeInsight(fixture.insightsDir, 'one.md', 'First lesson'); + writeInsight(fixture.insightsDir, 'two.md', 'Second lesson'); + const manifest = join(fixture.root, 'promote.tsv'); + writeFileSync(manifest, 'one.md\tkai\ntwo.md\tkai\n'); + + const output = runCurate(['promote', '--from-manifest', manifest], fixture.paiDir, fixture.home); + + expect(output).toContain('Promoted 2 insight(s)'); + const promoted = readFileSync(join(fixture.kaiMemory, 'insights_promoted.md'), 'utf-8'); + expect(promoted).toContain('## First lesson'); + expect(promoted).toContain('## Second lesson'); + expect(readFileSync(join(fixture.insightsDir, 'one.md'), 'utf-8')).toContain('status: promoted'); + expect(readFileSync(join(fixture.kaiMemory, 'MEMORY.md'), 'utf-8')).toContain('[Promoted Insights](insights_promoted.md)'); + }); + + test('promote --all promotes every pending candidate to the requested project', () => { + const fixture = makePaiFixture(); + writeInsight(fixture.insightsDir, 'all-one.md', 'All first lesson'); + writeInsight(fixture.insightsDir, 'all-two.md', 'All second lesson'); + writeFileSync(join(fixture.insightsDir, 'already.md'), `--- +title: "Already promoted" +status: promoted +--- + +skip me +`); + + const output = runCurate(['promote', '--all', 'kai'], fixture.paiDir, fixture.home); + + expect(output).toContain('Promoted 2 insight(s)'); + const promoted = readFileSync(join(fixture.kaiMemory, 'insights_promoted.md'), 'utf-8'); + expect(promoted).toContain('## All first lesson'); + expect(promoted).toContain('## All second lesson'); + expect(promoted).not.toContain('Already promoted'); + expect(existsSync(join(fixture.kaiMemory, 'MEMORY.md'))).toBe(true); + }); +}); + +describe('MemoryCurate draft approval', () => { + test('approve appends to an existing fixed-name memory file instead of overwriting', () => { + const fixture = makePaiFixture(); + const target = join(fixture.kaiMemory, 'feedback_success_pattern.md'); + writeFileSync(target, `--- +type: feedback +description: Existing hand-edited feedback +source: manual +--- + +Existing durable lesson. +`); + + writeDraft(fixture.stagingDir, 'first.md', 'First approved lesson', 'First generated lesson.'); + runCurate(['approve', '1'], fixture.paiDir, fixture.home); + writeDraft(fixture.stagingDir, 'second.md', 'Second approved lesson', 'Second generated lesson.'); + runCurate(['approve', '1'], fixture.paiDir, fixture.home); + + const approved = readFileSync(target, 'utf-8'); + expect(approved).toContain('Existing durable lesson.'); + expect(approved).toContain('## First approved lesson'); + expect(approved).toContain('First generated lesson.'); + expect(approved).toContain('## Second approved lesson'); + expect(approved).toContain('Second generated lesson.'); + }); +}); diff --git a/tests/MemoryDisclosure.test.ts b/tests/MemoryDisclosure.test.ts index f842618a..7ccca895 100644 --- a/tests/MemoryDisclosure.test.ts +++ b/tests/MemoryDisclosure.test.ts @@ -9,6 +9,7 @@ import { pruneIndex, appendTimeline, initializeMeta, + loadPromotedInsights, recordDetailRead, MAX_TIMELINE_ENTRIES, type MemoryMetaEntry, @@ -180,3 +181,41 @@ describe('recordDetailRead', () => { expect(entries[0].reference_count).toBe(1); }); }); + +describe('loadPromotedInsights', () => { + it('surfaces latest promoted insight sections for current project', () => { + const paiDir = mkPaiDir(); + const projectDir = '/Users/test/Projects/kai'; + const encoded = projectDir.replace(/[^a-zA-Z0-9]/g, '-'); + const memoryDir = join(paiDir, 'projects', encoded, 'memory'); + mkdirSync(memoryDir, { recursive: true }); + writeFileSync(join(memoryDir, 'insights_promoted.md'), `--- +type: project +--- + +# Promoted Insights + +## Old +old body + +## Middle +middle body + +## New +new body +`); + + const prev = process.env.CLAUDE_PROJECT_DIR; + process.env.CLAUDE_PROJECT_DIR = projectDir; + try { + const result = loadPromotedInsights(paiDir, 2); + expect(result).toContain('## Promoted Insights (recent)'); + expect(result).not.toContain('## Old'); + expect(result).toContain('## Middle'); + expect(result).toContain('## New'); + } finally { + if (prev === undefined) delete process.env.CLAUDE_PROJECT_DIR; + else process.env.CLAUDE_PROJECT_DIR = prev; + } + }); +}); diff --git a/tests/MemoryFunctions.test.ts b/tests/MemoryFunctions.test.ts index 593cf595..b1475519 100644 --- a/tests/MemoryFunctions.test.ts +++ b/tests/MemoryFunctions.test.ts @@ -181,7 +181,7 @@ describe('synthesisToStagingContent', () => { // --- maybeRunSynthesisBackstop tests --- // Note: This function spawns a subprocess and reads state files. // We test it indirectly by checking it doesn't throw. -import { maybeRunSynthesisBackstop, maybeAutoConsolidate } from '../hooks/SessionCleanup.hook'; +import { autoConsolidateEnabled, maybeRunSynthesisBackstop, maybeAutoConsolidate } from '../hooks/SessionCleanup.hook'; describe('maybeRunSynthesisBackstop', () => { test('does not throw when called', () => { @@ -194,6 +194,13 @@ describe('maybeRunSynthesisBackstop', () => { }); describe('maybeAutoConsolidate', () => { + test('is disabled by default and enabled only by explicit opt-in env', () => { + expect(autoConsolidateEnabled({})).toBe(false); + expect(autoConsolidateEnabled({ PAI_AUTO_CONSOLIDATE: '0' })).toBe(false); + expect(autoConsolidateEnabled({ PAI_AUTO_CONSOLIDATE: '1' })).toBe(true); + expect(autoConsolidateEnabled({ PAI_AUTO_CONSOLIDATE: 'true' })).toBe(true); + }); + test('does not throw when called', () => { expect(() => maybeAutoConsolidate()).not.toThrow(); }); diff --git a/tests/MemoryTelemetryHooks.test.ts b/tests/MemoryTelemetryHooks.test.ts index a8dc1f90..ca84d6a2 100644 --- a/tests/MemoryTelemetryHooks.test.ts +++ b/tests/MemoryTelemetryHooks.test.ts @@ -169,9 +169,13 @@ describe('memory telemetry hook emitters', () => { const stateDir = join(paiDir, 'MEMORY', 'STATE'); mkdirSync(stateDir, { recursive: true }); writeFileSync(join(stateDir, 'memory-telemetry.jsonl'), [ - JSON.stringify({ ts: '2026-06-25T00:00:00.000Z', type: 'agent.return', project: 'Instant-Help', agent_type: 'general-purpose', description: 'Validate nav bugs', result_chars: 11070 }), - JSON.stringify({ ts: '2026-06-25T00:00:01.000Z', type: 'agent.return', project: 'Instant-Help', agent_type: 'general-purpose', description: 'Validate naming bugs', result_chars: 10480 }), - JSON.stringify({ ts: '2026-06-25T00:00:02.000Z', type: 'agent.checkpoint', project: 'Instant-Help', agent_type: 'general-purpose' }), + JSON.stringify({ ts: '2026-06-25T00:00:00.000Z', type: 'agent.spawn', project: 'Instant-Help', agent_call_id: 'a1', agent_type: 'general-purpose', description: 'Validate nav bugs', context_handoff_missing: true }), + JSON.stringify({ ts: '2026-06-25T00:00:01.000Z', type: 'agent.spawn', project: 'Instant-Help', agent_call_id: 'a2', agent_type: 'general-purpose', description: 'Validate naming bugs' }), + JSON.stringify({ ts: '2026-06-25T00:00:03.000Z', type: 'agent.return', project: 'Instant-Help', agent_call_id: 'a1', agent_type: 'general-purpose', description: 'Validate nav bugs', result_chars: 11070, return_status: 'ok' }), + JSON.stringify({ ts: '2026-06-25T00:00:05.000Z', type: 'agent.return', project: 'Instant-Help', agent_call_id: 'a2', agent_type: 'general-purpose', description: 'Validate naming bugs', result_chars: 10480, return_status: 'failed' }), + JSON.stringify({ ts: '2026-06-25T00:00:06.000Z', type: 'agent.checkpoint', project: 'Instant-Help', agent_call_id: 'a1', agent_type: 'general-purpose' }), + JSON.stringify({ ts: '2026-06-25T00:00:07.000Z', type: 'agent.task_completed', project: 'Instant-Help', teammate_name: 'qa' }), + JSON.stringify({ ts: '2026-06-25T00:00:08.000Z', type: 'agent.idle', project: 'Instant-Help', teammate_name: 'qa' }), ].join('\n') + '\n'); const res = spawnSync('bun', [TELEMETRY_REPORT, '--json'], { @@ -181,8 +185,16 @@ describe('memory telemetry hook emitters', () => { expect(res.status).toBe(0); const report = JSON.parse(res.stdout); + expect(report.agent_return.spawns).toBe(2); expect(report.agent_return.returns).toBe(2); expect(report.agent_return.checkpoints).toBe(1); + expect(report.agent_return.task_completed).toBe(1); + expect(report.agent_return.idle).toBe(1); + expect(report.agent_return.returns_without_checkpoint_prompt).toBe(1); + expect(report.agent_return.return_status.failed).toBe(1); + expect(report.agent_return.missing_context_handoff).toBe(1); + expect(report.agent_return.spawn_to_return_latency.count).toBe(2); + expect(report.agent_return.spawn_to_return_latency.p50).toBe(4000); expect(report.agent_return.result_chars.total).toBe(21550); expect(report.agent_return.by_project['Instant-Help'].total_chars).toBe(21550); expect(report.agent_return.largest_returns[0].description).toBe('Validate nav bugs'); diff --git a/tests/NativeEventMigration.test.ts b/tests/NativeEventMigration.test.ts index c7c2ba85..a6338c91 100644 --- a/tests/NativeEventMigration.test.ts +++ b/tests/NativeEventMigration.test.ts @@ -12,6 +12,12 @@ import { join } from 'path'; import { tmpdir } from 'os'; import { spawn } from 'bun'; import { parseJSONC } from '../hooks/handlers/BuildSettings.ts'; +import { readState, writeState } from '../hooks/lib/algorithm-state'; +import { + buildBackgroundDelegationEnvelope, + buildAgentContextHandoff, + decideAgentContextHandoff, +} from '../hooks/lib/agent-context-handoff'; const REPO = new URL('..', import.meta.url).pathname.replace(/\/$/, ''); const HOOKS = join(REPO, 'hooks'); @@ -73,6 +79,83 @@ describe('Agent tool migration', () => { expect(result.stdout).toContain('Agent call'); }); + test('Agent context handoff policy keeps fast Explore prompts lean', () => { + const decision = decideAgentContextHandoff({ + subagent_type: 'Explore', + model: 'haiku', + prompt: 'Find where AgentExecutionGuard is wired.\n## Scope\nTiming: FAST', + }); + + expect(decision.tier).toBe('none'); + expect(decision.hasHandoff).toBe(false); + }); + + test('Agent context handoff policy gives Explore/Plan narrow context for reach-sensitive work', () => { + expect(decideAgentContextHandoff({ + subagent_type: 'Explore', + prompt: 'Find the ADA REGISTRY and roadmap files that define delegation reach.', + }).tier).toBe('rules'); + + expect(decideAgentContextHandoff({ + subagent_type: 'Explore', + prompt: 'Find the ADA REGISTRY file.\n## Scope\nTiming: FAST', + }).tier).toBe('rules'); + + expect(decideAgentContextHandoff({ + subagent_type: 'Plan', + prompt: 'Plan the KAI release checklist and branch guard changes.', + }).tier).toBe('rules'); + + expect(decideAgentContextHandoff({ + subagent_type: 'Plan', + prompt: 'Sketch a simple refactor plan for parser.ts.', + }).tier).toBe('none'); + }); + + test('Agent context handoff policy selects rules/full tiers for delegated work', () => { + expect(decideAgentContextHandoff({ + subagent_type: 'Engineer', + prompt: 'Implement the context handoff MVP.', + }).tier).toBe('rules'); + + expect(decideAgentContextHandoff({ + subagent_type: 'Pentester', + prompt: 'Review delegation safety.\n## Scope\nTiming: DEEP', + }).tier).toBe('full'); + + const block = buildAgentContextHandoff('rules'); + expect(block).toContain(''); + expect(block).toContain('branch-only workflow'); + expect(block).toContain('Durable findings for parent checkpoint:'); + }); + + test('background delegation envelope carries privacy and coordination boundaries', () => { + const envelope = buildBackgroundDelegationEnvelope(); + expect(envelope).toContain(''); + expect(envelope).toContain(''); + expect(envelope).toContain('Do not include private conversation history'); + expect(envelope).toContain('Coordinate through commits, PR comments, or explicit task/status files'); + }); + + test('AgentExecutionGuard surfaces missing context handoff for implementation agents', async () => { + const result = await runHook('AgentExecutionGuard.hook.ts', { + session_id: 's', + hook_event_name: 'PreToolUse', + tool_name: 'Agent', + tool_input: { + subagent_type: 'Engineer', + description: 'implement handoff', + run_in_background: true, + prompt: 'Implement the context handoff MVP.', + }, + }); + + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain('Agent context handoff missing'); + expect(result.stdout).toContain(''); + expect(result.stdout).not.toContain('FOREGROUND AGENT DETECTED'); + }); + test('AgentMemoryCapture emits reminder on substantive current Agent return', async () => { const result = await runHook('AgentMemoryCapture.hook.ts', { session_id: 's', @@ -88,6 +171,52 @@ describe('Agent tool migration', () => { expect(out.additionalContext).toContain('Researcher subagent'); expect(out.additionalContext).toContain('checkpoint it now'); }); + + test('AgentMemoryCapture emits reminder on explicit durable checkpoint marker', async () => { + const result = await runHook('AgentMemoryCapture.hook.ts', { + session_id: 's', + hook_event_name: 'PostToolUse', + tool_name: 'Agent', + cwd: REPO, + tool_input: { subagent_type: 'Engineer', description: 'small durable result' }, + tool_response: { result: 'Durable findings for parent checkpoint: repo fact X was verified.' }, + }); + + expect(result.exitCode).toBe(0); + const out = JSON.parse(result.stdout); + expect(out.additionalContext).toContain('Engineer subagent'); + expect(out.additionalContext).toContain('Durable findings for parent checkpoint'); + }); + + test('AgentMemoryCapture prompts on failed concise Agent return', async () => { + const result = await runHook('AgentMemoryCapture.hook.ts', { + session_id: 's', + hook_event_name: 'PostToolUse', + tool_name: 'Agent', + cwd: REPO, + tool_input: { subagent_type: 'Engineer', description: 'timed out task' }, + tool_response: { status: 'timeout', error: 'Agent timed out before finishing.' }, + }); + + expect(result.exitCode).toBe(0); + const out = JSON.parse(result.stdout); + expect(out.additionalContext).toContain('returned failed output'); + expect(out.additionalContext).toContain('inspect the result'); + }); + + test('AgentMemoryCapture stays quiet on concise non-durable successful return', async () => { + const result = await runHook('AgentMemoryCapture.hook.ts', { + session_id: 's', + hook_event_name: 'PostToolUse', + tool_name: 'Agent', + cwd: REPO, + tool_input: { subagent_type: 'Explore', description: 'quick lookup' }, + tool_response: { result: 'No matching file found.' }, + }); + + expect(result.exitCode).toBe(0); + expect(result.stdout).toBe(''); + }); }); describe('current native event fields', () => { @@ -106,6 +235,42 @@ describe('current native event fields', () => { expect(result.stdout).toBe(''); }); + test('TaskCompleted marks matching tracked agent completed using current teammate_name', async () => { + const paiDir = mkdtempSync(join(tmpdir(), 'pai-task-completed-agent-')); + const savedPaiDir = process.env.PAI_DIR; + try { + process.env.PAI_DIR = paiDir; + writeState({ + active: true, + sessionId: 's-agent-complete', + taskDescription: 'agent lifecycle test', + currentPhase: 'BUILD', + phaseStartedAt: Date.now(), + algorithmStartedAt: Date.now(), + sla: 'Standard', + criteria: [], + agents: [{ name: 'qa', agentType: 'QATester', status: 'active' }], + capabilities: ['Agent'], + phaseHistory: [], + } as any); + + const result = await runHook('TaskCompleted.hook.ts', { + session_id: 's-agent-complete', + hook_event_name: 'TaskCompleted', + task_id: 't1', + task_subject: 'non-ISC QA task', + task_description: 'done', + teammate_name: 'qa', + }, { PAI_DIR: paiDir }); + + expect(result.exitCode).toBe(0); + expect(readState('s-agent-complete')?.agents[0].status).toBe('completed'); + } finally { + if (savedPaiDir === undefined) delete process.env.PAI_DIR; else process.env.PAI_DIR = savedPaiDir; + rmSync(paiDir, { recursive: true, force: true }); + } + }); + test('legacy TaskCompleted subject field no longer triggers ISC logic', async () => { const result = await runHook('TaskCompleted.hook.ts', { session_id: 's', @@ -132,6 +297,40 @@ describe('current native event fields', () => { expect(result.stderr).toBe(''); }); + test('TeammateIdle marks matching tracked agent idle without legacy last_message', async () => { + const paiDir = mkdtempSync(join(tmpdir(), 'pai-teammate-idle-agent-')); + const savedPaiDir = process.env.PAI_DIR; + try { + process.env.PAI_DIR = paiDir; + writeState({ + active: true, + sessionId: 's-agent-idle', + taskDescription: 'agent lifecycle test', + currentPhase: 'BUILD', + phaseStartedAt: Date.now(), + algorithmStartedAt: Date.now(), + sla: 'Standard', + criteria: [], + agents: [{ name: 'qa', agentType: 'QATester', status: 'active' }], + capabilities: ['Agent'], + phaseHistory: [], + } as any); + + const result = await runHook('TeammateIdle.hook.ts', { + session_id: 's-agent-idle', + hook_event_name: 'TeammateIdle', + teammate_name: 'qa', + team_name: 'review', + }, { PAI_DIR: paiDir }); + + expect(result.exitCode).toBe(0); + expect(readState('s-agent-idle')?.agents[0].status).toBe('idle'); + } finally { + if (savedPaiDir === undefined) delete process.env.PAI_DIR; else process.env.PAI_DIR = savedPaiDir; + rmSync(paiDir, { recursive: true, force: true }); + } + }); + test('TeammateIdle blocks unattributable payloads through stderr exit-2 feedback', async () => { const result = await runHook('TeammateIdle.hook.ts', { session_id: 's', diff --git a/tests/OrchestratorAdapters.test.ts b/tests/OrchestratorAdapters.test.ts new file mode 100644 index 00000000..bdea06e8 --- /dev/null +++ b/tests/OrchestratorAdapters.test.ts @@ -0,0 +1,213 @@ +import { describe, expect, test } from 'bun:test'; +import { codexLocalAdapterConfig } from '../PAI/Tools/orchestrator/adapters/codex-local'; +import { + describeCommandRun, + runCommandAdapter, + structuredPrompt, + type CommandAdapterConfig, +} from '../PAI/Tools/orchestrator/adapters/command'; +import { claudeLocalAdapterConfig } from '../PAI/Tools/orchestrator/adapters/claude-local'; +import { type WorkPacket } from '../PAI/Tools/orchestrator/schema'; + +function packet(overrides: Partial = {}): WorkPacket { + return { + id: 'packet-1', + workItemId: 'work-1', + type: 'adversarial-review', + objective: 'Review the fixture plan.', + role: { + id: 'reviewer-1', + role: 'reviewer', + engine: 'stub-engine', + capabilities: ['review'], + }, + inputs: [ + { + id: 'input-1', + type: 'markdown', + source: 'fixture.md', + content: '# Fixture', + }, + ], + policy: { + autonomy: 'advise', + maxRounds: 1, + allowedPaths: ['**'], + blockedPaths: ['MEMORY/**'], + requireGreenCI: true, + allowPush: false, + allowMerge: false, + stopOnPrivateBoundaryRisk: true, + }, + artifacts: [], + ...overrides, + }; +} + +function stubConfig(code: string, overrides: Partial = {}): CommandAdapterConfig { + return { + id: 'stub', + engine: 'stub-engine', + command: process.execPath, + args: ['-e', code], + timeoutMs: 2_000, + requireStructuredOutput: true, + envAllowlist: ['PATH'], + supportedRoles: ['reviewer', 'validator'], + supportedCapabilities: ['review', 'validate'], + ...overrides, + }; +} + +describe('orchestrator command adapters', () => { + test('runs a stub command and parses structured AgentResult output', async () => { + const code = ` + let input = ''; + process.stdin.on('data', chunk => input += chunk); + process.stdin.on('end', () => { + const packet = JSON.parse(input); + console.log(JSON.stringify({ + status: 'findings', + summary: 'reviewed ' + packet.id, + artifacts: [], + findings: [{ + id: 'finding-1', + severity: 'medium', + category: 'design', + issue: 'Fixture issue', + recommendation: 'Tighten the plan', + confidence: 'high', + status: 'open' + }] + })); + }); + `; + + const result = await runCommandAdapter(packet(), stubConfig(code)); + expect(result.status).toBe('findings'); + expect(result.summary).toBe('reviewed packet-1'); + expect(result.findings?.[0].id).toBe('finding-1'); + expect(result.artifacts.at(-1)?.metadata?.exitCode).toBe(0); + }); + + test('returns error artifacts for malformed structured output instead of throwing', async () => { + const result = await runCommandAdapter(packet(), stubConfig("console.log('not json')")); + expect(result.status).toBe('error'); + expect(result.summary).toContain('not valid JSON'); + expect(result.artifacts[0].content).toContain('not json'); + }); + + test('returns error artifacts for nonzero exit codes', async () => { + const result = await runCommandAdapter(packet(), stubConfig("console.error('boom'); process.exit(7)")); + expect(result.status).toBe('error'); + expect(result.summary).toContain('code 7'); + expect(result.artifacts[0].metadata?.stderr).toContain('boom'); + }); + + test('returns blocked artifacts on timeout', async () => { + const result = await runCommandAdapter(packet(), stubConfig('setTimeout(() => {}, 500)', { timeoutMs: 25 })); + expect(result.status).toBe('blocked'); + expect(result.summary).toContain('timed out'); + expect(result.artifacts[0].metadata?.timedOut).toBe(true); + }); + + test('escalates timed-out commands that ignore SIGTERM', async () => { + const startedAt = Date.now(); + const result = await runCommandAdapter( + packet(), + stubConfig("process.on('SIGTERM', () => {}); setInterval(() => {}, 1000)", { + timeoutMs: 30, + killGraceMs: 30, + }), + ); + const elapsedMs = Date.now() - startedAt; + + expect(result.status).toBe('blocked'); + expect(result.summary).toContain('timed out'); + expect(result.artifacts[0].metadata?.timedOut).toBe(true); + expect(result.artifacts[0].metadata?.killGraceMs).toBe(30); + expect(elapsedMs).toBeLessThan(1_000); + }); + + test('bounds buffered command output and blocks on overflow', async () => { + const result = await runCommandAdapter( + packet(), + stubConfig("process.stdout.write('x'.repeat(2048)); setTimeout(() => {}, 500)", { + maxOutputBytes: 128, + timeoutMs: 1_000, + killGraceMs: 30, + }), + ); + + expect(result.status).toBe('blocked'); + expect(result.summary).toContain('exceeded max output'); + expect(result.artifacts[0].content.length).toBeLessThanOrEqual(128); + expect(result.artifacts[0].metadata?.outputExceeded).toBe(true); + expect(result.artifacts[0].metadata?.maxOutputBytes).toBe(128); + }); + + test('does not expose unallowlisted environment variables to commands', async () => { + const previous = process.env.PAI_ORCHESTRATOR_SECRET_TEST; + process.env.PAI_ORCHESTRATOR_SECRET_TEST = 'must-not-leak'; + try { + const code = ` + console.log(JSON.stringify({ + status: process.env.PAI_ORCHESTRATOR_SECRET_TEST ? 'error' : 'pass', + summary: process.env.PAI_ORCHESTRATOR_SECRET_TEST ? 'secret visible' : 'secret hidden', + artifacts: [] + })); + `; + const result = await runCommandAdapter(packet(), stubConfig(code, { envAllowlist: ['PATH'] })); + expect(result.status).toBe('pass'); + expect(result.summary).toBe('secret hidden'); + } finally { + if (previous === undefined) { + delete process.env.PAI_ORCHESTRATOR_SECRET_TEST; + } else { + process.env.PAI_ORCHESTRATOR_SECRET_TEST = previous; + } + } + }); + + test('blocks unsupported roles before spawning a command', async () => { + const result = await runCommandAdapter( + packet({ + role: { + role: 'fixer', + engine: 'stub-engine', + capabilities: ['implement'], + }, + }), + stubConfig('throw new Error("should not spawn")'), + ); + expect(result.status).toBe('blocked'); + expect(result.summary).toContain('Role fixer is not supported'); + }); + + test('renders structured prompts and command descriptions deterministically', () => { + const workPacket = packet(); + const config = stubConfig('console.log(process.argv[1])', { args: ['{{packet.id}}', '{{packet.role.role}}', '{{prompt}}'] }); + const run = describeCommandRun(workPacket, config); + expect(run.args[0]).toBe('packet-1'); + expect(run.args[1]).toBe('reviewer'); + expect(run.args[2]).toContain('Return ONLY JSON'); + expect(run.envKeys).toEqual(['PATH']); + expect(structuredPrompt(workPacket)).toContain('"workItemId": "work-1"'); + }); + + test('claude-local config supports review, red-team, judge, and validator roles with structured output', () => { + const config = claudeLocalAdapterConfig({ command: process.execPath }); + expect(config.engine).toBe('claude-local'); + expect(config.requireStructuredOutput).toBe(true); + expect(config.supportedRoles).toEqual(['reviewer', 'red-team', 'judge', 'validator']); + expect(config.supportedCapabilities).toEqual(['review', 'red-team', 'judge', 'validate']); + }); + + test('codex-local config supports implementation, fix, judge, and validation roles with structured output', () => { + const config = codexLocalAdapterConfig({ command: process.execPath }); + expect(config.engine).toBe('codex-local'); + expect(config.requireStructuredOutput).toBe(true); + expect(config.supportedRoles).toEqual(['implementer', 'fixer', 'judge', 'validator']); + expect(config.supportedCapabilities).toEqual(['implement', 'judge', 'validate']); + }); +}); diff --git a/tests/OrchestratorAdversarialReview.test.ts b/tests/OrchestratorAdversarialReview.test.ts new file mode 100644 index 00000000..4a9c319b --- /dev/null +++ b/tests/OrchestratorAdversarialReview.test.ts @@ -0,0 +1,142 @@ +import { describe, expect, test } from 'bun:test'; +import { existsSync, mkdtempSync, readFileSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; +import { runAdversarialReview, type AgentRunner } from '../PAI/Tools/orchestrator/workflows/adversarial-review'; +import { type WorkItem, type WorkPacket } from '../PAI/Tools/orchestrator/schema'; + +function fixture(): WorkItem { + return JSON.parse(readFileSync(join(process.cwd(), 'fixtures', 'orchestrator', 'adversarial-review.json'), 'utf-8')); +} + +describe('orchestrator adversarial-review workflow', () => { + test('runs reviewer/red-team and judge packets into a synthesis decision', async () => { + const packets: WorkPacket[] = []; + const runner: AgentRunner = async (packet) => { + packets.push(packet); + if (packet.role.role === 'red-team') { + return { + status: 'findings', + summary: 'red-team reviewed plan', + artifacts: [{ + id: 'red-team-result', + type: 'agent-result', + source: packet.role.engine, + content: 'red-team result', + }], + findings: [{ + id: 'finding-medium-1', + severity: 'medium', + category: 'design', + issue: 'Rollback checkpoint is implicit.', + recommendation: 'Make rollback checkpoint explicit before unattended execution.', + confidence: 'high', + status: 'open', + }], + }; + } + return { + status: 'pass', + summary: `judge synthesized ${packet.findings?.length ?? 0} finding(s)`, + artifacts: [{ + id: 'judge-result', + type: 'agent-result', + source: packet.role.engine, + content: 'judge result', + }], + }; + }; + + const result = await runAdversarialReview(fixture(), { runAgent: runner }); + + expect(result.status).toBe('complete'); + expect(result.policyResult.allowed).toBe(true); + expect(result.findings.map((finding) => finding.id)).toEqual(['finding-medium-1']); + expect(packets.map((packet) => packet.role.role)).toEqual(['red-team', 'judge']); + expect(packets[1].findings?.[0].id).toBe('finding-medium-1'); + expect(result.artifacts.some((artifact) => artifact.type === 'packet')).toBe(true); + expect(result.artifacts.some((artifact) => artifact.type === 'finding-report')).toBe(true); + expect(result.artifacts.some((artifact) => artifact.type === 'decision')).toBe(true); + }); + + test('blocks the synthesis decision when high findings remain open', async () => { + const runner: AgentRunner = async (packet) => { + if (packet.role.role === 'red-team') { + return { + status: 'findings', + summary: 'red-team found a high issue', + artifacts: [], + findings: [{ + id: 'finding-high-1', + severity: 'high', + category: 'security', + issue: 'Unattended execution has no explicit permission gate.', + recommendation: 'Require an explicit permission gate before execution.', + confidence: 'high', + status: 'open', + }], + }; + } + return { status: 'pass', summary: 'judge agrees this must block', artifacts: [] }; + }; + + const result = await runAdversarialReview(fixture(), { runAgent: runner }); + + expect(result.status).toBe('blocked'); + expect(result.policyResult.allowed).toBe(false); + expect(result.decision.reason).toContain('Open blocker/high findings remain'); + expect(result.decision.actions).toContain('fix-or-triage-blocking-findings'); + }); + + test('blocks when work item source paths hit blocked policy paths', async () => { + const workItem = fixture(); + workItem.inputs[0] = { + ...workItem.inputs[0], + source: 'MEMORY/STATE/private-runtime.json', + }; + + const result = await runAdversarialReview(workItem, { dryRun: true }); + + expect(result.status).toBe('blocked'); + expect(result.decision.reason).toContain('Changed path is blocked by policy: MEMORY/STATE/private-runtime.json'); + expect(result.policyResult.reasons).toContain('Blocked path changed: MEMORY/STATE/private-runtime.json'); + }); + + test('blocks and records an artifact for malformed agent output', async () => { + const runner: AgentRunner = async (packet) => { + if (packet.role.role === 'red-team') return { malformed: true }; + return { status: 'pass', summary: 'judge should still produce output', artifacts: [] }; + }; + + const result = await runAdversarialReview(fixture(), { runAgent: runner }); + + expect(result.status).toBe('blocked'); + expect(result.policyResult.reasons).toContain('Agent output is malformed.'); + expect(result.warnings[0]).toContain('agentResult.status'); + expect(result.artifacts.some((artifact) => + artifact.type === 'agent-result' && artifact.content?.includes('Agent result failed validation') + )).toBe(true); + }); + + test('dry-run fixture runner produces local-only artifacts without live command adapters', async () => { + const result = await runAdversarialReview(fixture(), { dryRun: true }); + + expect(result.status).toBe('complete'); + expect(result.packets.map((packet) => packet.role.role)).toEqual(['red-team', 'judge']); + expect(result.agentResults.every((agentResult) => + agentResult.artifacts.every((artifact) => artifact.metadata?.dryRunFixture === true) + )).toBe(true); + expect(result.artifacts.map((artifact) => artifact.source)).not.toContain('github'); + }); + + test('can persist workflow artifacts to the ledger when a root is provided', async () => { + const ledgerRoot = mkdtempSync(join(tmpdir(), 'pai-adversarial-review-ledger-')); + + const result = await runAdversarialReview(fixture(), { dryRun: true, ledgerRoot }); + + expect(result.status).toBe('complete'); + expect(result.artifacts.every((artifact) => artifact.path)).toBe(true); + expect(existsSync(join(ledgerRoot, fixture().id, 'work-item.json'))).toBe(true); + expect(existsSync(join(ledgerRoot, fixture().id, 'artifacts', `${fixture().id}-decision.json`))).toBe(true); + }); +}); diff --git a/tests/OrchestratorCli.test.ts b/tests/OrchestratorCli.test.ts new file mode 100644 index 00000000..c9a1e54b --- /dev/null +++ b/tests/OrchestratorCli.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, test } from 'bun:test'; + +function runOrchestrator(args: string[]) { + return Bun.spawnSync(['bun', 'PAI/Tools/Orchestrator.ts', ...args], { + cwd: process.cwd(), + stdout: 'pipe', + stderr: 'pipe', + }); +} + +function runPai(args: string[]) { + return Bun.spawnSync(['bun', 'PAI/Tools/pai.ts', ...args], { + cwd: process.cwd(), + stdout: 'pipe', + stderr: 'pipe', + }); +} + +describe('orchestrator CLI skeleton', () => { + test('dry-run validates a fixture without executing live engines', () => { + const result = runOrchestrator(['run', 'fixtures/orchestrator/pr-review.json', '--dry-run']); + expect(result.exitCode).toBe(0); + const payload = JSON.parse(result.stdout.toString()); + expect(payload.status).toBe('fixed'); + expect(payload.workItemId).toBe('fixture-pr-review-001'); + expect(payload.reason).toContain('Fix round completed'); + expect(payload.checkState).toBe('green'); + expect(payload.commentsWritten).toBe(0); + expect(payload.packets).toEqual([ + 'fixture-pr-review-001-claude-reviewer-review', + 'fixture-pr-review-001-codex-fixer-fix', + ]); + }); + + test('run without dry-run fails closed', () => { + const result = runOrchestrator(['run', 'fixtures/orchestrator/pr-review.json']); + expect(result.exitCode).toBe(2); + const payload = JSON.parse(result.stdout.toString()); + expect(payload.status).toBe('blocked'); + expect(payload.reason).toContain('not implemented'); + }); + + test('invalid work item fails closed', () => { + const result = runOrchestrator(['run', 'fixtures/orchestrator/missing.json', '--dry-run']); + expect(result.exitCode).toBe(2); + const payload = JSON.parse(result.stdout.toString()); + expect(payload.status).toBe('failed'); + expect(payload.errors[0]).toContain('not found'); + }); + + test('pai dispatcher routes adversarial-review dry-run through the workflow', () => { + const result = runPai(['orchestrator', 'run', 'fixtures/orchestrator/adversarial-review.json', '--dry-run']); + expect(result.exitCode).toBe(0); + const payload = JSON.parse(result.stdout.toString()); + expect(payload.status).toBe('complete'); + expect(payload.workItemId).toBe('fixture-adversarial-review-001'); + expect(payload.reason).toContain('Policy permits advise'); + expect(payload.findings).toBe(1); + expect(payload.packets).toEqual([ + 'fixture-adversarial-review-001-claude-red-team-review', + 'fixture-adversarial-review-001-codex-judge-judge', + ]); + }); +}); diff --git a/tests/OrchestratorComments.test.ts b/tests/OrchestratorComments.test.ts new file mode 100644 index 00000000..667de04b --- /dev/null +++ b/tests/OrchestratorComments.test.ts @@ -0,0 +1,90 @@ +import { describe, expect, test } from 'bun:test'; +import { + COMMENT_MARKER_END, + COMMENT_MARKER_START, + emitOrchestratorComment, + parseOrchestratorComment, +} from '../PAI/Tools/orchestrator/comments'; +import type { Decision, Finding } from '../PAI/Tools/orchestrator/schema'; + +const finding: Finding = { + id: 'finding-1', + severity: 'high', + category: 'bug', + issue: 'The PR can merge with a failing gate.', + recommendation: 'Require green CI before merge.', + confidence: 'high', + status: 'open', + location: { path: 'src/review.ts', line: 12 }, +}; + +const decision: Decision = { + status: 'blocked', + reason: 'Open high-severity finding remains.', + actions: ['fix-or-triage-blocking-findings'], +}; + +describe('orchestrator PR comment markers', () => { + test('emits a stable PAI marker with normalized JSON state', () => { + const markdown = emitOrchestratorComment({ + workItemId: 'work-1', + source: 'claude-local', + updatedAt: '2026-06-26T23:30:00.000Z', + findings: [finding], + decision, + }); + expect(markdown).toContain(COMMENT_MARKER_START); + expect(markdown).toContain(COMMENT_MARKER_END); + expect(markdown).toContain('"version": 1'); + expect(markdown).toContain('Open blocker/high findings: 1'); + }); + + test('round-trips finding IDs, severities, statuses, and source metadata', () => { + const markdown = emitOrchestratorComment({ + workItemId: 'work-1', + source: 'claude-local', + updatedAt: '2026-06-26T23:30:00.000Z', + findings: [finding], + decision, + }); + const parsed = parseOrchestratorComment(markdown); + expect(parsed.found).toBe(true); + expect(parsed.errors).toEqual([]); + expect(parsed.state?.workItemId).toBe('work-1'); + expect(parsed.state?.source).toBe('claude-local'); + expect(parsed.state?.findings[0].id).toBe('finding-1'); + expect(parsed.state?.findings[0].severity).toBe('high'); + expect(parsed.state?.findings[0].status).toBe('open'); + expect(parsed.state?.decision.status).toBe('blocked'); + }); + + test('returns found false when no marker exists', () => { + const parsed = parseOrchestratorComment('ordinary PR comment'); + expect(parsed.found).toBe(false); + expect(parsed.errors).toEqual([]); + }); + + test('fails closed on malformed marker JSON', () => { + const parsed = parseOrchestratorComment(`${COMMENT_MARKER_START}\nnot json\n${COMMENT_MARKER_END}`); + expect(parsed.found).toBe(true); + expect(parsed.errors.join('\n')).toContain('Failed to parse'); + }); + + test('fails closed on malformed finding payloads', () => { + const markdown = `${COMMENT_MARKER_START} +\`\`\`json +{ + "version": 1, + "workItemId": "work-1", + "source": "claude-local", + "updatedAt": "2026-06-26T23:30:00.000Z", + "findings": [{ "id": "bad", "severity": "critical" }], + "decision": { "status": "blocked", "reason": "bad finding" } +} +\`\`\` +${COMMENT_MARKER_END}`; + const parsed = parseOrchestratorComment(markdown); + expect(parsed.found).toBe(true); + expect(parsed.errors.join('\n')).toContain('findings[0].severity'); + }); +}); diff --git a/tests/OrchestratorLedger.test.ts b/tests/OrchestratorLedger.test.ts new file mode 100644 index 00000000..196b72ef --- /dev/null +++ b/tests/OrchestratorLedger.test.ts @@ -0,0 +1,207 @@ +import { describe, expect, test } from 'bun:test'; +import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { + ledgerPaths, + loadWorkItem, + readArtifact, + readLedgerEvents, + saveWorkItem, + writeArtifact, +} from '../PAI/Tools/orchestrator/ledger'; + +function tempRoot(): string { + return mkdtempSync(join(tmpdir(), 'pai-orchestrator-ledger-')); +} + +function fixtureWorkItem(): any { + return JSON.parse(readFileSync(join(process.cwd(), 'fixtures', 'orchestrator', 'pr-review.json'), 'utf-8')); +} + +describe('orchestrator ledger', () => { + test('saves and loads a valid work item atomically', () => { + const root = tempRoot(); + try { + const item = fixtureWorkItem(); + saveWorkItem(root, item); + const loaded = loadWorkItem(root, item.id); + expect(loaded?.id).toBe(item.id); + expect(loaded?.type).toBe('pr-review'); + expect(existsSync(ledgerPaths(root, item.id).workItemPath)).toBe(true); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + + test('writes artifacts and appends ledger events', () => { + const root = tempRoot(); + try { + const item = fixtureWorkItem(); + saveWorkItem(root, item); + const artifact = writeArtifact(root, item.id, { + id: 'packet-1', + type: 'packet', + source: 'orchestrator', + content: 'fixture packet', + }); + expect(artifact.path).toBe('artifacts/packet-1.json'); + expect(artifact.createdAt).toBeTruthy(); + expect(readArtifact(root, item.id, 'packet-1')?.content).toBe('fixture packet'); + + const events = readLedgerEvents(root, item.id); + expect(events.map((event) => event.type)).toContain('work-item-saved'); + expect(events.map((event) => event.type)).toContain('artifact-written'); + expect(events.some((event) => event.artifactId === 'packet-1')).toBe(true); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + + test('dot-only work item ids cannot escape the ledger root', () => { + const root = tempRoot(); + try { + expect(ledgerPaths(root, '..').workDir).toBe(join(root, 'item')); + expect(ledgerPaths(root, '.').workDir).toBe(join(root, 'item')); + expect(ledgerPaths(root, '...').workDir).toBe(join(root, 'item')); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + + test('sanitized work item ids include a hash when normalization would collide', () => { + const root = tempRoot(); + try { + expect(ledgerPaths(root, 'a/b').workDir).not.toBe(ledgerPaths(root, 'a-b').workDir); + expect(ledgerPaths(root, 'a/b').workDir).toContain('a-b-'); + expect(ledgerPaths(root, 'a-b').workDir).toBe(join(root, 'a-b')); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + + test('dot-only artifact ids cannot escape the artifacts directory', () => { + const root = tempRoot(); + try { + const item = fixtureWorkItem(); + saveWorkItem(root, item); + const artifact = writeArtifact(root, item.id, { + id: '..', + type: 'packet', + source: 'orchestrator', + content: 'safe packet', + }); + const paths = ledgerPaths(root, item.id); + + expect(artifact.path).toBe('artifacts/item.json'); + expect(existsSync(join(paths.artifactsDir, 'item.json'))).toBe(true); + expect(existsSync(join(paths.workDir, 'item.json'))).toBe(false); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + + test('sanitized artifact ids include a hash when normalization would collide', () => { + const root = tempRoot(); + try { + const item = fixtureWorkItem(); + saveWorkItem(root, item); + const slashArtifact = writeArtifact(root, item.id, { + id: 'a/b', + type: 'packet', + source: 'orchestrator', + content: 'slash', + }); + const dashArtifact = writeArtifact(root, item.id, { + id: 'a-b', + type: 'packet', + source: 'orchestrator', + content: 'dash', + }); + + expect(slashArtifact.path).not.toBe(dashArtifact.path); + expect(slashArtifact.path).toContain('artifacts/a-b-'); + expect(dashArtifact.path).toBe('artifacts/a-b.json'); + expect(readArtifact(root, item.id, 'a/b')?.content).toBe('slash'); + expect(readArtifact(root, item.id, 'a-b')?.content).toBe('dash'); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + + test('resume can reload an in-progress work item from persisted state', () => { + const root = tempRoot(); + try { + const item = fixtureWorkItem(); + item.status = 'running'; + saveWorkItem(root, item); + + const resumed = loadWorkItem(root, item.id); + expect(resumed?.status).toBe('running'); + expect(resumed?.roles[0].engine).toBe('claude-local'); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + + test('invalid stored work item fails loudly instead of resuming corrupt state', () => { + const root = tempRoot(); + try { + const item = fixtureWorkItem(); + saveWorkItem(root, item); + const paths = ledgerPaths(root, item.id); + writeFileSync(paths.workItemPath, '{ "id": "" }'); + expect(() => loadWorkItem(root, item.id)).toThrow('Stored work item is invalid'); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + + test('ledger event ids are collision-resistant for rapid same-type appends', () => { + const root = tempRoot(); + try { + const item = fixtureWorkItem(); + saveWorkItem(root, item); + writeArtifact(root, item.id, { + id: 'packet-1', + type: 'packet', + source: 'orchestrator', + content: 'first', + }); + writeArtifact(root, item.id, { + id: 'packet-2', + type: 'packet', + source: 'orchestrator', + content: 'second', + }); + + const events = readLedgerEvents(root, item.id); + expect(new Set(events.map(event => event.id)).size).toBe(events.length); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + + test('corrupt JSONL lines do not prevent reading later valid ledger events', () => { + const root = tempRoot(); + try { + const item = fixtureWorkItem(); + saveWorkItem(root, item); + const paths = ledgerPaths(root, item.id); + const validEvent = { + id: 'manual-event', + workItemId: item.id, + type: 'status-changed', + timestamp: '2026-06-27T00:00:00.000Z', + message: 'Manual event after corruption', + }; + writeFileSync(paths.eventsPath, `${readFileSync(paths.eventsPath, 'utf-8')}not-json\n${JSON.stringify(validEvent)}\n`); + + const events = readLedgerEvents(root, item.id); + expect(events.map(event => event.id)).toContain('manual-event'); + expect(events.some(event => event.message === 'Manual event after corruption')).toBe(true); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); +}); diff --git a/tests/OrchestratorPolicy.test.ts b/tests/OrchestratorPolicy.test.ts new file mode 100644 index 00000000..3aae450b --- /dev/null +++ b/tests/OrchestratorPolicy.test.ts @@ -0,0 +1,195 @@ +import { describe, expect, test } from 'bun:test'; +import { + conservativeDefaultPolicy, + type Finding, +} from '../PAI/Tools/orchestrator/schema'; +import { evaluatePolicy } from '../PAI/Tools/orchestrator/policy'; + +function policy(overrides: Partial> = {}) { + return { ...conservativeDefaultPolicy(), autonomy: 'merge' as const, allowMerge: true, allowPush: true, ...overrides }; +} + +const highFinding: Finding = { + id: 'finding-high-1', + severity: 'high', + category: 'bug', + issue: 'Incorrect behavior', + recommendation: 'Fix before merge', + confidence: 'high', + status: 'open', +}; + +describe('orchestrator policy evaluator', () => { + test('allows merge only when all safety gates pass', () => { + const result = evaluatePolicy({ + policy: policy(), + requestedAction: 'merge', + checkState: 'green', + changedPaths: ['src/orchestrator.ts'], + findings: [], + }); + expect(result.allowed).toBe(true); + expect(result.decision.status).toBe('merge-ready'); + }); + + test('blocks private/public boundary risk', () => { + const result = evaluatePolicy({ + policy: policy(), + requestedAction: 'merge', + checkState: 'green', + privateBoundaryRisk: true, + }); + expect(result.allowed).toBe(false); + expect(result.decision.reason).toContain('Private/public boundary risk'); + }); + + test('blocks red or missing CI when green CI is required', () => { + for (const checkState of ['red', 'pending', 'missing', 'unknown'] as const) { + const result = evaluatePolicy({ + policy: policy(), + requestedAction: 'merge', + checkState, + }); + expect(result.allowed).toBe(false); + expect(result.requiredActions).toContain('wait-for-green-ci'); + } + }); + + test('blocks merge with non-green CI even when green CI is not broadly required', () => { + const result = evaluatePolicy({ + policy: policy({ requireGreenCI: false }), + requestedAction: 'merge', + checkState: 'red', + }); + expect(result.allowed).toBe(false); + expect(result.decision.reason).toContain('CI is not green'); + expect(result.requiredActions).toContain('wait-for-green-ci'); + }); + + test('blocks push with non-green CI even when green CI is not broadly required', () => { + const result = evaluatePolicy({ + policy: policy({ requireGreenCI: false, allowPush: true }), + requestedAction: 'push', + checkState: 'red', + }); + expect(result.allowed).toBe(false); + expect(result.decision.reason).toContain('CI is not green'); + expect(result.requiredActions).toContain('wait-for-green-ci'); + }); + + test('blocks push when autonomy is advise even if allowPush is true', () => { + const result = evaluatePolicy({ + policy: policy({ autonomy: 'advise', allowPush: true }), + requestedAction: 'push', + checkState: 'green', + }); + expect(result.allowed).toBe(false); + expect(result.decision.reason).toContain('autonomy is advise'); + }); + + test('allows push when autonomy permits fixes, push is enabled, and CI is green', () => { + const result = evaluatePolicy({ + policy: policy({ autonomy: 'fix', allowPush: true }), + requestedAction: 'push', + checkState: 'green', + }); + expect(result.allowed).toBe(true); + }); + + test('blocks open high or blocker findings', () => { + const result = evaluatePolicy({ + policy: policy(), + requestedAction: 'merge', + checkState: 'green', + findings: [highFinding], + }); + expect(result.allowed).toBe(false); + expect(result.decision.reason).toContain('finding-high-1'); + }); + + test('does not block fixed high findings or open medium findings', () => { + const fixedHigh = { ...highFinding, status: 'fixed' as const }; + const medium = { ...highFinding, id: 'finding-medium-1', severity: 'medium' as const }; + const result = evaluatePolicy({ + policy: policy(), + requestedAction: 'merge', + checkState: 'green', + findings: [fixedHigh, medium], + }); + expect(result.allowed).toBe(true); + }); + + test('blocks changed paths under blockedPaths', () => { + const result = evaluatePolicy({ + policy: policy(), + requestedAction: 'fix', + checkState: 'green', + changedPaths: ['MEMORY/private.md'], + }); + expect(result.allowed).toBe(false); + expect(result.decision.reason).toContain('blocked by policy'); + }); + + test('blocks changed paths outside allowedPaths', () => { + const result = evaluatePolicy({ + policy: policy({ allowedPaths: ['src/**'] }), + requestedAction: 'fix', + checkState: 'green', + changedPaths: ['tests/example.test.ts'], + }); + expect(result.allowed).toBe(false); + expect(result.decision.reason).toContain('outside allowed paths'); + }); + + test('allows nested files under allowed globstar paths', () => { + const result = evaluatePolicy({ + policy: policy({ allowedPaths: ['docs/**'], blockedPaths: [] }), + requestedAction: 'fix', + checkState: 'green', + changedPaths: ['docs/planning/example-plan.md'], + }); + expect(result.allowed).toBe(true); + }); + + test('blocks malformed agent output', () => { + const result = evaluatePolicy({ + policy: policy(), + requestedAction: 'fix', + checkState: 'green', + malformedAgentOutput: true, + }); + expect(result.allowed).toBe(false); + expect(result.requiredActions).toContain('rerun-or-inspect-agent-output'); + }); + + test('blocks invalid agent result schema', () => { + const result = evaluatePolicy({ + policy: policy(), + requestedAction: 'fix', + checkState: 'green', + agentResult: { status: 'ok' }, + }); + expect(result.allowed).toBe(false); + expect(result.decision.reason).toContain('schema validation'); + }); + + test('blocks merge when allowMerge is false even if autonomy says merge', () => { + const result = evaluatePolicy({ + policy: policy({ allowMerge: false }), + requestedAction: 'merge', + checkState: 'green', + }); + expect(result.allowed).toBe(false); + expect(result.decision.reason).toContain('allowMerge is false'); + }); + + test('blocks fix and iterate when autonomy is advise', () => { + const result = evaluatePolicy({ + policy: conservativeDefaultPolicy(), + requestedAction: 'iterate', + checkState: 'green', + }); + expect(result.allowed).toBe(false); + expect(result.decision.reason).toContain('autonomy is advise'); + }); +}); diff --git a/tests/OrchestratorPrReview.test.ts b/tests/OrchestratorPrReview.test.ts new file mode 100644 index 00000000..eaa991dd --- /dev/null +++ b/tests/OrchestratorPrReview.test.ts @@ -0,0 +1,182 @@ +import { describe, expect, test } from 'bun:test'; +import { existsSync, mkdtempSync, readFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { emitOrchestratorComment } from '../PAI/Tools/orchestrator/comments'; +import { changedPathsFromDiff, fixtureGitHubClient, REQUIRED_LIVE_WRITE_TOKEN } from '../PAI/Tools/orchestrator/github'; +import { runPrReview, type PrAgentRunner } from '../PAI/Tools/orchestrator/workflows/pr-review'; +import { type Decision, type Finding, type WorkItem } from '../PAI/Tools/orchestrator/schema'; + +function fixture(): WorkItem { + return JSON.parse(readFileSync(join(process.cwd(), 'fixtures', 'orchestrator', 'pr-review.json'), 'utf-8')); +} + +const priorFinding: Finding = { + id: 'prior-finding', + severity: 'medium', + category: 'docs', + issue: 'Prior comment finding', + recommendation: 'Keep marker state resumable.', + confidence: 'medium', + status: 'open', +}; + +const priorDecision: Decision = { + status: 'blocked', + reason: 'Prior review blocked.', +}; + +describe('orchestrator pr-review workflow', () => { + test('extracts changed paths from unified diffs', () => { + expect(changedPathsFromDiff([ + 'diff --git a/src/old.ts b/src/new.ts', + '--- a/src/old.ts', + '+++ b/src/new.ts', + 'diff --git a/docs/planning/ROADMAP.md b/docs/planning/ROADMAP.md', + '--- a/docs/planning/ROADMAP.md', + '+++ b/docs/planning/ROADMAP.md', + ].join('\n'))).toEqual(['docs/planning/ROADMAP.md', 'src/new.ts', 'src/old.ts']); + }); + + test('dry-run produces review, fix, policy, and final-report artifacts without live GitHub writes', async () => { + const github = fixtureGitHubClient(); + + const result = await runPrReview(fixture(), { dryRun: true, github }); + + expect(result.status).toBe('fixed'); + expect(result.policyResult.allowed).toBe(true); + expect(result.checkState).toBe('green'); + expect(result.commentsWritten).toBe(0); + expect(github.writes).toEqual([]); + expect(result.packets.map((packet) => packet.role.role)).toEqual(['reviewer', 'fixer']); + expect(result.findings.every((finding) => finding.status === 'fixed')).toBe(true); + expect(result.artifacts.some((artifact) => artifact.type === 'packet')).toBe(true); + expect(result.artifacts.some((artifact) => artifact.id.endsWith('review-findings'))).toBe(true); + expect(result.artifacts.some((artifact) => artifact.id.endsWith('post-fix-findings'))).toBe(true); + expect(result.artifacts.some((artifact) => artifact.type === 'decision')).toBe(true); + expect(result.artifacts.some((artifact) => artifact.id.endsWith('final-report'))).toBe(true); + }); + + test('blocks PR review when changed paths hit blocked policy paths', async () => { + const github = fixtureGitHubClient({ + diff: [ + 'diff --git a/docs/planning/ROADMAP-7.x.md b/docs/planning/ROADMAP-7.x.md', + 'index 1111111..2222222 100644', + '--- a/docs/planning/ROADMAP-7.x.md', + '+++ b/docs/planning/ROADMAP-7.x.md', + '@@ -1,2 +1,3 @@', + '+private planning detail', + ].join('\n'), + }); + + const result = await runPrReview(fixture(), { dryRun: true, github }); + + expect(result.status).toBe('blocked'); + expect(result.decision.reason).toContain('Changed path is blocked by policy: docs/planning/ROADMAP-7.x.md'); + expect(result.policyResult.reasons).toContain('Blocked path changed: docs/planning/ROADMAP-7.x.md'); + }); + + test('passes PR metadata, diff, check state, and existing PAI marker into review packets', async () => { + const packets: string[] = []; + const comment = emitOrchestratorComment({ + workItemId: fixture().id, + source: 'https://github.com/example/project/pull/123', + findings: [priorFinding], + decision: priorDecision, + updatedAt: '2026-06-27T00:00:00.000Z', + }); + const github = fixtureGitHubClient({ + diff: 'diff --git a/src/foo.ts b/src/foo.ts\n+export const foo = true;', + comments: [comment], + labels: ['pai-autopilot'], + }); + const runner: PrAgentRunner = async (packet) => { + packets.push(JSON.stringify(packet.inputs)); + if (packet.role.role === 'reviewer') { + return { status: 'pass', summary: 'reviewed fixture PR context', artifacts: [] }; + } + return { status: 'fixed', summary: 'nothing to fix', artifacts: [] }; + }; + + const result = await runPrReview(fixture(), { runAgent: runner, github }); + + expect(result.status).toBe('complete'); + expect(packets[0]).toContain('diff --git'); + expect(packets[0]).toContain('checkState=green'); + expect(packets[0]).toContain('pai-autopilot'); + expect(packets[0]).toContain('prior-finding'); + }); + + test('blocks on pending CI and records a check-state artifact', async () => { + const github = fixtureGitHubClient({ checkState: 'pending' }); + + const result = await runPrReview(fixture(), { dryRun: true, github, ciWaitTimeoutMs: 1 }); + + expect(result.status).toBe('blocked'); + expect(result.decision.reason).toContain('CI is not green'); + expect(result.artifacts.some((artifact) => + artifact.id.endsWith('ci-state') && artifact.content?.includes('"checkState": "pending"') + )).toBe(true); + }); + + test('blocks and records malformed review output', async () => { + const runner: PrAgentRunner = async (packet) => { + if (packet.role.role === 'reviewer') return { malformed: true }; + return { status: 'fixed', summary: 'fixer should not repair malformed review output', artifacts: [] }; + }; + + const result = await runPrReview(fixture(), { runAgent: runner, github: fixtureGitHubClient() }); + + expect(result.status).toBe('blocked'); + expect(result.policyResult.reasons).toContain('Agent output is malformed.'); + expect(result.warnings[0]).toContain('agentResult.status'); + expect(result.artifacts.some((artifact) => + artifact.type === 'agent-result' && artifact.content?.includes('Review result failed validation') + )).toBe(true); + }); + + test('can persist PR workflow artifacts to the ledger when a root is provided', async () => { + const ledgerRoot = mkdtempSync(join(tmpdir(), 'pai-pr-review-ledger-')); + + const result = await runPrReview(fixture(), { dryRun: true, ledgerRoot }); + + expect(result.status).toBe('fixed'); + expect(result.artifacts.every((artifact) => artifact.path)).toBe(true); + expect(existsSync(join(ledgerRoot, fixture().id, 'work-item.json'))).toBe(true); + expect(existsSync(join(ledgerRoot, fixture().id, 'artifacts', `${fixture().id}-final-report.json`))).toBe(true); + }); +}); + +describe('orchestrator GitHub fixture write guards', () => { + test('blocks PR comments, pushes, and merges without explicit live-write approval', async () => { + const github = fixtureGitHubClient(); + + await expect(github.upsertComment('comment')).rejects.toThrow('requires explicit live GitHub write approval'); + await expect(github.pushFixes()).rejects.toThrow('requires explicit live GitHub write approval'); + await expect(github.mergePullRequest()).rejects.toThrow('requires explicit live GitHub write approval'); + expect(github.writes).toEqual([]); + }); + + test('records fixture writes only when the explicit live-write token is supplied', async () => { + const github = fixtureGitHubClient(); + const writeOptions = { allowLiveWrites: true, liveWriteToken: REQUIRED_LIVE_WRITE_TOKEN }; + + await github.upsertComment('comment', writeOptions); + await github.pushFixes(writeOptions); + await github.mergePullRequest(writeOptions); + + expect(github.writes).toEqual(['comment', 'pushFixes', 'mergePullRequest']); + }); + + test('workflow comment emission is blocked without explicit live-write approval', async () => { + const github = fixtureGitHubClient(); + + const result = await runPrReview(fixture(), { dryRun: true, github, emitComment: true }); + + expect(result.status).toBe('blocked'); + expect(result.commentsWritten).toBe(0); + expect(result.decision.reason).toContain('GitHub comment write blocked'); + expect(result.artifacts.some((artifact) => artifact.type === 'pr-comment')).toBe(true); + expect(github.writes).toEqual([]); + }); +}); diff --git a/tests/OrchestratorSchema.test.ts b/tests/OrchestratorSchema.test.ts new file mode 100644 index 00000000..ef35c5e8 --- /dev/null +++ b/tests/OrchestratorSchema.test.ts @@ -0,0 +1,135 @@ +import { describe, expect, test } from 'bun:test'; +import { readFileSync } from 'fs'; +import { join } from 'path'; +import { + conservativeDefaultPolicy, + validateAgentResult, + validateDecision, + validateFinding, + validatePolicy, + validatePolicyResult, + validateWorkPacket, + validateWorkItem, +} from '../PAI/Tools/orchestrator/schema'; + +function fixture(name: string): unknown { + return JSON.parse(readFileSync(join(process.cwd(), 'fixtures', 'orchestrator', name), 'utf-8')); +} + +describe('orchestrator schema', () => { + test('validates the PR review fixture', () => { + const result = validateWorkItem(fixture('pr-review.json')); + expect(result.valid).toBe(true); + expect(result.errors).toEqual([]); + expect(result.value?.type).toBe('pr-review'); + }); + + test('validates the adversarial review fixture', () => { + const result = validateWorkItem(fixture('adversarial-review.json')); + expect(result.valid).toBe(true); + expect(result.errors).toEqual([]); + expect(result.value?.type).toBe('adversarial-review'); + }); + + test('rejects unknown workflow types', () => { + const item = fixture('pr-review.json') as Record; + item.type = 'private-chat-bridge'; + const result = validateWorkItem(item); + expect(result.valid).toBe(false); + expect(result.errors.join('\n')).toContain('workItem.type'); + }); + + test('rejects malformed findings', () => { + const result = validateFinding({ + id: 'finding-1', + severity: 'critical', + category: 'bug', + issue: 'Missing boundary check', + recommendation: 'Add a policy guard', + confidence: 'high', + status: 'open', + }); + expect(result.valid).toBe(false); + expect(result.errors.join('\n')).toContain('finding.severity'); + }); + + test('validates agent results with nested findings and artifacts', () => { + const result = validateAgentResult({ + status: 'findings', + summary: 'One issue found.', + artifacts: [{ id: 'artifact-1', type: 'finding-report', source: 'claude-local', content: 'report' }], + findings: [{ + id: 'finding-1', + severity: 'high', + category: 'security', + issue: 'Untrusted merge path', + recommendation: 'Block merge until CI and policy pass', + confidence: 'high', + status: 'open', + }], + }); + expect(result.valid).toBe(true); + }); + + test('validates work packets for role-scoped agent handoff', () => { + const item = fixture('pr-review.json') as any; + const result = validateWorkPacket({ + id: 'packet-1', + workItemId: item.id, + type: item.type, + objective: item.objective, + role: item.roles[0], + inputs: item.inputs, + policy: item.policy, + artifacts: [], + }); + expect(result.valid).toBe(true); + expect(result.value?.role.engine).toBe('claude-local'); + }); + + test('validates decision and policy result contracts', () => { + const decision = validateDecision({ + status: 'blocked', + reason: 'CI is not green.', + actions: ['wait-for-ci'], + }); + expect(decision.valid).toBe(true); + + const policyResult = validatePolicyResult({ + allowed: false, + decision: decision.value, + reasons: ['CI is not green.'], + requiredActions: ['wait-for-ci'], + }); + expect(policyResult.valid).toBe(true); + expect(policyResult.value?.allowed).toBe(false); + }); + + test('rejects malformed policy results', () => { + const result = validatePolicyResult({ + allowed: 'yes', + decision: { status: 'ship-it', reason: '' }, + reasons: 'none', + }); + expect(result.valid).toBe(false); + expect(result.errors.join('\n')).toContain('policyResult.allowed'); + expect(result.errors.join('\n')).toContain('policyResult.decision.status'); + }); + + test('conservative default policy blocks push, merge, and private boundary risk', () => { + const policy = conservativeDefaultPolicy(); + expect(policy.autonomy).toBe('advise'); + expect(policy.allowPush).toBe(false); + expect(policy.allowMerge).toBe(false); + expect(policy.requireGreenCI).toBe(true); + expect(policy.stopOnPrivateBoundaryRisk).toBe(true); + expect(policy.blockedPaths).toContain('MEMORY/**'); + }); + + test('warns when policy weakens private boundary protection', () => { + const policy = { ...conservativeDefaultPolicy(), stopOnPrivateBoundaryRisk: false }; + const result = validatePolicy(policy); + expect(result.valid).toBe(true); + expect(result.warnings.join('\n')).toContain('stopOnPrivateBoundaryRisk'); + }); +}); diff --git a/tests/PayloadSchema.test.ts b/tests/PayloadSchema.test.ts index 8577d86c..c14f2168 100644 --- a/tests/PayloadSchema.test.ts +++ b/tests/PayloadSchema.test.ts @@ -242,6 +242,28 @@ describe('UserPromptExpansion', () => { }); }); +// ── InstructionsLoaded ─────────────────────────────────────────────────────── + +describe('InstructionsLoaded', () => { + test('valid native instruction-load payload', () => { + const r = validatePayload({ + ...base('InstructionsLoaded'), + file_path: '/tmp/project/CLAUDE.md', + memory_type: 'project', + reason: 'changed', + source: 'native', + globs: { matched: ['CLAUDE.md'] }, + }); + expect(r.valid).toBe(true); + expect(r.missing).toEqual([]); + }); + + test('metadata fields are optional so telemetry can no-op fail-open', () => { + const r = validatePayload(base('InstructionsLoaded')); + expect(r.valid).toBe(true); + }); +}); + // ── ValidationResult shape ──────────────────────────────────────────────────── describe('result shape', () => { diff --git a/tests/RedactAndGate.test.ts b/tests/RedactAndGate.test.ts index fb99342a..8ab73694 100644 --- a/tests/RedactAndGate.test.ts +++ b/tests/RedactAndGate.test.ts @@ -6,6 +6,7 @@ */ import { describe, test, expect } from 'bun:test'; import { redactSecrets, containsSecret } from '../hooks/lib/redact'; +import { sanitizeCrossProjectBody } from '../hooks/MemoryRecall.hook'; describe('redactSecrets', () => { test('masks an AWS access key with a marker', () => { @@ -53,3 +54,13 @@ describe('config gate defaults (deny-by-default)', () => { expect(isKnowledgeRedactionEnabled()).toBe(true); }); }); + +describe('cross-project body redaction', () => { + test('redacts credential-shaped content before body injection', () => { + const token = `ghp_${'123456789012345678901234567890123456'}`; + const out = sanitizeCrossProjectBody(`Use token ${token} in the other project.`); + + expect(out).not.toContain(token); + expect(out).toContain('[REDACTED:'); + }); +}); diff --git a/tests/ReleaseScript.test.ts b/tests/ReleaseScript.test.ts new file mode 100644 index 00000000..20bb9463 --- /dev/null +++ b/tests/ReleaseScript.test.ts @@ -0,0 +1,17 @@ +import { describe, expect, test } from 'bun:test'; +import { readFileSync } from 'fs'; +import { join } from 'path'; + +const repoRoot = join(import.meta.dir, '..'); +const releaseScript = readFileSync(join(repoRoot, 'scripts', 'release.sh'), 'utf-8'); + +describe('release.sh', () => { + test('runs the temp KAI artifact gate before creating a git tag', () => { + const gateIndex = releaseScript.indexOf('scripts/kai-temp-release-gate.ts'); + const tagIndex = releaseScript.indexOf('git tag -a "$VERSION"'); + + expect(gateIndex).toBeGreaterThan(-1); + expect(tagIndex).toBeGreaterThan(-1); + expect(gateIndex).toBeLessThan(tagIndex); + }); +}); diff --git a/tests/SettingsReproducibility.test.ts b/tests/SettingsReproducibility.test.ts index 98dbe12a..86468a85 100644 --- a/tests/SettingsReproducibility.test.ts +++ b/tests/SettingsReproducibility.test.ts @@ -34,6 +34,22 @@ function buildFixtureSettings(): Record { return buildSettings(FIXTURE.dir) as Record; } +function normalizeHookRunnerPaths(value: unknown): unknown { + if (typeof value === 'string') { + return value.replace(/\/[^\s"']+\/hooks\/lib\/run-hook\.sh/g, '${PAI_DIR}/hooks/lib/run-hook.sh'); + } + if (Array.isArray(value)) return value.map(normalizeHookRunnerPaths); + if (value && typeof value === 'object') { + return Object.fromEntries( + Object.entries(value as Record).map(([key, nested]) => [ + key, + normalizeHookRunnerPaths(nested), + ]) + ); + } + return value; +} + // buildSettings expands ${PAI_DIR}/${HOME} from process.env and (by design, PAI-SR-001) // THROWS when they resolve empty. In the full parallel suite another test can clear those // env vars in-process, making these tests flaky. Pin them for this suite and restore after. @@ -81,9 +97,12 @@ describe('settings reproducibility (PAI-SR-001)', () => { const code = ` const { buildSettings, findSettingsDivergence } = require(${JSON.stringify(join(REPO, 'hooks/handlers/BuildSettings.ts'))}); const { readFileSync } = require('fs'); + const normalizeHookRunnerPaths = ${normalizeHookRunnerPaths.toString()}; const built = buildSettings(${JSON.stringify(LIVE_PAI_DIR)}); const live = JSON.parse(readFileSync(${JSON.stringify(settingsPath)}, 'utf-8')); - console.log(JSON.stringify(findSettingsDivergence(built, live))); + const builtComparable = { ...built, hooks: normalizeHookRunnerPaths(built.hooks) }; + const liveComparable = { ...live, hooks: normalizeHookRunnerPaths(live.hooks) }; + console.log(JSON.stringify(findSettingsDivergence(builtComparable, liveComparable))); `; const proc = spawnSync(['bun', '-e', code], { stdout: 'pipe', @@ -94,7 +113,7 @@ describe('settings reproducibility (PAI-SR-001)', () => { const divergent = JSON.parse(proc.stdout.toString()) as string[]; // Allowed: spinnerTipsOverride (version-string staleness). Local installs may also carry // environment-specific statusLine/autoMemoryDirectory drift. Any OTHER key is a new defect. - const ALLOWED = new Set(['spinnerTipsOverride', 'autoMemoryDirectory', 'statusLine', 'pai']); + const ALLOWED = new Set(['spinnerTipsOverride', 'autoMemoryDirectory', 'statusLine']); const unexpected = divergent.filter(k => !ALLOWED.has(k)); expect(unexpected).toEqual([]); }); diff --git a/tests/SkillDiscoveryRecommender.test.ts b/tests/SkillDiscoveryRecommender.test.ts new file mode 100644 index 00000000..dd1dc112 --- /dev/null +++ b/tests/SkillDiscoveryRecommender.test.ts @@ -0,0 +1,68 @@ +import { describe, expect, test } from 'bun:test'; +import { mkdirSync, mkdtempSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { clearSkillCardCache, loadSkillCards, loadSkillCardsCached, recommendSkills } from '../hooks/SkillDiscoveryRecommender.hook'; + +function makeSkill(root: string, name: string, description: string) { + const dir = join(root, name); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, 'SKILL.md'), `--- +name: ${name} +description: ${description} +--- + +# ${name} +`); +} + +describe('SkillDiscoveryRecommender', () => { + test('loads skill trigger phrases from SKILL.md frontmatter', () => { + const root = mkdtempSync(join(tmpdir(), 'skill-recommend-')); + makeSkill(root, 'RedTeam', 'Adversarial analysis. USE WHEN red team, critique, stress test.'); + + const cards = loadSkillCards(root); + + expect(cards).toHaveLength(1); + expect(cards[0].name).toBe('RedTeam'); + expect(cards[0].triggers).toContain('stress test'); + }); + + test('reuses cached skill cards until SKILL.md mtime or size changes', () => { + const root = mkdtempSync(join(tmpdir(), 'skill-recommend-')); + const cache = join(root, 'cache.json'); + makeSkill(root, 'RedTeam', 'Adversarial analysis. USE WHEN red team, critique, stress test.'); + + const first = loadSkillCardsCached(root, cache); + writeFileSync(join(root, 'RedTeam', 'SKILL.md'), `--- +name: RedTeam +description: stale if cache is incorrectly reused +--- + +# RedTeam +`); + const second = loadSkillCardsCached(root, cache); + + expect(first[0].triggers).toContain('stress test'); + expect(second[0].description).toContain('stale if cache is incorrectly reused'); + clearSkillCardCache(cache); + }); + + test('recommends a close matching skill', () => { + const cards = [ + { name: 'RedTeam', description: '', triggers: ['red team', 'critique', 'stress test'] }, + { name: 'Research', description: '', triggers: ['research', 'find information'] }, + ]; + + const [rec] = recommendSkills('stress test this roadmap before we commit to it', cards); + + expect(rec.skill).toBe('RedTeam'); + expect(rec.trigger).toBe('stress test'); + }); + + test('does not recommend a skill already explicitly invoked', () => { + const cards = [{ name: 'Research', description: '', triggers: ['research'] }]; + + expect(recommendSkills('/Research this topic', cards)).toEqual([]); + }); +}); diff --git a/tests/SyncCIGate.test.ts b/tests/SyncCIGate.test.ts index 6a905146..cfd57993 100644 --- a/tests/SyncCIGate.test.ts +++ b/tests/SyncCIGate.test.ts @@ -30,7 +30,7 @@ const TEST_DIR = join(PAI_DIR, 'tests', '.sync-gate-test-tmp'); const hasSyncScript = existsSync(join(PAI_DIR, 'scripts', 'sync-to-kai.sh')); describe.skipIf(!hasSyncScript)('SyncCIGate', () => { - test('parses EXCLUDE_PATHS from sync-to-kai.sh', () => { + test('loads private paths from sync-manifest.json', () => { const excludePaths = parseExcludePaths(PAI_DIR); expect(excludePaths.length).toBeGreaterThan(0); // /CLAUDE.md is root-anchored (leading '/') so the bare basename exclude @@ -38,9 +38,11 @@ describe.skipIf(!hasSyncScript)('SyncCIGate', () => { expect(excludePaths).toContain('/CLAUDE.md'); expect(excludePaths).toContain('VERSION'); expect(excludePaths).toContain('config/identity.jsonc'); + expect(excludePaths).toContain('scripts/pii-patterns.json'); + expect(excludePaths).toContain('devices.json'); }); - test('parses KAI_ONLY_FILES from sync-to-kai.sh', () => { + test('loads kai-only paths from sync-manifest.json', () => { const kaiOnlyFiles = parseKaiOnlyFiles(PAI_DIR); expect(kaiOnlyFiles.length).toBeGreaterThan(0); expect(kaiOnlyFiles).toContain('CHANGELOG.md'); @@ -48,6 +50,16 @@ describe.skipIf(!hasSyncScript)('SyncCIGate', () => { expect(kaiOnlyFiles).toContain('get-kai.sh'); }); + test('sync-to-kai consumes manifest instead of owning populated classification arrays', () => { + const sync = require('fs').readFileSync(join(PAI_DIR, 'scripts', 'sync-to-kai.sh'), 'utf-8'); + expect(sync).toContain('SYNC_MANIFEST_FILE="$PAI_DIR/scripts/sync-manifest.json"'); + expect(sync).toContain("jq -r '.private[]'"); + expect(sync).toContain("jq -r '.kai_only[]'"); + expect(sync).toContain("jq -r '.stale_kai_paths[]?'"); + expect(sync).not.toMatch(/EXCLUDE_PATHS=\(\s*\n\s*\/CLAUDE\.md/); + expect(sync).not.toMatch(/KAI_ONLY_FILES=\(\s*\n\s*\.github\/workflows\/test\.yml/); + }); + test('classifies file in EXCLUDE_PATHS as private', () => { const excludePaths = parseExcludePaths(PAI_DIR); const kaiOnlyFiles = parseKaiOnlyFiles(PAI_DIR); @@ -271,4 +283,14 @@ describe.skipIf(!hasSyncScript)('SyncCIGate', () => { const result = classifyFile('MEMORY/STAGING/2026-05-27-session.json', excludePaths, kaiOnlyFiles); expect(result).toBe('private'); }); + + test('manifest path metacharacters are literal, not regex syntax', () => { + expect(classifyFile('docs/v1.0.md', ['docs/v1.0.md'], [])).toBe('private'); + expect(classifyFile('docs/v1x0.md', ['docs/v1.0.md'], [])).toBe('public'); + }); + + test('globstar patterns match nested paths consistently', () => { + expect(classifyFile('memcarry/packages/lib/src/w6.test.ts', ['memcarry/**/*.test.ts'], [])).toBe('private'); + expect(classifyFile('memcarry/packages/lib/src/w6.ts', ['memcarry/**/*.test.ts'], [])).toBe('public'); + }); }); diff --git a/tests/SyncDrift.test.ts b/tests/SyncDrift.test.ts new file mode 100644 index 00000000..703ec1df --- /dev/null +++ b/tests/SyncDrift.test.ts @@ -0,0 +1,32 @@ +import { describe, expect, test } from 'bun:test'; +import { loadSyncManifest, isExcluded } from '../scripts/sync-drift'; + +const REPO = new URL('..', import.meta.url).pathname.replace(/\/$/, ''); + +describe('sync-drift manifest classification', () => { + test('loads private and kai-only paths from sync-manifest.json', () => { + const manifest = loadSyncManifest(REPO); + + expect(manifest.private).toContain('/CLAUDE.md'); + expect(manifest.private).toContain('scripts/pii-patterns.json'); + expect(manifest.kai_only).toContain('/README.md'); + expect(manifest.stale_kai_paths).toContain('tests/SyncToKaiStagingArtifact.test.ts'); + }); + + test('matches root anchors, directories, and globs consistently', () => { + expect(isExcluded('CLAUDE.md', ['/CLAUDE.md'])).toBe(true); + expect(isExcluded('skills/Foo/CLAUDE.md', ['/CLAUDE.md'])).toBe(false); + expect(isExcluded('Plans/next.md', ['Plans/'])).toBe(true); + expect(isExcluded('MEMORY/STAGING/2026-06-26.json', ['MEMORY/STAGING/2026-*'])).toBe(true); + }); + + test('treats regex metacharacters literally in manifest paths', () => { + expect(isExcluded('docs/v1.0.md', ['docs/v1.0.md'])).toBe(true); + expect(isExcluded('docs/v1x0.md', ['docs/v1.0.md'])).toBe(false); + }); + + test('supports globstar patterns via the shared matcher', () => { + expect(isExcluded('memcarry/packages/lib/src/w6.test.ts', ['memcarry/**/*.test.ts'])).toBe(true); + expect(isExcluded('memcarry/packages/lib/src/w6.ts', ['memcarry/**/*.test.ts'])).toBe(false); + }); +}); From 68f19f80f11c83756d934b20c889b94ef19acb21 Mon Sep 17 00:00:00 2001 From: Deven Ducommun Date: Sat, 27 Jun 2026 17:27:53 -0700 Subject: [PATCH 2/3] Fix public KAI test scope --- scripts/version-targets.json | 2 +- scripts/weekly-maintenance.ts | 2 +- skills-lock.json | 2 +- tests/BumpVersionManifest.test.ts | 1 + tests/InstallRelocation.test.ts | 3 ++- 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/version-targets.json b/scripts/version-targets.json index c48573c2..71066585 100644 --- a/scripts/version-targets.json +++ b/scripts/version-targets.json @@ -8,7 +8,7 @@ { "file": "PAI/Tools/Banner.ts", "kind": "banner-pai-algo-fallback", "category": "fallback" }, { "file": "PAI/Tools/BuildCLAUDE.ts", "kind": "buildclaude-default-algo", "category": "fallback" }, { "file": "PAI/Tools/BuildCLAUDE.ts", "kind": "buildclaude-default-pai", "category": "fallback" }, - { "file": "CLAUDE.md", "kind": "h1-kai-version", "category": "docs" }, + { "file": "CLAUDE.md", "kind": "h1-kai-version", "category": "docs", "optional": true }, { "file": "README.md", "kind": "h1-kai-version", "category": "docs" }, { "file": "README.md", "kind": "readme-algo-inline", "category": "docs" }, { "file": "docs/WHATS-DIFFERENT.md", "kind": "whats-different-title", "category": "docs" }, diff --git a/scripts/weekly-maintenance.ts b/scripts/weekly-maintenance.ts index 4f139ccc..4953e5de 100644 --- a/scripts/weekly-maintenance.ts +++ b/scripts/weekly-maintenance.ts @@ -163,7 +163,7 @@ async function main() { // Sync status process.stdout.write(' Checking sync status...'); - const syncResult = await runTask('Sync status', ['git', '-C', join(process.env.HOME!, 'Projects/kai'), 'log', '--oneline', '-1']); + const syncResult = await runTask('Sync status', ['git', '-C', REPO_DIR, 'log', '--oneline', '-1']); results.push(syncResult); console.log(` ✅`); diff --git a/skills-lock.json b/skills-lock.json index d8510333..624c0789 100644 --- a/skills-lock.json +++ b/skills-lock.json @@ -1,6 +1,6 @@ { "version": 1, - "generated": "2026-06-28T00:11:57.340Z", + "generated": "2026-06-28T00:24:28.616Z", "skills": { "Agents": { "source": "kai", diff --git a/tests/BumpVersionManifest.test.ts b/tests/BumpVersionManifest.test.ts index 36f49908..eaf0f6f3 100644 --- a/tests/BumpVersionManifest.test.ts +++ b/tests/BumpVersionManifest.test.ts @@ -95,6 +95,7 @@ describe('bump-version target manifest', () => { }); test('implementation loads the manifest instead of owning a static target list', () => { + if (!existsSync(SCRIPT_PATH)) return; const script = readFileSync(SCRIPT_PATH, 'utf-8'); expect(script).toContain('scripts/version-targets.json'); expect(script).toContain('discoverTargets'); diff --git a/tests/InstallRelocation.test.ts b/tests/InstallRelocation.test.ts index 5fcd59bf..919e2896 100644 --- a/tests/InstallRelocation.test.ts +++ b/tests/InstallRelocation.test.ts @@ -5,7 +5,7 @@ import { join } from 'path'; const repo = join(import.meta.dir, '..'); describe('install relocation resilience', () => { - test('runtime scripts do not pin the old ~/Projects/kai location', () => { + test('runtime scripts do not pin old ~/Projects checkout locations', () => { const files = [ 'scripts/hooks/pre-push', 'scripts/weekly-maintenance.ts', @@ -15,6 +15,7 @@ describe('install relocation resilience', () => { for (const file of files) { const text = readFileSync(join(repo, file), 'utf8'); expect(text).not.toContain('Projects/kai'); + expect(text).not.toContain('Projects/kai'); } }); From 2c5b8e0a7c974568fc88f0467c2936463f91ca62 Mon Sep 17 00:00:00 2001 From: KAI Maintainer Date: Sat, 27 Jun 2026 20:16:36 -0700 Subject: [PATCH 3/3] docs: bump KAI README to 7.7.0 The README is kai_only (not overwritten by sync), so it lagged at 7.3.2 while the 7.7.0 sync updated the manifest/markers. Fix: H1 -> 7.7.0, correct the researcher count (5 -> 7), and refresh the highlights with the 7.x additions (branch-write guards, staged-artifact sync, telemetry, agent context, SpecKit). Count/Algorithm markers are BuildDocs-managed and already current. --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f32294cf..9a320b2e 100755 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# KAI 7.3.2 — Personal AI Infrastructure +# KAI 7.7.0 — Personal AI Infrastructure > Built on the foundation of Daniel Miessler's [PAI system](https://github.com/danielmiessler/Personal_AI_Infrastructure). The Algorithm, TELOS framework, skill hierarchy, and hook lifecycle originated there. KAI extends it with production hardening, multi-model inference, self-learning memory, and individual deployment. @@ -28,7 +28,7 @@ The installer symlinks `~/.claude/` to your repo, walks you through identity set | `PAI/` | Core system: Algorithm v3.14.0, context routing, system docs | | `skills/` | 71 skill modules (Research, Security, Writing, Analysis, EM/PLM workflows) | | `hooks/` | 61 lifecycle hooks (security guards, formatters, analytics, cleanup) | -| `agents/` | 20 named agents (Architect, Engineer, 5 researchers, Pentester, etc.) | +| `agents/` | 20 named agents (Architect, Engineer, 7 researchers, Pentester, etc.) | | `config/` | 7 domain config files that generate settings.json | | `scripts/` | KAI Board dashboard, deployment packager | | `PAI-Install/` | Interactive setup wizard | @@ -38,7 +38,7 @@ The installer symlinks `~/.claude/` to your repo, walks you through identity set See **[docs/WHATS-DIFFERENT.md](docs/WHATS-DIFFERENT.md)** for a detailed comparison with Daniel Miessler's original. -Highlights: domain-based config, interactive installer, hook stderr wrapper + async flags, SecretScanner, GitHubWriteGuard, 20 named agents, Algorithm v3.14.0 with ISC quality gates, EM/PLM workflow skills, no personal data in repo. +Highlights: domain-based config, interactive installer, hook stderr wrapper + async flags, SecretScanner, GitHubWriteGuard, branch-write guards, staged-artifact KAI release sync, memory + latency telemetry, agent context handoff, SpecKit spec-driven workflows, 20 named agents, Algorithm v3.14.0 with ISC quality gates, EM/PLM workflow skills, no personal data in repo. ## Configuration