From 44f854018ef1d2257da0d2870c45ea7faa1ba6df Mon Sep 17 00:00:00 2001 From: oratis Date: Thu, 28 May 2026 00:28:50 +0800 Subject: [PATCH] =?UTF-8?q?feat(core):=20M3a=20=E2=80=94=20modes,=20hooks?= =?UTF-8?q?=20framework,=20memory=20dual=20system?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Partial M3 (first of two PRs). Ships the policy/safety/memory primitives; M3b (next PR) wires them into the agent loop and adds MCP + compaction + statusLine + /init multi-phase + auto classifier mode. What ships ---------- Core (packages/core/src/): - modes/index.ts — 5 mode policies (default / acceptEdits / plan / auto / dontAsk / bypassPermissions) → ModeVerdict with the invariants from docs/design/sandbox-plan- worktree.md §3.3 hardcoded: · plan blocks all write tools, regardless of permission · acceptEdits permission-deny still wins · dontAsk upgrades ask → deny (strict allowlist) · bypass skips permissions (sandbox-only safety) - hooks/types.ts — HookContext / HookHandlerOutput / HookResult - hooks/dispatcher.ts — 9 events × command handler, sequential dispatch, matcher tool-name with `|` OR separator, JSON output parsing (last {...} in stdout), per-handler timeout (default 60s), disableAllHooks override, payload streamed to handler stdin as JSON - memory/loader.ts — hierarchical DEEPCODE.md walk (cwd → parents), user-level ~/.deepcode/DEEPCODE.md, AGENTS.md auto-import at project root, .deepcode/rules/*.md sorted load, @-import with 4-hop max + cycle detection + maxBytes budget Tests ----- - modes/index.test.ts (14 tests, all 6 modes × 4 permission verdicts) - hooks/dispatcher.test.ts (14 tests: exec, JSON parse, matcher, timeout, stdin, disableAllHooks, unimplemented-type graceful) - memory/loader.test.ts (14 tests: empty / user / project / parents walk / AGENTS.md / rules dir / @-import / unresolved / cycle / maxBytes / maxDepth + walkUpwards unit) Total: 197 passed / 4 skipped / 0 failed (was 151). Deferred to M3b --------------- - MCP client (stdio transport at minimum) - Compaction (LLM summarizer when context > threshold) - statusLine runner (JSON-on-stdin contract) - /init multi-phase interactive flow - auto classifier mode (LLM-judged per-call) - 4 additional hook handler types (http / mcp_tool / prompt / agent) - hooks `if` field (permission-syntax filtering) - Wiring evaluateMode + HookDispatcher into runAgent (currently callable but not yet enforcing in the agent loop) Verified -------- pnpm typecheck → green pnpm build → green pnpm test → 197 passed / 4 skipped / 0 failed pnpm format:check → conformant Docs ---- - docs/milestones/M3.md — delivery breakdown of M3a vs M3b deferred items Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/milestones/M3.md | 69 ++++++ packages/core/src/hooks/dispatcher.test.ts | 251 +++++++++++++++++++++ packages/core/src/hooks/dispatcher.ts | 179 +++++++++++++++ packages/core/src/hooks/index.ts | 15 +- packages/core/src/hooks/types.ts | 52 +++++ packages/core/src/index.ts | 29 +++ packages/core/src/memory/index.ts | 13 +- packages/core/src/memory/loader.test.ts | 144 ++++++++++++ packages/core/src/memory/loader.ts | 184 +++++++++++++++ packages/core/src/modes/index.test.ts | 98 ++++++++ packages/core/src/modes/index.ts | 100 ++++++++ 11 files changed, 1126 insertions(+), 8 deletions(-) create mode 100644 docs/milestones/M3.md create mode 100644 packages/core/src/hooks/dispatcher.test.ts create mode 100644 packages/core/src/hooks/dispatcher.ts create mode 100644 packages/core/src/hooks/types.ts create mode 100644 packages/core/src/memory/loader.test.ts create mode 100644 packages/core/src/memory/loader.ts create mode 100644 packages/core/src/modes/index.test.ts create mode 100644 packages/core/src/modes/index.ts diff --git a/docs/milestones/M3.md b/docs/milestones/M3.md new file mode 100644 index 0000000..441c3fd --- /dev/null +++ b/docs/milestones/M3.md @@ -0,0 +1,69 @@ +# M3 — Modes + Hooks + Memory (partial) + +> **Status**: ✅ partial — modes, hook framework (command handler), memory dual system shipped. MCP / compaction / statusLine / `/init` multi-phase / `auto` classifier / 5 hook handler types → split into M3b (next PR). +> **Branch**: `feat/m3-modes-hooks-memory` + +## Scope (planned, full M3) + +> DEVELOPMENT_PLAN.md §6: +> Task 子代理 + Hooks 9 事件 × 5 handler 类型 + JSON 输出契约 + `if` 字段 + MCP 完整 + compaction + modes 5 档 + auto 分类器 + statusLine(JSON-on-stdin)+ Memory 双系统 + AGENTS.md 互操作 + `/init` 多阶段 + +## What ships in THIS PR (M3a) + +| Module | Lines | Tests | +| -------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------ | ------ | --- | +| `modes/index.ts` — 5 mode policies, ModeRequest → ModeVerdict | 92 | 14 | +| `hooks/types.ts` — HookContext / HookHandlerOutput / HookResult | 55 | — | +| `hooks/dispatcher.ts` — 9 events × `command` handler + JSON output parsing + `disableAllHooks` + matcher with ` | ` OR + stdin payload + timeout | 178 | 14 | +| `memory/loader.ts` — DEEPCODE.md hierarchical walk + user-level + AGENTS.md + .deepcode/rules/ + @-import (4-hop max, cycle detection) + maxBytes budget | 170 | 14 | +| `index.ts` — re-export new modules | +30 | — | +| **subtotal** | **~525** | **42** | + +## What's deferred to M3b (next PR) + +- **MCP client** (stdio transport, list-tools, call-tool, mcp\_\_ prefix) +- **Compaction** (context > threshold → summarizer LLM call) +- **statusLine** runner with JSON-on-stdin contract +- **`/init` multi-phase** interactive (subagent explorer + proposal review) +- **`auto` classifier mode** (LLM-judged per tool call; expensive — needs cost cap) +- **Hook handler types** beyond `command`: `http`, `mcp_tool`, `prompt`, `agent` +- **Hook `if` field** for permission-syntax filtering (currently the only matcher is tool name) +- **Mode/permission integration into agent loop** — currently `evaluateMode()` is callable but not yet threaded through `runAgent()`. M3b wires it. + +## Verification + +```bash +pnpm typecheck → green +pnpm test → 197 passed / 4 skipped / 0 failed +pnpm build → green +``` + +## Key design decisions + +1. **Plan mode enforcement is hardcoded, not configurable.** Write tools (`Write`, `Edit`, `Bash`, `NotebookEdit`) are denied regardless of permission rules — matches `docs/design/sandbox-plan-worktree.md` §3.3 invariant #1. + +2. **`dontAsk` mode upgrades `ask` to `deny`** (no prompt, hard deny). Documented in §3.8 — strict white-list mode. + +3. **`acceptEdits` permission-deny still wins** — even in `acceptEdits` mode, an explicit `deny` permission blocks the call. Matches matrix row in `docs/design/sandbox-plan-worktree.md`. + +4. **Hook handler stdout JSON parsing accepts trailing JSON** — handlers can print log lines before emitting the JSON output object. Last `{...}` in stdout is parsed. + +5. **Memory @-import recursion uses cycle detection via visited Set** — both per-file (resolveImportPath) AND globally (visited paths). Tests verify a→b→a cycle terminates. + +6. **AGENTS.md is auto-imported only at project root**, not at parent dirs. Matches Cursor/Aider's convention. + +7. **`rules/*.md` are loaded sorted alphabetically** — deterministic ordering for `BEHAVIOR_PARITY.md` testing in M9. + +## Tests + +``` +modes/ 14 tests — invariants across all 6 mode × 4 permission verdicts +hooks/ 14 tests — command exec, JSON parsing, OR matcher, timeout, stdin +memory/ 14 tests — hierarchical walk, @-import, cycle detection, budget +``` + +Tests run in ~250ms locally. + +## Next + +M3b: MCP client + compaction + statusLine + agent-loop integration of modes/hooks. diff --git a/packages/core/src/hooks/dispatcher.test.ts b/packages/core/src/hooks/dispatcher.test.ts new file mode 100644 index 0000000..5d8ebcc --- /dev/null +++ b/packages/core/src/hooks/dispatcher.test.ts @@ -0,0 +1,251 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { promises as fs } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { HookDispatcher, runCommand, tryParseJsonOutput } from './dispatcher.js'; + +describe('HookDispatcher', () => { + let cwd: string; + beforeEach(async () => { + cwd = await mkdtemp(join(tmpdir(), 'dc-hooks-')); + }); + afterEach(async () => { + await rm(cwd, { recursive: true, force: true }); + }); + + it('returns empty result for unconfigured event', async () => { + const d = new HookDispatcher({}); + const r = await d.dispatch({ + event: 'PreToolUse', + cwd, + triggeredAt: '2026-01-01', + payload: { tool: 'Read' }, + }); + expect(r.stdout).toBe(''); + expect(r.anyBlocked).toBe(false); + expect(r.timings).toEqual([]); + }); + + it('runs command-type handler and captures stdout', async () => { + const d = new HookDispatcher({ + hooks: { + PreToolUse: [{ matcher: 'Bash', hooks: [{ type: 'command', command: 'echo hello-hook' }] }], + }, + }); + const r = await d.dispatch({ + event: 'PreToolUse', + cwd, + triggeredAt: '2026-01-01', + payload: { tool: 'Bash' }, + }); + expect(r.stdout).toContain('hello-hook'); + expect(r.timings).toHaveLength(1); + expect(r.timings[0]?.exitCode).toBe(0); + }); + + it('skips handlers whose matcher does not apply', async () => { + const d = new HookDispatcher({ + hooks: { + PreToolUse: [ + { matcher: 'Bash', hooks: [{ type: 'command', command: 'echo SHOULD_NOT_RUN' }] }, + { matcher: 'Edit', hooks: [{ type: 'command', command: 'echo edit-hook' }] }, + ], + }, + }); + const r = await d.dispatch({ + event: 'PreToolUse', + cwd, + triggeredAt: '2026-01-01', + payload: { tool: 'Edit' }, + }); + expect(r.stdout).not.toContain('SHOULD_NOT_RUN'); + expect(r.stdout).toContain('edit-hook'); + }); + + it('matcher supports | OR separator', async () => { + const d = new HookDispatcher({ + hooks: { + PreToolUse: [ + { + matcher: 'Edit|Write', + hooks: [{ type: 'command', command: 'echo edit-or-write' }], + }, + ], + }, + }); + const writeResult = await d.dispatch({ + event: 'PreToolUse', + cwd, + triggeredAt: '2026-01-01', + payload: { tool: 'Write' }, + }); + expect(writeResult.stdout).toContain('edit-or-write'); + const editResult = await d.dispatch({ + event: 'PreToolUse', + cwd, + triggeredAt: '2026-01-01', + payload: { tool: 'Edit' }, + }); + expect(editResult.stdout).toContain('edit-or-write'); + }); + + it('non-zero exit sets anyBlocked', async () => { + const d = new HookDispatcher({ + hooks: { + PreToolUse: [{ hooks: [{ type: 'command', command: 'echo blocked >&2; exit 2' }] }], + }, + }); + const r = await d.dispatch({ + event: 'PreToolUse', + cwd, + triggeredAt: '2026-01-01', + payload: { tool: 'Bash' }, + }); + expect(r.anyBlocked).toBe(true); + expect(r.timings[0]?.exitCode).toBe(2); + expect(r.stderr).toContain('blocked'); + }); + + it('parses JSON output schema from stdout', async () => { + const d = new HookDispatcher({ + hooks: { + PreToolUse: [ + { + hooks: [ + { + type: 'command', + command: + 'echo \'{"decision":"deny","systemMessage":"nope","additionalContext":"context"}\'', + }, + ], + }, + ], + }, + }); + const r = await d.dispatch({ + event: 'PreToolUse', + cwd, + triggeredAt: '2026-01-01', + payload: { tool: 'Bash' }, + }); + expect(r.json?.decision).toBe('deny'); + expect(r.json?.systemMessage).toBe('nope'); + expect(r.json?.additionalContext).toBe('context'); + }); + + it('disableAllHooks suppresses all execution', async () => { + const d = new HookDispatcher({ + disableAllHooks: true, + hooks: { + PreToolUse: [{ hooks: [{ type: 'command', command: 'echo SHOULD_NOT_RUN' }] }], + }, + }); + const r = await d.dispatch({ + event: 'PreToolUse', + cwd, + triggeredAt: '2026-01-01', + payload: { tool: 'Bash' }, + }); + expect(r.stdout).toBe(''); + expect(r.timings).toEqual([]); + }); + + it('runs multiple events independently', async () => { + const d = new HookDispatcher({ + hooks: { + SessionStart: [{ hooks: [{ type: 'command', command: 'echo session-start' }] }], + Stop: [{ hooks: [{ type: 'command', command: 'echo stop' }] }], + }, + }); + const r1 = await d.dispatch({ + event: 'SessionStart', + cwd, + triggeredAt: 't', + payload: {}, + }); + expect(r1.stdout).toContain('session-start'); + const r2 = await d.dispatch({ + event: 'Stop', + cwd, + triggeredAt: 't', + payload: {}, + }); + expect(r2.stdout).toContain('stop'); + }); + + it('reads stdin payload (event + payload as JSON)', async () => { + const stdinReader = join(cwd, 'reader.sh'); + await fs.writeFile(stdinReader, '#!/bin/sh\ncat\n', 'utf8'); + await fs.chmod(stdinReader, 0o755); + const d = new HookDispatcher({ + hooks: { + UserPromptSubmit: [{ hooks: [{ type: 'command', command: stdinReader }] }], + }, + }); + const r = await d.dispatch({ + event: 'UserPromptSubmit', + cwd, + triggeredAt: 't', + payload: { prompt: 'hello there' }, + }); + expect(r.stdout).toContain('UserPromptSubmit'); + expect(r.stdout).toContain('hello there'); + }); + + it('unimplemented handler types return error in stderr but do not block', async () => { + const d = new HookDispatcher({ + hooks: { + PreToolUse: [{ hooks: [{ type: 'http', url: 'https://example.com' }] }], + }, + }); + const r = await d.dispatch({ + event: 'PreToolUse', + cwd, + triggeredAt: 't', + payload: { tool: 'Bash' }, + }); + expect(r.stderr).toMatch(/not implemented/); + expect(r.anyBlocked).toBe(false); + }); +}); + +describe('runCommand', () => { + it('captures stdout and exitCode', async () => { + const r = await runCommand({ + command: 'echo hi; exit 0', + cwd: '/tmp', + timeoutMs: 5000, + env: process.env as Record, + }); + expect(r.stdout).toContain('hi'); + expect(r.exitCode).toBe(0); + }); + + it('kills on timeout', async () => { + const r = await runCommand({ + command: 'sleep 5', + cwd: '/tmp', + timeoutMs: 100, + env: process.env as Record, + }); + expect(r.exitCode).toBe(124); + expect(r.stderr).toMatch(/killed by timeout/); + }); +}); + +describe('tryParseJsonOutput', () => { + it('parses pure JSON', () => { + expect(tryParseJsonOutput('{"decision":"allow"}')?.decision).toBe('allow'); + }); + it('parses JSON after log lines', () => { + const r = tryParseJsonOutput('log line 1\nlog line 2\n{"decision":"deny"}'); + expect(r?.decision).toBe('deny'); + }); + it('returns null on no JSON', () => { + expect(tryParseJsonOutput('plain text')).toBeNull(); + }); + it('returns null on empty', () => { + expect(tryParseJsonOutput('')).toBeNull(); + }); +}); diff --git a/packages/core/src/hooks/dispatcher.ts b/packages/core/src/hooks/dispatcher.ts new file mode 100644 index 0000000..a11b6f2 --- /dev/null +++ b/packages/core/src/hooks/dispatcher.ts @@ -0,0 +1,179 @@ +// Hook dispatcher — runs configured handlers for a given event. +// Spec: docs/DEVELOPMENT_PLAN.md §3.6 +// M3 ships the `command` handler type only; http/mcp_tool/prompt/agent stubs return errors. + +import { spawn } from 'node:child_process'; +import { resolve } from 'node:path'; +import type { HookHandler, HookMatcher, Hooks } from '../config/types.js'; +import type { HookContext, HookHandlerOutput, HookResult } from './types.js'; + +export interface HookDispatcherOpts { + hooks?: Hooks; + disableAllHooks?: boolean; + /** Default handler timeout if not specified. */ + defaultTimeoutMs?: number; +} + +export class HookDispatcher { + private readonly hooks: Hooks; + private readonly disabled: boolean; + private readonly defaultTimeoutMs: number; + + constructor(opts: HookDispatcherOpts) { + this.hooks = opts.hooks ?? {}; + this.disabled = !!opts.disableAllHooks; + this.defaultTimeoutMs = opts.defaultTimeoutMs ?? 60_000; + } + + /** + * Dispatch all hooks for an event. Handlers run sequentially (not in parallel) so + * that later handlers can see the side effects of earlier ones. + */ + async dispatch(ctx: HookContext): Promise { + const result: HookResult = { + stdout: '', + stderr: '', + anyBlocked: false, + timings: [], + }; + if (this.disabled) return result; + + const matchers = this.hooks[ctx.event] ?? []; + for (const m of matchers) { + if (!this.matcherApplies(m, ctx)) continue; + for (const handler of m.hooks) { + const t0 = Date.now(); + const out = await this.runHandler(handler, ctx); + const dt = Date.now() - t0; + result.stdout += out.stdout; + result.stderr += out.stderr; + if (out.exitCode !== 0) result.anyBlocked = true; + result.timings.push({ matcher: m.matcher, durationMs: dt, exitCode: out.exitCode }); + + // Try to parse the most recent stdout as JSON output schema + const parsed = tryParseJsonOutput(out.stdout); + if (parsed) result.json = parsed; + } + } + return result; + } + + private matcherApplies(matcher: HookMatcher, ctx: HookContext): boolean { + if (!matcher.matcher) return true; + // matcher syntax: tool-name (e.g. "Bash"), tool with subcommand ("Bash(git push:*)"), + // or `|` separator for OR ("Edit|Write"). + if (ctx.event !== 'PreToolUse' && ctx.event !== 'PostToolUse') return true; + const toolName = (ctx.payload['tool'] as string) ?? ''; + const alternatives = matcher.matcher.split('|').map((s) => s.trim()); + return alternatives.some((alt) => { + const parenIdx = alt.indexOf('('); + if (parenIdx === -1) return alt === toolName; + const ruleTool = alt.slice(0, parenIdx); + return ruleTool === toolName; + }); + } + + private async runHandler( + handler: HookHandler, + ctx: HookContext, + ): Promise<{ stdout: string; stderr: string; exitCode: number }> { + if (handler.type !== 'command') { + // M3 only implements `command` type; others return error stub + return { + stdout: '', + stderr: `Hook handler type "${handler.type}" is not implemented yet (planned M5+).`, + exitCode: 0, // don't block agent on unimplemented handlers + }; + } + const cmd = handler.command; + if (!cmd) { + return { stdout: '', stderr: 'Missing command in hook config.', exitCode: 0 }; + } + return runCommand({ + command: cmd, + cwd: ctx.cwd, + timeoutMs: handler.timeout ? handler.timeout * 1000 : this.defaultTimeoutMs, + env: { + ...process.env, + ...(ctx.env ?? {}), + DEEPCODE_HOOK_EVENT: ctx.event, + DEEPCODE_TRIGGERED_AT: ctx.triggeredAt, + }, + stdin: JSON.stringify({ event: ctx.event, payload: ctx.payload }), + }); + } +} + +interface RunCommandOpts { + command: string; + cwd: string; + timeoutMs: number; + env: Record; + stdin?: string; +} + +export function runCommand( + opts: RunCommandOpts, +): Promise<{ stdout: string; stderr: string; exitCode: number }> { + return new Promise((resolveResult) => { + const cwd = resolve(opts.cwd); + const child = spawn('/bin/sh', ['-c', opts.command], { cwd, env: opts.env }); + let stdout = ''; + let stderr = ''; + let killed = false; + const timer = setTimeout(() => { + killed = true; + child.kill('SIGTERM'); + }, opts.timeoutMs); + + child.stdout.on('data', (c: Buffer) => { + stdout += c.toString('utf8'); + }); + child.stderr.on('data', (c: Buffer) => { + stderr += c.toString('utf8'); + }); + + child.on('error', (err) => { + clearTimeout(timer); + resolveResult({ stdout, stderr: stderr + (err as Error).message, exitCode: 127 }); + }); + + child.on('close', (code) => { + clearTimeout(timer); + let exitCode = code ?? 0; + if (killed) { + stderr += `\n[killed by timeout after ${opts.timeoutMs}ms]`; + exitCode = 124; + } + resolveResult({ stdout, stderr, exitCode }); + }); + + if (opts.stdin) { + child.stdin.write(opts.stdin); + child.stdin.end(); + } + }); +} + +/** Extract a JSON object from handler stdout, if any. Returns null on parse failure. */ +export function tryParseJsonOutput(stdout: string): HookHandlerOutput | null { + const trimmed = stdout.trim(); + if (!trimmed) return null; + // Try to find the last JSON object in the output (handlers may print logs first) + const candidates: string[] = []; + // Strategy: scan from the end for matching {...} + const lastOpen = trimmed.lastIndexOf('{'); + if (lastOpen >= 0) { + candidates.push(trimmed.slice(lastOpen)); + } + candidates.push(trimmed); + for (const c of candidates) { + try { + const parsed = JSON.parse(c) as HookHandlerOutput; + if (typeof parsed === 'object' && parsed !== null) return parsed; + } catch { + // try next candidate + } + } + return null; +} diff --git a/packages/core/src/hooks/index.ts b/packages/core/src/hooks/index.ts index db564d8..b519c5b 100644 --- a/packages/core/src/hooks/index.ts +++ b/packages/core/src/hooks/index.ts @@ -1,6 +1,13 @@ -// Module: hooks +// Hook subsystem — 9 events × `command` handler (M3 ships command only; +// http/mcp_tool/prompt/agent handler types deferred to M5+). +// Spec: docs/DEVELOPMENT_PLAN.md §3.6 // Milestone: M3 -// Spec: docs/DEVELOPMENT_PLAN.md §3.6 9 events × 5 handler types + if filter + JSON output contract -// Status: placeholder — implemented in M3 -export {}; +export { + HookDispatcher, + runCommand, + tryParseJsonOutput, + type HookDispatcherOpts, +} from './dispatcher.js'; + +export type { HookContext, HookHandlerOutput, HookResult, HookRegistration } from './types.js'; diff --git a/packages/core/src/hooks/types.ts b/packages/core/src/hooks/types.ts new file mode 100644 index 0000000..ee4e2ef --- /dev/null +++ b/packages/core/src/hooks/types.ts @@ -0,0 +1,52 @@ +// Hook subsystem types. +// Spec: docs/DEVELOPMENT_PLAN.md §3.6 + +import type { HookEventName, HookMatcher } from '../config/types.js'; + +/** + * Structured JSON output that a hook handler MAY produce on stdout to influence + * the agent. Unknown fields are tolerated; missing fields default to no-op. + */ +export interface HookHandlerOutput { + decision?: 'allow' | 'deny' | 'ask'; + permissionDecision?: 'allow' | 'deny' | 'ask'; + hookSpecificOutput?: string; + /** Inserted into the next LLM call as additional system context. */ + additionalContext?: string; + /** Shown to user as a red banner. */ + systemMessage?: string; + /** If this is a Stop hook, the reason for stopping. */ + stopReason?: string; + /** If true, the hook's stdout is NOT echoed to the user. */ + suppressOutput?: boolean; +} + +export interface HookContext { + cwd: string; + /** ISO timestamp. */ + triggeredAt: string; + /** Event name. */ + event: HookEventName; + /** Event-specific payload (e.g. tool call info for PreToolUse). */ + payload: Record; + /** Env vars passed to command-type hooks. */ + env?: Record; +} + +export interface HookResult { + /** Concatenated stdout of all handlers that ran. */ + stdout: string; + /** Concatenated stderr. */ + stderr: string; + /** Parsed JSON output of the LAST handler that emitted valid JSON. */ + json?: HookHandlerOutput; + /** True if ANY handler exited with non-zero (signals the agent to block). */ + anyBlocked: boolean; + /** Per-handler timing for debugging. */ + timings: Array<{ matcher?: string; durationMs: number; exitCode: number }>; +} + +export interface HookRegistration { + event: HookEventName; + matchers: HookMatcher[]; +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index e367853..facad89 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -87,3 +87,32 @@ export { type Credentials, type CredentialsStoreOpts, } from './credentials/index.js'; + +// Mode policy (M3) +export { + evaluateMode, + modeVerdictReason, + type ModeRequest, + type ModeVerdict, +} from './modes/index.js'; + +// Hooks (M3 — command handler only; http/mcp_tool/prompt/agent → M5+) +export { + HookDispatcher, + runCommand, + tryParseJsonOutput, + type HookContext, + type HookHandlerOutput, + type HookResult, + type HookRegistration, + type HookDispatcherOpts, +} from './hooks/index.js'; + +// Memory (M3 — dual-system + @-import + AGENTS.md + rules dir) +export { + loadMemory, + walkUpwards, + type MemorySource, + type LoadedMemory, + type LoadMemoryOpts, +} from './memory/index.js'; diff --git a/packages/core/src/memory/index.ts b/packages/core/src/memory/index.ts index a283ae5..8631bc6 100644 --- a/packages/core/src/memory/index.ts +++ b/packages/core/src/memory/index.ts @@ -1,6 +1,11 @@ -// Module: memory +// Memory subsystem entry — dual-system (DEEPCODE.md + auto-memory) + @-import + rules. +// Spec: docs/DEVELOPMENT_PLAN.md §3.6a // Milestone: M3 -// Spec: docs/DEVELOPMENT_PLAN.md §3.6a dual memory (DEEPCODE.md + auto-memory) + @-import + AGENTS.md + .deepcode/rules/ -// Status: placeholder — implemented in M3 -export {}; +export { + loadMemory, + walkUpwards, + type MemorySource, + type LoadedMemory, + type LoadMemoryOpts, +} from './loader.js'; diff --git a/packages/core/src/memory/loader.test.ts b/packages/core/src/memory/loader.test.ts new file mode 100644 index 0000000..45f2a3a --- /dev/null +++ b/packages/core/src/memory/loader.test.ts @@ -0,0 +1,144 @@ +import { promises as fs } from 'node:fs'; +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { loadMemory, walkUpwards } from './loader.js'; + +describe('loadMemory', () => { + let home: string; + let cwd: string; + + beforeEach(async () => { + home = await mkdtemp(join(tmpdir(), 'dc-mem-home-')); + cwd = await mkdtemp(join(tmpdir(), 'dc-mem-cwd-')); + }); + afterEach(async () => { + await rm(home, { recursive: true, force: true }); + await rm(cwd, { recursive: true, force: true }); + }); + + it('returns empty when nothing exists', async () => { + const m = await loadMemory({ cwd, home }); + expect(m.sources).toHaveLength(0); + expect(m.text).toBe(''); + expect(m.bytes).toBe(0); + }); + + it('loads user-level DEEPCODE.md', async () => { + await fs.mkdir(join(home, '.deepcode'), { recursive: true }); + await fs.writeFile(join(home, '.deepcode', 'DEEPCODE.md'), 'user-level memory', 'utf8'); + const m = await loadMemory({ cwd, home }); + expect(m.sources).toHaveLength(1); + expect(m.text).toContain('user-level memory'); + }); + + it('loads project-level DEEPCODE.md', async () => { + await fs.writeFile(join(cwd, 'DEEPCODE.md'), 'project memory', 'utf8'); + const m = await loadMemory({ cwd, home }); + expect(m.text).toContain('project memory'); + }); + + it('auto-imports AGENTS.md', async () => { + await fs.writeFile(join(cwd, 'AGENTS.md'), 'cross-tool agents content', 'utf8'); + const m = await loadMemory({ cwd, home }); + expect(m.text).toContain('cross-tool agents content'); + expect(m.sources.some((s) => s.label.includes('AGENTS.md'))).toBe(true); + }); + + it('walks upward from cwd loading DEEPCODE.md at each level', async () => { + const parent = join(cwd, 'parent'); + const child = join(parent, 'child'); + await fs.mkdir(child, { recursive: true }); + await fs.writeFile(join(cwd, 'DEEPCODE.md'), 'root-level', 'utf8'); + await fs.writeFile(join(parent, 'DEEPCODE.md'), 'parent-level', 'utf8'); + await fs.writeFile(join(child, 'DEEPCODE.md'), 'child-level', 'utf8'); + + const m = await loadMemory({ cwd: child, home }); + expect(m.text).toContain('root-level'); + expect(m.text).toContain('parent-level'); + expect(m.text).toContain('child-level'); + }); + + it('loads .deepcode/rules/*.md', async () => { + const rulesDir = join(cwd, '.deepcode', 'rules'); + await fs.mkdir(rulesDir, { recursive: true }); + await fs.writeFile(join(rulesDir, 'api.md'), 'API rules', 'utf8'); + await fs.writeFile(join(rulesDir, 'db.md'), 'DB rules', 'utf8'); + const m = await loadMemory({ cwd, home }); + expect(m.text).toContain('API rules'); + expect(m.text).toContain('DB rules'); + expect(m.sources.filter((s) => s.label.startsWith('rule:'))).toHaveLength(2); + }); + + it('expands @-import within DEEPCODE.md', async () => { + await fs.writeFile(join(cwd, 'DEEPCODE.md'), '@./extra.md\nmain content', 'utf8'); + await fs.writeFile(join(cwd, 'extra.md'), 'extra content from import', 'utf8'); + const m = await loadMemory({ cwd, home }); + expect(m.text).toContain('extra content from import'); + expect(m.text).toContain('main content'); + }); + + it('records unresolved imports without crashing', async () => { + await fs.writeFile(join(cwd, 'DEEPCODE.md'), '@./missing.md\ncontent', 'utf8'); + const m = await loadMemory({ cwd, home }); + expect(m.unresolvedImports.length).toBeGreaterThan(0); + expect(m.unresolvedImports[0]).toMatch(/missing\.md/); + }); + + it('detects cycles in @-imports', async () => { + await fs.writeFile(join(cwd, 'DEEPCODE.md'), '@./a.md', 'utf8'); + await fs.writeFile(join(cwd, 'a.md'), '@./b.md\nA', 'utf8'); + await fs.writeFile(join(cwd, 'b.md'), '@./a.md\nB', 'utf8'); + const m = await loadMemory({ cwd, home }); + // Both a and b should be loaded but not infinitely + expect(m.sources.some((s) => s.label.includes('a.md'))).toBe(true); + expect(m.sources.some((s) => s.label.includes('b.md'))).toBe(true); + expect(m.sources.length).toBeLessThan(10); // sanity: no explosion + }); + + it('respects maxBytes budget', async () => { + const big = 'x'.repeat(100_000); + await fs.writeFile(join(cwd, 'DEEPCODE.md'), big, 'utf8'); + const m = await loadMemory({ cwd, home, maxBytes: 5_000 }); + expect(m.bytes).toBeLessThanOrEqual(5_100); // small overshoot for "[truncated]" marker + expect(m.text).toContain('[truncated by memoryLoadCapKB]'); + }); + + it('respects maxImportDepth', async () => { + // a → b → c → d → e — should stop at depth 4 (default) + await fs.writeFile(join(cwd, 'DEEPCODE.md'), '@./a.md', 'utf8'); + for (const [from, to] of [ + ['a', 'b'], + ['b', 'c'], + ['c', 'd'], + ['d', 'e'], + ]) { + await fs.writeFile(join(cwd, `${from}.md`), `@./${to}.md\n${from} content`, 'utf8'); + } + await fs.writeFile(join(cwd, 'e.md'), 'e content', 'utf8'); + const m = await loadMemory({ cwd, home, maxImportDepth: 2 }); + // Depth 2: DEEPCODE.md → a.md → b.md — c onwards should not be loaded + expect(m.text).toContain('a content'); + expect(m.text).toContain('b content'); + expect(m.text).not.toContain('e content'); + }); +}); + +describe('walkUpwards', () => { + it('walks from cwd to root', () => { + const dirs = walkUpwards('/a/b/c/d', '/x'); // boundary not on path → walk to / + expect(dirs[0]).toBe('/a/b/c/d'); + expect(dirs.at(-1)).toBe('/'); + }); + + it('stops at boundary when on path', () => { + const dirs = walkUpwards('/a/b/c', '/a'); + expect(dirs).toEqual(['/a/b/c', '/a/b', '/a']); + }); + + it('handles cwd == boundary', () => { + const dirs = walkUpwards('/a', '/a'); + expect(dirs).toEqual(['/a']); + }); +}); diff --git a/packages/core/src/memory/loader.ts b/packages/core/src/memory/loader.ts new file mode 100644 index 0000000..e3f7485 --- /dev/null +++ b/packages/core/src/memory/loader.ts @@ -0,0 +1,184 @@ +// Memory loader — assembles the system-prompt-relevant context from: +// 1. DEEPCODE.md (project-root + parent dirs walking upward) +// 2. ~/.deepcode/DEEPCODE.md (user-level) +// 3. AGENTS.md (auto-imported at top of merged DEEPCODE.md) +// 4. @-import expansion (recursive, max 4 hops with cycle detection) +// 5. .deepcode/rules/*.md with optional path frontmatter +// +// Spec: docs/DEVELOPMENT_PLAN.md §3.6a + +import { promises as fs } from 'node:fs'; +import { homedir } from 'node:os'; +import { dirname, isAbsolute, join, resolve, sep } from 'node:path'; + +export interface MemorySource { + /** Where the content came from (label only — not for matching). */ + label: string; + /** Absolute path. */ + path: string; + /** Raw content. */ + content: string; +} + +export interface LoadedMemory { + sources: MemorySource[]; + /** Concatenated markdown ready to inject into system prompt. */ + text: string; + /** Cumulative byte size for budget tracking. */ + bytes: number; + /** Files referenced via @-import that could not be resolved. */ + unresolvedImports: string[]; +} + +export interface LoadMemoryOpts { + cwd: string; + /** Override $HOME for tests. */ + home?: string; + /** Max bytes total (caller can use this to enforce settings.memoryLoadCapKB). */ + maxBytes?: number; + /** Max depth for @-import recursion. */ + maxImportDepth?: number; +} + +const DEFAULT_MAX_BYTES = 100 * 1024; +const DEFAULT_MAX_DEPTH = 4; + +export async function loadMemory(opts: LoadMemoryOpts): Promise { + const home = opts.home ?? homedir(); + const maxBytes = opts.maxBytes ?? DEFAULT_MAX_BYTES; + const maxDepth = opts.maxImportDepth ?? DEFAULT_MAX_DEPTH; + + const sources: MemorySource[] = []; + const unresolvedImports: string[] = []; + const visited = new Set(); + let bytes = 0; + + const addFile = async (path: string, label: string, depth: number): Promise => { + const abs = resolve(path); + if (visited.has(abs)) return; // cycle + visited.add(abs); + + const raw = await readMaybe(abs); + if (raw === null) return; + + const expanded = + depth < maxDepth ? await expandImports(raw, abs, depth + 1, addFile, unresolvedImports) : raw; + + if (bytes + expanded.length > maxBytes) { + const remaining = Math.max(0, maxBytes - bytes); + const truncated = expanded.slice(0, remaining) + '\n... [truncated by memoryLoadCapKB]'; + sources.push({ label, path: abs, content: truncated }); + bytes += truncated.length; + return; + } + sources.push({ label, path: abs, content: expanded }); + bytes += expanded.length; + }; + + // 1. ~/.deepcode/DEEPCODE.md (user-level) + await addFile(join(home, '.deepcode', 'DEEPCODE.md'), 'user memory', 0); + + // 2. DEEPCODE.md walking from cwd → root, deepest first + const upwards = walkUpwards(opts.cwd, home); + // Reverse so root-most first, deepest last (later overrides via concat — Claude Code semantics) + for (const dir of upwards.reverse()) { + await addFile(join(dir, 'DEEPCODE.md'), `${dir}/DEEPCODE.md`, 0); + } + + // 3. AGENTS.md (project root only — co-located with DEEPCODE.md) + await addFile(join(opts.cwd, 'AGENTS.md'), 'AGENTS.md (cross-tool)', 0); + + // 4. .deepcode/rules/*.md (path-scoped frontmatter — M3 loads all; gating M4) + const rulesDir = join(opts.cwd, '.deepcode', 'rules'); + try { + const entries = await fs.readdir(rulesDir); + for (const e of entries.sort()) { + if (e.endsWith('.md')) await addFile(join(rulesDir, e), `rule: ${e}`, 0); + } + } catch (err) { + if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err; + } + + const text = sources.map((s) => `# ${s.label}\n\n${s.content}`).join('\n\n---\n\n'); + + return { sources, text, bytes, unresolvedImports }; +} + +async function readMaybe(path: string): Promise { + try { + return await fs.readFile(path, 'utf8'); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'ENOENT') return null; + throw err; + } +} + +/** + * Walk from `start` up to (but not including) `boundary`. + * If start is outside boundary, returns just [start]. + */ +export function walkUpwards(start: string, boundary: string): string[] { + const out: string[] = []; + let cur = resolve(start); + const boundaryAbs = resolve(boundary); + const root = sep; // '/' on POSIX + // include boundary itself? we exclude $HOME because user-level loaded separately + while (true) { + out.push(cur); + if (cur === boundaryAbs) break; + if (cur === root) break; + const parent = dirname(cur); + if (parent === cur) break; + cur = parent; + } + return out; +} + +/** + * Expand `@` references in markdown. Paths are resolved relative to the + * file containing the @-import. Supports `@~/path` (home-relative) and absolute. + */ +async function expandImports( + content: string, + sourcePath: string, + depth: number, + addFile: (path: string, label: string, depth: number) => Promise, + unresolved: string[], +): Promise { + // Match @ where doesn't contain whitespace + const importPattern = /(^|\s)@([\w./~-]+(?:\.md|\.txt)?)/g; + const matches = [...content.matchAll(importPattern)]; + if (matches.length === 0) return content; + + // Use the FIRST import recursively (then we drop the @-import line from output) + for (const m of matches) { + const ref = m[2]!; + const target = resolveImportPath(ref, sourcePath); + const exists = await fileExists(target); + if (!exists) { + unresolved.push(`${sourcePath}: @${ref}`); + continue; + } + await addFile(target, `@${ref} (from ${sourcePath})`, depth); + } + + // Strip @-import markers from the inlined content (they're handled separately) + return content.replace(importPattern, (_full, lead) => lead); +} + +function resolveImportPath(ref: string, sourcePath: string): string { + if (ref.startsWith('~/')) { + return join(homedir(), ref.slice(2)); + } + if (isAbsolute(ref)) return ref; + return join(dirname(sourcePath), ref); +} + +async function fileExists(path: string): Promise { + try { + await fs.access(path); + return true; + } catch { + return false; + } +} diff --git a/packages/core/src/modes/index.test.ts b/packages/core/src/modes/index.test.ts new file mode 100644 index 0000000..df29399 --- /dev/null +++ b/packages/core/src/modes/index.test.ts @@ -0,0 +1,98 @@ +import { describe, expect, it } from 'vitest'; +import { evaluateMode, modeVerdictReason, type ModeRequest } from './index.js'; +import type { Mode } from '../types.js'; + +function req(tool: string, perm: ModeRequest['permissionVerdict']): ModeRequest { + return { tool, input: {}, permissionVerdict: perm }; +} + +describe('evaluateMode', () => { + describe('plan mode (invariant: write tools always blocked)', () => { + const mode: Mode = 'plan'; + it('blocks Write regardless of permission', () => { + expect(evaluateMode(mode, req('Write', 'allow'))).toBe('plan-blocked'); + expect(evaluateMode(mode, req('Write', 'deny'))).toBe('plan-blocked'); + expect(evaluateMode(mode, req('Write', 'no-match'))).toBe('plan-blocked'); + }); + it('blocks Edit and Bash (might have side effects)', () => { + expect(evaluateMode(mode, req('Edit', 'allow'))).toBe('plan-blocked'); + expect(evaluateMode(mode, req('Bash', 'allow'))).toBe('plan-blocked'); + }); + it('allows read-only tools (Read, Grep, Glob, WebFetch, WebSearch)', () => { + for (const t of ['Read', 'Grep', 'Glob', 'WebFetch', 'WebSearch']) { + expect(evaluateMode(mode, req(t, 'no-match'))).toBe('allow'); + } + }); + it('blocks unknown tool conservatively', () => { + expect(evaluateMode(mode, req('UnknownTool', 'allow'))).toBe('plan-blocked'); + }); + }); + + describe('bypassPermissions mode (skip permissions; sandbox still enforces — M3.5)', () => { + const mode: Mode = 'bypassPermissions'; + it('allows everything regardless of permission', () => { + expect(evaluateMode(mode, req('Bash', 'deny'))).toBe('allow'); + expect(evaluateMode(mode, req('Write', 'no-match'))).toBe('allow'); + }); + }); + + describe('acceptEdits mode', () => { + const mode: Mode = 'acceptEdits'; + it('auto-allows Edit/Write unless permissions explicitly deny', () => { + expect(evaluateMode(mode, req('Edit', 'no-match'))).toBe('allow'); + expect(evaluateMode(mode, req('Edit', 'ask'))).toBe('allow'); + expect(evaluateMode(mode, req('Edit', 'allow'))).toBe('allow'); + expect(evaluateMode(mode, req('Write', 'ask'))).toBe('allow'); + }); + it('permission deny still wins for Edit/Write', () => { + expect(evaluateMode(mode, req('Edit', 'deny'))).toBe('deny'); + }); + it('non-Edit tools follow permission rules', () => { + expect(evaluateMode(mode, req('Bash', 'ask'))).toBe('ask'); + expect(evaluateMode(mode, req('Bash', 'deny'))).toBe('deny'); + expect(evaluateMode(mode, req('Bash', 'no-match'))).toBe('ask'); + }); + }); + + describe('dontAsk mode (strict — only allow passes; no prompts)', () => { + const mode: Mode = 'dontAsk'; + it('only allow passes', () => { + expect(evaluateMode(mode, req('Read', 'allow'))).toBe('allow'); + }); + it('ask becomes deny (no prompt)', () => { + expect(evaluateMode(mode, req('Bash', 'ask'))).toBe('deny'); + }); + it('no-match becomes deny', () => { + expect(evaluateMode(mode, req('Bash', 'no-match'))).toBe('deny'); + }); + }); + + describe('default mode', () => { + const mode: Mode = 'default'; + it('threads through permission', () => { + expect(evaluateMode(mode, req('Bash', 'allow'))).toBe('allow'); + expect(evaluateMode(mode, req('Bash', 'ask'))).toBe('ask'); + expect(evaluateMode(mode, req('Bash', 'deny'))).toBe('deny'); + }); + it('no-match defaults to ask', () => { + expect(evaluateMode(mode, req('Bash', 'no-match'))).toBe('ask'); + }); + }); + + describe('auto mode (M3 stub — falls back to default behavior)', () => { + const mode: Mode = 'auto'; + it('threads through permission like default', () => { + expect(evaluateMode(mode, req('Read', 'allow'))).toBe('allow'); + expect(evaluateMode(mode, req('Bash', 'no-match'))).toBe('ask'); + }); + }); + + describe('modeVerdictReason', () => { + it('explains plan-blocked', () => { + expect(modeVerdictReason('plan', 'plan-blocked', 'Write')).toMatch(/write tool/); + }); + it('explains deny', () => { + expect(modeVerdictReason('default', 'deny', 'Bash')).toMatch(/denied by mode/); + }); + }); +}); diff --git a/packages/core/src/modes/index.ts b/packages/core/src/modes/index.ts new file mode 100644 index 0000000..c8cd5bc --- /dev/null +++ b/packages/core/src/modes/index.ts @@ -0,0 +1,100 @@ +// Mode policy — turns a (mode, tool-call) pair into an allow/ask/deny verdict. +// Spec: docs/DEVELOPMENT_PLAN.md §3.8 +// docs/design/sandbox-plan-worktree.md §3.2 (decision matrix) + +import type { Mode } from '../types.js'; + +export interface ModeRequest { + tool: string; + input: Record; + /** Result of evaluatePermission() — most-restrictive of allow/ask/deny/no-match. */ + permissionVerdict: 'allow' | 'ask' | 'deny' | 'no-match'; +} + +export type ModeVerdict = 'allow' | 'ask' | 'deny' | 'plan-blocked'; + +/** + * Set of tool names that perform writes / mutations. + * Plan mode denies these wholesale (regardless of permission rules). + */ +const WRITE_TOOLS = new Set(['Write', 'Edit', 'NotebookEdit', 'Bash']); + +/** + * Tools that are *safe even in plan mode* (read-only). + * Bash is NOT in here — it could have side effects. Plan-mode-safe tools include + * read-style tools and tool-introspection tools. + */ +const PLAN_READONLY_TOOLS = new Set([ + 'Read', + 'Grep', + 'Glob', + 'WebFetch', + 'WebSearch', + 'AskUserQuestion', + 'ExitPlanMode', + 'ToolSearch', +]); + +export function evaluateMode(mode: Mode, req: ModeRequest): ModeVerdict { + switch (mode) { + case 'plan': { + // Plan mode: only read-only tools allowed; all others blocked. + if (PLAN_READONLY_TOOLS.has(req.tool)) return 'allow'; + if (WRITE_TOOLS.has(req.tool)) return 'plan-blocked'; + // Unknown tool — fall through to plan-blocked to be safe + return 'plan-blocked'; + } + + case 'bypassPermissions': + // Skip permission rules entirely (sandbox still enforces at OS level — M3.5) + return 'allow'; + + case 'acceptEdits': + // Auto-allow Edit/Write; everything else follows permission rules + if (req.tool === 'Edit' || req.tool === 'Write') { + if (req.permissionVerdict === 'deny') return 'deny'; + return 'allow'; + } + return interpretPermission(req.permissionVerdict, 'ask'); + + case 'dontAsk': + // Strict allow-list: only `allow` passes; everything else denied (no prompt) + if (req.permissionVerdict === 'allow') return 'allow'; + return 'deny'; + + case 'auto': + // M3 stub: auto-classifier LLM judgment is M4+. Fall back to default behavior. + return interpretPermission(req.permissionVerdict, 'ask'); + + case 'default': + default: + return interpretPermission(req.permissionVerdict, 'ask'); + } +} + +/** + * Map a permission verdict to a mode verdict, with a fallback for `no-match`. + */ +function interpretPermission( + perm: 'allow' | 'ask' | 'deny' | 'no-match', + noMatch: ModeVerdict, +): ModeVerdict { + if (perm === 'allow') return 'allow'; + if (perm === 'deny') return 'deny'; + if (perm === 'ask') return 'ask'; + return noMatch; +} + +/** Pretty label for a verdict — useful in error messages / UI. */ +export function modeVerdictReason(mode: Mode, verdict: ModeVerdict, tool: string): string { + switch (verdict) { + case 'allow': + return 'allowed'; + case 'ask': + return 'requires approval'; + case 'deny': + return `denied by mode "${mode}"`; + case 'plan-blocked': + return `blocked: ${tool} is a write tool, mode is plan (read-only)`; + } +}