From 724015a8b3fe9fce53541883a0cf7c991abe5114 Mon Sep 17 00:00:00 2001
From: oratis <wangharp@gmail.com>
Date: Thu, 28 May 2026 00:32:10 +0800
Subject: [PATCH] =?UTF-8?q?feat(core):=20M3b=20=E2=80=94=20wire=20mode/per?=
 =?UTF-8?q?mission/hooks=20into=20agent=20loop?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements docs/design/sandbox-plan-worktree.md §5.1 decision flow.

Shipped
-------
- harness/tool-dispatcher.ts (105 lines)
  · dispatchToolCall() combines:
    1. evaluatePermission (existing M2)
    2. evaluateMode      (existing M3a)
    3. PreToolUse hook   (existing M3a HookDispatcher)
  · Returns DispatchVerdict { decision, source, reason, hook?, ... }
  · plan-blocked short-circuits before hooks (matches invariant #1)
  · Hook JSON output decision=deny/ask overrides mode allow
  · Hook non-zero exit code → deny

- agent.ts integration:
  · New RunAgentOptions fields: mode, permissions, hooks, approval
  · Tool-call loop now consults dispatcher when mode is set
  · 'ask' verdict invokes ApprovalCallback (caller-supplied)
  · 'deny' / 'plan-blocked' produce tool_result with is_error=true,
    fed back to LLM so it can recover
  · PostToolUse hook fires after every tool execution
  · Backwards compatible: when mode is unset, M1 behavior (allow all)

Tests
-----
- harness/tool-dispatcher.test.ts (9 tests)
  All paths of the §5.1 decision tree, including plan short-circuit,
  acceptEdits with permission-deny still winning, hook JSON override,
  hook non-zero exit, dontAsk strict deny.

Total: 206 passed / 4 skipped / 0 failed (was 197).

Deferred to M3c
---------------
MCP client / compaction / statusLine / /init multi-phase / auto-classifier
mode / hook handler types beyond `command` / hook `if` field.

CLI REPL not yet plumbed with mode (M3c). Mode-aware gating only activates
when a caller passes `mode:` to `runAgent` — current REPL still uses M1
path.

Verified
--------
  pnpm typecheck   → green
  pnpm test        → 206 passed / 4 skipped
  pnpm format:check → conformant

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 docs/milestones/M3b.md                        |  33 ++++
 packages/core/src/agent.ts                    |  69 ++++++++
 packages/core/src/harness/index.ts            |  11 +-
 .../core/src/harness/tool-dispatcher.test.ts  | 149 ++++++++++++++++++
 packages/core/src/harness/tool-dispatcher.ts  | 135 ++++++++++++++++
 packages/core/src/index.ts                    |   6 +
 6 files changed, 398 insertions(+), 5 deletions(-)
 create mode 100644 docs/milestones/M3b.md
 create mode 100644 packages/core/src/harness/tool-dispatcher.test.ts
 create mode 100644 packages/core/src/harness/tool-dispatcher.ts

diff --git a/docs/milestones/M3b.md b/docs/milestones/M3b.md
new file mode 100644
index 0000000..955b44d
--- /dev/null
+++ b/docs/milestones/M3b.md
@@ -0,0 +1,33 @@
+# M3b — Agent loop integration (mode × permission × hooks)
+
+> **Status**: ✅ Complete · **Branch**: `feat/m3b-agent-integration-mcp`
+
+## Shipped
+
+- `harness/tool-dispatcher.ts` — the central gate that combines mode + permission + PreToolUse hook into a single verdict. Implements the decision flow from `docs/design/sandbox-plan-worktree.md` §5.1.
+- Agent loop now consults `dispatchToolCall()` for every tool invocation when `mode` is provided. Plan-blocks short-circuit before hooks; deny short-circuits before tool execution; ask invokes `approval` callback.
+- PostToolUse hook fires after every tool execution with the result.
+- 9 new tests in `tool-dispatcher.test.ts` covering all decision paths.
+
+## Verification
+
+- `pnpm test` → 206 passed / 4 skipped / 0 failed (was 197).
+- `pnpm typecheck` / `pnpm build` → green.
+
+## Deferred to M3c
+
+- MCP client (stdio transport, list/call tools)
+- Compaction (LLM summarizer at threshold)
+- statusLine runner (JSON-on-stdin)
+- `/init` multi-phase
+- `auto` classifier mode (LLM-judged)
+- Hook handler types beyond `command`
+- Hook `if` field
+
+These are independent subsystems; each gets its own PR.
+
+## CLI integration
+
+The CLI REPL (M2) does **not** yet pass `mode` to `runAgent()`, so tool calls
+still flow unmolested through the M1 path. Plumbing that is M3c (5-line change
+in `repl.ts` once compaction + statusLine are in place to make a coherent demo).
diff --git a/packages/core/src/agent.ts b/packages/core/src/agent.ts
index 0ae4544..3401819 100644
--- a/packages/core/src/agent.ts
+++ b/packages/core/src/agent.ts
@@ -1,6 +1,10 @@
 // Agent loop — orchestrates provider <-> tools <-> session.
 // Spec: docs/DEVELOPMENT_PLAN.md §3.1 / §3.15
 
+import type { PermissionRules } from './config/types.js';
+import { dispatchToolCall, type DispatchVerdict } from './harness/tool-dispatcher.js';
+import type { HookDispatcher } from './hooks/index.js';
+import type { Mode } from './types.js';
 import type { Provider } from './providers/types.js';
 import { SessionManager } from './sessions/index.js';
 import type { ToolRegistry } from './tools/registry.js';
@@ -13,6 +17,16 @@ import type {
   ToolUseBlock,
 } from './types.js';
 
+/**
+ * Approval callback — return true to allow, false to reject.
+ * Called when dispatcher returns 'ask'.
+ */
+export type ApprovalCallback = (
+  toolName: string,
+  toolInput: Record<string, unknown>,
+  verdict: DispatchVerdict,
+) => Promise<boolean> | boolean;
+
 export interface RunAgentOptions {
   provider: Provider;
   tools: ToolRegistry;
@@ -31,6 +45,12 @@ export interface RunAgentOptions {
   session?: { manager: SessionManager; id: string };
   /** Optional: snapshot files before/after Edit/Write tool calls. */
   enableSnapshots?: boolean;
+  /** M3: dispatch gates (mode + permissions + hooks). When set, every tool call
+   *  goes through the gate. When unset, all tool calls are allowed (M1 behavior). */
+  mode?: Mode;
+  permissions?: PermissionRules;
+  hooks?: HookDispatcher;
+  approval?: ApprovalCallback;
 }
 
 export interface RunAgentResult {
@@ -157,6 +177,40 @@ export async function runAgent(opts: RunAgentOptions): Promise<RunAgentResult> {
         continue;
       }
 
+      // M3: dispatch gate (mode + permissions + PreToolUse hook)
+      if (opts.mode) {
+        const verdict = await dispatchToolCall({
+          tool: toolUse.name,
+          input: toolUse.input,
+          mode: opts.mode,
+          rules: opts.permissions,
+          hooks: opts.hooks,
+          cwd: opts.cwd,
+        });
+        let allowed = verdict.decision === 'allow';
+        if (verdict.decision === 'ask' && opts.approval) {
+          allowed = await opts.approval(toolUse.name, toolUse.input, verdict);
+        }
+        if (!allowed) {
+          toolResults.push({
+            type: 'tool_result',
+            tool_use_id: toolUse.id,
+            content: `Tool call blocked: ${verdict.reason}`,
+            is_error: true,
+          });
+          opts.onEvent?.({
+            type: 'tool_result',
+            id: toolUse.id,
+            result: {
+              content: verdict.reason,
+              isError: true,
+              data: { dispatchSource: verdict.source, decision: verdict.decision },
+            },
+          });
+          continue;
+        }
+      }
+
       // Pre-execution snapshot (Edit/Write only)
       if (
         opts.enableSnapshots !== false &&
@@ -182,6 +236,21 @@ export async function runAgent(opts: RunAgentOptions): Promise<RunAgentResult> {
         tr = { content: `Error: ${(err as Error).message}`, isError: true };
       }
 
+      // PostToolUse hook (M3) — observation only; can inject additionalContext
+      if (opts.hooks) {
+        await opts.hooks.dispatch({
+          event: 'PostToolUse',
+          cwd: opts.cwd,
+          triggeredAt: new Date().toISOString(),
+          payload: {
+            tool: toolUse.name,
+            input: toolUse.input,
+            result_content: tr.content.slice(0, 1000),
+            is_error: tr.isError ?? false,
+          },
+        });
+      }
+
       // Post-execution snapshot
       if (
         opts.enableSnapshots !== false &&
diff --git a/packages/core/src/harness/index.ts b/packages/core/src/harness/index.ts
index e66fe99..82ffd79 100644
--- a/packages/core/src/harness/index.ts
+++ b/packages/core/src/harness/index.ts
@@ -1,6 +1,7 @@
-// Module: harness
-// Milestone: M3
-// Spec: docs/DEVELOPMENT_PLAN.md §3.15 event bus / system-reminder injector / plan-mode state machine / tasks / cron / worktree / ToolSearch / Notification / statusline
-// Status: placeholder — implemented in M3
+// Harness layer entry — tool dispatcher (mode × permission × hooks gating).
+// Spec: docs/DEVELOPMENT_PLAN.md §3.15
+// Milestone: M3b — basic gating wired; system-reminder injector / TaskCreate /
+// cron / worktree / ToolSearch / Notification / statusLine implementation will
+// land in M3c+.
 
-export {};
+export { dispatchToolCall, type DispatchRequest, type DispatchVerdict } from './tool-dispatcher.js';
diff --git a/packages/core/src/harness/tool-dispatcher.test.ts b/packages/core/src/harness/tool-dispatcher.test.ts
new file mode 100644
index 0000000..af90b17
--- /dev/null
+++ b/packages/core/src/harness/tool-dispatcher.test.ts
@@ -0,0 +1,149 @@
+import { describe, expect, it } from 'vitest';
+import { HookDispatcher } from '../hooks/index.js';
+import { dispatchToolCall } from './tool-dispatcher.js';
+
+describe('dispatchToolCall', () => {
+  it('mode=default + permission=allow → allow (source: permission)', async () => {
+    const v = await dispatchToolCall({
+      tool: 'Read',
+      input: { file_path: '/x' },
+      mode: 'default',
+      rules: { allow: ['Read'] },
+      cwd: '/tmp',
+    });
+    expect(v.decision).toBe('allow');
+  });
+
+  it('mode=plan blocks write tools (short-circuit, hook does not fire)', async () => {
+    let hookFired = false;
+    const hooks = new HookDispatcher({
+      hooks: {
+        PreToolUse: [{ hooks: [{ type: 'command', command: 'echo hook' }] }],
+      },
+    });
+    // We can't easily detect "did not fire" without inspecting timings; do it indirectly
+    const v = await dispatchToolCall({
+      tool: 'Write',
+      input: { file_path: '/x' },
+      mode: 'plan',
+      rules: { allow: ['Write'] }, // allowed by permission, but plan-blocked
+      hooks,
+      cwd: '/tmp',
+    });
+    expect(v.decision).toBe('plan-blocked');
+    expect(v.source).toBe('mode');
+    expect(v.hook).toBeUndefined(); // hook did not run
+  });
+
+  it('mode=acceptEdits + permission=deny → deny (permission wins)', async () => {
+    const v = await dispatchToolCall({
+      tool: 'Edit',
+      input: { file_path: '/x' },
+      mode: 'acceptEdits',
+      rules: { deny: ['Edit'] },
+      cwd: '/tmp',
+    });
+    expect(v.decision).toBe('deny');
+  });
+
+  it('mode=bypassPermissions → allow even when deny rule', async () => {
+    const v = await dispatchToolCall({
+      tool: 'Bash',
+      input: { command: 'rm -rf /' },
+      mode: 'bypassPermissions',
+      rules: { deny: ['Bash'] },
+      cwd: '/tmp',
+    });
+    expect(v.decision).toBe('allow');
+  });
+
+  it('mode=dontAsk + no-match → deny', async () => {
+    const v = await dispatchToolCall({
+      tool: 'Bash',
+      input: { command: 'ls' },
+      mode: 'dontAsk',
+      rules: {},
+      cwd: '/tmp',
+    });
+    expect(v.decision).toBe('deny');
+  });
+
+  it('hook JSON output decision=deny overrides mode=allow', async () => {
+    const hooks = new HookDispatcher({
+      hooks: {
+        PreToolUse: [
+          {
+            hooks: [
+              {
+                type: 'command',
+                command: 'echo \'{"decision":"deny","systemMessage":"hook says no"}\'',
+              },
+            ],
+          },
+        ],
+      },
+    });
+    const v = await dispatchToolCall({
+      tool: 'Bash',
+      input: { command: 'ls' },
+      mode: 'default',
+      rules: { allow: ['Bash'] },
+      hooks,
+      cwd: '/tmp',
+    });
+    expect(v.decision).toBe('deny');
+    expect(v.source).toBe('hook');
+    expect(v.reason).toMatch(/hook says no/);
+  });
+
+  it('hook non-zero exit blocks the call', async () => {
+    const hooks = new HookDispatcher({
+      hooks: {
+        PreToolUse: [{ hooks: [{ type: 'command', command: 'exit 1' }] }],
+      },
+    });
+    const v = await dispatchToolCall({
+      tool: 'Bash',
+      input: { command: 'ls' },
+      mode: 'default',
+      rules: { allow: ['Bash'] },
+      hooks,
+      cwd: '/tmp',
+    });
+    expect(v.decision).toBe('deny');
+    expect(v.source).toBe('hook');
+  });
+
+  it('without hooks: just mode + permission', async () => {
+    const v = await dispatchToolCall({
+      tool: 'Read',
+      input: { file_path: '/x' },
+      mode: 'default',
+      rules: { ask: ['Read'] },
+      cwd: '/tmp',
+    });
+    expect(v.decision).toBe('ask');
+  });
+
+  it('hook can upgrade allow → ask via JSON output', async () => {
+    const hooks = new HookDispatcher({
+      hooks: {
+        PreToolUse: [
+          {
+            hooks: [{ type: 'command', command: 'echo \'{"decision":"ask"}\'' }],
+          },
+        ],
+      },
+    });
+    const v = await dispatchToolCall({
+      tool: 'Bash',
+      input: { command: 'rm test.txt' },
+      mode: 'default',
+      rules: { allow: ['Bash'] },
+      hooks,
+      cwd: '/tmp',
+    });
+    expect(v.decision).toBe('ask');
+    expect(v.source).toBe('hook');
+  });
+});
diff --git a/packages/core/src/harness/tool-dispatcher.ts b/packages/core/src/harness/tool-dispatcher.ts
new file mode 100644
index 0000000..9551fa5
--- /dev/null
+++ b/packages/core/src/harness/tool-dispatcher.ts
@@ -0,0 +1,135 @@
+// Tool dispatcher — the central gate. Combines mode + permission + hook decision
+// into a single allow/ask/deny verdict.
+// Spec: docs/design/sandbox-plan-worktree.md §5.1
+//       docs/DEVELOPMENT_PLAN.md §3.8 / §3.15
+
+import { evaluateMode, type ModeRequest, type ModeVerdict } from '../modes/index.js';
+import {
+  evaluatePermission,
+  type PermissionRequest,
+  type PermissionVerdict,
+} from '../config/permissions.js';
+import type { PermissionRules } from '../config/types.js';
+import type { Mode } from '../types.js';
+import type { HookDispatcher, HookResult } from '../hooks/index.js';
+
+export interface DispatchRequest {
+  tool: string;
+  input: Record<string, unknown>;
+  mode: Mode;
+  rules?: PermissionRules;
+  hooks?: HookDispatcher;
+  cwd: string;
+}
+
+export interface DispatchVerdict {
+  /** Final decision after all gates. */
+  decision: 'allow' | 'ask' | 'deny' | 'plan-blocked';
+  /** Where the decision came from (for UI/logging). */
+  source: 'mode' | 'permission' | 'hook';
+  /** Human-readable explanation. */
+  reason: string;
+  /** If hook produced JSON output, surfaced here so caller can use additionalContext etc. */
+  hook?: HookResult;
+  /** Permission verdict (for diagnostic). */
+  permissionVerdict?: PermissionVerdict;
+  /** Mode verdict (for diagnostic). */
+  modeVerdict?: ModeVerdict;
+}
+
+/**
+ * Evaluate a tool call against mode + permission + PreToolUse hook.
+ *
+ * Decision order (per docs/design/sandbox-plan-worktree.md §5.1):
+ *   1. Mode policy (plan-blocked / deny short-circuit immediately)
+ *   2. Permission rules (mode policy can demote/upgrade to ask)
+ *   3. PreToolUse hook chain (can override the prior decision via JSON output)
+ *
+ * Sandbox (M3.5) is enforced separately at the OS layer — not here.
+ */
+export async function dispatchToolCall(req: DispatchRequest): Promise<DispatchVerdict> {
+  // Step 1: Permission verdict
+  const permReq: PermissionRequest = { tool: req.tool, input: req.input };
+  const permVerdict = evaluatePermission(permReq, req.rules);
+
+  // Step 2: Mode policy (incorporates permission verdict)
+  const modeReq: ModeRequest = {
+    tool: req.tool,
+    input: req.input,
+    permissionVerdict: permVerdict,
+  };
+  const modeVerdict = evaluateMode(req.mode, modeReq);
+
+  // Plan-block short-circuits — hook doesn't even fire
+  if (modeVerdict === 'plan-blocked') {
+    return {
+      decision: 'plan-blocked',
+      source: 'mode',
+      reason: `Tool "${req.tool}" blocked: mode=plan (read-only); call would write.`,
+      modeVerdict,
+      permissionVerdict: permVerdict,
+    };
+  }
+
+  // Step 3: PreToolUse hook (only if mode didn't outright deny)
+  let hookResult: HookResult | undefined;
+  if (req.hooks && modeVerdict !== 'deny') {
+    hookResult = await req.hooks.dispatch({
+      event: 'PreToolUse',
+      cwd: req.cwd,
+      triggeredAt: new Date().toISOString(),
+      payload: { tool: req.tool, input: req.input },
+    });
+
+    // Hook JSON output may override
+    if (hookResult.json?.decision === 'deny' || hookResult.json?.permissionDecision === 'deny') {
+      return {
+        decision: 'deny',
+        source: 'hook',
+        reason: hookResult.json.systemMessage ?? 'Hook denied this tool call.',
+        hook: hookResult,
+        modeVerdict,
+        permissionVerdict: permVerdict,
+      };
+    }
+    if (hookResult.json?.decision === 'ask' || hookResult.json?.permissionDecision === 'ask') {
+      return {
+        decision: 'ask',
+        source: 'hook',
+        reason: hookResult.json.systemMessage ?? 'Hook requested approval.',
+        hook: hookResult,
+        modeVerdict,
+        permissionVerdict: permVerdict,
+      };
+    }
+    // Hook exited non-zero → treat as deny
+    if (hookResult.anyBlocked) {
+      return {
+        decision: 'deny',
+        source: 'hook',
+        reason: 'A PreToolUse hook exited non-zero — blocking call.',
+        hook: hookResult,
+        modeVerdict,
+        permissionVerdict: permVerdict,
+      };
+    }
+  }
+
+  // Otherwise use mode verdict
+  return {
+    decision: modeVerdict,
+    source: modeVerdict === 'allow' && permVerdict === 'allow' ? 'permission' : 'mode',
+    reason: explain(modeVerdict, permVerdict, req.mode),
+    hook: hookResult,
+    modeVerdict,
+    permissionVerdict: permVerdict,
+  };
+}
+
+function explain(mode: ModeVerdict, perm: PermissionVerdict, modeName: Mode): string {
+  if (mode === 'allow' && perm === 'allow') return 'allowed by permission rules';
+  if (mode === 'allow') return `auto-allowed by mode "${modeName}"`;
+  if (mode === 'ask') return 'requires approval';
+  if (mode === 'deny') return `denied by mode "${modeName}"`;
+  return 'unknown';
+}
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index facad89..198c279 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -116,3 +116,9 @@ export {
   type LoadedMemory,
   type LoadMemoryOpts,
 } from './memory/index.js';
+
+// Harness (M3b — tool dispatcher gates: mode × permission × hooks)
+export { dispatchToolCall, type DispatchRequest, type DispatchVerdict } from './harness/index.js';
+
+// Agent loop's approval callback type (M3b)
+export type { ApprovalCallback } from './agent.js';