Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions docs/milestones/M3b.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# M3b — Agent loop integration (mode × permission × hooks)

> **Status**: ✅ Complete · **Branch**: `feat/m3b-agent-integration-mcp`

## Shipped

- `harness/tool-dispatcher.ts` — the central gate that combines mode + permission + PreToolUse hook into a single verdict. Implements the decision flow from `docs/design/sandbox-plan-worktree.md` §5.1.
- Agent loop now consults `dispatchToolCall()` for every tool invocation when `mode` is provided. Plan-blocks short-circuit before hooks; deny short-circuits before tool execution; ask invokes `approval` callback.
- PostToolUse hook fires after every tool execution with the result.
- 9 new tests in `tool-dispatcher.test.ts` covering all decision paths.

## Verification

- `pnpm test` → 206 passed / 4 skipped / 0 failed (was 197).
- `pnpm typecheck` / `pnpm build` → green.

## Deferred to M3c

- MCP client (stdio transport, list/call tools)
- Compaction (LLM summarizer at threshold)
- statusLine runner (JSON-on-stdin)
- `/init` multi-phase
- `auto` classifier mode (LLM-judged)
- Hook handler types beyond `command`
- Hook `if` field

These are independent subsystems; each gets its own PR.

## CLI integration

The CLI REPL (M2) does **not** yet pass `mode` to `runAgent()`, so tool calls
still flow unmolested through the M1 path. Plumbing that is M3c (5-line change
in `repl.ts` once compaction + statusLine are in place to make a coherent demo).
69 changes: 69 additions & 0 deletions packages/core/src/agent.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
// Agent loop — orchestrates provider <-> tools <-> session.
// Spec: docs/DEVELOPMENT_PLAN.md §3.1 / §3.15

import type { PermissionRules } from './config/types.js';
import { dispatchToolCall, type DispatchVerdict } from './harness/tool-dispatcher.js';
import type { HookDispatcher } from './hooks/index.js';
import type { Mode } from './types.js';
import type { Provider } from './providers/types.js';
import { SessionManager } from './sessions/index.js';
import type { ToolRegistry } from './tools/registry.js';
Expand All @@ -13,6 +17,16 @@ import type {
ToolUseBlock,
} from './types.js';

/**
* Approval callback — return true to allow, false to reject.
* Called when dispatcher returns 'ask'.
*/
export type ApprovalCallback = (
toolName: string,
toolInput: Record<string, unknown>,
verdict: DispatchVerdict,
) => Promise<boolean> | boolean;

export interface RunAgentOptions {
provider: Provider;
tools: ToolRegistry;
Expand All @@ -31,6 +45,12 @@ export interface RunAgentOptions {
session?: { manager: SessionManager; id: string };
/** Optional: snapshot files before/after Edit/Write tool calls. */
enableSnapshots?: boolean;
/** M3: dispatch gates (mode + permissions + hooks). When set, every tool call
* goes through the gate. When unset, all tool calls are allowed (M1 behavior). */
mode?: Mode;
permissions?: PermissionRules;
hooks?: HookDispatcher;
approval?: ApprovalCallback;
}

export interface RunAgentResult {
Expand Down Expand Up @@ -157,6 +177,40 @@ export async function runAgent(opts: RunAgentOptions): Promise<RunAgentResult> {
continue;
}

// M3: dispatch gate (mode + permissions + PreToolUse hook)
if (opts.mode) {
const verdict = await dispatchToolCall({
tool: toolUse.name,
input: toolUse.input,
mode: opts.mode,
rules: opts.permissions,
hooks: opts.hooks,
cwd: opts.cwd,
});
let allowed = verdict.decision === 'allow';
if (verdict.decision === 'ask' && opts.approval) {
allowed = await opts.approval(toolUse.name, toolUse.input, verdict);
}
if (!allowed) {
toolResults.push({
type: 'tool_result',
tool_use_id: toolUse.id,
content: `Tool call blocked: ${verdict.reason}`,
is_error: true,
});
opts.onEvent?.({
type: 'tool_result',
id: toolUse.id,
result: {
content: verdict.reason,
isError: true,
data: { dispatchSource: verdict.source, decision: verdict.decision },
},
});
continue;
}
}

// Pre-execution snapshot (Edit/Write only)
if (
opts.enableSnapshots !== false &&
Expand All @@ -182,6 +236,21 @@ export async function runAgent(opts: RunAgentOptions): Promise<RunAgentResult> {
tr = { content: `Error: ${(err as Error).message}`, isError: true };
}

// PostToolUse hook (M3) — observation only; can inject additionalContext
if (opts.hooks) {
await opts.hooks.dispatch({
event: 'PostToolUse',
cwd: opts.cwd,
triggeredAt: new Date().toISOString(),
payload: {
tool: toolUse.name,
input: toolUse.input,
result_content: tr.content.slice(0, 1000),
is_error: tr.isError ?? false,
},
});
}

// Post-execution snapshot
if (
opts.enableSnapshots !== false &&
Expand Down
11 changes: 6 additions & 5 deletions packages/core/src/harness/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Module: harness
// Milestone: M3
// Spec: docs/DEVELOPMENT_PLAN.md §3.15 event bus / system-reminder injector / plan-mode state machine / tasks / cron / worktree / ToolSearch / Notification / statusline
// Status: placeholder — implemented in M3
// Harness layer entry — tool dispatcher (mode × permission × hooks gating).
// Spec: docs/DEVELOPMENT_PLAN.md §3.15
// Milestone: M3b — basic gating wired; system-reminder injector / TaskCreate /
// cron / worktree / ToolSearch / Notification / statusLine implementation will
// land in M3c+.

export {};
export { dispatchToolCall, type DispatchRequest, type DispatchVerdict } from './tool-dispatcher.js';
149 changes: 149 additions & 0 deletions packages/core/src/harness/tool-dispatcher.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import { describe, expect, it } from 'vitest';
import { HookDispatcher } from '../hooks/index.js';
import { dispatchToolCall } from './tool-dispatcher.js';

describe('dispatchToolCall', () => {
it('mode=default + permission=allow → allow (source: permission)', async () => {
const v = await dispatchToolCall({
tool: 'Read',
input: { file_path: '/x' },
mode: 'default',
rules: { allow: ['Read'] },
cwd: '/tmp',
});
expect(v.decision).toBe('allow');
});

it('mode=plan blocks write tools (short-circuit, hook does not fire)', async () => {
let hookFired = false;
const hooks = new HookDispatcher({
hooks: {
PreToolUse: [{ hooks: [{ type: 'command', command: 'echo hook' }] }],
},
});
// We can't easily detect "did not fire" without inspecting timings; do it indirectly
const v = await dispatchToolCall({
tool: 'Write',
input: { file_path: '/x' },
mode: 'plan',
rules: { allow: ['Write'] }, // allowed by permission, but plan-blocked
hooks,
cwd: '/tmp',
});
expect(v.decision).toBe('plan-blocked');
expect(v.source).toBe('mode');
expect(v.hook).toBeUndefined(); // hook did not run
});

it('mode=acceptEdits + permission=deny → deny (permission wins)', async () => {
const v = await dispatchToolCall({
tool: 'Edit',
input: { file_path: '/x' },
mode: 'acceptEdits',
rules: { deny: ['Edit'] },
cwd: '/tmp',
});
expect(v.decision).toBe('deny');
});

it('mode=bypassPermissions → allow even when deny rule', async () => {
const v = await dispatchToolCall({
tool: 'Bash',
input: { command: 'rm -rf /' },
mode: 'bypassPermissions',
rules: { deny: ['Bash'] },
cwd: '/tmp',
});
expect(v.decision).toBe('allow');
});

it('mode=dontAsk + no-match → deny', async () => {
const v = await dispatchToolCall({
tool: 'Bash',
input: { command: 'ls' },
mode: 'dontAsk',
rules: {},
cwd: '/tmp',
});
expect(v.decision).toBe('deny');
});

it('hook JSON output decision=deny overrides mode=allow', async () => {
const hooks = new HookDispatcher({
hooks: {
PreToolUse: [
{
hooks: [
{
type: 'command',
command: 'echo \'{"decision":"deny","systemMessage":"hook says no"}\'',
},
],
},
],
},
});
const v = await dispatchToolCall({
tool: 'Bash',
input: { command: 'ls' },
mode: 'default',
rules: { allow: ['Bash'] },
hooks,
cwd: '/tmp',
});
expect(v.decision).toBe('deny');
expect(v.source).toBe('hook');
expect(v.reason).toMatch(/hook says no/);
});

it('hook non-zero exit blocks the call', async () => {
const hooks = new HookDispatcher({
hooks: {
PreToolUse: [{ hooks: [{ type: 'command', command: 'exit 1' }] }],
},
});
const v = await dispatchToolCall({
tool: 'Bash',
input: { command: 'ls' },
mode: 'default',
rules: { allow: ['Bash'] },
hooks,
cwd: '/tmp',
});
expect(v.decision).toBe('deny');
expect(v.source).toBe('hook');
});

it('without hooks: just mode + permission', async () => {
const v = await dispatchToolCall({
tool: 'Read',
input: { file_path: '/x' },
mode: 'default',
rules: { ask: ['Read'] },
cwd: '/tmp',
});
expect(v.decision).toBe('ask');
});

it('hook can upgrade allow → ask via JSON output', async () => {
const hooks = new HookDispatcher({
hooks: {
PreToolUse: [
{
hooks: [{ type: 'command', command: 'echo \'{"decision":"ask"}\'' }],
},
],
},
});
const v = await dispatchToolCall({
tool: 'Bash',
input: { command: 'rm test.txt' },
mode: 'default',
rules: { allow: ['Bash'] },
hooks,
cwd: '/tmp',
});
expect(v.decision).toBe('ask');
expect(v.source).toBe('hook');
});
});
Loading
Loading