oratis · oratis · May 27, 2026 · May 27, 2026
diff --git a/apps/cli/src/repl.ts b/apps/cli/src/repl.ts
@@ -5,13 +5,22 @@ import {
   CredentialsStore,
   DeepSeekProvider,
   EFFORT_PARAMS,
+  HookDispatcher,
   SessionManager,
   ToolRegistry,
+  applyStyle,
+  buildSkillsDescriptionBlock,
+  findStyle,
+  loadMemory,
+  loadOutputStyles,
   loadSettings,
+  loadSkills,
+  makeSkillTool,
   resolveCredentials,
   runAgent,
   type DeepCodeSettings,
   type Effort,
+  type Mode,
   type AgentEvent,
   type StoredMessage,
 } from '@deepcode/core';
@@ -55,7 +64,7 @@ export async function startRepl(opts: ReplOpts): Promise<number> {
   }
 
   const model = opts.model ?? settings.model ?? 'deepseek-chat';
-  const mode = opts.mode ?? settings.permissions?.defaultMode ?? 'default';
+  const mode = (opts.mode ?? settings.permissions?.defaultMode ?? 'default') as Mode;
   const effort = opts.effort ?? settings.effortLevel ?? 'medium';
   const { maxTokens, temperature } = EFFORT_PARAMS[effort as Effort] ?? EFFORT_PARAMS.medium;
 
@@ -70,6 +79,38 @@ export async function startRepl(opts: ReplOpts): Promise<number> {
   const tools = new ToolRegistry();
   const commands = new CommandRegistry();
 
+  // M5: load memory, skills, output style — assemble final system prompt
+  const memory = await loadMemory({
+    cwd,
+    home: opts.home,
+    maxBytes: (settings.memoryLoadCapKB ?? 100) * 1024,
+  });
+  const skills = await loadSkills({
+    cwd,
+    home: opts.home,
+    overrides: settings.skillOverrides,
+  });
+  const styles = await loadOutputStyles({ cwd, home: opts.home });
+  const activeStyle = findStyle(styles, settings.outputStyle ?? 'default');
+
+  // Register Skill tool (M5)
+  if (skills.length > 0) {
+    tools.register(makeSkillTool(skills));
+  }
+
+  // Build the composite system prompt
+  let systemPrompt = DEFAULT_SYSTEM_PROMPT;
+  if (memory.text) systemPrompt += '\n\n' + memory.text;
+  const skillsBlock = buildSkillsDescriptionBlock(skills);
+  if (skillsBlock) systemPrompt += '\n\n' + skillsBlock;
+  systemPrompt = applyStyle(systemPrompt, activeStyle);
+
+  // Hook dispatcher (M3)
+  const hooks = new HookDispatcher({
+    hooks: settings.hooks,
+    disableAllHooks: settings.disableAllHooks,
+  });
+
   let history: StoredMessage[] = [];
   const ctx: SessionContext = {
     cwd,
@@ -128,18 +169,26 @@ export async function startRepl(opts: ReplOpts): Promise<number> {
       continue;
     }
 
-    // Otherwise: send to agent
+    // Otherwise: send to agent (with mode/permission/hooks gating from M3b)
     const result = await runAgent({
       provider,
       tools,
-      systemPrompt: DEFAULT_SYSTEM_PROMPT,
+      systemPrompt,
       userMessage: userInput,
       history,
       model: ctx.model,
       maxTokens,
       temperature,
       cwd: ctx.cwd,
       session: { manager: sessions, id: session.id },
+      mode: ctx.mode as Mode,
+      permissions: settings.permissions,
+      hooks,
+      approval: async (toolName, _input, verdict) => {
+        output.write(`\n  ⏸ Approve ${toolName}?  Reason: ${verdict.reason}\n`);
+        const answer = (await rl.question('     [y]es / [n]o: ')).trim().toLowerCase();
+        return answer === 'y' || answer === 'yes';
+      },
       onEvent: (e: AgentEvent) => formatEvent(output, e),
     });
     history = result.history;

diff --git a/docs/milestones/M4.md b/docs/milestones/M4.md
@@ -5,30 +5,33 @@
 
 ## Shipped
 
-| Module | Purpose | Tests |
-|---|---|---|
-| `skills/frontmatter.ts` | Zero-dep YAML frontmatter parser (strings/numbers/bools/flow + block arrays/objects) | 10 |
-| `skills/loader.ts` | 4-layer loader (builtin / user / project / plugin) + `buildSkillsDescriptionBlock()` for system-prompt injection | 9 |
-| `sub-agents/loader.ts` | `.deepcode/agents/*.md` → `SubAgent` objects with isolation / tools / model / maxTurns | 6 |
-| `output-styles/loader.ts` | 4 built-in styles (default / explanatory / learning / proactive) + user/project overrides + `applyStyle()` | 9 |
-| Top-level re-exports | All new types/functions exposed from `@deepcode/core` | — |
+| Module                    | Purpose                                                                                                          | Tests |
+| ------------------------- | ---------------------------------------------------------------------------------------------------------------- | ----- |
+| `skills/frontmatter.ts`   | Zero-dep YAML frontmatter parser (strings/numbers/bools/flow + block arrays/objects)                             | 10    |
+| `skills/loader.ts`        | 4-layer loader (builtin / user / project / plugin) + `buildSkillsDescriptionBlock()` for system-prompt injection | 9     |
+| `sub-agents/loader.ts`    | `.deepcode/agents/*.md` → `SubAgent` objects with isolation / tools / model / maxTurns                           | 6     |
+| `output-styles/loader.ts` | 4 built-in styles (default / explanatory / learning / proactive) + user/project overrides + `applyStyle()`       | 9     |
+| Top-level re-exports      | All new types/functions exposed from `@deepcode/core`                                                            | —     |
 
 **Total new tests**: 34. Across whole project: 240 passing / 4 skipped / 0 failed.
 
 ## What's in each subsystem
 
 **Skills** (`SKILL.md` files in `<root>/<name>/SKILL.md`):
+
 - Frontmatter spec: `name`, `description`, `allowed-tools`, `model`, `effort`, `shell`, `hooks`, `disabled`
 - Qualified names: bare for user/project, `<plugin>:<name>` for plugin-shipped
 - `disabled: true` in frontmatter OR `skillOverrides[name].disabled = true` in settings → skip load
 - `buildSkillsDescriptionBlock()` produces the system-prompt fragment that lists available skills (name + description only — body is loaded on Skill-tool invocation)
 
 **Sub-agents** (`.deepcode/agents/<name>.md`):
+
 - Frontmatter: `name`, `description`, `tools[]`, `model`, `isolation`, `maxTurns`
 - CLI `--agents <dir>` flag honored via `projectDirOverride` option
 - `findSubAgent(agents, name)` lookup helper
 
 **Output styles** (`<root>/.deepcode/output-styles/<name>.md`):
+
 - 4 built-in: `default`, `explanatory`, `learning`, `proactive`
 - Frontmatter: `name`, `description`, `keep-coding-instructions`
 - User → project layer order with replace semantics

diff --git a/docs/milestones/M5.md b/docs/milestones/M5.md
@@ -0,0 +1,87 @@
+# M5 — Plugins (manifest + hash pin) + Skill tool + CLI integration
+
+> **Status**: ✅ Foundation shipped — sandbox subprocess deferred to M5.1  
+> **Branch**: `feat/m5-plugins-skill-tool-integration`
+
+## Shipped
+
+| Module | Lines | Tests |
+|---|---|---|
+| `plugins/manifest.ts` | Manifest parser, SHA-256 hash pinning, trust state JSON, installLocal(), discoverPlugins() with hash drift detection | 175 | 12 |
+| `skills/tool.ts` | `Skill` ToolHandler factory — agent invokes by qualified name, skill body returned as tool_result | 60 | 6 |
+| `apps/cli/src/repl.ts` | Wires memory + skills + output styles + mode/permissions/hooks/approval into the REPL agent loop | +50 | (smoke) |
+| **subtotal** | **~285** | **18** |
+
+Across whole project: 258 tests / 4 skipped / 0 failed (was 240).
+
+## What the CLI REPL now does end-to-end
+
+When user types a message, agent receives:
+1. **System prompt** = default + memory (DEEPCODE.md + ~/.deepcode + AGENTS.md + rules/) + skills description block + output style append
+2. **Tools available** = 6 P0 + `Skill` tool (if any skills loaded)
+3. **Per tool call** = goes through `dispatchToolCall()`:
+   - Mode policy (`plan` blocks writes, `dontAsk` rejects ask, etc.)
+   - Permission rules (allow/ask/deny patterns)
+   - PreToolUse hook chain (JSON output can override)
+4. **`ask` verdict** → REPL prompts user `[y]es/[n]o`
+5. **PostToolUse hook** fires after every tool execution
+6. **Snapshots** captured pre/post Edit/Write for future rewind
+
+## What's NOT in M5
+
+Per `docs/design/plugin-security.md` we have a deliberate gap:
+
+> **Plugin sandbox subprocess (RPC over stdio) — M5.1.**
+> Right now `discoverPlugins()` finds installed plugins but the agent loop does
+> NOT yet *run* their contributed code in-process. They're discovered, their
+> manifest is verified, but their JS/skills/hooks/MCP servers aren't yet
+> registered into the active registries. That wire-up needs the sandbox
+> subprocess design from `plugin-security.md` §3.5 to land first — running
+> arbitrary plugin code in the host process is the exact RCE vector the design
+> doc enumerated as A1 / A3.
+
+What works **today** safely:
+- Local install: `installLocal({ sourcePath })` copies + records trust + hashes
+- Discovery on startup: `discoverPlugins()` finds plugins, flags hash drift, returns enabled list
+- Trust manifest at `~/.deepcode/plugins-trust.json` tracks what was installed
+- Hash-pinning catches tampered plugins
+
+What's deferred to M5.1:
+- Subprocess sandbox via bwrap/sandbox-exec (depends on §3.9a sandbox subsystem — M3.5)
+- RPC stdio bridge between host and plugin subprocess
+- GitHub URL install (`gh:user/repo`)
+- Marketplace index + ed25519 signature verification
+- Revoke list pull
+- Loading plugin-bundled skills/agents/hooks into the active registry
+
+## Skill tool
+
+`makeSkillTool(skills)` returns a `ToolHandler` that:
+- Looks up skill by `qualifiedName` (e.g. `code-review` or `plugin-x:do-thing`)
+- Returns the SKILL.md body as tool_result
+- Lets the LLM "decide to invoke" via natural tool calling
+- Errors clearly when skill not found (lists known skills)
+
+Auto-trigger via description matching is implicit — by including `buildSkillsDescriptionBlock(skills)` in the system prompt, the model sees `## Available skills - **code-review** — Review diff for bugs.` and tool-calls Skill when the user asks.
+
+## Tests added
+
+- `plugins/manifest.test.ts` — 12 tests covering: manifest validation, hash determinism, hash sensitivity (manifest + SKILL.md changes), trust round-trip, install, discovery, drift detection, disabled list, untrusted skip
+- `skills/tool.test.ts` — 6 tests covering: tool shape, known skill lookup, args appending, plugin-qualified names, missing skill, missing arg
+
+## Verified
+
+```
+pnpm typecheck    → green
+pnpm build        → green
+pnpm test         → 258 passed / 4 skipped / 0 failed
+pnpm format:check → conformant
+```
+
+CLI smoke: `node apps/cli/dist/cli.js --version` → `0.1.0`. Full REPL run not validated end-to-end (would need a live DEEPSEEK_API_KEY); the wiring is type-checked and the unit tests for each piece pass.
+
+## Why deferred to M5.1 is the right call
+
+`docs/design/plugin-security.md` was explicit that running plugins in the host process is the **primary** RCE vector. M5 ships the trust/hash machinery as a foundation, but explicitly **does not** wire plugin code into the live agent — because doing so without sandbox is the headline security mistake we warned ourselves about. The honest M5 is: discover and verify, don't execute.
+
+The user can still benefit from skills (file-based, no code) — those work via the M4 user/project layers — they just don't yet auto-load from installed plugins.
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
@@ -123,18 +123,37 @@ export { dispatchToolCall, type DispatchRequest, type DispatchVerdict } from './
 // Agent loop's approval callback type (M3b)
 export type { ApprovalCallback } from './agent.js';
 
-// Skills (M4 — SKILL.md frontmatter loading + system-prompt builder)
+// Skills (M4 — SKILL.md frontmatter loading + system-prompt builder; M5 — Skill tool)
 export {
   loadSkills,
   buildSkillsDescriptionBlock,
   parseFrontmatter,
   parseSimpleYaml,
+  makeSkillTool,
   type Skill,
   type SkillFrontmatter,
   type LoadSkillsOpts,
   type Frontmatter,
 } from './skills/index.js';
 
+// Plugins (M5 — manifest + hash pinning + local install + discovery)
+export {
+  installLocal,
+  discoverPlugins,
+  readManifest,
+  computeSourceHash,
+  loadTrustState,
+  saveTrustState,
+  pluginsDir,
+  trustFilePath,
+  type PluginManifest,
+  type InstalledPlugin,
+  type PluginTrust,
+  type TrustState,
+  type InstallOptions,
+  type DiscoverOptions,
+} from './plugins/index.js';
+
 // Sub-agents (M4 — .deepcode/agents/*.md)
 export {
   loadSubAgents,

diff --git a/packages/core/src/plugins/index.ts b/packages/core/src/plugins/index.ts
@@ -1,6 +1,38 @@
-// Module: plugins
+// Plugins subsystem entry — manifest parsing, hash pinning, local install, discovery.
+// Spec: docs/DEVELOPMENT_PLAN.md §3.14
 // Milestone: M5
-// Spec: docs/DEVELOPMENT_PLAN.md §3.14 plugin sandbox sub-process + RPC + hash pin + marketplace (see docs/design/plugin-security.md)
-// Status: placeholder — implemented in M5
+//
+// What's IN this milestone:
+//   - plugin.json manifest parsing
+//   - SHA-256 source hash + ~/.deepcode/plugins-trust.json
+//   - installLocal() — copy a directory + record trust
+//   - discoverPlugins() — scan ~/.deepcode/plugins/ + verify hashes
+//
+// What's NOT in this milestone (see docs/design/plugin-security.md):
+//   - Sandbox subprocess execution (RPC over stdio)
+//   - GitHub URL install (gh:user/repo)
+//   - Marketplace index + ed25519 signature verification
+//   - Revoke list pull + enforcement
+//   - "Trust ladder" UI tiers
+//
+// IMPORTANT: until subprocess sandbox lands (planned M5.1), plugins are
+// effectively untrusted code with full host access. The trust system records
+// what the user *thought* they were installing, but cannot enforce it.
+// Treat M5 as a foundation, not a security boundary.
 
-export {};
+export {
+  installLocal,
+  discoverPlugins,
+  readManifest,
+  computeSourceHash,
+  loadTrustState,
+  saveTrustState,
+  pluginsDir,
+  trustFilePath,
+  type PluginManifest,
+  type InstalledPlugin,
+  type PluginTrust,
+  type TrustState,
+  type InstallOptions,
+  type DiscoverOptions,
+} from './manifest.js';