Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 52 additions & 3 deletions apps/cli/src/repl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,22 @@ import {
CredentialsStore,
DeepSeekProvider,
EFFORT_PARAMS,
HookDispatcher,
SessionManager,
ToolRegistry,
applyStyle,
buildSkillsDescriptionBlock,
findStyle,
loadMemory,
loadOutputStyles,
loadSettings,
loadSkills,
makeSkillTool,
resolveCredentials,
runAgent,
type DeepCodeSettings,
type Effort,
type Mode,
type AgentEvent,
type StoredMessage,
} from '@deepcode/core';
Expand Down Expand Up @@ -55,7 +64,7 @@ export async function startRepl(opts: ReplOpts): Promise<number> {
}

const model = opts.model ?? settings.model ?? 'deepseek-chat';
const mode = opts.mode ?? settings.permissions?.defaultMode ?? 'default';
const mode = (opts.mode ?? settings.permissions?.defaultMode ?? 'default') as Mode;
const effort = opts.effort ?? settings.effortLevel ?? 'medium';
const { maxTokens, temperature } = EFFORT_PARAMS[effort as Effort] ?? EFFORT_PARAMS.medium;

Expand All @@ -70,6 +79,38 @@ export async function startRepl(opts: ReplOpts): Promise<number> {
const tools = new ToolRegistry();
const commands = new CommandRegistry();

// M5: load memory, skills, output style — assemble final system prompt
const memory = await loadMemory({
cwd,
home: opts.home,
maxBytes: (settings.memoryLoadCapKB ?? 100) * 1024,
});
const skills = await loadSkills({
cwd,
home: opts.home,
overrides: settings.skillOverrides,
});
const styles = await loadOutputStyles({ cwd, home: opts.home });
const activeStyle = findStyle(styles, settings.outputStyle ?? 'default');

// Register Skill tool (M5)
if (skills.length > 0) {
tools.register(makeSkillTool(skills));
}

// Build the composite system prompt
let systemPrompt = DEFAULT_SYSTEM_PROMPT;
if (memory.text) systemPrompt += '\n\n' + memory.text;
const skillsBlock = buildSkillsDescriptionBlock(skills);
if (skillsBlock) systemPrompt += '\n\n' + skillsBlock;
systemPrompt = applyStyle(systemPrompt, activeStyle);

// Hook dispatcher (M3)
const hooks = new HookDispatcher({
hooks: settings.hooks,
disableAllHooks: settings.disableAllHooks,
});

let history: StoredMessage[] = [];
const ctx: SessionContext = {
cwd,
Expand Down Expand Up @@ -128,18 +169,26 @@ export async function startRepl(opts: ReplOpts): Promise<number> {
continue;
}

// Otherwise: send to agent
// Otherwise: send to agent (with mode/permission/hooks gating from M3b)
const result = await runAgent({
provider,
tools,
systemPrompt: DEFAULT_SYSTEM_PROMPT,
systemPrompt,
userMessage: userInput,
history,
model: ctx.model,
maxTokens,
temperature,
cwd: ctx.cwd,
session: { manager: sessions, id: session.id },
mode: ctx.mode as Mode,
permissions: settings.permissions,
hooks,
approval: async (toolName, _input, verdict) => {
output.write(`\n ⏸ Approve ${toolName}? Reason: ${verdict.reason}\n`);
const answer = (await rl.question(' [y]es / [n]o: ')).trim().toLowerCase();
return answer === 'y' || answer === 'yes';
},
onEvent: (e: AgentEvent) => formatEvent(output, e),
});
history = result.history;
Expand Down
17 changes: 10 additions & 7 deletions docs/milestones/M4.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,33 @@

## Shipped

| Module | Purpose | Tests |
|---|---|---|
| `skills/frontmatter.ts` | Zero-dep YAML frontmatter parser (strings/numbers/bools/flow + block arrays/objects) | 10 |
| `skills/loader.ts` | 4-layer loader (builtin / user / project / plugin) + `buildSkillsDescriptionBlock()` for system-prompt injection | 9 |
| `sub-agents/loader.ts` | `.deepcode/agents/*.md` → `SubAgent` objects with isolation / tools / model / maxTurns | 6 |
| `output-styles/loader.ts` | 4 built-in styles (default / explanatory / learning / proactive) + user/project overrides + `applyStyle()` | 9 |
| Top-level re-exports | All new types/functions exposed from `@deepcode/core` | — |
| Module | Purpose | Tests |
| ------------------------- | ---------------------------------------------------------------------------------------------------------------- | ----- |
| `skills/frontmatter.ts` | Zero-dep YAML frontmatter parser (strings/numbers/bools/flow + block arrays/objects) | 10 |
| `skills/loader.ts` | 4-layer loader (builtin / user / project / plugin) + `buildSkillsDescriptionBlock()` for system-prompt injection | 9 |
| `sub-agents/loader.ts` | `.deepcode/agents/*.md` → `SubAgent` objects with isolation / tools / model / maxTurns | 6 |
| `output-styles/loader.ts` | 4 built-in styles (default / explanatory / learning / proactive) + user/project overrides + `applyStyle()` | 9 |
| Top-level re-exports | All new types/functions exposed from `@deepcode/core` | — |

**Total new tests**: 34. Across whole project: 240 passing / 4 skipped / 0 failed.

## What's in each subsystem

**Skills** (`SKILL.md` files in `<root>/<name>/SKILL.md`):

- Frontmatter spec: `name`, `description`, `allowed-tools`, `model`, `effort`, `shell`, `hooks`, `disabled`
- Qualified names: bare for user/project, `<plugin>:<name>` for plugin-shipped
- `disabled: true` in frontmatter OR `skillOverrides[name].disabled = true` in settings → skip load
- `buildSkillsDescriptionBlock()` produces the system-prompt fragment that lists available skills (name + description only — body is loaded on Skill-tool invocation)

**Sub-agents** (`.deepcode/agents/<name>.md`):

- Frontmatter: `name`, `description`, `tools[]`, `model`, `isolation`, `maxTurns`
- CLI `--agents <dir>` flag honored via `projectDirOverride` option
- `findSubAgent(agents, name)` lookup helper

**Output styles** (`<root>/.deepcode/output-styles/<name>.md`):

- 4 built-in: `default`, `explanatory`, `learning`, `proactive`
- Frontmatter: `name`, `description`, `keep-coding-instructions`
- User → project layer order with replace semantics
Expand Down
87 changes: 87 additions & 0 deletions docs/milestones/M5.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# M5 — Plugins (manifest + hash pin) + Skill tool + CLI integration

> **Status**: ✅ Foundation shipped — sandbox subprocess deferred to M5.1
> **Branch**: `feat/m5-plugins-skill-tool-integration`

## Shipped

| Module | Lines | Tests |
|---|---|---|
| `plugins/manifest.ts` | Manifest parser, SHA-256 hash pinning, trust state JSON, installLocal(), discoverPlugins() with hash drift detection | 175 | 12 |
| `skills/tool.ts` | `Skill` ToolHandler factory — agent invokes by qualified name, skill body returned as tool_result | 60 | 6 |
| `apps/cli/src/repl.ts` | Wires memory + skills + output styles + mode/permissions/hooks/approval into the REPL agent loop | +50 | (smoke) |
| **subtotal** | **~285** | **18** |

Across whole project: 258 tests / 4 skipped / 0 failed (was 240).

## What the CLI REPL now does end-to-end

When user types a message, agent receives:
1. **System prompt** = default + memory (DEEPCODE.md + ~/.deepcode + AGENTS.md + rules/) + skills description block + output style append
2. **Tools available** = 6 P0 + `Skill` tool (if any skills loaded)
3. **Per tool call** = goes through `dispatchToolCall()`:
- Mode policy (`plan` blocks writes, `dontAsk` rejects ask, etc.)
- Permission rules (allow/ask/deny patterns)
- PreToolUse hook chain (JSON output can override)
4. **`ask` verdict** → REPL prompts user `[y]es/[n]o`
5. **PostToolUse hook** fires after every tool execution
6. **Snapshots** captured pre/post Edit/Write for future rewind

## What's NOT in M5

Per `docs/design/plugin-security.md` we have a deliberate gap:

> **Plugin sandbox subprocess (RPC over stdio) — M5.1.**
> Right now `discoverPlugins()` finds installed plugins but the agent loop does
> NOT yet *run* their contributed code in-process. They're discovered, their
> manifest is verified, but their JS/skills/hooks/MCP servers aren't yet
> registered into the active registries. That wire-up needs the sandbox
> subprocess design from `plugin-security.md` §3.5 to land first — running
> arbitrary plugin code in the host process is the exact RCE vector the design
> doc enumerated as A1 / A3.

What works **today** safely:
- Local install: `installLocal({ sourcePath })` copies + records trust + hashes
- Discovery on startup: `discoverPlugins()` finds plugins, flags hash drift, returns enabled list
- Trust manifest at `~/.deepcode/plugins-trust.json` tracks what was installed
- Hash-pinning catches tampered plugins

What's deferred to M5.1:
- Subprocess sandbox via bwrap/sandbox-exec (depends on §3.9a sandbox subsystem — M3.5)
- RPC stdio bridge between host and plugin subprocess
- GitHub URL install (`gh:user/repo`)
- Marketplace index + ed25519 signature verification
- Revoke list pull
- Loading plugin-bundled skills/agents/hooks into the active registry

## Skill tool

`makeSkillTool(skills)` returns a `ToolHandler` that:
- Looks up skill by `qualifiedName` (e.g. `code-review` or `plugin-x:do-thing`)
- Returns the SKILL.md body as tool_result
- Lets the LLM "decide to invoke" via natural tool calling
- Errors clearly when skill not found (lists known skills)

Auto-trigger via description matching is implicit — by including `buildSkillsDescriptionBlock(skills)` in the system prompt, the model sees `## Available skills - **code-review** — Review diff for bugs.` and tool-calls Skill when the user asks.

## Tests added

- `plugins/manifest.test.ts` — 12 tests covering: manifest validation, hash determinism, hash sensitivity (manifest + SKILL.md changes), trust round-trip, install, discovery, drift detection, disabled list, untrusted skip
- `skills/tool.test.ts` — 6 tests covering: tool shape, known skill lookup, args appending, plugin-qualified names, missing skill, missing arg

## Verified

```
pnpm typecheck → green
pnpm build → green
pnpm test → 258 passed / 4 skipped / 0 failed
pnpm format:check → conformant
```

CLI smoke: `node apps/cli/dist/cli.js --version` → `0.1.0`. Full REPL run not validated end-to-end (would need a live DEEPSEEK_API_KEY); the wiring is type-checked and the unit tests for each piece pass.

## Why deferred to M5.1 is the right call

`docs/design/plugin-security.md` was explicit that running plugins in the host process is the **primary** RCE vector. M5 ships the trust/hash machinery as a foundation, but explicitly **does not** wire plugin code into the live agent — because doing so without sandbox is the headline security mistake we warned ourselves about. The honest M5 is: discover and verify, don't execute.

The user can still benefit from skills (file-based, no code) — those work via the M4 user/project layers — they just don't yet auto-load from installed plugins.
21 changes: 20 additions & 1 deletion packages/core/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,18 +123,37 @@ export { dispatchToolCall, type DispatchRequest, type DispatchVerdict } from './
// Agent loop's approval callback type (M3b)
export type { ApprovalCallback } from './agent.js';

// Skills (M4 — SKILL.md frontmatter loading + system-prompt builder)
// Skills (M4 — SKILL.md frontmatter loading + system-prompt builder; M5 — Skill tool)
export {
loadSkills,
buildSkillsDescriptionBlock,
parseFrontmatter,
parseSimpleYaml,
makeSkillTool,
type Skill,
type SkillFrontmatter,
type LoadSkillsOpts,
type Frontmatter,
} from './skills/index.js';

// Plugins (M5 — manifest + hash pinning + local install + discovery)
export {
installLocal,
discoverPlugins,
readManifest,
computeSourceHash,
loadTrustState,
saveTrustState,
pluginsDir,
trustFilePath,
type PluginManifest,
type InstalledPlugin,
type PluginTrust,
type TrustState,
type InstallOptions,
type DiscoverOptions,
} from './plugins/index.js';

// Sub-agents (M4 — .deepcode/agents/*.md)
export {
loadSubAgents,
Expand Down
40 changes: 36 additions & 4 deletions packages/core/src/plugins/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,38 @@
// Module: plugins
// Plugins subsystem entry — manifest parsing, hash pinning, local install, discovery.
// Spec: docs/DEVELOPMENT_PLAN.md §3.14
// Milestone: M5
// Spec: docs/DEVELOPMENT_PLAN.md §3.14 plugin sandbox sub-process + RPC + hash pin + marketplace (see docs/design/plugin-security.md)
// Status: placeholder — implemented in M5
//
// What's IN this milestone:
// - plugin.json manifest parsing
// - SHA-256 source hash + ~/.deepcode/plugins-trust.json
// - installLocal() — copy a directory + record trust
// - discoverPlugins() — scan ~/.deepcode/plugins/ + verify hashes
//
// What's NOT in this milestone (see docs/design/plugin-security.md):
// - Sandbox subprocess execution (RPC over stdio)
// - GitHub URL install (gh:user/repo)
// - Marketplace index + ed25519 signature verification
// - Revoke list pull + enforcement
// - "Trust ladder" UI tiers
//
// IMPORTANT: until subprocess sandbox lands (planned M5.1), plugins are
// effectively untrusted code with full host access. The trust system records
// what the user *thought* they were installing, but cannot enforce it.
// Treat M5 as a foundation, not a security boundary.

export {};
export {
installLocal,
discoverPlugins,
readManifest,
computeSourceHash,
loadTrustState,
saveTrustState,
pluginsDir,
trustFilePath,
type PluginManifest,
type InstalledPlugin,
type PluginTrust,
type TrustState,
type InstallOptions,
type DiscoverOptions,
} from './manifest.js';
Loading
Loading