diff --git a/packages/core/src/agent.test.ts b/packages/core/src/agent.test.ts index 850d099..5e6b5f5 100644 --- a/packages/core/src/agent.test.ts +++ b/packages/core/src/agent.test.ts @@ -333,6 +333,70 @@ describe('runAgent', () => { expect(ids).toEqual(['g1', 'g2']); }); + it('Task tool runs a sub-agent and feeds its output back', async () => { + // Top-level agent calls Task; the sub-agent runs (same provider queue) and + // its final text comes back as the Task tool_result. + const provider = new MockProvider([ + toolUse('delegating', { + type: 'tool_use', + id: 'task1', + name: 'Task', + input: { prompt: 'explore the routes' }, + }), + endTurn('Found 3 routes.'), // ← the sub-agent's run + endTurn('Summary: 3 routes exist.'), // ← back in the top-level agent + ]); + const result = await runAgent({ + provider, + tools: new ToolRegistry(), // includes TaskTool + systemPrompt: '', + userMessage: 'how many routes?', + model: 'deepseek-chat', + cwd, + }); + expect(result.stopReason).toBe('end_turn'); + // The Task tool_result (in the user msg after the assistant Task call) + // should carry the sub-agent's output. + const toolResultMsg = result.history[2]!; + const block = toolResultMsg.content[0]; + expect(block?.type).toBe('tool_result'); + if (block?.type === 'tool_result') { + expect(block.content).toContain('Found 3 routes.'); + } + // 3 provider calls total: top turn1, sub-agent turn, top turn2. + expect(provider.received).toHaveLength(3); + }); + + it('a sub-agent cannot spawn further sub-agents (depth guard)', async () => { + // At subAgentDepth=1, runSubAgent is not wired, so Task fails gracefully. + const provider = new MockProvider([ + toolUse('trying to recurse', { + type: 'tool_use', + id: 't', + name: 'Task', + input: { prompt: 'recurse forever' }, + }), + endTurn('gave up recursing'), + ]); + const result = await runAgent({ + provider, + tools: new ToolRegistry(), + systemPrompt: '', + userMessage: 'go', + model: 'deepseek-chat', + cwd, + subAgentDepth: 1, + }); + const toolResultMsg = result.history[2]!; + const block = toolResultMsg.content[0]; + if (block?.type === 'tool_result') { + expect(block.is_error).toBe(true); + expect(block.content).toMatch(/not available/); + } else { + expect.fail('expected a tool_result'); + } + }); + it('does not auto-compact on cumulative usage when each turn is below threshold', async () => { // Regression: shouldCompact must use the *current* turn's input tokens, not // the cumulative sum across turns. contextWindow 100, threshold 0.8 → trigger diff --git a/packages/core/src/agent.ts b/packages/core/src/agent.ts index b999040..9a115e7 100644 --- a/packages/core/src/agent.ts +++ b/packages/core/src/agent.ts @@ -84,8 +84,25 @@ export interface RunAgentOptions { /** Host callback for AskUserQuestion tool. Optional — when absent the tool * errors. */ askUser?: NonNullable; + /** Internal: sub-agent recursion depth (the Task tool). 0 = top-level agent. + * Sub-agents run at depth 1 and are NOT given a runSubAgent, so they can't + * spawn further sub-agents. */ + subAgentDepth?: number; } +/** Max sub-agent recursion: top-level (0) may spawn sub-agents (depth 1); those + * cannot spawn more. */ +const MAX_SUBAGENT_DEPTH = 1; +/** Tools a sub-agent never gets (would let it mutate the parent's control flow). */ +const SUBAGENT_TOOL_DENYLIST = new Set([ + 'Task', + 'EnterPlanMode', + 'ExitPlanMode', + 'AskUserQuestion', +]); +/** Default turn cap for a sub-agent run when its frontmatter doesn't set one. */ +const DEFAULT_SUBAGENT_MAX_TURNS = 12; + export interface RunAgentResult { /** Final history (input history + everything appended this run). */ history: StoredMessage[]; @@ -166,6 +183,90 @@ export async function runAgent(opts: RunAgentOptions): Promise { askUser: opts.askUser, modeSignal, }; + + // Wire the Task tool's sub-agent runner — but only below the recursion cap, + // so a sub-agent can't spawn further sub-agents (it also never gets the Task + // tool, see the denylist below; this is belt-and-suspenders). + const depth = opts.subAgentDepth ?? 0; + if (depth < MAX_SUBAGENT_DEPTH) { + toolCtx.runSubAgent = async ({ prompt, agentType }) => { + // Resolve a named sub-agent from disk (lazy import keeps node:fs out of + // browser bundles; failures degrade to a generic sub-agent prompt). + let systemPrompt = + 'You are a focused sub-agent. Complete the task below using the available tools, then reply with a concise summary of your findings or result. You have no memory of any other conversation.'; + let model = opts.model; + let subMaxTurns = DEFAULT_SUBAGENT_MAX_TURNS; + let allow: Set | null = null; + try { + const mod = /* @vite-ignore */ './sub-agents/index.js'; + const { loadSubAgents, findSubAgent } = (await import( + mod + )) as typeof import('./sub-agents/index.js'); + const agents = await loadSubAgents({ cwd: opts.cwd }); + const found = agentType ? findSubAgent(agents, agentType) : undefined; + if (agentType && !found) { + const names = agents.map((a) => a.qualifiedName).join(', ') || '(none)'; + throw new Error(`unknown subagent_type "${agentType}". Available: ${names}`); + } + if (found) { + systemPrompt = found.body.trim() || systemPrompt; + if (found.frontmatter.model) model = found.frontmatter.model; + if (found.frontmatter.maxTurns) subMaxTurns = found.frontmatter.maxTurns; + if (found.frontmatter.tools?.length) allow = new Set(found.frontmatter.tools); + } + } catch (err) { + if (agentType) throw err; // explicit agent requested but not found/loadable + // else: no agent named — proceed with the generic sub-agent prompt + } + + // A registry view exposing only the sub-agent's allowed tools (its + // frontmatter whitelist, if any) minus the control/recursion tools. + // Built inline so agent.ts never imports ToolRegistry/BUILTIN_TOOLS + // (which would drag node:fs into the renderer bundle). + const subTools = { + definitions: () => + opts.tools + .definitions() + .filter((d) => !SUBAGENT_TOOL_DENYLIST.has(d.name) && (!allow || allow.has(d.name))), + get: (name: string) => + SUBAGENT_TOOL_DENYLIST.has(name) || (allow && !allow.has(name)) + ? undefined + : opts.tools.get(name), + list: () => + opts.tools + .list() + .filter((t) => !SUBAGENT_TOOL_DENYLIST.has(t.name) && (!allow || allow.has(t.name))), + } as typeof opts.tools; + + const sub = await runAgent({ + provider: opts.provider, + tools: subTools, + systemPrompt, + userMessage: prompt, + model, + maxTokens: opts.maxTokens, + temperature: opts.temperature, + maxTurns: subMaxTurns, + cwd: opts.cwd, + signal: opts.signal, + mode: opts.mode, + permissions: opts.permissions, + hooks: opts.hooks, + sandboxConfig: opts.sandboxConfig, + autoMode: opts.autoMode, + systemReminders: false, // sub-agent gets a clean context + subAgentDepth: depth + 1, + }); + const text = sub.history + .filter((m) => m.role === 'assistant') + .flatMap((m) => m.content) + .filter((b): b is import('./types.js').TextBlock => b.type === 'text') + .map((b) => b.text) + .join('\n') + .trim(); + return { text, turnsUsed: sub.turnsUsed, agentType: agentType ?? 'general' }; + }; + } const totalUsage = { inputTokens: 0, outputTokens: 0, reasoningTokens: 0 }; let turnsUsed = 0; diff --git a/packages/core/src/tools/registry.ts b/packages/core/src/tools/registry.ts index 19d7b96..86994a8 100644 --- a/packages/core/src/tools/registry.ts +++ b/packages/core/src/tools/registry.ts @@ -10,6 +10,7 @@ import { ExitPlanModeTool } from './exit-plan.js'; import { GlobTool } from './glob.js'; import { GrepTool } from './grep.js'; import { ReadTool } from './read.js'; +import { TaskTool } from './task.js'; import { TodoWriteTool } from './todo.js'; import { WebFetchTool } from './web-fetch.js'; import { WebSearchTool } from './web-search.js'; @@ -34,6 +35,7 @@ export const BUILTIN_TOOLS: ToolHandler[] = [ AskUserQuestionTool, EnterPlanModeTool, ExitPlanModeTool, + TaskTool, ]; export class ToolRegistry { diff --git a/packages/core/src/tools/task.test.ts b/packages/core/src/tools/task.test.ts new file mode 100644 index 0000000..00d7ed6 --- /dev/null +++ b/packages/core/src/tools/task.test.ts @@ -0,0 +1,47 @@ +import { describe, expect, it } from 'vitest'; +import { TaskTool } from './task.js'; + +describe('TaskTool', () => { + it('errors on empty prompt', async () => { + const r = await TaskTool.execute({ prompt: ' ' }, { cwd: '/x' }); + expect(r.isError).toBe(true); + expect(r.content).toMatch(/non-empty/); + }); + + it('errors clearly when no runSubAgent is wired (renderer / max depth)', async () => { + const r = await TaskTool.execute({ prompt: 'do a thing' }, { cwd: '/x' }); + expect(r.isError).toBe(true); + expect(r.content).toMatch(/not available/); + }); + + it('delegates to ctx.runSubAgent and returns its text', async () => { + const r = await TaskTool.execute( + { prompt: 'explore the routes', subagent_type: 'explorer', description: 'find routes' }, + { + cwd: '/x', + runSubAgent: async ({ prompt, agentType }) => ({ + text: `did: ${prompt} via ${agentType}`, + turnsUsed: 2, + agentType: agentType ?? 'general', + }), + }, + ); + expect(r.isError).toBeFalsy(); + expect(r.content).toBe('did: explore the routes via explorer'); + expect((r.data as { turnsUsed: number }).turnsUsed).toBe(2); + }); + + it('surfaces sub-agent errors', async () => { + const r = await TaskTool.execute( + { prompt: 'x' }, + { + cwd: '/x', + runSubAgent: async () => { + throw new Error('unknown subagent_type "nope"'); + }, + }, + ); + expect(r.isError).toBe(true); + expect(r.content).toMatch(/unknown subagent_type/); + }); +}); diff --git a/packages/core/src/tools/task.ts b/packages/core/src/tools/task.ts new file mode 100644 index 0000000..bb2f271 --- /dev/null +++ b/packages/core/src/tools/task.ts @@ -0,0 +1,73 @@ +// Task tool — dispatch a focused sub-agent for a self-contained piece of work. +// Mirrors Claude Code's Task tool. The actual sub-agent run is provided by the +// agent loop via ctx.runSubAgent (it has the provider/model/tools in scope); +// this tool is just the schema + a thin call into it. +// Spec: docs/DEVELOPMENT_PLAN.md §3.13a / §0.1 (parity tool) + +import type { ToolContext, ToolHandler, ToolResult } from '../types.js'; + +interface TaskInput { + /** Short label for the work (3-5 words). */ + description?: string; + /** The full instruction for the sub-agent — it has no other context. */ + prompt?: string; + /** Named sub-agent from .deepcode/agents/*.md; omit for a generic one. */ + subagent_type?: string; +} + +export const TaskTool: ToolHandler = { + name: 'Task', + definition: { + name: 'Task', + description: + 'Launch a focused sub-agent to handle a self-contained, multi-step task and return only its conclusion (not its intermediate work). Use for broad searches/research where you want the result, not the file dumps, or to parallelize independent investigations. The sub-agent runs in a fresh context with no memory of this conversation — put everything it needs in `prompt`. Optionally target a named sub-agent from .deepcode/agents via `subagent_type`. The sub-agent cannot spawn further sub-agents.', + inputSchema: { + type: 'object', + properties: { + description: { type: 'string', description: 'Short (3-5 word) task label.' }, + prompt: { + type: 'string', + description: 'Self-contained instruction — the sub-agent sees nothing else.', + }, + subagent_type: { + type: 'string', + description: 'Optional named sub-agent (.deepcode/agents/.md).', + }, + }, + required: ['prompt'], + }, + }, + async execute(rawInput: Record, ctx: ToolContext): Promise { + const input = rawInput as unknown as TaskInput; + const prompt = input?.prompt?.trim(); + if (!prompt) { + return { content: 'Error: Task requires a non-empty `prompt`.', isError: true }; + } + if (!ctx.runSubAgent) { + // No sub-agent runner wired (renderer, or a sub-agent already at max + // recursion depth). Fail clearly rather than silently no-op. + return { + content: + 'Error: sub-agents are not available here (already inside a sub-agent, or the host did not enable Task). Do the work directly.', + isError: true, + }; + } + try { + const result = await ctx.runSubAgent({ + prompt, + agentType: input.subagent_type, + description: input.description, + }); + return { + content: result.text || '(sub-agent produced no output)', + data: { + agentType: result.agentType, + turnsUsed: result.turnsUsed, + description: input.description, + }, + }; + } catch (err) { + return { content: `Error running sub-agent: ${(err as Error).message}`, isError: true }; + } + }, +}; diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index 5c71257..ba397f5 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -132,6 +132,17 @@ export interface ToolContext { * accordingly (plan ⇄ default). */ modeSignal?: { exitPlanMode?: boolean; enterPlanMode?: boolean }; + /** + * Run a sub-agent (the Task tool). Supplied by the agent loop when sub-agent + * recursion depth allows; absent in the renderer or at max depth (so a + * sub-agent can't spawn further sub-agents). Resolves to the sub-agent's + * final assistant text. + */ + runSubAgent?: (opts: { + prompt: string; + agentType?: string; + description?: string; + }) => Promise<{ text: string; turnsUsed: number; agentType: string }>; } export interface ToolResult {