Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions packages/core/src/agent.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,70 @@ describe('runAgent', () => {
expect(ids).toEqual(['g1', 'g2']);
});

it('Task tool runs a sub-agent and feeds its output back', async () => {
// Top-level agent calls Task; the sub-agent runs (same provider queue) and
// its final text comes back as the Task tool_result.
const provider = new MockProvider([
toolUse('delegating', {
type: 'tool_use',
id: 'task1',
name: 'Task',
input: { prompt: 'explore the routes' },
}),
endTurn('Found 3 routes.'), // ← the sub-agent's run
endTurn('Summary: 3 routes exist.'), // ← back in the top-level agent
]);
const result = await runAgent({
provider,
tools: new ToolRegistry(), // includes TaskTool
systemPrompt: '',
userMessage: 'how many routes?',
model: 'deepseek-chat',
cwd,
});
expect(result.stopReason).toBe('end_turn');
// The Task tool_result (in the user msg after the assistant Task call)
// should carry the sub-agent's output.
const toolResultMsg = result.history[2]!;
const block = toolResultMsg.content[0];
expect(block?.type).toBe('tool_result');
if (block?.type === 'tool_result') {
expect(block.content).toContain('Found 3 routes.');
}
// 3 provider calls total: top turn1, sub-agent turn, top turn2.
expect(provider.received).toHaveLength(3);
});

it('a sub-agent cannot spawn further sub-agents (depth guard)', async () => {
// At subAgentDepth=1, runSubAgent is not wired, so Task fails gracefully.
const provider = new MockProvider([
toolUse('trying to recurse', {
type: 'tool_use',
id: 't',
name: 'Task',
input: { prompt: 'recurse forever' },
}),
endTurn('gave up recursing'),
]);
const result = await runAgent({
provider,
tools: new ToolRegistry(),
systemPrompt: '',
userMessage: 'go',
model: 'deepseek-chat',
cwd,
subAgentDepth: 1,
});
const toolResultMsg = result.history[2]!;
const block = toolResultMsg.content[0];
if (block?.type === 'tool_result') {
expect(block.is_error).toBe(true);
expect(block.content).toMatch(/not available/);
} else {
expect.fail('expected a tool_result');
}
});

it('does not auto-compact on cumulative usage when each turn is below threshold', async () => {
// Regression: shouldCompact must use the *current* turn's input tokens, not
// the cumulative sum across turns. contextWindow 100, threshold 0.8 → trigger
Expand Down
101 changes: 101 additions & 0 deletions packages/core/src/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,25 @@ export interface RunAgentOptions {
/** Host callback for AskUserQuestion tool. Optional — when absent the tool
* errors. */
askUser?: NonNullable<ToolContext['askUser']>;
/** Internal: sub-agent recursion depth (the Task tool). 0 = top-level agent.
* Sub-agents run at depth 1 and are NOT given a runSubAgent, so they can't
* spawn further sub-agents. */
subAgentDepth?: number;
}

/** Max sub-agent recursion: top-level (0) may spawn sub-agents (depth 1); those
* cannot spawn more. */
const MAX_SUBAGENT_DEPTH = 1;
/** Tools a sub-agent never gets (would let it mutate the parent's control flow). */
const SUBAGENT_TOOL_DENYLIST = new Set([
'Task',
'EnterPlanMode',
'ExitPlanMode',
'AskUserQuestion',
]);
/** Default turn cap for a sub-agent run when its frontmatter doesn't set one. */
const DEFAULT_SUBAGENT_MAX_TURNS = 12;

export interface RunAgentResult {
/** Final history (input history + everything appended this run). */
history: StoredMessage[];
Expand Down Expand Up @@ -166,6 +183,90 @@ export async function runAgent(opts: RunAgentOptions): Promise<RunAgentResult> {
askUser: opts.askUser,
modeSignal,
};

// Wire the Task tool's sub-agent runner — but only below the recursion cap,
// so a sub-agent can't spawn further sub-agents (it also never gets the Task
// tool, see the denylist below; this is belt-and-suspenders).
const depth = opts.subAgentDepth ?? 0;
if (depth < MAX_SUBAGENT_DEPTH) {
toolCtx.runSubAgent = async ({ prompt, agentType }) => {
// Resolve a named sub-agent from disk (lazy import keeps node:fs out of
// browser bundles; failures degrade to a generic sub-agent prompt).
let systemPrompt =
'You are a focused sub-agent. Complete the task below using the available tools, then reply with a concise summary of your findings or result. You have no memory of any other conversation.';
let model = opts.model;
let subMaxTurns = DEFAULT_SUBAGENT_MAX_TURNS;
let allow: Set<string> | null = null;
try {
const mod = /* @vite-ignore */ './sub-agents/index.js';
const { loadSubAgents, findSubAgent } = (await import(
mod
)) as typeof import('./sub-agents/index.js');
const agents = await loadSubAgents({ cwd: opts.cwd });
const found = agentType ? findSubAgent(agents, agentType) : undefined;
if (agentType && !found) {
const names = agents.map((a) => a.qualifiedName).join(', ') || '(none)';
throw new Error(`unknown subagent_type "${agentType}". Available: ${names}`);
}
if (found) {
systemPrompt = found.body.trim() || systemPrompt;
if (found.frontmatter.model) model = found.frontmatter.model;
if (found.frontmatter.maxTurns) subMaxTurns = found.frontmatter.maxTurns;
if (found.frontmatter.tools?.length) allow = new Set(found.frontmatter.tools);
}
} catch (err) {
if (agentType) throw err; // explicit agent requested but not found/loadable
// else: no agent named — proceed with the generic sub-agent prompt
}

// A registry view exposing only the sub-agent's allowed tools (its
// frontmatter whitelist, if any) minus the control/recursion tools.
// Built inline so agent.ts never imports ToolRegistry/BUILTIN_TOOLS
// (which would drag node:fs into the renderer bundle).
const subTools = {
definitions: () =>
opts.tools
.definitions()
.filter((d) => !SUBAGENT_TOOL_DENYLIST.has(d.name) && (!allow || allow.has(d.name))),
get: (name: string) =>
SUBAGENT_TOOL_DENYLIST.has(name) || (allow && !allow.has(name))
? undefined
: opts.tools.get(name),
list: () =>
opts.tools
.list()
.filter((t) => !SUBAGENT_TOOL_DENYLIST.has(t.name) && (!allow || allow.has(t.name))),
} as typeof opts.tools;

const sub = await runAgent({
provider: opts.provider,
tools: subTools,
systemPrompt,
userMessage: prompt,
model,
maxTokens: opts.maxTokens,
temperature: opts.temperature,
maxTurns: subMaxTurns,
cwd: opts.cwd,
signal: opts.signal,
mode: opts.mode,
permissions: opts.permissions,
hooks: opts.hooks,
sandboxConfig: opts.sandboxConfig,
autoMode: opts.autoMode,
systemReminders: false, // sub-agent gets a clean context
subAgentDepth: depth + 1,
});
const text = sub.history
.filter((m) => m.role === 'assistant')
.flatMap((m) => m.content)
.filter((b): b is import('./types.js').TextBlock => b.type === 'text')
.map((b) => b.text)
.join('\n')
.trim();
return { text, turnsUsed: sub.turnsUsed, agentType: agentType ?? 'general' };
};
}
const totalUsage = { inputTokens: 0, outputTokens: 0, reasoningTokens: 0 };
let turnsUsed = 0;

Expand Down
2 changes: 2 additions & 0 deletions packages/core/src/tools/registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { ExitPlanModeTool } from './exit-plan.js';
import { GlobTool } from './glob.js';
import { GrepTool } from './grep.js';
import { ReadTool } from './read.js';
import { TaskTool } from './task.js';
import { TodoWriteTool } from './todo.js';
import { WebFetchTool } from './web-fetch.js';
import { WebSearchTool } from './web-search.js';
Expand All @@ -34,6 +35,7 @@ export const BUILTIN_TOOLS: ToolHandler[] = [
AskUserQuestionTool,
EnterPlanModeTool,
ExitPlanModeTool,
TaskTool,
];

export class ToolRegistry {
Expand Down
47 changes: 47 additions & 0 deletions packages/core/src/tools/task.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import { describe, expect, it } from 'vitest';
import { TaskTool } from './task.js';

describe('TaskTool', () => {
it('errors on empty prompt', async () => {
const r = await TaskTool.execute({ prompt: ' ' }, { cwd: '/x' });
expect(r.isError).toBe(true);
expect(r.content).toMatch(/non-empty/);
});

it('errors clearly when no runSubAgent is wired (renderer / max depth)', async () => {
const r = await TaskTool.execute({ prompt: 'do a thing' }, { cwd: '/x' });
expect(r.isError).toBe(true);
expect(r.content).toMatch(/not available/);
});

it('delegates to ctx.runSubAgent and returns its text', async () => {
const r = await TaskTool.execute(
{ prompt: 'explore the routes', subagent_type: 'explorer', description: 'find routes' },
{
cwd: '/x',
runSubAgent: async ({ prompt, agentType }) => ({
text: `did: ${prompt} via ${agentType}`,
turnsUsed: 2,
agentType: agentType ?? 'general',
}),
},
);
expect(r.isError).toBeFalsy();
expect(r.content).toBe('did: explore the routes via explorer');
expect((r.data as { turnsUsed: number }).turnsUsed).toBe(2);
});

it('surfaces sub-agent errors', async () => {
const r = await TaskTool.execute(
{ prompt: 'x' },
{
cwd: '/x',
runSubAgent: async () => {
throw new Error('unknown subagent_type "nope"');
},
},
);
expect(r.isError).toBe(true);
expect(r.content).toMatch(/unknown subagent_type/);
});
});
73 changes: 73 additions & 0 deletions packages/core/src/tools/task.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Task tool — dispatch a focused sub-agent for a self-contained piece of work.
// Mirrors Claude Code's Task tool. The actual sub-agent run is provided by the
// agent loop via ctx.runSubAgent (it has the provider/model/tools in scope);
// this tool is just the schema + a thin call into it.
// Spec: docs/DEVELOPMENT_PLAN.md §3.13a / §0.1 (parity tool)

import type { ToolContext, ToolHandler, ToolResult } from '../types.js';

interface TaskInput {
/** Short label for the work (3-5 words). */
description?: string;
/** The full instruction for the sub-agent — it has no other context. */
prompt?: string;
/** Named sub-agent from .deepcode/agents/*.md; omit for a generic one. */
subagent_type?: string;
}

export const TaskTool: ToolHandler = {
name: 'Task',
definition: {
name: 'Task',
description:
'Launch a focused sub-agent to handle a self-contained, multi-step task and return only its conclusion (not its intermediate work). Use for broad searches/research where you want the result, not the file dumps, or to parallelize independent investigations. The sub-agent runs in a fresh context with no memory of this conversation — put everything it needs in `prompt`. Optionally target a named sub-agent from .deepcode/agents via `subagent_type`. The sub-agent cannot spawn further sub-agents.',
inputSchema: {
type: 'object',
properties: {
description: { type: 'string', description: 'Short (3-5 word) task label.' },
prompt: {
type: 'string',
description: 'Self-contained instruction — the sub-agent sees nothing else.',
},
subagent_type: {
type: 'string',
description: 'Optional named sub-agent (.deepcode/agents/<name>.md).',
},
},
required: ['prompt'],
},
},
async execute(rawInput: Record<string, unknown>, ctx: ToolContext): Promise<ToolResult> {
const input = rawInput as unknown as TaskInput;
const prompt = input?.prompt?.trim();
if (!prompt) {
return { content: 'Error: Task requires a non-empty `prompt`.', isError: true };
}
if (!ctx.runSubAgent) {
// No sub-agent runner wired (renderer, or a sub-agent already at max
// recursion depth). Fail clearly rather than silently no-op.
return {
content:
'Error: sub-agents are not available here (already inside a sub-agent, or the host did not enable Task). Do the work directly.',
isError: true,
};
}
try {
const result = await ctx.runSubAgent({
prompt,
agentType: input.subagent_type,
description: input.description,
});
return {
content: result.text || '(sub-agent produced no output)',
data: {
agentType: result.agentType,
turnsUsed: result.turnsUsed,
description: input.description,
},
};
} catch (err) {
return { content: `Error running sub-agent: ${(err as Error).message}`, isError: true };
}
},
};
11 changes: 11 additions & 0 deletions packages/core/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,17 @@ export interface ToolContext {
* accordingly (plan ⇄ default).
*/
modeSignal?: { exitPlanMode?: boolean; enterPlanMode?: boolean };
/**
* Run a sub-agent (the Task tool). Supplied by the agent loop when sub-agent
* recursion depth allows; absent in the renderer or at max depth (so a
* sub-agent can't spawn further sub-agents). Resolves to the sub-agent's
* final assistant text.
*/
runSubAgent?: (opts: {
prompt: string;
agentType?: string;
description?: string;
}) => Promise<{ text: string; turnsUsed: number; agentType: string }>;
}

export interface ToolResult {
Expand Down
Loading