diff --git a/docs/BEHAVIOR_PARITY.md b/docs/BEHAVIOR_PARITY.md index 12b25c9..cf54b14 100644 --- a/docs/BEHAVIOR_PARITY.md +++ b/docs/BEHAVIOR_PARITY.md @@ -146,9 +146,9 @@ Specific deviations: | TaskCreate / Monitor / TaskList / TaskGet / TaskOutput / TaskStop / TaskUpdate | โœ“ | ๐Ÿ”„ | M8 (background tasks) | | CronCreate / CronList / CronDelete | โœ“ | ๐Ÿ”„ | M8 (cron daemon) | | ScheduleWakeup | โœ“ | ๐Ÿ”„ | M8 | -| WebFetch | โœ“ | ๐Ÿ”„ | M3c+ | -| WebSearch | โœ“ | ๐Ÿ”„ | M3c+ | -| TodoWrite | โœ“ | ๐Ÿ”„ | M3c+ | +| WebFetch | โœ“ | โœ… | shipped M3c-rest โ€” 5 MiB cap + abort | +| WebSearch | โœ“ | โœ… | shipped M3c-rest โ€” DDG default + SearXNG | +| TodoWrite | โœ“ | โœ… | shipped M3c-rest โ€” persists in sessionDir | ## CLI flags diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 7f8d995..e4f1aec 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -29,8 +29,17 @@ export { BashTool, GrepTool, GlobTool, + TodoWriteTool, + WebFetchTool, + WebSearchTool, + readTodos, + TODO_FILE, + parseDuckDuckGoHtml, ToolRegistry, BUILTIN_TOOLS, + type TodoItem, + type TodoStatus, + type SearchHit, } from './tools/index.js'; // Sessions diff --git a/packages/core/src/tools/index.ts b/packages/core/src/tools/index.ts index 6e74b2f..3e2a6df 100644 --- a/packages/core/src/tools/index.ts +++ b/packages/core/src/tools/index.ts @@ -1,6 +1,6 @@ -// Tools subsystem entry โ€” 6 P0 tools (Read/Write/Edit/Bash/Grep/Glob) + registry. +// Tools subsystem entry โ€” P0 tools + M3c-rest extensions + registry. // Spec: docs/DEVELOPMENT_PLAN.md ยง3.2 -// Milestone: M1 +// Milestone: M1 (P0) + M3c-rest (TodoWrite/WebFetch/WebSearch) export { ReadTool } from './read.js'; export { WriteTool } from './write.js'; @@ -8,5 +8,10 @@ export { EditTool } from './edit.js'; export { BashTool } from './bash.js'; export { GrepTool } from './grep.js'; export { GlobTool } from './glob.js'; +export { TodoWriteTool, readTodos, TODO_FILE } from './todo.js'; +export type { TodoItem, TodoStatus } from './todo.js'; +export { WebFetchTool } from './web-fetch.js'; +export { WebSearchTool, parseDuckDuckGoHtml } from './web-search.js'; +export type { SearchHit } from './web-search.js'; export { ToolRegistry, BUILTIN_TOOLS } from './registry.js'; export type { ToolDefinition, ToolContext, ToolResult, ToolHandler } from './types.js'; diff --git a/packages/core/src/tools/registry.ts b/packages/core/src/tools/registry.ts index c1e6f21..2ebc03b 100644 --- a/packages/core/src/tools/registry.ts +++ b/packages/core/src/tools/registry.ts @@ -7,9 +7,16 @@ import { EditTool } from './edit.js'; import { GlobTool } from './glob.js'; import { GrepTool } from './grep.js'; import { ReadTool } from './read.js'; +import { TodoWriteTool } from './todo.js'; +import { WebFetchTool } from './web-fetch.js'; +import { WebSearchTool } from './web-search.js'; import { WriteTool } from './write.js'; -/** The 6 P0 tools shipped in M1. */ +/** + * Built-in tools shipped by default. + * ยท 6 P0 tools from M1 (Read/Write/Edit/Bash/Grep/Glob) + * ยท 3 M3c-rest tools (TodoWrite/WebFetch/WebSearch) + */ export const BUILTIN_TOOLS: ToolHandler[] = [ ReadTool, WriteTool, @@ -17,6 +24,9 @@ export const BUILTIN_TOOLS: ToolHandler[] = [ BashTool, GrepTool, GlobTool, + TodoWriteTool, + WebFetchTool, + WebSearchTool, ]; export class ToolRegistry { diff --git a/packages/core/src/tools/todo.test.ts b/packages/core/src/tools/todo.test.ts new file mode 100644 index 0000000..9d0f4d9 --- /dev/null +++ b/packages/core/src/tools/todo.test.ts @@ -0,0 +1,102 @@ +import { promises as fs } from 'node:fs'; +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { readTodos, TODO_FILE, TodoWriteTool } from './todo.js'; + +describe('TodoWriteTool', () => { + let sessionDir: string; + beforeEach(async () => { + sessionDir = await mkdtemp(join(tmpdir(), 'dc-todo-')); + }); + afterEach(async () => { + await rm(sessionDir, { recursive: true, force: true }); + }); + + it('persists a fresh list to /todos.json', async () => { + const res = await TodoWriteTool.execute( + { + todos: [ + { content: 'Write spec', activeForm: 'Writing spec', status: 'in_progress' }, + { content: 'Add tests', activeForm: 'Adding tests', status: 'pending' }, + ], + }, + { cwd: process.cwd(), sessionDir }, + ); + expect(res.isError).toBeFalsy(); + expect(res.content).toContain('OK'); + expect(res.content).toContain('2 todos'); + const raw = await fs.readFile(join(sessionDir, TODO_FILE), 'utf8'); + const parsed = JSON.parse(raw) as Array<{ content: string }>; + expect(parsed).toHaveLength(2); + expect(parsed[0].content).toBe('Write spec'); + }); + + it('replaces the list on subsequent calls (no append)', async () => { + await TodoWriteTool.execute( + { + todos: [{ content: 'A', activeForm: 'Doing A', status: 'pending' }], + }, + { cwd: process.cwd(), sessionDir }, + ); + await TodoWriteTool.execute( + { + todos: [{ content: 'B', activeForm: 'Doing B', status: 'completed' }], + }, + { cwd: process.cwd(), sessionDir }, + ); + const todos = await readTodos(sessionDir); + expect(todos).toHaveLength(1); + expect(todos[0]?.content).toBe('B'); + }); + + it('rejects when more than one item is in_progress', async () => { + const res = await TodoWriteTool.execute( + { + todos: [ + { content: 'A', activeForm: 'Doing A', status: 'in_progress' }, + { content: 'B', activeForm: 'Doing B', status: 'in_progress' }, + ], + }, + { cwd: process.cwd(), sessionDir }, + ); + expect(res.isError).toBe(true); + expect(res.content).toMatch(/at most one/i); + }); + + it('rejects malformed item shape', async () => { + const res = await TodoWriteTool.execute( + { + todos: [{ content: 'A', status: 'pending' }], + }, + { cwd: process.cwd(), sessionDir }, + ); + expect(res.isError).toBe(true); + }); + + it('rejects non-array input', async () => { + const res = await TodoWriteTool.execute( + { todos: 'nope' as unknown as never }, + { cwd: process.cwd(), sessionDir }, + ); + expect(res.isError).toBe(true); + }); + + it('returns ok but not persisted when no sessionDir', async () => { + const res = await TodoWriteTool.execute( + { + todos: [{ content: 'X', activeForm: 'X-ing', status: 'pending' }], + }, + { cwd: process.cwd() }, + ); + expect(res.isError).toBeFalsy(); + expect(res.content).toMatch(/not persisted/); + expect((res.data as { persisted: boolean }).persisted).toBe(false); + }); + + it('readTodos returns [] when file does not exist', async () => { + const todos = await readTodos(sessionDir); + expect(todos).toEqual([]); + }); +}); diff --git a/packages/core/src/tools/todo.ts b/packages/core/src/tools/todo.ts new file mode 100644 index 0000000..65bb5f9 --- /dev/null +++ b/packages/core/src/tools/todo.ts @@ -0,0 +1,126 @@ +// TodoWrite tool โ€” agent-managed task list, persisted per-session. +// Spec: docs/DEVELOPMENT_PLAN.md ยง3.15 / behavior parity with Claude Code's TodoWrite. +// +// The list lives in `/todos.json`. Each call replaces the whole list +// (agent submits the desired state). UI can render it as a checklist. The tool +// itself is stateless โ€” state is the file. + +import { promises as fs } from 'node:fs'; +import { join } from 'node:path'; +import type { ToolContext, ToolHandler, ToolResult } from '../types.js'; + +export type TodoStatus = 'pending' | 'in_progress' | 'completed'; + +export interface TodoItem { + content: string; + /** First-person continuous form, shown while the item is in_progress. */ + activeForm: string; + status: TodoStatus; +} + +interface TodoInput { + todos: TodoItem[]; +} + +/** Where the per-session todo list lives โ€” relative to sessionDir. */ +export const TODO_FILE = 'todos.json'; + +function isTodo(x: unknown): x is TodoItem { + if (!x || typeof x !== 'object') return false; + const o = x as Record; + return ( + typeof o['content'] === 'string' && + typeof o['activeForm'] === 'string' && + (o['status'] === 'pending' || o['status'] === 'in_progress' || o['status'] === 'completed') + ); +} + +export const TodoWriteTool: ToolHandler = { + name: 'TodoWrite', + definition: { + name: 'TodoWrite', + description: + 'Replace the session todo list. Submit the full desired state (not a diff). Each item has content (imperative), activeForm (first-person continuous, shown while in_progress), and status (pending|in_progress|completed). Convention: at most ONE item in_progress at a time.', + inputSchema: { + type: 'object', + properties: { + todos: { + type: 'array', + description: 'Full list of todo items in the desired final state.', + items: { + type: 'object', + properties: { + content: { type: 'string', description: 'Imperative task description.' }, + activeForm: { type: 'string', description: 'First-person continuous form.' }, + status: { type: 'string', enum: ['pending', 'in_progress', 'completed'] }, + }, + required: ['content', 'activeForm', 'status'], + }, + }, + }, + required: ['todos'], + }, + }, + async execute(rawInput: Record, ctx: ToolContext): Promise { + const input = rawInput as unknown as TodoInput; + if (!Array.isArray(input?.todos)) { + return { content: 'Error: todos must be an array.', isError: true }; + } + if (!input.todos.every(isTodo)) { + return { + content: + 'Error: each todo needs string content, string activeForm, and status in (pending|in_progress|completed).', + isError: true, + }; + } + const inProgressCount = input.todos.filter((t) => t.status === 'in_progress').length; + if (inProgressCount > 1) { + return { + content: `Error: at most one todo may be in_progress at a time (got ${inProgressCount}).`, + isError: true, + }; + } + + if (!ctx.sessionDir) { + // Without a sessionDir we can't persist, but we still validate and return. + return { + content: `OK (not persisted: no sessionDir). ${summarize(input.todos)}`, + data: { todos: input.todos, persisted: false }, + }; + } + + const target = join(ctx.sessionDir, TODO_FILE); + try { + await fs.mkdir(ctx.sessionDir, { recursive: true }); + await fs.writeFile(target, JSON.stringify(input.todos, null, 2) + '\n', 'utf8'); + } catch (err) { + return { + content: `Error persisting todos: ${(err as Error).message}`, + isError: true, + }; + } + + return { + content: `OK. ${summarize(input.todos)}`, + data: { todos: input.todos, persisted: true, path: target }, + }; + }, +}; + +function summarize(todos: TodoItem[]): string { + const counts = { pending: 0, in_progress: 0, completed: 0 }; + for (const t of todos) counts[t.status]++; + return `${todos.length} todos (${counts.completed} done ยท ${counts.in_progress} in_progress ยท ${counts.pending} pending).`; +} + +/** Reads the current todo list from a session dir. Returns [] if none. */ +export async function readTodos(sessionDir: string): Promise { + try { + const raw = await fs.readFile(join(sessionDir, TODO_FILE), 'utf8'); + const parsed = JSON.parse(raw) as unknown; + if (Array.isArray(parsed) && parsed.every(isTodo)) return parsed; + return []; + } catch { + return []; + } +} diff --git a/packages/core/src/tools/web-fetch.test.ts b/packages/core/src/tools/web-fetch.test.ts new file mode 100644 index 0000000..a2f467f --- /dev/null +++ b/packages/core/src/tools/web-fetch.test.ts @@ -0,0 +1,124 @@ +import { createServer, type Server } from 'node:http'; +import { AddressInfo } from 'node:net'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { WebFetchTool } from './web-fetch.js'; + +function startServer(handler: (req: import('node:http').IncomingMessage, res: import('node:http').ServerResponse) => void): Promise<{ server: Server; url: string }> { + return new Promise((res) => { + const server = createServer(handler); + server.listen(0, '127.0.0.1', () => { + const addr = server.address() as AddressInfo; + res({ server, url: `http://127.0.0.1:${addr.port}` }); + }); + }); +} + +describe('WebFetchTool', () => { + let server: Server | null = null; + beforeEach(() => { + server = null; + }); + afterEach(async () => { + if (server) await new Promise((r) => server!.close(() => r())); + }); + + it('returns body text on 200', async () => { + const s = await startServer((_req, res) => { + res.statusCode = 200; + res.setHeader('content-type', 'text/plain'); + res.end('hello world'); + }); + server = s.server; + const result = await WebFetchTool.execute({ url: s.url }, { cwd: process.cwd() }); + expect(result.isError).toBeFalsy(); + expect(result.content).toBe('hello world'); + expect((result.data as { status: number }).status).toBe(200); + }); + + it('marks isError on 5xx but still returns body', async () => { + const s = await startServer((_req, res) => { + res.statusCode = 500; + res.end('boom'); + }); + server = s.server; + const result = await WebFetchTool.execute({ url: s.url }, { cwd: process.cwd() }); + expect(result.isError).toBe(true); + expect(result.content).toBe('boom'); + }); + + it('rejects non-http URL', async () => { + const result = await WebFetchTool.execute( + { url: 'file:///etc/hostname' }, + { cwd: process.cwd() }, + ); + expect(result.isError).toBe(true); + expect(result.content).toMatch(/only http/i); + }); + + it('rejects invalid URL', async () => { + const result = await WebFetchTool.execute( + { url: 'not-a-url' }, + { cwd: process.cwd() }, + ); + expect(result.isError).toBe(true); + expect(result.content).toMatch(/invalid URL/i); + }); + + it('caps oversized responses via content-length', async () => { + process.env['DEEPCODE_WEBFETCH_MAX_BYTES'] = '10'; + const s = await startServer((_req, res) => { + res.statusCode = 200; + res.setHeader('content-length', '1000'); + res.setHeader('content-type', 'text/plain'); + res.end('x'.repeat(1000)); + }); + server = s.server; + try { + const result = await WebFetchTool.execute({ url: s.url }, { cwd: process.cwd() }); + expect(result.isError).toBe(true); + expect(result.content).toMatch(/too large/i); + } finally { + delete process.env['DEEPCODE_WEBFETCH_MAX_BYTES']; + } + }); + + it('caps oversized responses when streaming with no content-length', async () => { + process.env['DEEPCODE_WEBFETCH_MAX_BYTES'] = '50'; + const s = await startServer((_req, res) => { + res.statusCode = 200; + res.setHeader('content-type', 'text/plain'); + // Chunked-style: no content-length set. + res.write('a'.repeat(40)); + res.write('b'.repeat(40)); + res.end(); + }); + server = s.server; + try { + const result = await WebFetchTool.execute({ url: s.url }, { cwd: process.cwd() }); + expect(result.isError).toBe(true); + expect(result.content).toMatch(/stream cap/i); + } finally { + delete process.env['DEEPCODE_WEBFETCH_MAX_BYTES']; + } + }); + + it('honors abort signal', async () => { + const s = await startServer((_req, res) => { + // Never respond โ€” let abort fire + setTimeout(() => { + try { + res.end('late'); + } catch { + /* ignore */ + } + }, 5000); + }); + server = s.server; + const ctrl = new AbortController(); + const p = WebFetchTool.execute({ url: s.url }, { cwd: process.cwd(), signal: ctrl.signal }); + setTimeout(() => ctrl.abort(), 100); + const result = await p; + expect(result.isError).toBe(true); + expect(result.content).toMatch(/aborted/i); + }, 8000); +}); diff --git a/packages/core/src/tools/web-fetch.ts b/packages/core/src/tools/web-fetch.ts new file mode 100644 index 0000000..f0dd034 --- /dev/null +++ b/packages/core/src/tools/web-fetch.ts @@ -0,0 +1,132 @@ +// WebFetch tool โ€” fetch a URL and return its body as text. +// Spec: docs/DEVELOPMENT_PLAN.md ยง3.15 (M3c-rest, plan ยง3.15.4) +// +// Safety: +// ยท Caps response at WEBFETCH_MAX_BYTES (default 5 MiB). +// ยท Honors AbortSignal from agent loop. +// ยท Strips request to HEAD/GET only โ€” no POST from this tool (use Bash + curl +// if a write is truly needed; permission gate catches that). +// ยท No redirect-following beyond fetch's default; if host policy needs custom +// allowlists, wire through ctx (future work). + +import type { ToolContext, ToolHandler, ToolResult } from '../types.js'; + +interface FetchInput { + url: string; + /** Optional prompt for the model โ€” surfaced in response metadata for traceability. */ + prompt?: string; +} + +const DEFAULT_MAX_BYTES = 5 * 1024 * 1024; +const TIMEOUT_MS = 30_000; + +export const WebFetchTool: ToolHandler = { + name: 'WebFetch', + definition: { + name: 'WebFetch', + description: + 'Fetch a URL via GET and return its body as text. Capped at 5 MiB. Read-only โ€” for writes, use Bash + curl with an explicit permission grant.', + inputSchema: { + type: 'object', + properties: { + url: { + type: 'string', + description: 'Absolute http(s) URL.', + }, + prompt: { + type: 'string', + description: 'Optional intent โ€” recorded in the result metadata.', + }, + }, + required: ['url'], + }, + }, + async execute(rawInput: Record, ctx: ToolContext): Promise { + const input = rawInput as unknown as FetchInput; + if (!input?.url || typeof input.url !== 'string') { + return { content: 'Error: url is required (string).', isError: true }; + } + let parsed: URL; + try { + parsed = new URL(input.url); + } catch { + return { content: `Error: invalid URL: ${input.url}`, isError: true }; + } + if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') { + return { + content: `Error: only http(s) URLs supported (got ${parsed.protocol}).`, + isError: true, + }; + } + + const maxBytes = Number(process.env['DEEPCODE_WEBFETCH_MAX_BYTES'] ?? DEFAULT_MAX_BYTES); + const controller = new AbortController(); + const tid = setTimeout(() => controller.abort(), TIMEOUT_MS); + const linkedAbort = () => controller.abort(); + if (ctx.signal) ctx.signal.addEventListener('abort', linkedAbort); + + try { + const res = await fetch(parsed.toString(), { + method: 'GET', + redirect: 'follow', + headers: { 'user-agent': 'DeepCode/0.1 (+https://github.com/oratis/deepcode)' }, + signal: controller.signal, + }); + const status = res.status; + const contentType = res.headers.get('content-type') ?? ''; + const contentLength = Number(res.headers.get('content-length') ?? 0); + if (contentLength && contentLength > maxBytes) { + return { + content: `Error: response too large (${contentLength} > ${maxBytes} bytes).`, + isError: true, + data: { url: parsed.toString(), status, contentLength }, + }; + } + // Stream body with a hard byte cap (content-length may be missing for chunked). + const reader = res.body?.getReader(); + let received = 0; + const chunks: Uint8Array[] = []; + if (reader) { + for (;;) { + const { done, value } = await reader.read(); + if (done) break; + received += value.byteLength; + if (received > maxBytes) { + try { + await reader.cancel(); + } catch { + /* ignore */ + } + return { + content: `Error: response exceeded ${maxBytes} bytes (stream cap).`, + isError: true, + data: { url: parsed.toString(), status, partialBytes: received }, + }; + } + chunks.push(value); + } + } + const body = Buffer.concat(chunks).toString('utf8'); + return { + content: body, + data: { + url: parsed.toString(), + status, + contentType, + bytes: received, + prompt: input.prompt, + }, + isError: !res.ok, + }; + } catch (err) { + const e = err as Error; + if (e.name === 'AbortError') { + return { content: `Error: fetch aborted (timeout ${TIMEOUT_MS}ms or signal).`, isError: true }; + } + return { content: `Error fetching ${parsed.toString()}: ${e.message}`, isError: true }; + } finally { + clearTimeout(tid); + if (ctx.signal) ctx.signal.removeEventListener('abort', linkedAbort); + } + }, +}; diff --git a/packages/core/src/tools/web-search.test.ts b/packages/core/src/tools/web-search.test.ts new file mode 100644 index 0000000..b63b664 --- /dev/null +++ b/packages/core/src/tools/web-search.test.ts @@ -0,0 +1,105 @@ +import { createServer, type Server } from 'node:http'; +import { AddressInfo } from 'node:net'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { parseDuckDuckGoHtml, WebSearchTool } from './web-search.js'; + +const SAMPLE_HTML = ` + + +
+

Example Second

+
Second snippet
+
+ +`; + +function startServer(body: string, status = 200): Promise<{ server: Server; url: string }> { + return new Promise((res) => { + const server = createServer((_req, response) => { + response.statusCode = status; + response.setHeader('content-type', 'text/html'); + response.end(body); + }); + server.listen(0, '127.0.0.1', () => { + const addr = server.address() as AddressInfo; + res({ server, url: `http://127.0.0.1:${addr.port}/?q={q}` }); + }); + }); +} + +describe('parseDuckDuckGoHtml', () => { + it('extracts title, URL, and snippet from DDG markup', () => { + const hits = parseDuckDuckGoHtml(SAMPLE_HTML); + expect(hits).toHaveLength(2); + expect(hits[0]?.title).toBe('Example First'); + expect(hits[0]?.url).toBe('https://example.com/first'); + expect(hits[0]?.snippet).toBe('First snippet text & more'); + expect(hits[1]?.title).toBe('Example Second'); + expect(hits[1]?.url).toBe('https://example.org/second'); + expect(hits[1]?.snippet).toBe('Second snippet'); + }); + + it('returns [] on garbage HTML', () => { + expect(parseDuckDuckGoHtml('no results')).toEqual([]); + }); +}); + +describe('WebSearchTool', () => { + let server: Server | null = null; + afterEach(async () => { + if (server) await new Promise((r) => server!.close(() => r())); + delete process.env['DEEPCODE_WEBSEARCH_URL_TEMPLATE']; + }); + beforeEach(() => { + server = null; + }); + + it('returns formatted hits from a stubbed backend', async () => { + const s = await startServer(SAMPLE_HTML); + server = s.server; + process.env['DEEPCODE_WEBSEARCH_URL_TEMPLATE'] = s.url; + const result = await WebSearchTool.execute({ query: 'example' }, { cwd: process.cwd() }); + expect(result.isError).toBeFalsy(); + expect(result.content).toContain('Example First'); + expect(result.content).toContain('https://example.com/first'); + expect((result.data as { hits: unknown[] }).hits).toHaveLength(2); + }); + + it('honors limit', async () => { + const s = await startServer(SAMPLE_HTML); + server = s.server; + process.env['DEEPCODE_WEBSEARCH_URL_TEMPLATE'] = s.url; + const result = await WebSearchTool.execute( + { query: 'example', limit: 1 }, + { cwd: process.cwd() }, + ); + const data = result.data as { hits: unknown[] }; + expect(data.hits).toHaveLength(1); + }); + + it('returns "no results" message when nothing matches', async () => { + const s = await startServer('nothing here'); + server = s.server; + process.env['DEEPCODE_WEBSEARCH_URL_TEMPLATE'] = s.url; + const result = await WebSearchTool.execute({ query: 'foo' }, { cwd: process.cwd() }); + expect(result.isError).toBeFalsy(); + expect(result.content).toMatch(/No results/i); + }); + + it('rejects empty query', async () => { + const result = await WebSearchTool.execute({ query: '' }, { cwd: process.cwd() }); + expect(result.isError).toBe(true); + }); + + it('returns error when backend 5xx', async () => { + const s = await startServer('boom', 500); + server = s.server; + process.env['DEEPCODE_WEBSEARCH_URL_TEMPLATE'] = s.url; + const result = await WebSearchTool.execute({ query: 'x' }, { cwd: process.cwd() }); + expect(result.isError).toBe(true); + expect(result.content).toMatch(/500/); + }); +}); diff --git a/packages/core/src/tools/web-search.ts b/packages/core/src/tools/web-search.ts new file mode 100644 index 0000000..7efe36a --- /dev/null +++ b/packages/core/src/tools/web-search.ts @@ -0,0 +1,149 @@ +// WebSearch tool โ€” query the web and return top-N result links + titles + snippets. +// Spec: docs/DEVELOPMENT_PLAN.md ยง3.15 (M3c-rest, plan ยง3.15.4) +// +// Approach: +// ยท Default backend: DuckDuckGo HTML endpoint (no API key required). Parsed +// with a tolerant regex; sufficient for "give the agent a few links to fetch". +// ยท Pluggable via `DEEPCODE_WEBSEARCH_URL_TEMPLATE` (e.g. for self-hosted SearXNG). +// ยท Caps to N=8 results. +// +// Caveats acknowledged: +// ยท DDG's markup changes; we parse defensively but tests stub the HTML. +// ยท No image / news verticals โ€” just web links. + +import type { ToolContext, ToolHandler, ToolResult } from '../types.js'; + +interface SearchInput { + query: string; + limit?: number; +} + +export interface SearchHit { + title: string; + url: string; + snippet: string; +} + +const DEFAULT_LIMIT = 8; +const TIMEOUT_MS = 20_000; +const DEFAULT_TEMPLATE = 'https://duckduckgo.com/html/?q={q}'; + +export const WebSearchTool: ToolHandler = { + name: 'WebSearch', + definition: { + name: 'WebSearch', + description: + 'Search the web and return up to 8 results (title + URL + snippet). Default backend: DuckDuckGo HTML. Set DEEPCODE_WEBSEARCH_URL_TEMPLATE to point at a self-hosted SearXNG.', + inputSchema: { + type: 'object', + properties: { + query: { type: 'string', description: 'Search query.' }, + limit: { type: 'number', description: 'Max results (1-8, default 8).' }, + }, + required: ['query'], + }, + }, + async execute(rawInput: Record, ctx: ToolContext): Promise { + const input = rawInput as unknown as SearchInput; + if (!input?.query || typeof input.query !== 'string') { + return { content: 'Error: query is required (string).', isError: true }; + } + const limit = Math.max(1, Math.min(DEFAULT_LIMIT, input.limit ?? DEFAULT_LIMIT)); + const template = process.env['DEEPCODE_WEBSEARCH_URL_TEMPLATE'] ?? DEFAULT_TEMPLATE; + const url = template.replace('{q}', encodeURIComponent(input.query)); + + const controller = new AbortController(); + const tid = setTimeout(() => controller.abort(), TIMEOUT_MS); + const linkedAbort = () => controller.abort(); + if (ctx.signal) ctx.signal.addEventListener('abort', linkedAbort); + + try { + const res = await fetch(url, { + method: 'GET', + redirect: 'follow', + headers: { 'user-agent': 'DeepCode/0.1 (+https://github.com/oratis/deepcode)' }, + signal: controller.signal, + }); + if (!res.ok) { + return { content: `Error: search backend returned ${res.status}`, isError: true }; + } + const html = await res.text(); + const hits = parseDuckDuckGoHtml(html).slice(0, limit); + if (hits.length === 0) { + return { + content: `No results for "${input.query}".`, + data: { hits: [], backend: url }, + }; + } + const formatted = hits + .map((h, i) => `${i + 1}. ${h.title}\n ${h.url}\n ${h.snippet}`) + .join('\n\n'); + return { + content: formatted, + data: { hits, backend: url, query: input.query }, + }; + } catch (err) { + const e = err as Error; + if (e.name === 'AbortError') { + return { content: `Error: search aborted (timeout ${TIMEOUT_MS}ms or signal).`, isError: true }; + } + return { content: `Error: ${e.message}`, isError: true }; + } finally { + clearTimeout(tid); + if (ctx.signal) ctx.signal.removeEventListener('abort', linkedAbort); + } + }, +}; + +/** + * Parse DDG's HTML result page. The markup uses `result__a` for titles and + * `result__snippet` for descriptions. We're permissive: any reasonable a-tag + * containing href= within a result block counts. + */ +export function parseDuckDuckGoHtml(html: string): SearchHit[] { + const hits: SearchHit[] = []; + // Match title ... snippet + const blockRe = + /]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>[\s\S]*?(?:]*class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/a>|]*class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/div>)/g; + let m: RegExpExecArray | null; + while ((m = blockRe.exec(html)) !== null) { + const rawUrl = m[1] ?? ''; + const url = unwrapDdgRedirect(decodeHtml(rawUrl)); + const title = stripTags(decodeHtml(m[2] ?? '')).trim(); + const snippet = stripTags(decodeHtml(m[3] ?? m[4] ?? '')).trim(); + if (url && title) { + hits.push({ title, url, snippet }); + } + } + return hits; +} + +function unwrapDdgRedirect(url: string): string { + // DDG wraps results as `//duckduckgo.com/l/?uddg=&...` + try { + let absolute = url; + if (absolute.startsWith('//')) absolute = 'https:' + absolute; + if (!/^https?:/i.test(absolute)) return url; + const u = new URL(absolute); + const real = u.searchParams.get('uddg'); + if (real) return decodeURIComponent(real); + return absolute; + } catch { + return url; + } +} + +function stripTags(s: string): string { + return s.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' '); +} + +function decodeHtml(s: string): string { + return s + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/'/g, "'") + .replace(/ /g, ' '); +}