diff --git a/.changeset/grok-build-host-tools.md b/.changeset/grok-build-host-tools.md new file mode 100644 index 000000000000..fa93d17351ec --- /dev/null +++ b/.changeset/grok-build-host-tools.md @@ -0,0 +1,10 @@ +--- +'@ai-sdk/harness-grok-build': patch +--- + +feat(harness-grok-build): drive the `grok agent stdio` ACP surface + +Move the adapter to ACP (JSON-RPC over stdio): tool-call, tool-result, and +file-change events; token usage and a structured finish reason on finish; +host-defined custom tools via an in-sandbox MCP server; and built-in tool +approvals through the ACP `session/request_permission` flow. diff --git a/content/providers/02-ai-sdk-harnesses/04-grok-build.mdx b/content/providers/02-ai-sdk-harnesses/04-grok-build.mdx index 3e1c7e3dd517..e05cc3fd8659 100644 --- a/content/providers/02-ai-sdk-harnesses/04-grok-build.mdx +++ b/content/providers/02-ai-sdk-harnesses/04-grok-build.mdx @@ -6,9 +6,11 @@ description: Learn how to use the Grok Build harness adapter. # Grok Build Harness The Grok Build harness adapter connects `HarnessAgent` to the `grok` CLI. The -adapter runs a bridge inside the sandbox and streams the CLI's -`--output-format streaming-json` events back to the host over a sandbox-exposed -WebSocket. +adapter drives `grok agent stdio` over the Agent Client Protocol (ACP/JSON-RPC) +through a bridge inside the sandbox and streams its events back to the host over +a sandbox-exposed WebSocket. This surfaces text and reasoning, tool-call, +tool-result, and file-change events, token usage on finish, a structured finish +reason, host-defined (custom) tools, and built-in tool approvals. Harness packages are **experimental**. Expect breaking changes between @@ -103,8 +105,8 @@ Use `createGrokBuild()` to configure the runtime: ```ts const harness = createGrokBuild({ - model: 'grok-code-fast-1', - planMode: true, + model: 'grok-build-0.1', + reasoningEffort: 'high', }); ``` @@ -112,6 +114,7 @@ Settings: - `auth`: xAI or AI Gateway authentication settings. - `model`: Grok model id. If omitted, the adapter uses its pinned default. +- `reasoningEffort`: reasoning effort (`'low' | 'medium' | 'high'`), passed to the CLI's `--reasoning-effort`. - `planMode`: run the CLI in plan mode. - `port`: bridge port override. - `startupTimeoutMs`: maximum time to wait for the bridge to start. @@ -153,15 +156,64 @@ const sandbox = createVercelSandbox({ }); ``` -## Known limitations +## Tools -The grok CLI's `--output-format streaming-json` surface is narrow: +Host-defined (custom) tools passed to `agent.tools` are exposed to the CLI +through an in-sandbox MCP server and executed on the host: -- Streams reasoning and text only — no tool-call, tool-result, or file-change - events, and no token usage. -- Allow-all permission mode only. The CLI runs with `--always-approve` and - executes tools itself; use `permissionMode: 'allow-all'`. -- No compaction. +```ts +import { tool } from 'ai'; +import { z } from 'zod'; + +const weather = tool({ + description: 'Get the current temperature for a city.', + inputSchema: z.object({ city: z.string() }), + execute: async ({ city }) => ({ city, celsius: 12 }), +}); + +const agent = new HarnessAgent({ + harness: grokBuild, + sandbox, + tools: { weather }, +}); +``` + +The adapter also exposes these common Grok Build built-ins through `agent.tools`: + +- `read` +- `write` +- `edit` +- `bash` +- `glob` +- `grep` +- `webSearch` + +Tool-call, tool-result, and file-change events appear in the stream, and token +usage is reported on finish alongside a structured finish reason. + +## Tool approvals + +Grok Build requests approval before running a tool via the ACP +`session/request_permission` flow when `permissionMode` is `allow-reads` or +`allow-edits` (use `allow-all` to auto-approve). The adapter surfaces each +request to the host so it can be approved or rejected. + +ACP approval is **synchronous**: Grok pauses the turn and waits for the reply on +the same live connection. Per the ACP specification, a prompt turn cannot be +paused and resumed later — cancellation ends it. Approval therefore only works +when the host answers inline over a connection that stays open for the whole +turn. + + + This means per-tool approval works in single-stream setups (e.g. a TUI, or a + server route backed by a persistent SSE/WebSocket connection), but **not** in a + request/response HTTP route that ends one response at the approval and resumes + in a second request. The standard AI SDK `toolApproval: 'user-approval'` + split-request pattern cannot drive Grok Build approvals over plain HTTP, + because Grok's turn is mid-flight and ACP cannot resume it. For such routes, + run with `permissionMode: 'allow-all'` so the turn never blocks, or keep the + connection open for the turn's lifetime and answer approvals inline. + ## Related diff --git a/examples/ai-functions/src/harness-agent/grok-build/with-reasoning-effort.ts b/examples/ai-functions/src/harness-agent/grok-build/with-reasoning-effort.ts new file mode 100644 index 000000000000..59ff2bebce11 --- /dev/null +++ b/examples/ai-functions/src/harness-agent/grok-build/with-reasoning-effort.ts @@ -0,0 +1,35 @@ +import { HarnessAgent } from '@ai-sdk/harness/agent'; +import { createGrokBuild } from '@ai-sdk/harness-grok-build'; +import { printFullStream } from '../../lib/print-full-stream'; +import { run } from '../../lib/run'; +import { createVercelSandbox } from '@ai-sdk/sandbox-vercel'; + +run(async () => { + const sandbox = createVercelSandbox({ + runtime: 'node24', + ports: [4000], + timeout: 10 * 60 * 1000, + }); + const agent = new HarnessAgent({ + harness: createGrokBuild({ reasoningEffort: 'high' }), + sandbox, + }); + + let exitCode = 0; + const session = await agent.createSession(); + try { + const result = await agent.stream({ + session, + prompt: + 'Plan a multi-step path from A to B where A=(0,0) and B=(3,4) on a grid, moving only N/S/E/W. ' + + 'Explain your reasoning, then give the final path.', + }); + await printFullStream({ result }); + } catch (err) { + exitCode = 1; + console.error('[example] failed:', err); + } finally { + await session.destroy(); + process.exit(exitCode); + } +}); diff --git a/examples/ai-functions/src/harness-agent/grok-build/with-tools.ts b/examples/ai-functions/src/harness-agent/grok-build/with-tools.ts new file mode 100644 index 000000000000..cddfad0c4cdc --- /dev/null +++ b/examples/ai-functions/src/harness-agent/grok-build/with-tools.ts @@ -0,0 +1,54 @@ +import { HarnessAgent } from '@ai-sdk/harness/agent'; +import { grokBuild } from '@ai-sdk/harness-grok-build'; +import { createVercelSandbox } from '@ai-sdk/sandbox-vercel'; +import { tool } from 'ai'; +import { z } from 'zod'; +import { printFullStream } from '../../lib/print-full-stream'; +import { run } from '../../lib/run'; + +run(async () => { + const sandbox = createVercelSandbox({ + runtime: 'node24', + ports: [4000], + timeout: 10 * 60 * 1000, + }); + const weather = tool({ + description: 'Get the current temperature for a city.', + inputSchema: z.object({ city: z.string() }), + execute: async ({ city }: { city: string }) => { + const temps: Record = { + Paris: 12, + Tokyo: 18, + Reykjavik: 3, + }; + return { city, celsius: temps[city] ?? 20 }; + }, + }); + + const agent = new HarnessAgent({ + harness: grokBuild, + sandbox, + tools: { weather }, + permissionMode: 'allow-all', + }); + + let exitCode = 0; + const session = await agent.createSession(); + try { + const result = await agent.stream({ + session, + prompt: + 'What is the weather in Paris and Reykjavik? Use the `weather` tool, then summarize in one sentence.', + }); + + await printFullStream({ result }); + + console.log('steps:', (await result.steps).length); + } catch (err) { + exitCode = 1; + console.error('[example] failed:', err); + } finally { + await session.destroy(); + process.exit(exitCode); + } +}); diff --git a/examples/harness-e2e-next/agent/harness/grok-build/weather-agent.ts b/examples/harness-e2e-next/agent/harness/grok-build/weather-agent.ts new file mode 100644 index 000000000000..50c0796d4af8 --- /dev/null +++ b/examples/harness-e2e-next/agent/harness/grok-build/weather-agent.ts @@ -0,0 +1,45 @@ +import { weatherTool } from '@/lib/tools/weather-tool'; +import { + WEATHER_CODES_REFERENCE, + weatherCodesSkill, + weatherForecastSkill, + weatherInstructions, +} from '@/lib/weather-utils'; +import { + HarnessAgent, + createFileReporter, + createTraceTreeReporter, +} from '@ai-sdk/harness/agent'; +import { grokBuild } from '@ai-sdk/harness-grok-build'; +import { createVercelSandbox } from '@ai-sdk/sandbox-vercel'; +import type { InferUITools, UIMessage } from 'ai'; + +export const weatherGrokBuildHarnessAgent = new HarnessAgent({ + harness: grokBuild, + instructions: weatherInstructions, + skills: [weatherForecastSkill, weatherCodesSkill], + tools: { get_weather: weatherTool }, + sandbox: createVercelSandbox({ + runtime: 'node24', + ports: [4000], + }), + onSandboxSession: async ({ session, sessionWorkDir, abortSignal }) => { + await session.writeTextFile({ + path: `${sessionWorkDir}/weather-codes.md`, + content: WEATHER_CODES_REFERENCE, + abortSignal, + }); + }, + telemetry: { + integrations: [ + createTraceTreeReporter(), + createFileReporter({ dir: '.harness-observability/grok-build/weather' }), + ], + }, +}); + +export type WeatherGrokBuildHarnessAgentMessage = UIMessage< + unknown, + never, + InferUITools +>; diff --git a/examples/harness-e2e-next/app/api/harness/grok-build/weather/route.ts b/examples/harness-e2e-next/app/api/harness/grok-build/weather/route.ts new file mode 100644 index 000000000000..0b630870755b --- /dev/null +++ b/examples/harness-e2e-next/app/api/harness/grok-build/weather/route.ts @@ -0,0 +1,41 @@ +import { weatherGrokBuildHarnessAgent } from '@/agent/harness/grok-build/weather-agent'; +import { + detachAndPersist, + resumeOrCreateSession, +} from '@/util/harness-resume-store'; +import { + convertToModelMessages, + createUIMessageStreamResponse, + toUIMessageStream, + type UIMessage, +} from 'ai'; + +export async function POST(request: Request) { + const body: { + id?: string; + messages: UIMessage[]; + } = await request.json(); + + if (!body.id) { + return new Response('Missing chat id', { status: 400 }); + } + const chatId = body.id; + const messages = await convertToModelMessages(body.messages); + + const session = await resumeOrCreateSession( + weatherGrokBuildHarnessAgent, + chatId, + ); + + const result = await weatherGrokBuildHarnessAgent.stream({ + session, + messages, + }); + + return createUIMessageStreamResponse({ + stream: toUIMessageStream({ + stream: result.stream, + onFinish: () => detachAndPersist(chatId, session), + }), + }); +} diff --git a/examples/harness-e2e-next/app/harness/grok-build/weather/page.tsx b/examples/harness-e2e-next/app/harness/grok-build/weather/page.tsx new file mode 100644 index 000000000000..2cbf85d69391 --- /dev/null +++ b/examples/harness-e2e-next/app/harness/grok-build/weather/page.tsx @@ -0,0 +1,19 @@ +import ChatIdProvider from '@/components/chat-id-provider'; +import WeatherGrokBuildHarnessChat from '@/components/weather-grok-build-harness-chat'; + +export const metadata = { + title: 'Grok Build — Weather', +}; + +const STORAGE_KEY = 'harness-grok-build-weather-chat-id'; + +export default function HarnessGrokBuildWeatherPage() { + return ( + + + + ); +} diff --git a/examples/harness-e2e-next/app/page.tsx b/examples/harness-e2e-next/app/page.tsx index 80c33b1aa230..67f54bb1ae0a 100644 --- a/examples/harness-e2e-next/app/page.tsx +++ b/examples/harness-e2e-next/app/page.tsx @@ -45,7 +45,7 @@ const HARNESSES = [ { slug: 'grok-build', label: 'Grok Build', - variants: ['basic', 'basic-with-stop', 'ai-sdk-coding'], + variants: ['basic', 'basic-with-stop', 'ai-sdk-coding', 'weather'], }, ] as const; diff --git a/examples/harness-e2e-next/components/weather-grok-build-harness-chat.tsx b/examples/harness-e2e-next/components/weather-grok-build-harness-chat.tsx new file mode 100644 index 000000000000..dcb897e5eda3 --- /dev/null +++ b/examples/harness-e2e-next/components/weather-grok-build-harness-chat.tsx @@ -0,0 +1,162 @@ +'use client'; + +import type { WeatherGrokBuildHarnessAgentMessage } from '@/agent/harness/grok-build/weather-agent'; +import { Response } from '@/components/ai-elements/response'; +import { useChatId } from '@/components/chat-id-provider'; +import ChatInput from '@/components/chat-input'; +import DynamicToolView from '@/components/tool/dynamic-tool-view'; +import HarnessBashToolView from '@/components/tool/harness-bash-tool-view'; +import HarnessToolView from '@/components/tool/harness-tool-view'; +import WeatherView from '@/components/tool/weather-tool-view'; +import { useChat } from '@ai-sdk/react'; +import { + DefaultChatTransport, + lastAssistantMessageIsCompleteWithApprovalResponses, +} from 'ai'; + +export default function WeatherGrokBuildHarnessChat({ + apiRoute, + exampleLabel, +}: { + apiRoute: string; + exampleLabel: string; +}) { + const { chatId, resetChatId } = useChatId(); + const { + error, + status, + sendMessage, + messages, + regenerate, + addToolApprovalResponse, + } = useChat({ + id: chatId, + transport: new DefaultChatTransport({ + api: apiRoute, + }), + sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses, + }); + + return ( +
+

Grok Build — {exampleLabel}

+

+ chat id: {chatId} + +

+ + {messages.map(message => ( +
+ {message.role === 'user' ? 'You: ' : 'AI: '} + {message.parts.map((part, index) => { + switch (part.type) { + case 'text': { + return ( + + {part.text} + + ); + } + case 'reasoning': { + return ( + + {part.text} + + ); + } + case 'file': + case 'reasoning-file': { + if (part.mediaType.startsWith('image/')) { + return ( + // eslint-disable-next-line @next/next/no-img-element + Generated image + ); + } + return null; + } + case 'tool-get_weather': { + return ( + + ); + } + case 'tool-bash': { + return ; + } + case 'dynamic-tool': { + if (part.toolName === 'fileChange') { + if (typeof part.input !== 'object' || part.input === null) { + return null; + } + return ( + + ); + } + return ; + } + } + })} +
+ ))} + + {status === 'submitted' && ( +
+ )} + + {error && ( +
+
+ {error.message || String(error)} +
+ +
+ )} + +
+ + sendMessage({ text })} + /> +
+ ); +} diff --git a/examples/harness-e2e-tui/agents/grok-build/weather-agent.ts b/examples/harness-e2e-tui/agents/grok-build/weather-agent.ts new file mode 100644 index 000000000000..50c0796d4af8 --- /dev/null +++ b/examples/harness-e2e-tui/agents/grok-build/weather-agent.ts @@ -0,0 +1,45 @@ +import { weatherTool } from '@/lib/tools/weather-tool'; +import { + WEATHER_CODES_REFERENCE, + weatherCodesSkill, + weatherForecastSkill, + weatherInstructions, +} from '@/lib/weather-utils'; +import { + HarnessAgent, + createFileReporter, + createTraceTreeReporter, +} from '@ai-sdk/harness/agent'; +import { grokBuild } from '@ai-sdk/harness-grok-build'; +import { createVercelSandbox } from '@ai-sdk/sandbox-vercel'; +import type { InferUITools, UIMessage } from 'ai'; + +export const weatherGrokBuildHarnessAgent = new HarnessAgent({ + harness: grokBuild, + instructions: weatherInstructions, + skills: [weatherForecastSkill, weatherCodesSkill], + tools: { get_weather: weatherTool }, + sandbox: createVercelSandbox({ + runtime: 'node24', + ports: [4000], + }), + onSandboxSession: async ({ session, sessionWorkDir, abortSignal }) => { + await session.writeTextFile({ + path: `${sessionWorkDir}/weather-codes.md`, + content: WEATHER_CODES_REFERENCE, + abortSignal, + }); + }, + telemetry: { + integrations: [ + createTraceTreeReporter(), + createFileReporter({ dir: '.harness-observability/grok-build/weather' }), + ], + }, +}); + +export type WeatherGrokBuildHarnessAgentMessage = UIMessage< + unknown, + never, + InferUITools +>; diff --git a/examples/harness-e2e-tui/agents/grok-build/weather-approval-agent.ts b/examples/harness-e2e-tui/agents/grok-build/weather-approval-agent.ts new file mode 100644 index 000000000000..0dae4605151a --- /dev/null +++ b/examples/harness-e2e-tui/agents/grok-build/weather-approval-agent.ts @@ -0,0 +1,51 @@ +import { weatherTool } from '@/lib/tools/weather-tool'; +import { + WEATHER_CODES_REFERENCE, + weatherCodesSkill, + weatherForecastSkill, + weatherInstructions, +} from '@/lib/weather-utils'; +import { + HarnessAgent, + createFileReporter, + createTraceTreeReporter, +} from '@ai-sdk/harness/agent'; +import { grokBuild } from '@ai-sdk/harness-grok-build'; +import { createVercelSandbox } from '@ai-sdk/sandbox-vercel'; +import type { InferUITools, UIMessage } from 'ai'; + +export const weatherApprovalGrokBuildHarnessAgent = new HarnessAgent({ + harness: grokBuild, + instructions: weatherInstructions, + skills: [weatherForecastSkill, weatherCodesSkill], + tools: { get_weather: weatherTool }, + toolApproval: { + get_weather: 'user-approval', + }, + permissionMode: 'allow-edits', + sandbox: createVercelSandbox({ + runtime: 'node24', + ports: [4000], + }), + onSandboxSession: async ({ session, sessionWorkDir, abortSignal }) => { + await session.writeTextFile({ + path: `${sessionWorkDir}/weather-codes.md`, + content: WEATHER_CODES_REFERENCE, + abortSignal, + }); + }, + telemetry: { + integrations: [ + createTraceTreeReporter(), + createFileReporter({ + dir: '.harness-observability/grok-build/weather-approval', + }), + ], + }, +}); + +export type WeatherApprovalGrokBuildHarnessAgentMessage = UIMessage< + unknown, + never, + InferUITools +>; diff --git a/examples/harness-e2e-tui/harness/grok-build/weather-approval.ts b/examples/harness-e2e-tui/harness/grok-build/weather-approval.ts new file mode 100644 index 000000000000..ab8058fd7922 --- /dev/null +++ b/examples/harness-e2e-tui/harness/grok-build/weather-approval.ts @@ -0,0 +1,8 @@ +import { weatherApprovalGrokBuildHarnessAgent } from '../../agents/grok-build/weather-approval-agent'; +import { runTUI } from '../../lib/run-tui'; + +await runTUI({ + agent: weatherApprovalGrokBuildHarnessAgent, + entrypointUrl: import.meta.url, + title: 'Grok Build — Weather Approval', +}); diff --git a/examples/harness-e2e-tui/harness/grok-build/weather.ts b/examples/harness-e2e-tui/harness/grok-build/weather.ts new file mode 100644 index 000000000000..a9a38a03cca3 --- /dev/null +++ b/examples/harness-e2e-tui/harness/grok-build/weather.ts @@ -0,0 +1,8 @@ +import { weatherGrokBuildHarnessAgent } from '../../agents/grok-build/weather-agent'; +import { runTUI } from '../../lib/run-tui'; + +await runTUI({ + agent: weatherGrokBuildHarnessAgent, + entrypointUrl: import.meta.url, + title: 'Grok Build — Weather', +}); diff --git a/packages/harness-grok-build/README.md b/packages/harness-grok-build/README.md index d167ccb68311..bfb37a7554ba 100644 --- a/packages/harness-grok-build/README.md +++ b/packages/harness-grok-build/README.md @@ -1,6 +1,6 @@ # AI SDK - Grok Build Harness -`HarnessV1` adapter backed by the `grok` CLI (`@xai-official/grok`). The adapter ships a bridge process that runs inside a sandbox and talks to the host over a WebSocket on a sandbox-proxied loopback port. +`HarnessV1` adapter backed by the `grok` CLI (`@xai-official/grok`). The adapter drives `grok agent stdio` over the Agent Client Protocol (ACP/JSON-RPC) through a bridge process that runs inside a sandbox and talks to the host over a WebSocket on a sandbox-proxied loopback port. ## Setup @@ -49,13 +49,21 @@ Authentication is resolved from the host environment and forwarded to the sandbo The CLI maps these internally to `GROK_MODELS_BASE_URL` / `GROK_CODE_XAI_API_KEY`. -## Limitations +## Capabilities -The grok CLI's `--output-format streaming-json` surface is narrow: +The ACP surface streams: -- Streams reasoning and text only — no tool-call, tool-result, or file-change events, and no token usage. -- Allow-all permission mode only (`supportsBuiltinToolApprovals: false`); the CLI runs with `--always-approve` and executes tools itself. -- No compaction. +- Text and reasoning, plus tool-call, tool-result, and file-change events. +- Token usage on finish, with a structured finish reason. +- Host-defined (custom) tools via `agent.tools` (executed on the host through an in-sandbox MCP server). +- Built-in tool approvals via the ACP `session/request_permission` flow (`supportsBuiltinToolApprovals: true`); use `permissionMode: 'allow-reads'` or `'allow-edits'`. + +ACP approval is synchronous — Grok pauses the turn and waits for the reply on the +same live connection, and per the ACP spec a turn cannot be paused and resumed. +Approval works in single-stream setups (TUI, or a persistent SSE/WebSocket route) +but not in a request/response HTTP route that splits the response across the +approval (the AI SDK `toolApproval: 'user-approval'` pattern). For plain HTTP +routes, use `permissionMode: 'allow-all'`. ## Related diff --git a/packages/harness-grok-build/package.json b/packages/harness-grok-build/package.json index e4c3902ad723..cf9e63cedb19 100644 --- a/packages/harness-grok-build/package.json +++ b/packages/harness-grok-build/package.json @@ -42,6 +42,7 @@ "zod": "3.25.76" }, "devDependencies": { + "@modelcontextprotocol/sdk": "1.29.0", "@xai-official/grok": "0.2.51", "@types/node": "22.19.19", "@types/ws": "^8.5.13", diff --git a/packages/harness-grok-build/src/__fixtures__/README.md b/packages/harness-grok-build/src/__fixtures__/README.md index b9c23d2121ee..a971ecb64f0f 100644 --- a/packages/harness-grok-build/src/__fixtures__/README.md +++ b/packages/harness-grok-build/src/__fixtures__/README.md @@ -1,33 +1,20 @@ # Grok Build CLI fixtures -## `streaming-json-basic.jsonl` (REAL capture — source of truth) +The adapter now drives `grok agent stdio` over the Agent Client Protocol +(ACP/JSON-RPC), not `grok -p --output-format streaming-json`. The ACP surface +carries text and reasoning, tool-call / tool-result / file-change session +updates, token usage, and a structured stop reason. -Real output of: +Stream mapping is exercised in the harness unit tests using synthetic ACP +session updates rather than a recorded capture, so no live fixture file is the +source of truth anymore. - grok -p "Create a file hello.txt containing the text hi, then read it back." \ - -m grok-build-0.1 --output-format streaming-json --always-approve - -captured against `@xai-official/grok` v0.2.53 (direct xAI API, `XAI_API_KEY`) on -2026-06-18. Home-directory path in the assistant text was redacted to -`/Users/USER`. - -### Actual schema (flat, newline-delimited JSON) -This mode is lean. Only three event types appear: +## `streaming-json-basic.jsonl` (legacy capture — historical only) -- `{"type":"thought","data":""}` — reasoning/thinking text delta -- `{"type":"text","data":""}` — assistant message text delta -- `{"type":"end","stopReason":"EndTurn","sessionId":"","requestId":""}` — terminal +A real capture of the old `streaming-json` mode: -### What this mode does NOT include (important) -- **No tool-call / tool-result events** — even though the agent created and read - the file, `streaming-json` does not surface tool invocations. -- **No file-change events.** -- **No token usage.** - -Full tool/file/usage fidelity requires the `grok agent` ACP (JSON-RPC stdio) -surface instead — that's a planned follow-up (see the harness-grok-build plan). -The v1 adapter maps only thought→reasoning, text→text, end→finish. + grok -p "Create a file hello.txt containing the text hi, then read it back." \ + -m grok-build-0.1 --output-format streaming-json --always-approve -### stopReason values -Observed: `EndTurn`. Others (e.g. max-tokens, cancellation) are unconfirmed — -map defensively. +against `@xai-official/grok` v0.2.53 on 2026-06-18. Retained only as a record of +the pre-ACP surface; it is not used by the current adapter or tests. diff --git a/packages/harness-grok-build/src/bridge/acp-client.test.ts b/packages/harness-grok-build/src/bridge/acp-client.test.ts new file mode 100644 index 000000000000..07a6b46a5223 --- /dev/null +++ b/packages/harness-grok-build/src/bridge/acp-client.test.ts @@ -0,0 +1,124 @@ +import { describe, expect, it, vi } from 'vitest'; +import { createAcpClient } from './acp-client'; + +function setup() { + const lines: string[] = []; + const client = createAcpClient({ writeLine: line => lines.push(line) }); + const parsed = () => lines.map(l => JSON.parse(l)); + return { lines, parsed, client }; +} + +describe('createAcpClient', () => { + it('writes a correctly-shaped request line with incrementing ids', () => { + const { parsed, client } = setup(); + void client.request('initialize', { a: 1 }); + void client.request('session/new'); + expect(parsed()).toEqual([ + { jsonrpc: '2.0', id: 1, method: 'initialize', params: { a: 1 } }, + { jsonrpc: '2.0', id: 2, method: 'session/new', params: undefined }, + ]); + }); + + it('resolves a request when a matching id+result line is fed', async () => { + const { client } = setup(); + const p = client.request('session/new'); + client.handleLine( + JSON.stringify({ jsonrpc: '2.0', id: 1, result: { ok: true } }), + ); + await expect(p).resolves.toEqual({ ok: true }); + }); + + it('rejects a request on a matching id+error line', async () => { + const { client } = setup(); + const p = client.request('boom'); + client.handleLine( + JSON.stringify({ + jsonrpc: '2.0', + id: 1, + error: { code: -1, message: 'no' }, + }), + ); + await expect(p).rejects.toEqual({ code: -1, message: 'no' }); + }); + + it('notify writes a line with no id', () => { + const { parsed, client } = setup(); + client.notify('session/update', { x: 1 }); + expect(parsed()).toEqual([ + { jsonrpc: '2.0', method: 'session/update', params: { x: 1 } }, + ]); + expect('id' in parsed()[0]).toBe(false); + }); + + it('fires a registered notification handler', () => { + const { client } = setup(); + const cb = vi.fn(); + client.onNotification('session/update', cb); + client.handleLine( + JSON.stringify({ + jsonrpc: '2.0', + method: 'session/update', + params: { y: 2 }, + }), + ); + expect(cb).toHaveBeenCalledWith({ y: 2 }); + }); + + it('fires a request handler and writes back its result', async () => { + const { parsed, client } = setup(); + client.onRequest('session/request_permission', () => ({ + outcome: 'allow', + })); + client.handleLine( + JSON.stringify({ + jsonrpc: '2.0', + id: 7, + method: 'session/request_permission', + }), + ); + await vi.waitFor(() => + expect(parsed()).toEqual([ + { jsonrpc: '2.0', id: 7, result: { outcome: 'allow' } }, + ]), + ); + }); + + it('writes an error response when a request handler throws', async () => { + const { parsed, client } = setup(); + client.onRequest('fs/read_text_file', () => { + throw new Error('nope'); + }); + client.handleLine( + JSON.stringify({ jsonrpc: '2.0', id: 9, method: 'fs/read_text_file' }), + ); + await vi.waitFor(() => { + expect(parsed()[0].id).toBe(9); + expect(parsed()[0].error.message).toContain('nope'); + }); + }); + + it('replies with method-not-found for an unknown inbound request', () => { + const { parsed, client } = setup(); + client.handleLine( + JSON.stringify({ jsonrpc: '2.0', id: 3, method: 'mystery/method' }), + ); + expect(parsed()).toEqual([ + { + jsonrpc: '2.0', + id: 3, + error: { code: -32601, message: 'Method not found' }, + }, + ]); + }); + + it('ignores unknown notifications and malformed lines without throwing', () => { + const { lines, client } = setup(); + expect(() => client.handleLine('not json {')).not.toThrow(); + expect(() => + client.handleLine( + JSON.stringify({ jsonrpc: '2.0', method: 'unknown/x' }), + ), + ).not.toThrow(); + expect(lines).toEqual([]); + }); +}); diff --git a/packages/harness-grok-build/src/bridge/acp-client.ts b/packages/harness-grok-build/src/bridge/acp-client.ts new file mode 100644 index 000000000000..36b8e71e8738 --- /dev/null +++ b/packages/harness-grok-build/src/bridge/acp-client.ts @@ -0,0 +1,102 @@ +export type AcpClient = { + request: (method: string, params?: unknown) => Promise; + notify: (method: string, params?: unknown) => void; + onNotification: (method: string, handler: (params: unknown) => void) => void; + onRequest: ( + method: string, + handler: (params: unknown) => unknown | Promise, + ) => void; + handleLine: (line: string) => void; +}; + +export type AcpTransport = { + writeLine: (line: string) => void; +}; + +const METHOD_NOT_FOUND = -32601; + +export function createAcpClient(io: AcpTransport): AcpClient { + let nextId = 1; + const pending = new Map< + number, + { resolve: (value: unknown) => void; reject: (reason: unknown) => void } + >(); + const notificationHandlers = new Map void>(); + const requestHandlers = new Map< + string, + (params: unknown) => unknown | Promise + >(); + + const write = (message: unknown) => + io.writeLine(JSON.stringify({ jsonrpc: '2.0', ...(message as object) })); + + const request = (method: string, params?: unknown): Promise => { + const id = nextId++; + return new Promise((resolve, reject) => { + pending.set(id, { resolve, reject }); + write({ id, method, params }); + }); + }; + + const notify = (method: string, params?: unknown): void => { + write({ method, params }); + }; + + const onNotification = ( + method: string, + handler: (params: unknown) => void, + ): void => { + notificationHandlers.set(method, handler); + }; + + const onRequest = ( + method: string, + handler: (params: unknown) => unknown | Promise, + ): void => { + requestHandlers.set(method, handler); + }; + + const handleResponse = (msg: Record): void => { + const entry = pending.get(msg.id as number); + if (!entry) return; + pending.delete(msg.id as number); + if ('error' in msg) entry.reject(msg.error); + else entry.resolve(msg.result); + }; + + const handleIncomingRequest = (msg: Record): void => { + const id = msg.id; + const handler = requestHandlers.get(msg.method as string); + if (!handler) { + write({ + id, + error: { code: METHOD_NOT_FOUND, message: 'Method not found' }, + }); + return; + } + Promise.resolve() + .then(() => handler(msg.params)) + .then(result => write({ id, result })) + .catch(err => + write({ id, error: { code: -32603, message: String(err) } }), + ); + }; + + const handleLine = (line: string): void => { + let msg: Record; + try { + msg = JSON.parse(line) as Record; + } catch { + return; + } + if (!msg || typeof msg !== 'object') return; + if ('method' in msg) { + if ('id' in msg) handleIncomingRequest(msg); + else notificationHandlers.get(msg.method as string)?.(msg.params); + return; + } + if ('id' in msg) handleResponse(msg); + }; + + return { request, notify, onNotification, onRequest, handleLine }; +} diff --git a/packages/harness-grok-build/src/bridge/host-tool-mcp.ts b/packages/harness-grok-build/src/bridge/host-tool-mcp.ts new file mode 100644 index 000000000000..b46631eba5cd --- /dev/null +++ b/packages/harness-grok-build/src/bridge/host-tool-mcp.ts @@ -0,0 +1,154 @@ +#!/usr/bin/env node +// MCP-stdio tool server spawned by the grok CLI when an `mcpServers` entry is +// passed to `session/new`. Exposes host-defined tools over MCP-stdio and +// round-trips each call to the bridge's HTTP relay. +// +// Env vars (set by the bridge when starting a turn): +// TOOL_SCHEMAS — JSON array of { name, description, inputSchema } +// TOOL_RELAY_URL — http://127.0.0.1: of the bridge relay server +// TOOL_RELAY_TOKEN — bearer token required by the relay + +/* + * CONSTRAINT — the third-party imports below are NEVER bundled into the + * compiled `bridge/host-tool-mcp.mjs`. They are declared `external` in + * tsup.config.ts and resolved at runtime from the node_modules the bridge + * installs *inside the sandbox* from `src/bridge/package.json` (and its pinned + * `pnpm-lock.yaml`). Keep import, tsup `external`, and bridge package.json in + * sync. + */ +import * as mcpServerModule from '@modelcontextprotocol/sdk/server/mcp.js'; +import * as mcpStdioModule from '@modelcontextprotocol/sdk/server/stdio.js'; +import { z } from 'zod'; + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +const { McpServer } = mcpServerModule as any; +// eslint-disable-next-line @typescript-eslint/no-explicit-any +const { StdioServerTransport } = mcpStdioModule as any; + +type ToolSchema = { + name: string; + description?: string; + inputSchema?: JsonSchemaObject; +}; + +type JsonSchemaObject = { + type?: string | string[]; + description?: string; + properties?: Record; + required?: string[]; + items?: JsonSchemaObject; + enum?: unknown[]; + const?: unknown; + oneOf?: JsonSchemaObject[]; + anyOf?: JsonSchemaObject[]; + additionalProperties?: boolean | JsonSchemaObject; + nullable?: boolean; +}; + +const schemas: ToolSchema[] = JSON.parse(process.env.TOOL_SCHEMAS || '[]'); +const relayUrl = process.env.TOOL_RELAY_URL || ''; +const relayToken = process.env.TOOL_RELAY_TOKEN || ''; + +if (!schemas.length || !relayUrl) { + process.stderr.write( + '[host-tool-mcp] Missing TOOL_SCHEMAS or TOOL_RELAY_URL; exiting\n', + ); + process.exit(0); +} + +const server = new McpServer({ name: 'harness-tools', version: '1.0.0' }); + +for (const schema of schemas) { + const shape = toZodShape(schema.inputSchema); + server.tool( + schema.name, + schema.description ?? '', + shape, + async (input: Record) => { + const requestId = crypto.randomUUID(); + try { + const res = await fetch(relayUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + ...(relayToken ? { Authorization: `Bearer ${relayToken}` } : {}), + }, + body: JSON.stringify({ requestId, toolName: schema.name, input }), + }); + if (!res.ok) { + const body = await res.text(); + throw new Error( + `Tool relay ${schema.name} failed with ${res.status}: ${body.slice(0, 500)}`, + ); + } + const data = (await res.json()) as { result?: unknown }; + return { + content: [ + { + type: 'text' as const, + text: JSON.stringify(data.result ?? null), + }, + ], + }; + } catch (err) { + return { + content: [{ type: 'text' as const, text: `Error: ${String(err)}` }], + isError: true, + }; + } + }, + ); +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function toZodShape(schema: JsonSchemaObject | undefined): Record { + if (!schema?.properties) return {}; + const required = new Set(schema.required ?? []); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const shape: Record = {}; + for (const [key, propSchema] of Object.entries(schema.properties)) { + const propType = toZodType(propSchema); + shape[key] = required.has(key) ? propType : propType.optional(); + } + return shape; +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function toZodType(schema: JsonSchemaObject | undefined): any { + if (!schema) return z.any(); + const types = Array.isArray(schema.type) + ? schema.type.filter((t): t is string => t !== 'null') + : ([schema.type].filter(Boolean) as string[]); + let zType; + switch (types[0]) { + case 'string': + zType = z.string(); + break; + case 'number': + zType = z.number(); + break; + case 'integer': + zType = z.number().int(); + break; + case 'boolean': + zType = z.boolean(); + break; + case 'array': + zType = z.array(toZodType(schema.items)); + break; + case 'object': + zType = z.object(toZodShape(schema)); + break; + case 'null': + zType = z.null(); + break; + default: + zType = z.any(); + } + if (schema.description) zType = zType.describe(schema.description); + if (schema.nullable) zType = zType.nullable(); + return zType; +} + +const transport = new StdioServerTransport(); +await server.connect(transport); diff --git a/packages/harness-grok-build/src/bridge/index.ts b/packages/harness-grok-build/src/bridge/index.ts index bd76876c7dd0..dddde4f41766 100644 --- a/packages/harness-grok-build/src/bridge/index.ts +++ b/packages/harness-grok-build/src/bridge/index.ts @@ -1,20 +1,32 @@ -// Grok-specific turn driver for the shared @ai-sdk/harness/bridge runtime. -// Spawns a `grok -p ... --output-format streaming-json --always-approve` child -// per turn; tools run inside grok, so there's no host tool dispatch here. +// Grok-build turn driver for the shared @ai-sdk/harness/bridge runtime. +// Drives `grok agent -m stdio` over ACP/JSON-RPC: ONE persistent child + +// ONE ACP session reused across turns (so multi-turn history is preserved). +// First turn spawns + initialize -> session/new; every turn does session/prompt, +// mapping session/update to stream parts and routing session/request_permission +// to host approval against the active turn. import { runBridge, type BridgeEvent, type BridgeTurn, } from '@ai-sdk/harness/bridge'; -import { spawn } from 'node:child_process'; +import { spawn, type ChildProcess } from 'node:child_process'; +import { randomUUID } from 'node:crypto'; +import { createServer, type Server } from 'node:http'; import { argv, env as procEnv, stdout } from 'node:process'; import { createInterface } from 'node:readline'; import type { StartMessage } from '../grok-build-bridge-protocol'; -import { createStreamMapState, mapStreamLine } from '../grok-build-stream-map'; +import { + createAcpStreamState, + finishFromResult, + mapAcpUpdate, + type AcpStreamState, +} from '../grok-build-stream-map'; +import { createAcpClient, type AcpClient } from './acp-client'; import { prependGrokBuildBinToPath } from './grok-build-path'; const DEFAULT_GROK_MODEL = 'grok-build-0.1'; +const ACP_PROTOCOL_VERSION = 1; const args = parseArgs(argv.slice(2)); if (!args.workdir) { @@ -27,16 +39,68 @@ const workdir: string = args.workdir; const bridgeStateDir: string = args.bridgeStateDir; const bootstrapDir: string = args.bootstrapDir ?? workdir; -// Make the bootstrap-installed `grok` binary resolve ahead of any system copy -// by prepending its node_modules/.bin to PATH. Spawning the bare `grok` name -// (rather than an absolute path) then picks it up. Mirrors the OpenCode bridge. +// Make the bootstrap-installed `grok` binary resolve ahead of any system copy. prependGrokBuildBinToPath({ bootstrapDir, env: procEnv }); -// The latest grok CLI session id, learned from the terminal `end` event's -// `sessionId`. Returned to the host on detach so a future process could resume -// the grok thread via `-r/--resume`. +// The latest grok ACP session id, captured from session/new. Returned to the +// host on detach so a future process could resume the thread. const sessionState: { id: string | undefined } = { id: undefined }; +// Persistent grok child + ACP client, spawned once on the first turn and reused +// across every turn so conversation history is preserved. `undefined` until the +// first turn, and reset to `undefined` if the child dies so the next turn can +// re-spawn. +interface LiveProcess { + child: ChildProcess; + acp: AcpClient; + stdin: NodeJS.WritableStream; + stderrChunks: string[]; + failed?: Error; +} +let live: LiveProcess | undefined; + +// The host-tool relay is started once (first turn, if tools were supplied) and +// closed only on bridge shutdown. It routes through `currentTurn`. +let relay: { port: number; close(): void } | undefined; + +// Mutable handle to the active turn. The persistent session/update notification, +// session/request_permission, and tool-relay handlers are registered once but +// route through this ref so they always target the in-flight turn. +interface CurrentTurn { + state: AcpStreamState; + emit: (event: BridgeEvent) => void; + turn: BridgeTurn; +} +let currentTurn: CurrentTurn | undefined; + +const killChild = (): void => { + if (!live) return; + try { + live.child.kill('SIGTERM'); + } catch {} + live = undefined; +}; + +const shutdown = (): void => { + try { + relay?.close(); + } catch {} + relay = undefined; + killChild(); +}; + +// runBridge exits the process directly after shutdown/detach (no cleanup hook), +// so kill the persistent child on process teardown to avoid orphaned grok procs. +process.on('exit', shutdown); +process.on('SIGTERM', () => { + shutdown(); + process.exit(0); +}); +process.on('SIGINT', () => { + shutdown(); + process.exit(0); +}); + await runBridge({ bridgeType: 'grok-build', bridgeStateDir, @@ -44,41 +108,118 @@ await runBridge({ onDetach: () => (sessionState.id ? { sessionId: sessionState.id } : {}), }); -async function runTurn(start: StartMessage, turn: BridgeTurn): Promise { - const emit = (event: BridgeEvent) => turn.emit(event); +// Spawn the grok child + ACP client and register the persistent handlers ONCE. +// Handlers route through `currentTurn`, so a turn must set `currentTurn` before +// prompting. Returns the live handle; throws if stdio pipes are unavailable. +function spawnLive(start: StartMessage): LiveProcess { + const allowAll = start.permissionMode === 'allow-all'; - const cliArgs = [ - '-p', - start.prompt, - '-m', - start.model ?? DEFAULT_GROK_MODEL, - '--output-format', - 'streaming-json', - // REQUIRED in headless mode: without it the CLI blocks on tool approval. - // Tools therefore execute inside grok; no host dispatch happens here. - '--always-approve', - '--cwd', - workdir, - ]; - // Resume the prior CLI thread in this workdir instead of starting fresh. - if (start.continue) cliArgs.push('-c'); + // `-m ` MUST precede `stdio`. For allow-all, run frictionless via + // `--always-approve` so grok auto-approves and no session/request_permission + // round-trip occurs; otherwise approvals route back to the host. + const cliArgs = ['agent', '-m', start.model ?? DEFAULT_GROK_MODEL]; + if (start.reasoningEffort) { + cliArgs.push('--reasoning-effort', start.reasoningEffort); + } + if (allowAll) { + cliArgs.push('--always-approve'); + } + cliArgs.push('stdio'); const child = spawn('grok', cliArgs, { cwd: workdir, env: procEnv, - stdio: ['ignore', 'pipe', 'pipe'], + stdio: ['pipe', 'pipe', 'pipe'], }); + const childStdin = child.stdin; const childStdout = child.stdout; const childStderr = child.stderr; - if (!childStdout || !childStderr) { - throw new Error('grok child process did not expose stdout/stderr pipes.'); + if (!childStdin || !childStdout || !childStderr) { + throw new Error('grok child process did not expose stdio pipes.'); } - // Wire host abort to killing the child. + const acp = createAcpClient({ + writeLine: line => childStdin.write(line + '\n'), + }); + + // Map session/update notifications to stream parts for the active turn. + acp.onNotification('session/update', (params: unknown) => { + const active = currentTurn; + if (!active) return; + const rec = asRecord(params); + if (rec === null || !('update' in rec)) return; + for (const part of mapAcpUpdate(rec['update'], active.state)) { + active.emit(part as BridgeEvent); + } + }); + + // Host-driven tool approval against the active turn. Skipped under allow-all, + // where grok auto-approves via `--always-approve`. + if (start.permissionMode !== 'allow-all') { + acp.onRequest('session/request_permission', async (params: unknown) => { + const active = currentTurn; + if (!active) return { outcome: { outcome: 'cancelled' } }; + return handlePermissionRequest( + params, + active.turn, + active.emit, + active.state, + ); + }); + } + + const stderrChunks: string[] = []; + childStderr.setEncoding('utf8'); + childStderr.on('data', (chunk: string) => { + stderrChunks.push(chunk); + process.stderr.write(chunk); + }); + + const handle: LiveProcess = { child, acp, stdin: childStdin, stderrChunks }; + + const rl = createInterface({ input: childStdout, crlfDelay: Infinity }); + rl.on('line', line => { + const trimmed = line.trim(); + if (trimmed.length === 0) return; + acp.handleLine(trimmed); + }); + + // The persistent child dying is unexpected mid-turn: surface an error to the + // active turn and drop `live` so the next turn re-spawns a fresh session. + const onDeath = (err: Error): void => { + if (live === handle) live = undefined; + handle.failed = err; + currentTurn?.emit({ type: 'error', error: serialiseError(err) }); + }; + child.on('error', err => onDeath(err)); + child.on('close', code => { + if (handle.failed) return; + if (code === 0 || code === null) { + if (live === handle) live = undefined; + return; + } + const tail = stderrChunks.join('').trim().slice(-2000); + onDeath( + new Error(`grok CLI exited with code ${code}${tail ? `:\n${tail}` : ''}`), + ); + }); + + return handle; +} + +async function runTurn(start: StartMessage, turn: BridgeTurn): Promise { + const emit = (event: BridgeEvent) => turn.emit(event); + const hostToolNames = new Set((start.tools ?? []).map(t => t.name)); + const state = createAcpStreamState(hostToolNames); + + // Route this turn's abort to session/cancel only; the persistent child must + // survive so later turns reuse it. The child is killed only on shutdown. const onAbort = () => { - try { - child.kill('SIGTERM'); - } catch {} + if (live && sessionState.id) { + try { + live.acp.notify('session/cancel', { sessionId: sessionState.id }); + } catch {} + } }; if (turn.abortSignal.aborted) { onAbort(); @@ -86,72 +227,249 @@ async function runTurn(start: StartMessage, turn: BridgeTurn): Promise { turn.abortSignal.addEventListener('abort', onAbort, { once: true }); } - // Per-turn stream-map state: each line of grok's streaming-json stdout maps - // to zero or more HarnessV1StreamPart events. - const state = createStreamMapState(); + // First turn (or after the child died): spawn, initialize, create the session, + // and start the host-tool relay once. + const isFreshSession = live === undefined; + if (isFreshSession) { + live = spawnLive(start); + const acp = live.acp; - const rl = createInterface({ input: childStdout, crlfDelay: Infinity }); - rl.on('line', line => { - const trimmed = line.trim(); - if (trimmed.length === 0) return; - // Capture the grok session id from the terminal `end` event before mapping - // (mapStreamLine does not surface it). - captureSessionId(trimmed); - for (const part of mapStreamLine(trimmed, state)) { - emit(part as BridgeEvent); + // Start the relay once. `start.tools` is only known on the first turn; tools + // are stable across turns in a session (same agent), so build mcpServers + // from the first turn's tools. + const mcpServers: unknown[] = []; + if (start.tools && start.tools.length > 0 && !relay) { + const relayToken = randomUUID(); + relay = await startToolRelay({ + relayToken, + tools: start.tools, + getCurrentTurn: () => currentTurn, + }); + mcpServers.push({ + name: 'harness-tools', + command: 'node', + args: [`${bootstrapDir}/host-tool-mcp.mjs`], + env: [ + { + name: 'TOOL_SCHEMAS', + value: JSON.stringify( + start.tools.map(t => ({ + name: t.name, + description: t.description, + inputSchema: t.inputSchema, + })), + ), + }, + { name: 'TOOL_RELAY_URL', value: `http://127.0.0.1:${relay.port}` }, + { name: 'TOOL_RELAY_TOKEN', value: relayToken }, + ], + }); } - }); - // Forward stderr to this process's stderr so a CLI failure is inspectable - // from the host's bridge-stderr forwarding. - const stderrChunks: string[] = []; - childStderr.setEncoding('utf8'); - childStderr.on('data', (chunk: string) => { - stderrChunks.push(chunk); - process.stderr.write(chunk); - }); + await acp.request('initialize', { + protocolVersion: ACP_PROTOCOL_VERSION, + clientCapabilities: {}, + }); + + // session/load is unverified for grok; always create a fresh session here. + // Same-process multi-turn memory is preserved by reusing this session id. + const newSession = asRecord( + await acp.request('session/new', { cwd: workdir, mcpServers }), + ); + if (newSession && typeof newSession['sessionId'] === 'string') { + sessionState.id = newSession['sessionId']; + } + } + + const activeLive = live; + if (!activeLive) { + throw new Error('grok child process is not available.'); + } - await new Promise((resolve, reject) => { - child.on('error', err => { - emit({ type: 'error', error: serialiseError(err) }); - reject(err); + currentTurn = { state, emit, turn }; + try { + const result = await activeLive.acp.request('session/prompt', { + sessionId: sessionState.id, + prompt: [{ type: 'text', text: start.prompt }], }); - child.on('close', code => { - turn.abortSignal.removeEventListener('abort', onAbort); - // Aborted: treat as a clean wind-down (host already settles the turn). - if (turn.abortSignal.aborted) { - resolve(); + for (const part of finishFromResult( + (asRecord(result) ?? {}) as { stopReason?: string; _meta?: unknown }, + state, + )) { + emit(part as BridgeEvent); + } + } catch (err) { + if (!turn.abortSignal.aborted) throw err; + } finally { + turn.abortSignal.removeEventListener('abort', onAbort); + currentTurn = undefined; + } + + void turn.pendingUserMessages; // accepted but unused. +} + +// Reply to session/request_permission by selecting an allow/reject optionId. +async function handlePermissionRequest( + params: unknown, + turn: BridgeTurn, + emit: (event: BridgeEvent) => void, + state: AcpStreamState, +): Promise { + const rec = asRecord(params); + const options = Array.isArray(rec?.['options']) + ? (rec!['options'] as unknown[]) + : []; + const pick = (kinds: string[]): string | undefined => { + for (const kind of kinds) { + const found = options + .map(asRecord) + .find(o => o?.['kind'] === kind && typeof o['optionId'] === 'string'); + if (found) return found['optionId'] as string; + } + return undefined; + }; + const allowId = pick(['allow_once', 'allow_always']); + const rejectId = pick(['reject_once', 'reject_always']); + + const toolCall = asRecord(rec?.['toolCall']); + const toolCallId = + toolCall && typeof toolCall['toolCallId'] === 'string' + ? (toolCall['toolCallId'] as string) + : 'grok-build-approval'; + + const allow = () => + allowId + ? { outcome: { outcome: 'selected', optionId: allowId } } + : { outcome: { outcome: 'cancelled' } }; + + // Only tool-calls surfaced to the host (builtins) can be approved there. + // Host tools are gated by the relay / the host's own tool approval, and + // grok-internal tools (search_tool/use_tool) are never surfaced — auto-allow + // both so grok proceeds without orphaning an approval the host can't match. + if (!state.toolNamesById.has(toolCallId)) { + return allow(); + } + + // Some BridgeTurn versions may lack requestToolApproval; auto-allow then. + if (typeof turn.requestToolApproval !== 'function') { + return allow(); + } + + emit({ type: 'tool-approval-request', approvalId: toolCallId, toolCallId }); + const decision = await turn.requestToolApproval(toolCallId); + + if (turn.abortSignal.aborted) return { outcome: { outcome: 'cancelled' } }; + const chosen = decision.approved ? allowId : rejectId; + return chosen + ? { outcome: { outcome: 'selected', optionId: chosen } } + : { outcome: { outcome: 'cancelled' } }; +} + +// Tool relay — HTTP server on 127.0.0.1:0 with bearer-token auth. Started once; +// each invocation routes through the active turn (via `getCurrentTurn`) so a +// relay started on the first turn still resolves against the current turn's +// `requestToolResult` / `emit`. The in-sandbox MCP stdio server POSTs each +// host-tool invocation here; the relay emits a `tool-call`, awaits the matching +// `tool-result`, and responds with `{ result }`. +async function startToolRelay({ + relayToken, + tools, + getCurrentTurn, +}: { + relayToken: string; + tools: ReadonlyArray<{ name: string }>; + getCurrentTurn: () => CurrentTurn | undefined; +}): Promise<{ port: number; close(): void }> { + const toolNames = new Set(tools.map(t => t.name)); + + const server = createServer(async (req, res) => { + try { + if ( + req.method !== 'POST' || + req.url !== '/' || + req.headers.authorization !== `Bearer ${relayToken}` + ) { + res.writeHead(401, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: 'unauthorized tool relay request' })); + return; + } + const chunks: Buffer[] = []; + for await (const chunk of req) { + chunks.push(chunk as Buffer); + } + const body = Buffer.concat(chunks).toString('utf8'); + const { requestId, toolName, input } = JSON.parse(body) as { + requestId: string; + toolName: string; + input: unknown; + }; + + if (!toolNames.has(toolName)) { + res.writeHead(403, { 'Content-Type': 'application/json' }); + res.end( + JSON.stringify({ error: `Tool "${toolName}" is not available` }), + ); return; } - if (code === 0) { - resolve(); + + const active = getCurrentTurn(); + if (!active) { + res.writeHead(409, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: 'no active turn for tool relay' })); return; } - const tail = stderrChunks.join('').trim().slice(-2000); - const err = new Error( - `grok CLI exited with code ${code}${tail ? `:\n${tail}` : ''}`, + const emit = active.emit; + + emit({ + type: 'tool-call', + toolCallId: requestId, + toolName, + input: JSON.stringify(input ?? {}), + providerExecuted: false, + } as BridgeEvent); + + const { output, isError } = + await active.turn.requestToolResult(requestId); + emit({ + type: 'tool-result', + toolCallId: requestId, + toolName, + result: output ?? null, + isError: !!isError, + } as BridgeEvent); + + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ result: output })); + } catch (error) { + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end( + JSON.stringify({ + error: error instanceof Error ? error.message : String(error), + }), ); - emit({ type: 'error', error: serialiseError(err) }); - reject(err); - }); + } }); - void turn.pendingUserMessages; // accepted but unused: each turn is a fresh CLI invocation. + await new Promise(resolve => + server.listen(0, '127.0.0.1', () => resolve()), + ); + const address = server.address(); + if (!address || typeof address === 'string') { + throw new Error('tool relay did not expose a numeric port'); + } + return { port: address.port, close: () => closeServer(server) }; } -function captureSessionId(line: string): void { +function closeServer(server: Server): void { try { - const msg = JSON.parse(line) as Record; - if ( - msg?.type === 'end' && - typeof msg.sessionId === 'string' && - msg.sessionId.length > 0 - ) { - sessionState.id = msg.sessionId; - } - } catch { - // Non-JSON / partial line — ignore. The stream-map handles malformed input. - } + server.close(); + } catch {} +} + +function asRecord(value: unknown): Record | null { + return typeof value === 'object' && value !== null + ? (value as Record) + : null; } function parseArgs(rawArgs: string[]): { diff --git a/packages/harness-grok-build/src/bridge/package.json b/packages/harness-grok-build/src/bridge/package.json index d47b6d2397bc..a9bd6b57158e 100644 --- a/packages/harness-grok-build/src/bridge/package.json +++ b/packages/harness-grok-build/src/bridge/package.json @@ -5,6 +5,7 @@ "type": "module", "dependencies": { "@xai-official/grok": "0.2.51", + "@modelcontextprotocol/sdk": "1.29.0", "ws": "8.21.0", "zod": "3.25.76" } diff --git a/packages/harness-grok-build/src/bridge/pnpm-lock.yaml b/packages/harness-grok-build/src/bridge/pnpm-lock.yaml index 7fe70027a50e..7d6c309e0456 100644 --- a/packages/harness-grok-build/src/bridge/pnpm-lock.yaml +++ b/packages/harness-grok-build/src/bridge/pnpm-lock.yaml @@ -8,6 +8,9 @@ importers: .: dependencies: + '@modelcontextprotocol/sdk': + specifier: 1.29.0 + version: 1.29.0(zod@3.25.76) '@xai-official/grok': specifier: 0.2.51 version: 0.2.51 @@ -20,9 +23,25 @@ importers: packages: + '@hono/node-server@1.19.14': + resolution: {integrity: sha512-GwtvgtXxnWsucXvbQXkRgqksiH2Qed37H9xHZocE5sA3N8O8O8/8FA3uclQXxXVzc9XBZuEOMK7+r02FmSpHtw==} + engines: {node: '>=18.14.1'} + peerDependencies: + hono: ^4 + '@iarna/toml@3.0.0': resolution: {integrity: sha512-td6ZUkz2oS3VeleBcN+m//Q6HlCFCPrnI0FZhrt/h4XqLEdOyYp2u21nd8MdsR+WJy5r9PTDaHTDDfhf4H4l6Q==} + '@modelcontextprotocol/sdk@1.29.0': + resolution: {integrity: sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ==} + engines: {node: '>=18'} + peerDependencies: + '@cfworker/json-schema': ^4.1.1 + zod: ^3.25 || ^4.0 + peerDependenciesMeta: + '@cfworker/json-schema': + optional: true + '@xai-official/grok-darwin-arm64@0.2.51': resolution: {integrity: sha512-HKkXN+1ui1P4SqRJNIWgjMZZEP47+1H+utNxD0R/cUPHOZd7oP7si/fNJMk8BVwTxkDtNuOtoIdHxt1TinwgmA==} cpu: [arm64] @@ -60,6 +79,352 @@ packages: os: [darwin, linux, win32] hasBin: true + accepts@2.0.0: + resolution: {integrity: sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==} + engines: {node: '>= 0.6'} + + ajv-formats@3.0.1: + resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==} + peerDependencies: + ajv: ^8.0.0 + peerDependenciesMeta: + ajv: + optional: true + + ajv@8.20.0: + resolution: {integrity: sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==} + + body-parser@2.3.0: + resolution: {integrity: sha512-2cGmJupaNgg+QUwVLAucDuWuoMZ6EX9iHDRswZ5lsNYEmwPaRknMPCLZz07yTzVq/83p4o/wzbDZbBrTvGGTIw==} + engines: {node: '>=18'} + + bytes@3.1.2: + resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==} + engines: {node: '>= 0.8'} + + call-bind-apply-helpers@1.0.2: + resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==} + engines: {node: '>= 0.4'} + + call-bound@1.0.4: + resolution: {integrity: sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==} + engines: {node: '>= 0.4'} + + content-disposition@1.1.0: + resolution: {integrity: sha512-5jRCH9Z/+DRP7rkvY83B+yGIGX96OYdJmzngqnw2SBSxqCFPd0w2km3s5iawpGX8krnwSGmF0FW5Nhr0Hfai3g==} + engines: {node: '>=18'} + + content-type@1.0.5: + resolution: {integrity: sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==} + engines: {node: '>= 0.6'} + + content-type@2.0.0: + resolution: {integrity: sha512-j/O/d7GcZCyNl7/hwZAb606rzqkyvaDctLmckbxLzHvFBzTJHuGEdodATcP3yIRoDrLHkIATJuvzbFlp/ki2cQ==} + engines: {node: '>=18'} + + cookie-signature@1.2.2: + resolution: {integrity: sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==} + engines: {node: '>=6.6.0'} + + cookie@0.7.2: + resolution: {integrity: sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==} + engines: {node: '>= 0.6'} + + cors@2.8.6: + resolution: {integrity: sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw==} + engines: {node: '>= 0.10'} + + cross-spawn@7.0.6: + resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==} + engines: {node: '>= 8'} + + debug@4.4.3: + resolution: {integrity: sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==} + engines: {node: '>=6.0'} + peerDependencies: + supports-color: '*' + peerDependenciesMeta: + supports-color: + optional: true + + depd@2.0.0: + resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==} + engines: {node: '>= 0.8'} + + dunder-proto@1.0.1: + resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==} + engines: {node: '>= 0.4'} + + ee-first@1.1.1: + resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==} + + encodeurl@2.0.0: + resolution: {integrity: sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==} + engines: {node: '>= 0.8'} + + es-define-property@1.0.1: + resolution: {integrity: sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==} + engines: {node: '>= 0.4'} + + es-errors@1.3.0: + resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==} + engines: {node: '>= 0.4'} + + es-object-atoms@1.1.2: + resolution: {integrity: sha512-HWcBoN6NileqtSydK2FqHbS/LoDd2pqrnQHLyJzBj4kOp/ky2MWMN694xOfkK8/SnUsW2DH7EfyVlydKCsm1Zw==} + engines: {node: '>= 0.4'} + + escape-html@1.0.3: + resolution: {integrity: sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==} + + etag@1.8.1: + resolution: {integrity: sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==} + engines: {node: '>= 0.6'} + + eventsource-parser@3.1.0: + resolution: {integrity: sha512-kJezFj9YFAMLeORyi7aCLxLbD5/qWMQnoMVlVPyHIll7lgRJCc3JVln9Vgl9nwQi0YkMnhdGTMNn7CkRRAptMg==} + engines: {node: '>=18.0.0'} + + eventsource@3.0.7: + resolution: {integrity: sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==} + engines: {node: '>=18.0.0'} + + express-rate-limit@8.5.2: + resolution: {integrity: sha512-5Kb34ipNX694DH48vN9irak1Qx30nb0PLYHXfJgw4YEjiC3ZEmZJhwOp+VfiCYwFzvFTdB9QkArYS5kXa2cx2A==} + engines: {node: '>= 16'} + peerDependencies: + express: '>= 4.11' + + express@5.2.1: + resolution: {integrity: sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==} + engines: {node: '>= 18'} + + fast-deep-equal@3.1.3: + resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==} + + fast-uri@3.1.2: + resolution: {integrity: sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ==} + + finalhandler@2.1.1: + resolution: {integrity: sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA==} + engines: {node: '>= 18.0.0'} + + forwarded@0.2.0: + resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==} + engines: {node: '>= 0.6'} + + fresh@2.0.0: + resolution: {integrity: sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==} + engines: {node: '>= 0.8'} + + function-bind@1.1.2: + resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==} + + get-intrinsic@1.3.0: + resolution: {integrity: sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==} + engines: {node: '>= 0.4'} + + get-proto@1.0.1: + resolution: {integrity: sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==} + engines: {node: '>= 0.4'} + + gopd@1.2.0: + resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==} + engines: {node: '>= 0.4'} + + has-symbols@1.1.0: + resolution: {integrity: sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==} + engines: {node: '>= 0.4'} + + hasown@2.0.4: + resolution: {integrity: sha512-T2UbfbBEF32wiepXIsMlTW9+dDYC6wMh/t/vYA4tuOMKqWz/n3vr1NFSxQiyP+zk2mXsoMA/i/7qV6LKut1t1A==} + engines: {node: '>= 0.4'} + + hono@4.12.27: + resolution: {integrity: sha512-1yrb/+w6HWQJrUCLkJ2IF5jNIPvvFkblV5RNOYl6bV+OA6p9GLcMpHFFGTosSvHvcAUibuUukRqhlYI4z32C7Q==} + engines: {node: '>=16.9.0'} + + http-errors@2.0.1: + resolution: {integrity: sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==} + engines: {node: '>= 0.8'} + + iconv-lite@0.7.2: + resolution: {integrity: sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==} + engines: {node: '>=0.10.0'} + + inherits@2.0.4: + resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} + + ip-address@10.2.0: + resolution: {integrity: sha512-/+S6j4E9AHvW9SWMSEY9Xfy66O5PWvVEJ08O0y5JGyEKQpojb0K0GKpz/v5HJ/G0vi3D2sjGK78119oXZeE0qA==} + engines: {node: '>= 12'} + + ipaddr.js@1.9.1: + resolution: {integrity: sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==} + engines: {node: '>= 0.10'} + + is-promise@4.0.0: + resolution: {integrity: sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==} + + isexe@2.0.0: + resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==} + + jose@6.2.3: + resolution: {integrity: sha512-YYVDInQKFJfR/xa3ojUTl8c2KoTwiL1R5Wg9YCydwH0x0B9grbzlg5HC7mMjCtUJjbQ/YnGEZIhI5tCgfTb4Hw==} + + json-schema-traverse@1.0.0: + resolution: {integrity: sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==} + + json-schema-typed@8.0.2: + resolution: {integrity: sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==} + + math-intrinsics@1.1.0: + resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==} + engines: {node: '>= 0.4'} + + media-typer@1.1.0: + resolution: {integrity: sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==} + engines: {node: '>= 0.8'} + + merge-descriptors@2.0.0: + resolution: {integrity: sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==} + engines: {node: '>=18'} + + mime-db@1.54.0: + resolution: {integrity: sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==} + engines: {node: '>= 0.6'} + + mime-types@3.0.2: + resolution: {integrity: sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==} + engines: {node: '>=18'} + + ms@2.1.3: + resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} + + negotiator@1.0.0: + resolution: {integrity: sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==} + engines: {node: '>= 0.6'} + + object-assign@4.1.1: + resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==} + engines: {node: '>=0.10.0'} + + object-inspect@1.13.4: + resolution: {integrity: sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==} + engines: {node: '>= 0.4'} + + on-finished@2.4.1: + resolution: {integrity: sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==} + engines: {node: '>= 0.8'} + + once@1.4.0: + resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==} + + parseurl@1.3.3: + resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==} + engines: {node: '>= 0.8'} + + path-key@3.1.1: + resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==} + engines: {node: '>=8'} + + path-to-regexp@8.4.2: + resolution: {integrity: sha512-qRcuIdP69NPm4qbACK+aDogI5CBDMi1jKe0ry5rSQJz8JVLsC7jV8XpiJjGRLLol3N+R5ihGYcrPLTno6pAdBA==} + + pkce-challenge@5.0.1: + resolution: {integrity: sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ==} + engines: {node: '>=16.20.0'} + + proxy-addr@2.0.7: + resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==} + engines: {node: '>= 0.10'} + + qs@6.15.3: + resolution: {integrity: sha512-O9gl3zCl5h5blw1KGUzQKhA5oUXSl8rwUIM5o0S3nCXMliSvy5Dzx7/DJcI+SwgICv+IneSZwhBh1oSyEHA71A==} + engines: {node: '>=0.6'} + + range-parser@1.3.0: + resolution: {integrity: sha512-hek2mFQpPuI4E1BBKrSto+BU3e3x4xuarsbiwr3+lf7p44juvFMV0XFWQAP3xUyqXA4RrXLIoaSUGbSt056ZMw==} + engines: {node: '>= 0.6'} + + raw-body@3.0.2: + resolution: {integrity: sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==} + engines: {node: '>= 0.10'} + + require-from-string@2.0.2: + resolution: {integrity: sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==} + engines: {node: '>=0.10.0'} + + router@2.2.0: + resolution: {integrity: sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==} + engines: {node: '>= 18'} + + safer-buffer@2.1.2: + resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==} + + send@1.2.1: + resolution: {integrity: sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ==} + engines: {node: '>= 18'} + + serve-static@2.2.1: + resolution: {integrity: sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw==} + engines: {node: '>= 18'} + + setprototypeof@1.2.0: + resolution: {integrity: sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==} + + shebang-command@2.0.0: + resolution: {integrity: sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==} + engines: {node: '>=8'} + + shebang-regex@3.0.0: + resolution: {integrity: sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==} + engines: {node: '>=8'} + + side-channel-list@1.0.1: + resolution: {integrity: sha512-mjn/0bi/oUURjc5Xl7IaWi/OJJJumuoJFQJfDDyO46+hBWsfaVM65TBHq2eoZBhzl9EchxOijpkbRC8SVBQU0w==} + engines: {node: '>= 0.4'} + + side-channel-map@1.0.1: + resolution: {integrity: sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==} + engines: {node: '>= 0.4'} + + side-channel-weakmap@1.0.2: + resolution: {integrity: sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==} + engines: {node: '>= 0.4'} + + side-channel@1.1.1: + resolution: {integrity: sha512-6x6dK6zJdpTzF4sQeNYxwtvBzf6Eg4GtlesS94HOvTudUeyK2WXAaIfmDgsyslYrRBeFIlsi54AYsFGUuhmvrQ==} + engines: {node: '>= 0.4'} + + statuses@2.0.2: + resolution: {integrity: sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==} + engines: {node: '>= 0.8'} + + toidentifier@1.0.1: + resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==} + engines: {node: '>=0.6'} + + type-is@2.1.0: + resolution: {integrity: sha512-faYHw0anBbc/kWF3zFTEnxSFOAGUX9GFbOBthvDdLsIlEoWOFOtS0zgCiQYwIskL9iGXZL3kAXD8OoZ4GmMATA==} + engines: {node: '>= 18'} + + unpipe@1.0.0: + resolution: {integrity: sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==} + engines: {node: '>= 0.8'} + + vary@1.1.2: + resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==} + engines: {node: '>= 0.8'} + + which@2.0.2: + resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==} + engines: {node: '>= 8'} + hasBin: true + + wrappy@1.0.2: + resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==} + ws@8.21.0: resolution: {integrity: sha512-Vsp28b7DRcimFQvrqu2Wek3z1iYxDCWqHYB8Qsnk/S4RfaCQzPGPyBNuVjJV3cd6UiKtUtp6sNM77gWvzcCH+g==} engines: {node: '>=10.0.0'} @@ -72,13 +437,44 @@ packages: utf-8-validate: optional: true + zod-to-json-schema@3.25.2: + resolution: {integrity: sha512-O/PgfnpT1xKSDeQYSCfRI5Gy3hPf91mKVDuYLUHZJMiDFptvP41MSnWofm8dnCm0256ZNfZIM7DSzuSMAFnjHA==} + peerDependencies: + zod: ^3.25.28 || ^4 + zod@3.25.76: resolution: {integrity: sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==} snapshots: + '@hono/node-server@1.19.14(hono@4.12.27)': + dependencies: + hono: 4.12.27 + '@iarna/toml@3.0.0': {} + '@modelcontextprotocol/sdk@1.29.0(zod@3.25.76)': + dependencies: + '@hono/node-server': 1.19.14(hono@4.12.27) + ajv: 8.20.0 + ajv-formats: 3.0.1(ajv@8.20.0) + content-type: 1.0.5 + cors: 2.8.6 + cross-spawn: 7.0.6 + eventsource: 3.0.7 + eventsource-parser: 3.1.0 + express: 5.2.1 + express-rate-limit: 8.5.2(express@5.2.1) + hono: 4.12.27 + jose: 6.2.3 + json-schema-typed: 8.0.2 + pkce-challenge: 5.0.1 + raw-body: 3.0.2 + zod: 3.25.76 + zod-to-json-schema: 3.25.2(zod@3.25.76) + transitivePeerDependencies: + - supports-color + '@xai-official/grok-darwin-arm64@0.2.51': optional: true @@ -108,6 +504,372 @@ snapshots: '@xai-official/grok-win32-arm64': 0.2.51 '@xai-official/grok-win32-x64': 0.2.51 + accepts@2.0.0: + dependencies: + mime-types: 3.0.2 + negotiator: 1.0.0 + + ajv-formats@3.0.1(ajv@8.20.0): + optionalDependencies: + ajv: 8.20.0 + + ajv@8.20.0: + dependencies: + fast-deep-equal: 3.1.3 + fast-uri: 3.1.2 + json-schema-traverse: 1.0.0 + require-from-string: 2.0.2 + + body-parser@2.3.0: + dependencies: + bytes: 3.1.2 + content-type: 2.0.0 + debug: 4.4.3 + http-errors: 2.0.1 + iconv-lite: 0.7.2 + on-finished: 2.4.1 + qs: 6.15.3 + raw-body: 3.0.2 + type-is: 2.1.0 + transitivePeerDependencies: + - supports-color + + bytes@3.1.2: {} + + call-bind-apply-helpers@1.0.2: + dependencies: + es-errors: 1.3.0 + function-bind: 1.1.2 + + call-bound@1.0.4: + dependencies: + call-bind-apply-helpers: 1.0.2 + get-intrinsic: 1.3.0 + + content-disposition@1.1.0: {} + + content-type@1.0.5: {} + + content-type@2.0.0: {} + + cookie-signature@1.2.2: {} + + cookie@0.7.2: {} + + cors@2.8.6: + dependencies: + object-assign: 4.1.1 + vary: 1.1.2 + + cross-spawn@7.0.6: + dependencies: + path-key: 3.1.1 + shebang-command: 2.0.0 + which: 2.0.2 + + debug@4.4.3: + dependencies: + ms: 2.1.3 + + depd@2.0.0: {} + + dunder-proto@1.0.1: + dependencies: + call-bind-apply-helpers: 1.0.2 + es-errors: 1.3.0 + gopd: 1.2.0 + + ee-first@1.1.1: {} + + encodeurl@2.0.0: {} + + es-define-property@1.0.1: {} + + es-errors@1.3.0: {} + + es-object-atoms@1.1.2: + dependencies: + es-errors: 1.3.0 + + escape-html@1.0.3: {} + + etag@1.8.1: {} + + eventsource-parser@3.1.0: {} + + eventsource@3.0.7: + dependencies: + eventsource-parser: 3.1.0 + + express-rate-limit@8.5.2(express@5.2.1): + dependencies: + express: 5.2.1 + ip-address: 10.2.0 + + express@5.2.1: + dependencies: + accepts: 2.0.0 + body-parser: 2.3.0 + content-disposition: 1.1.0 + content-type: 1.0.5 + cookie: 0.7.2 + cookie-signature: 1.2.2 + debug: 4.4.3 + depd: 2.0.0 + encodeurl: 2.0.0 + escape-html: 1.0.3 + etag: 1.8.1 + finalhandler: 2.1.1 + fresh: 2.0.0 + http-errors: 2.0.1 + merge-descriptors: 2.0.0 + mime-types: 3.0.2 + on-finished: 2.4.1 + once: 1.4.0 + parseurl: 1.3.3 + proxy-addr: 2.0.7 + qs: 6.15.3 + range-parser: 1.3.0 + router: 2.2.0 + send: 1.2.1 + serve-static: 2.2.1 + statuses: 2.0.2 + type-is: 2.1.0 + vary: 1.1.2 + transitivePeerDependencies: + - supports-color + + fast-deep-equal@3.1.3: {} + + fast-uri@3.1.2: {} + + finalhandler@2.1.1: + dependencies: + debug: 4.4.3 + encodeurl: 2.0.0 + escape-html: 1.0.3 + on-finished: 2.4.1 + parseurl: 1.3.3 + statuses: 2.0.2 + transitivePeerDependencies: + - supports-color + + forwarded@0.2.0: {} + + fresh@2.0.0: {} + + function-bind@1.1.2: {} + + get-intrinsic@1.3.0: + dependencies: + call-bind-apply-helpers: 1.0.2 + es-define-property: 1.0.1 + es-errors: 1.3.0 + es-object-atoms: 1.1.2 + function-bind: 1.1.2 + get-proto: 1.0.1 + gopd: 1.2.0 + has-symbols: 1.1.0 + hasown: 2.0.4 + math-intrinsics: 1.1.0 + + get-proto@1.0.1: + dependencies: + dunder-proto: 1.0.1 + es-object-atoms: 1.1.2 + + gopd@1.2.0: {} + + has-symbols@1.1.0: {} + + hasown@2.0.4: + dependencies: + function-bind: 1.1.2 + + hono@4.12.27: {} + + http-errors@2.0.1: + dependencies: + depd: 2.0.0 + inherits: 2.0.4 + setprototypeof: 1.2.0 + statuses: 2.0.2 + toidentifier: 1.0.1 + + iconv-lite@0.7.2: + dependencies: + safer-buffer: 2.1.2 + + inherits@2.0.4: {} + + ip-address@10.2.0: {} + + ipaddr.js@1.9.1: {} + + is-promise@4.0.0: {} + + isexe@2.0.0: {} + + jose@6.2.3: {} + + json-schema-traverse@1.0.0: {} + + json-schema-typed@8.0.2: {} + + math-intrinsics@1.1.0: {} + + media-typer@1.1.0: {} + + merge-descriptors@2.0.0: {} + + mime-db@1.54.0: {} + + mime-types@3.0.2: + dependencies: + mime-db: 1.54.0 + + ms@2.1.3: {} + + negotiator@1.0.0: {} + + object-assign@4.1.1: {} + + object-inspect@1.13.4: {} + + on-finished@2.4.1: + dependencies: + ee-first: 1.1.1 + + once@1.4.0: + dependencies: + wrappy: 1.0.2 + + parseurl@1.3.3: {} + + path-key@3.1.1: {} + + path-to-regexp@8.4.2: {} + + pkce-challenge@5.0.1: {} + + proxy-addr@2.0.7: + dependencies: + forwarded: 0.2.0 + ipaddr.js: 1.9.1 + + qs@6.15.3: + dependencies: + es-define-property: 1.0.1 + side-channel: 1.1.1 + + range-parser@1.3.0: {} + + raw-body@3.0.2: + dependencies: + bytes: 3.1.2 + http-errors: 2.0.1 + iconv-lite: 0.7.2 + unpipe: 1.0.0 + + require-from-string@2.0.2: {} + + router@2.2.0: + dependencies: + debug: 4.4.3 + depd: 2.0.0 + is-promise: 4.0.0 + parseurl: 1.3.3 + path-to-regexp: 8.4.2 + transitivePeerDependencies: + - supports-color + + safer-buffer@2.1.2: {} + + send@1.2.1: + dependencies: + debug: 4.4.3 + encodeurl: 2.0.0 + escape-html: 1.0.3 + etag: 1.8.1 + fresh: 2.0.0 + http-errors: 2.0.1 + mime-types: 3.0.2 + ms: 2.1.3 + on-finished: 2.4.1 + range-parser: 1.3.0 + statuses: 2.0.2 + transitivePeerDependencies: + - supports-color + + serve-static@2.2.1: + dependencies: + encodeurl: 2.0.0 + escape-html: 1.0.3 + parseurl: 1.3.3 + send: 1.2.1 + transitivePeerDependencies: + - supports-color + + setprototypeof@1.2.0: {} + + shebang-command@2.0.0: + dependencies: + shebang-regex: 3.0.0 + + shebang-regex@3.0.0: {} + + side-channel-list@1.0.1: + dependencies: + es-errors: 1.3.0 + object-inspect: 1.13.4 + + side-channel-map@1.0.1: + dependencies: + call-bound: 1.0.4 + es-errors: 1.3.0 + get-intrinsic: 1.3.0 + object-inspect: 1.13.4 + + side-channel-weakmap@1.0.2: + dependencies: + call-bound: 1.0.4 + es-errors: 1.3.0 + get-intrinsic: 1.3.0 + object-inspect: 1.13.4 + side-channel-map: 1.0.1 + + side-channel@1.1.1: + dependencies: + es-errors: 1.3.0 + object-inspect: 1.13.4 + side-channel-list: 1.0.1 + side-channel-map: 1.0.1 + side-channel-weakmap: 1.0.2 + + statuses@2.0.2: {} + + toidentifier@1.0.1: {} + + type-is@2.1.0: + dependencies: + content-type: 2.0.0 + media-typer: 1.1.0 + mime-types: 3.0.2 + + unpipe@1.0.0: {} + + vary@1.1.2: {} + + which@2.0.2: + dependencies: + isexe: 2.0.0 + + wrappy@1.0.2: {} + ws@8.21.0: {} + zod-to-json-schema@3.25.2(zod@3.25.76): + dependencies: + zod: 3.25.76 + zod@3.25.76: {} diff --git a/packages/harness-grok-build/src/grok-build-bridge-protocol.test.ts b/packages/harness-grok-build/src/grok-build-bridge-protocol.test.ts index ccc235b5b4da..d76dc384acad 100644 --- a/packages/harness-grok-build/src/grok-build-bridge-protocol.test.ts +++ b/packages/harness-grok-build/src/grok-build-bridge-protocol.test.ts @@ -22,6 +22,35 @@ describe('grok-build bridge protocol', () => { expect(parsed.planMode).toBe(true); }); + it('carries host-defined tools on the start message', () => { + const parsed = startMessageSchema.parse({ + type: 'start', + prompt: 'hi', + tools: [ + { + name: 'get_weather', + description: 'Look up the weather', + inputSchema: { + type: 'object', + properties: { city: { type: 'string' } }, + required: ['city'], + }, + }, + ], + }); + expect(parsed.tools).toHaveLength(1); + expect(parsed.tools?.[0].name).toBe('get_weather'); + }); + + it('round-trips permissionMode on the start message', () => { + const parsed = startMessageSchema.parse({ + type: 'start', + prompt: 'hi', + permissionMode: 'allow-all', + }); + expect(parsed.permissionMode).toBe('allow-all'); + }); + it('discriminates start within the inbound union', () => { const parsed = inboundMessageSchema.parse({ type: 'start', prompt: 'hi' }); expect(parsed.type).toBe('start'); diff --git a/packages/harness-grok-build/src/grok-build-bridge-protocol.ts b/packages/harness-grok-build/src/grok-build-bridge-protocol.ts index 5a4c3151cc54..6a0abd887c71 100644 --- a/packages/harness-grok-build/src/grok-build-bridge-protocol.ts +++ b/packages/harness-grok-build/src/grok-build-bridge-protocol.ts @@ -13,6 +13,7 @@ export type OutboundMessage = z.infer; export const startMessageSchema = harnessV1BridgeStartBaseSchema.extend({ model: z.string().optional(), planMode: z.boolean().optional(), + reasoningEffort: z.enum(['low', 'medium', 'high']).optional(), // Resume the prior CLI thread instead of a fresh session. continue: z.boolean().optional(), }); diff --git a/packages/harness-grok-build/src/grok-build-harness.test.ts b/packages/harness-grok-build/src/grok-build-harness.test.ts index f1e76163bb7c..4e0054d61a21 100644 --- a/packages/harness-grok-build/src/grok-build-harness.test.ts +++ b/packages/harness-grok-build/src/grok-build-harness.test.ts @@ -41,6 +41,7 @@ vi.mock('node:fs/promises', async importOriginal => { readFile: vi.fn(async (input: unknown, ...rest: unknown[]) => { const p = String(input); if (p.endsWith('/bridge/index.mjs')) return '// mock bridge\n'; + if (p.endsWith('/bridge/host-tool-mcp.mjs')) return '// mock mcp\n'; if (p.endsWith('/bridge/package.json')) return '{"name":"mock"}'; if (p.endsWith('/bridge/pnpm-lock.yaml')) return 'lockfileVersion: 9\n'; // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -118,7 +119,7 @@ describe('grok-build builtin tools', () => { }); it('maps a native name to its common name', () => { - expect(toCommonName('Read')).toBe('read'); + expect(toCommonName('read_file')).toBe('read'); }); it('passes through unknown native names unchanged', () => { @@ -136,6 +137,7 @@ describe('grok-build bootstrap', () => { expect(paths).toContain('/tmp/harness/grok-build/package.json'); expect(paths).toContain('/tmp/harness/grok-build/pnpm-lock.yaml'); expect(paths).toContain('/tmp/harness/grok-build/bridge.mjs'); + expect(paths).toContain('/tmp/harness/grok-build/host-tool-mcp.mjs'); expect(bootstrap.commands.some(c => c.command.includes('pnpm'))).toBe(true); }); }); @@ -149,18 +151,6 @@ describe('grok-build doStart', () => { vi.restoreAllMocks(); }); - it('rejects built-in permission modes other than allow-all', async () => { - const harness = createGrokBuild(); - await expect( - harness.doStart({ - sessionId: 's1', - sandboxSession: {} as HarnessV1NetworkSandboxSession, - sessionWorkDir: '/vercel/sandbox/grok-s1', - permissionMode: 'allow-edits', - }), - ).rejects.toBeInstanceOf(HarnessCapabilityUnsupportedError); - }); - it('throws when the network sandbox exposes no ports', async () => { const harness = createGrokBuild(); const sandboxSession = { @@ -254,6 +244,29 @@ describe('grok-build doStart', () => { }); }); + it('carries reasoningEffort on the start message when configured', async () => { + const harness = createGrokBuild({ + auth: { xai: { apiKey: 'sk' } }, + reasoningEffort: 'high', + }); + const spawnCalls: Array<{ + command: string; + env: Record; + }> = []; + const runs: string[] = []; + const session = await harness.doStart({ + sessionId: 's1', + sandboxSession: fakeSandbox({ spawnCalls, runs }), + sessionWorkDir: '/vercel/sandbox/grok-s1', + permissionMode: 'allow-all', + }); + + await session.doPromptTurn({ prompt: 'hello', emit: () => {} }); + expect(sentMessages.find(m => m.type === 'start')).toMatchObject({ + reasoningEffort: 'high', + }); + }); + it('sends a continuation start message with continue:true on doContinueTurn', async () => { const harness = createGrokBuild({ auth: { xai: { apiKey: 'sk' } } }); const spawnCalls: Array<{ diff --git a/packages/harness-grok-build/src/grok-build-harness.ts b/packages/harness-grok-build/src/grok-build-harness.ts index 4c1ef5b75f72..edbbb65450c8 100644 --- a/packages/harness-grok-build/src/grok-build-harness.ts +++ b/packages/harness-grok-build/src/grok-build-harness.ts @@ -37,17 +37,17 @@ import { type GrokBuildChannel = SandboxChannel; -// Native tool name → common name. Placeholder names; reconcile with real CLI output. +// Real grok native (snake_case) tool name → HarnessV1 builtin common name. export const NATIVE_TO_COMMON: Readonly< Record > = { - Read: 'read', - Write: 'write', - Edit: 'edit', - Bash: 'bash', - Glob: 'glob', - Grep: 'grep', - WebSearch: 'webSearch', + read_file: 'read', + write: 'write', + search_replace: 'edit', + run_terminal_command: 'bash', + grep: 'grep', + list_dir: 'glob', + web_search: 'webSearch', }; export function toCommonName( @@ -56,10 +56,22 @@ export function toCommonName( return NATIVE_TO_COMMON[nativeName] ?? nativeName; } -// Builtin tools, keyed by common name. Placeholder names/schemas; reconcile with real CLI output. +// Common name for a grok native tool name, or null when it has no builtin equivalent. +export function commonNameOrNull( + nativeName: string, +): HarnessV1BuiltinToolName | null { + return NATIVE_TO_COMMON[nativeName] ?? null; +} + +// Set of grok native names that map to a HarnessV1 builtin. +export const GROK_BUILD_BUILTIN_NATIVE_NAMES: ReadonlySet = new Set( + Object.keys(NATIVE_TO_COMMON), +); + +// Builtin tools, keyed by common name. Schemas are placeholders pending CLI confirmation. export const GROK_BUILD_BUILTIN_TOOLS = { read: commonTool('read', { - nativeName: 'Read', + nativeName: 'read_file', toolUseKind: 'readonly', description: 'Read file contents (text, image, PDF, notebook)', inputSchema: z.object({ @@ -70,7 +82,7 @@ export const GROK_BUILD_BUILTIN_TOOLS = { }), }), write: commonTool('write', { - nativeName: 'Write', + nativeName: 'write', toolUseKind: 'edit', description: 'Overwrite or create a file at an absolute path', inputSchema: z.object({ @@ -79,7 +91,7 @@ export const GROK_BUILD_BUILTIN_TOOLS = { }), }), edit: commonTool('edit', { - nativeName: 'Edit', + nativeName: 'search_replace', toolUseKind: 'edit', description: 'Edit a file by exact string replacement', inputSchema: z.object({ @@ -90,7 +102,7 @@ export const GROK_BUILD_BUILTIN_TOOLS = { }), }), bash: commonTool('bash', { - nativeName: 'Bash', + nativeName: 'run_terminal_command', toolUseKind: 'bash', description: 'Execute a shell command', inputSchema: z.object({ @@ -101,7 +113,7 @@ export const GROK_BUILD_BUILTIN_TOOLS = { }), }), glob: commonTool('glob', { - nativeName: 'Glob', + nativeName: 'list_dir', toolUseKind: 'readonly', description: 'Fast file-pattern search using glob syntax', inputSchema: z.object({ @@ -110,7 +122,7 @@ export const GROK_BUILD_BUILTIN_TOOLS = { }), }), grep: commonTool('grep', { - nativeName: 'Grep', + nativeName: 'grep', toolUseKind: 'readonly', description: 'Regex search over file contents', inputSchema: z.object({ @@ -119,7 +131,7 @@ export const GROK_BUILD_BUILTIN_TOOLS = { }), }), webSearch: commonTool('webSearch', { - nativeName: 'WebSearch', + nativeName: 'web_search', toolUseKind: 'readonly', description: 'Issue web search queries', inputSchema: z.object({ @@ -139,6 +151,7 @@ const DEFAULT_GROK_MODEL_GATEWAY = 'xai/grok-build-0.1'; export type GrokBuildHarnessSettings = { readonly model?: string; readonly planMode?: boolean; + readonly reasoningEffort?: 'low' | 'medium' | 'high'; readonly auth?: GrokBuildAuthOptions; readonly port?: number; /** Maximum milliseconds to wait for the bridge to advertise its port. Defaults to 120000. */ @@ -190,14 +203,15 @@ export function createGrokBuild( specificationVersion: 'harness-v1', harnessId: 'grok-build', builtinTools: GROK_BUILD_BUILTIN_TOOLS, - supportsBuiltinToolApprovals: false, + supportsBuiltinToolApprovals: true, lifecycleStateSchema, getBootstrap: async () => { if (cachedBootstrap != null) return cachedBootstrap; - const [pkg, lock, bridge] = await Promise.all([ + const [pkg, lock, bridge, hostToolMcp] = await Promise.all([ readBridgeAsset('package.json'), readBridgeAsset('pnpm-lock.yaml'), readBridgeAsset('index.mjs'), + readBridgeAsset('host-tool-mcp.mjs'), ]); cachedBootstrap = { harnessId: 'grok-build', @@ -206,6 +220,7 @@ export function createGrokBuild( { path: `${BOOTSTRAP_DIR}/package.json`, content: pkg }, { path: `${BOOTSTRAP_DIR}/pnpm-lock.yaml`, content: lock }, { path: `${BOOTSTRAP_DIR}/bridge.mjs`, content: bridge }, + { path: `${BOOTSTRAP_DIR}/host-tool-mcp.mjs`, content: hostToolMcp }, ], commands: [ { command: `mkdir -p ${BOOTSTRAP_DIR}` }, @@ -220,17 +235,6 @@ export function createGrokBuild( return cachedBootstrap; }, doStart: async startOpts => { - if ( - startOpts.permissionMode != null && - startOpts.permissionMode !== 'allow-all' - ) { - throw new HarnessCapabilityUnsupportedError({ - message: - "Harness 'grok-build' does not support built-in tool approval requests; use permissionMode: 'allow-all'. The grok CLI runs with --always-approve and executes tools itself.", - harnessId: 'grok-build', - }); - } - const sandboxSession = startOpts.sandboxSession; const session = sandboxSession.restricted(); const sandboxId = sandboxSession.id; @@ -294,6 +298,7 @@ export function createGrokBuild( channel: attachChannel, proc: undefined, // live bridge owned by another process model, + reasoningEffort: settings.reasoningEffort, isResume: true, bridgePort: coords.port, bridgeToken: coords.token, @@ -391,6 +396,7 @@ export function createGrokBuild( channel, proc, model, + reasoningEffort: settings.reasoningEffort, isResume, bridgePort: boundPort, bridgeToken: token, @@ -491,6 +497,7 @@ function createSession({ channel, proc, model, + reasoningEffort, isResume, bridgePort, bridgeToken, @@ -504,6 +511,7 @@ function createSession({ /** Undefined on `attach` — the live bridge was spawned by another process. */ proc: Experimental_SandboxProcess | undefined; model: string | undefined; + reasoningEffort: 'low' | 'medium' | 'high' | undefined; isResume: boolean; bridgePort: number; bridgeToken: string; @@ -513,7 +521,6 @@ function createSession({ resumeGrokSessionId: string | undefined; }): HarnessV1Session { void debug; - void permissionMode; let stopped = false; let stopPromise: Promise | undefined; @@ -624,20 +631,30 @@ function createSession({ return { done }; }; - // grok self-executes tools (`--always-approve`); these are unsupported no-ops. - const unsupportedToolControl = { - submitToolResult: async () => { - throw new HarnessCapabilityUnsupportedError({ - harnessId: 'grok-build', - message: - 'The grok-build harness executes tools inside the CLI (--always-approve); host tool results are not accepted.', + // Forward host tool results/approvals to the bridge over the ACP channel. + const toolControl = { + submitToolResult: async (input: { + toolCallId: string; + output: unknown; + isError?: boolean; + }) => { + channel.send({ + type: 'tool-result', + toolCallId: input.toolCallId, + output: input.output, + isError: input.isError, }); }, - submitToolApproval: async () => { - throw new HarnessCapabilityUnsupportedError({ - harnessId: 'grok-build', - message: - 'The grok-build harness executes tools inside the CLI (--always-approve); host tool approvals are not accepted.', + submitToolApproval: async (input: { + approvalId: string; + approved: boolean; + reason?: string; + }) => { + channel.send({ + type: 'tool-approval-response', + approvalId: input.approvalId, + approved: input.approved, + reason: input.reason, }); }, }; @@ -662,10 +679,17 @@ function createSession({ channel.send({ type: 'start', prompt, + tools: (promptOpts.tools ?? []).map(t => ({ + name: t.name, + description: t.description, + inputSchema: t.inputSchema, + })), ...(model ? { model } : {}), + ...(reasoningEffort ? { reasoningEffort } : {}), + ...(permissionMode ? { permissionMode } : {}), ...(shouldContinue ? { continue: true } : {}), }); - return { ...unsupportedToolControl, done }; + return { ...toolControl, done }; }, doContinueTurn: async continueOpts => { const { done } = wireTurn({ @@ -676,10 +700,17 @@ function createSession({ channel.send({ type: 'start', prompt: 'Continue.', + tools: (continueOpts.tools ?? []).map(t => ({ + name: t.name, + description: t.description, + inputSchema: t.inputSchema, + })), ...(model ? { model } : {}), + ...(reasoningEffort ? { reasoningEffort } : {}), + ...(permissionMode ? { permissionMode } : {}), continue: true, }); - return { ...unsupportedToolControl, done }; + return { ...toolControl, done }; }, doCompact: async () => { throw new HarnessCapabilityUnsupportedError({ diff --git a/packages/harness-grok-build/src/grok-build-stream-map.test.ts b/packages/harness-grok-build/src/grok-build-stream-map.test.ts index 80f055b9502d..d6325e144d23 100644 --- a/packages/harness-grok-build/src/grok-build-stream-map.test.ts +++ b/packages/harness-grok-build/src/grok-build-stream-map.test.ts @@ -1,55 +1,284 @@ -import { readFileSync } from 'node:fs'; -import { join } from 'node:path'; import { describe, expect, it } from 'vitest'; -import { createStreamMapState, mapStreamLine } from './grok-build-stream-map'; - -const lines = readFileSync( - join(__dirname, '__fixtures__/streaming-json-basic.jsonl'), - 'utf8', -) - .split('\n') - .filter(Boolean); - -const mapAll = () => { - const s = createStreamMapState(); - return lines.flatMap(l => mapStreamLine(l, s)); -}; - -describe('mapStreamLine (grok streaming-json)', () => { - it('emits exactly one stream-start', () => { - expect(mapAll().filter(p => p.type === 'stream-start')).toHaveLength(1); - }); - it('maps thought chunks to reasoning start/delta/end', () => { - const t = mapAll().map(p => p.type); - expect(t).toContain('reasoning-start'); - expect(t).toContain('reasoning-delta'); - expect(t).toContain('reasoning-end'); - }); - it('maps text chunks to text start/delta/end', () => { - const t = mapAll().map(p => p.type); - expect(t).toContain('text-start'); - expect(t).toContain('text-delta'); - expect(t).toContain('text-end'); - }); - it('reasoning ends before text starts (single ordered transition)', () => { - const types = mapAll().map(p => p.type); - const firstText = types.indexOf('text-start'); - const reasoningEnd = types.indexOf('reasoning-end'); - expect(reasoningEnd).toBeGreaterThanOrEqual(0); - expect(firstText).toBeGreaterThan(reasoningEnd); - }); - it('concatenated text deltas reconstruct the final answer', () => { - const text = mapAll() - .filter(p => p.type === 'text-delta') - .map((p: any) => p.delta) - .join(''); - expect(text).toContain('hello.txt'); - }); - it('emits a finish for the end event', () => { - const f = mapAll().find(p => p.type === 'finish'); - expect(f).toBeDefined(); - }); - it('never throws on malformed input', () => { - expect(mapStreamLine('not json', createStreamMapState())).toEqual([]); +import { + createAcpStreamState, + finishFromResult, + mapAcpUpdate, +} from './grok-build-stream-map'; + +function run(updates: unknown[]) { + const s = createAcpStreamState(); + return updates.flatMap(u => mapAcpUpdate(u, s)); +} + +describe('mapAcpUpdate', () => { + it('maps thought chunks then message chunks into reasoning then text blocks', () => { + const parts = run([ + { + sessionUpdate: 'agent_thought_chunk', + content: { type: 'text', text: 'think a' }, + }, + { + sessionUpdate: 'agent_thought_chunk', + content: { type: 'text', text: 'think b' }, + }, + { + sessionUpdate: 'agent_message_chunk', + content: { type: 'text', text: 'hello ' }, + }, + { + sessionUpdate: 'agent_message_chunk', + content: { type: 'text', text: 'world' }, + }, + ]); + + expect(parts.filter(p => p.type === 'stream-start')).toHaveLength(1); + + expect(parts.map(p => p.type)).toEqual([ + 'stream-start', + 'reasoning-start', + 'reasoning-delta', + 'reasoning-delta', + 'reasoning-end', + 'text-start', + 'text-delta', + 'text-delta', + ]); + }); + + it('closes the open text block on finish', () => { + const s = createAcpStreamState(); + mapAcpUpdate( + { + sessionUpdate: 'agent_message_chunk', + content: { type: 'text', text: 'hi' }, + }, + s, + ); + const parts = finishFromResult({ stopReason: 'end_turn' }, s); + expect(parts.map(p => p.type)).toEqual([ + 'text-end', + 'finish-step', + 'finish', + ]); + }); + + it('emits exactly one stream-start across many updates', () => { + const parts = run([ + { + sessionUpdate: 'agent_message_chunk', + content: { type: 'text', text: 'a' }, + }, + { + sessionUpdate: 'agent_message_chunk', + content: { type: 'text', text: 'b' }, + }, + ]); + expect(parts.filter(p => p.type === 'stream-start')).toHaveLength(1); + }); + + it('maps tool_call to a tool-call part with toolName, nativeName, input', () => { + const parts = run([ + { + sessionUpdate: 'tool_call', + toolCallId: 'tc1', + title: 'read_file', + status: 'pending', + rawInput: { file_path: '/a.txt' }, + }, + ]); + expect(parts.find(p => p.type === 'tool-call')).toMatchObject({ + type: 'tool-call', + toolCallId: 'tc1', + toolName: 'read', + nativeName: 'read_file', + input: JSON.stringify({ file_path: '/a.txt' }), + }); + }); + + it('emits grok-internal tools (search_tool/use_tool) as raw, not tool-call', () => { + const s = createAcpStreamState(); + const searchParts = mapAcpUpdate( + { + sessionUpdate: 'tool_call', + toolCallId: 'g1', + title: 'search_tool', + status: 'pending', + rawInput: { query: 'weather' }, + }, + s, + ); + expect(searchParts.find(p => p.type === 'tool-call')).toBeUndefined(); + expect(searchParts.find(p => p.type === 'raw')).toBeDefined(); + + const updateParts = mapAcpUpdate( + { + sessionUpdate: 'tool_call_update', + toolCallId: 'g1', + status: 'completed', + rawOutput: 'found', + }, + s, + ); + expect(updateParts.find(p => p.type === 'tool-result')).toBeUndefined(); + expect(updateParts.find(p => p.type === 'raw')).toBeDefined(); + }); + + it('emits a file-change for a tool_call carrying a diff content entry', () => { + const parts = run([ + { + sessionUpdate: 'tool_call', + toolCallId: 'tc2', + title: 'search_replace', + kind: 'edit', + status: 'pending', + rawInput: { file_path: '/b.txt' }, + content: [{ type: 'diff', diff: { path: '/b.txt' } }], + }, + ]); + expect(parts.find(p => p.type === 'tool-call')).toBeDefined(); + expect(parts.find(p => p.type === 'file-change')).toMatchObject({ + type: 'file-change', + event: 'modify', + path: '/b.txt', + }); + }); + + it('suppresses tool_call and its update for a host tool (relay owns it)', () => { + const s = createAcpStreamState(new Set(['get_weather'])); + const callParts = mapAcpUpdate( + { + sessionUpdate: 'tool_call', + toolCallId: 'h1', + title: 'get_weather', + status: 'pending', + rawInput: { city: 'sf' }, + }, + s, + ); + expect(callParts.find(p => p.type === 'tool-call')).toBeUndefined(); + const updateParts = mapAcpUpdate( + { + sessionUpdate: 'tool_call_update', + toolCallId: 'h1', + status: 'completed', + rawOutput: 'sunny', + }, + s, + ); + expect(updateParts.find(p => p.type === 'tool-result')).toBeUndefined(); + }); + + it('suppresses host tools when grok prefixes the server name onto the title', () => { + const s = createAcpStreamState(new Set(['get_weather'])); + const callParts = mapAcpUpdate( + { + sessionUpdate: 'tool_call', + toolCallId: 'h2', + title: 'harness-tools__get_weather', + status: 'pending', + rawInput: {}, + }, + s, + ); + expect(callParts.find(p => p.type === 'tool-call')).toBeUndefined(); + }); + + it('maps a completed tool_call_update to a tool-result', () => { + const parts = run([ + { + sessionUpdate: 'tool_call_update', + toolCallId: 'tc1', + status: 'completed', + rawOutput: 'done', + }, + ]); + expect(parts.find(p => p.type === 'tool-result')).toMatchObject({ + type: 'tool-result', + toolCallId: 'tc1', + result: 'done', + isError: false, + }); + }); + + it('returns no parts for in_progress tool_call_update', () => { + const s = createAcpStreamState(); + s.streamStarted = true; + const parts = mapAcpUpdate( + { + sessionUpdate: 'tool_call_update', + toolCallId: 'tc1', + status: 'in_progress', + }, + s, + ); + expect(parts).toEqual([]); + }); + + it('maps a failed tool_call_update to an error tool-result', () => { + const parts = run([ + { + sessionUpdate: 'tool_call_update', + toolCallId: 'tc1', + status: 'failed', + content: 'boom', + }, + ]); + expect(parts.find(p => p.type === 'tool-result')).toMatchObject({ + isError: true, + result: 'boom', + }); + }); + + it('maps unknown sessionUpdate to raw', () => { + const parts = run([{ sessionUpdate: 'something_new', foo: 1 }]); + expect(parts).toEqual([ + { type: 'stream-start' }, + { type: 'raw', rawValue: { sessionUpdate: 'something_new', foo: 1 } }, + ]); + }); + + it('returns [] for malformed input', () => { + expect(mapAcpUpdate(null, createAcpStreamState())).toEqual([]); + expect(mapAcpUpdate('nope', createAcpStreamState())).toEqual([]); + expect( + mapAcpUpdate({ noSessionUpdate: true }, createAcpStreamState()), + ).toEqual([]); + }); +}); + +describe('finishFromResult', () => { + it('maps end_turn to stop and populates usage from _meta', () => { + const parts = finishFromResult({ + stopReason: 'end_turn', + _meta: { inputTokens: 10, outputTokens: 5, totalTokens: 15 }, + }); + const finish = parts.find(p => p.type === 'finish'); + expect(finish).toMatchObject({ + type: 'finish', + finishReason: { unified: 'stop', raw: 'end_turn' }, + }); + expect( + finish && finish.type === 'finish' && finish.totalUsage, + ).toMatchObject({ + inputTokens: { total: 10 }, + outputTokens: { total: 5 }, + }); + }); + + it('maps max_tokens to length with undefined usage when _meta absent', () => { + const parts = finishFromResult({ stopReason: 'max_tokens' }); + const finish = parts.find(p => p.type === 'finish'); + expect(finish).toMatchObject({ + finishReason: { unified: 'length', raw: 'max_tokens' }, + }); + expect(finish && finish.type === 'finish' && finish.totalUsage).toEqual({ + inputTokens: { + total: undefined, + noCache: undefined, + cacheRead: undefined, + cacheWrite: undefined, + }, + outputTokens: { total: undefined, text: undefined, reasoning: undefined }, + }); }); }); diff --git a/packages/harness-grok-build/src/grok-build-stream-map.ts b/packages/harness-grok-build/src/grok-build-stream-map.ts index 9a6613918141..938e7bb5d043 100644 --- a/packages/harness-grok-build/src/grok-build-stream-map.ts +++ b/packages/harness-grok-build/src/grok-build-stream-map.ts @@ -1,32 +1,94 @@ import type { HarnessV1StreamPart } from '@ai-sdk/harness'; +import { + GROK_BUILD_BUILTIN_NATIVE_NAMES, + toCommonName, +} from './grok-build-harness'; // V4 types via the finish part shape (@ai-sdk/provider isn't a dependency). type FinishPart = Extract; type LanguageModelV4FinishReason = FinishPart['finishReason']; type LanguageModelV4Usage = FinishPart['totalUsage']; -export type StreamMapState = { +export type AcpStreamState = { streamStarted: boolean; openTextId: string | null; openReasoningId: string | null; nextId: number; + // Host-tool names whose ACP tool_call/tool_call_update events are suppressed: + // the HTTP relay owns the tool-call/tool-result lifecycle for these. + hostToolNames: ReadonlySet; + // Grok native names that map to a HarnessV1 builtin; only these (and host + // tools) may surface as tool-call parts. + builtinNativeNames: ReadonlySet; + // ACP toolCallIds the stream map has classified as host tools (to also drop + // their later tool_call_update without re-deriving the name). + suppressedToolCallIds: Set; + // ACP toolCallIds for grok-internal tools (e.g. search_tool/use_tool) whose + // updates are emitted as raw rather than tool-result. + rawToolCallIds: Set; + // Common tool name per ACP toolCallId, so tool_call_update results can be + // paired with the originating tool-call's name. + toolNamesById: Map; }; -export function createStreamMapState(): StreamMapState { +export function createAcpStreamState( + hostToolNames: ReadonlySet = new Set(), + builtinNativeNames: ReadonlySet = GROK_BUILD_BUILTIN_NATIVE_NAMES, +): AcpStreamState { return { streamStarted: false, openTextId: null, openReasoningId: null, nextId: 0, + hostToolNames, + builtinNativeNames, + suppressedToolCallIds: new Set(), + rawToolCallIds: new Set(), + toolNamesById: new Map(), }; } -function mintId(state: StreamMapState, prefix: string): string { +// Grok prefixes MCP tool titles with the server name (e.g. `harness-tools__foo` +// or `harness-tools:foo`). Strip a leading `harness-tools` segment so the bare +// host-tool name can be matched against the registered set. +function stripHostServerPrefix(title: string): string { + const match = title.match(/^harness-tools[:_]+(.+)$/); + return match ? match[1] : title; +} + +function mintId(state: AcpStreamState, prefix: string): string { return `${prefix}_${state.nextId++}`; } -// streaming-json reports no token counts; `undefined` (not 0) signals "not reported". -function unknownUsage(): LanguageModelV4Usage { +function asRecord(value: unknown): Record | null { + return typeof value === 'object' && value !== null + ? (value as Record) + : null; +} + +// ACP content `{type:'text', text}` → the text string, else ''. +function textFromContent(content: unknown): string { + const rec = asRecord(content); + if (rec === null) return ''; + return typeof rec['text'] === 'string' ? rec['text'] : ''; +} + +// Find a `{type:'diff', diff:{path}}` entry in tool_call content; returns its path. +function diffPathFromContent(content: unknown): string | null { + if (!Array.isArray(content)) return null; + for (const entry of content) { + const rec = asRecord(entry); + if (rec?.['type'] === 'diff') { + const diff = asRecord(rec['diff']); + if (diff !== null && typeof diff['path'] === 'string') { + return diff['path']; + } + } + } + return null; +} + +function emptyUsage(): LanguageModelV4Usage { return { inputTokens: { total: undefined, @@ -38,43 +100,53 @@ function unknownUsage(): LanguageModelV4Usage { }; } +function usageFromMeta(meta: unknown): LanguageModelV4Usage { + const rec = asRecord(meta); + if (rec === null) return emptyUsage(); + const num = (key: string): number | undefined => + typeof rec[key] === 'number' ? (rec[key] as number) : undefined; + return { + inputTokens: { + total: num('inputTokens'), + noCache: undefined, + cacheRead: num('cachedReadTokens'), + cacheWrite: undefined, + }, + outputTokens: { + total: num('outputTokens'), + text: undefined, + reasoning: num('reasoningTokens'), + }, + raw: { totalTokens: num('totalTokens') ?? null }, + }; +} + function mapStopReason(raw: string | undefined): LanguageModelV4FinishReason { switch (raw) { - case 'EndTurn': + case 'end_turn': return { unified: 'stop', raw }; - case 'MaxTokens': + case 'max_tokens': + case 'max_turn_requests': return { unified: 'length', raw }; - case 'ToolUse': - return { unified: 'tool-calls', raw }; - case 'ContentFilter': + case 'refusal': return { unified: 'content-filter', raw }; - case 'Error': - return { unified: 'error', raw }; + case 'cancelled': + return { unified: 'stop', raw }; default: return { unified: 'other', raw }; } } -// --------------------------------------------------------------------------- -// Core mapping -// --------------------------------------------------------------------------- - -// Map one streaming-json line (`thought`/`text`/`end`) to stream parts. Pure, never throws. -export function mapStreamLine( - rawLine: string, - state: StreamMapState, +// Map one ACP `session/update` notification's `update` to stream parts. Pure, never throws. +export function mapAcpUpdate( + update: unknown, + state: AcpStreamState, ): HarnessV1StreamPart[] { - // JSON.parse, not async safeParseJSON, since this runs per line synchronously. - let msg: unknown; - try { - msg = JSON.parse(rawLine); - } catch { - return []; - } - if (typeof msg !== 'object' || msg === null) return []; + const rec = asRecord(update); + if (rec === null) return []; - const anyMsg = msg as Record; - const eventType = anyMsg['type'] as string | undefined; + const sessionUpdate = rec['sessionUpdate']; + if (typeof sessionUpdate !== 'string') return []; const parts: HarnessV1StreamPart[] = []; @@ -101,63 +173,141 @@ export function mapStreamLine( ensureStreamStart(); - switch (eventType) { - case 'thought': { - const data = typeof anyMsg['data'] === 'string' ? anyMsg['data'] : ''; - + switch (sessionUpdate) { + case 'agent_thought_chunk': { + const delta = textFromContent(rec['content']); closeTextBlock(); if (state.openReasoningId === null) { const id = mintId(state, 'reasoning'); state.openReasoningId = id; parts.push({ type: 'reasoning-start', id }); } - parts.push({ type: 'reasoning-delta', id: state.openReasoningId, - delta: data, + delta, }); break; } - case 'text': { - const data = typeof anyMsg['data'] === 'string' ? anyMsg['data'] : ''; - + case 'agent_message_chunk': { + const delta = textFromContent(rec['content']); closeReasoningBlock(); if (state.openTextId === null) { const id = mintId(state, 'text'); state.openTextId = id; parts.push({ type: 'text-start', id }); } + parts.push({ type: 'text-delta', id: state.openTextId, delta }); + break; + } + case 'tool_call': { + const toolCallId = + typeof rec['toolCallId'] === 'string' ? rec['toolCallId'] : ''; + const title = typeof rec['title'] === 'string' ? rec['title'] : ''; + const kind = typeof rec['kind'] === 'string' ? rec['kind'] : undefined; + // Host tools are mediated by the relay, which emits their tool-call / + // tool-result. Suppress grok's own ACP events to avoid duplicates. + if (state.hostToolNames.has(stripHostServerPrefix(title))) { + state.suppressedToolCallIds.add(toolCallId); + return []; + } + // Grok-internal tools (search_tool/use_tool/etc.) have no builtin mapping; + // emitting them as tool-call would trip AI_NoSuchToolError. Surface raw. + if (!state.builtinNativeNames.has(title)) { + state.rawToolCallIds.add(toolCallId); + return [{ type: 'raw', rawValue: update }]; + } + const commonName = toCommonName(title); + state.toolNamesById.set(toolCallId, commonName); parts.push({ - type: 'text-delta', - id: state.openTextId, - delta: data, + type: 'tool-call', + toolCallId, + toolName: commonName, + nativeName: title, + input: JSON.stringify(rec['rawInput'] ?? {}), + providerExecuted: true, }); + const diffPath = diffPathFromContent(rec['content']); + if (diffPath !== null) { + parts.push({ type: 'file-change', event: 'modify', path: diffPath }); + } else if (kind === 'edit') { + const single = asRecord( + Array.isArray(rec['content']) ? rec['content'][0] : rec['content'], + ); + const diff = asRecord(single?.['diff']); + if (diff !== null && typeof diff['path'] === 'string') { + parts.push({ + type: 'file-change', + event: 'modify', + path: diff['path'], + }); + } + } break; } - case 'end': { - // Close any open blocks. - closeReasoningBlock(); - closeTextBlock(); - - const stopReason = anyMsg['stopReason'] as string | undefined; - + case 'tool_call_update': { + const updateToolCallId = + typeof rec['toolCallId'] === 'string' ? rec['toolCallId'] : ''; + if (state.rawToolCallIds.has(updateToolCallId)) { + return [{ type: 'raw', rawValue: update }]; + } + const status = typeof rec['status'] === 'string' ? rec['status'] : ''; + if (status !== 'completed' && status !== 'failed') return []; + const toolCallId = updateToolCallId; + if (state.suppressedToolCallIds.has(toolCallId)) return []; + const output = rec['rawOutput'] ?? rec['content'] ?? null; parts.push({ - type: 'finish', - finishReason: mapStopReason(stopReason), - totalUsage: unknownUsage(), + type: 'tool-result', + toolCallId, + toolName: state.toolNamesById.get(toolCallId) ?? '', + result: output as never, + isError: status === 'failed', }); break; } + case 'user_message_chunk': + case 'available_commands_update': + case 'current_mode_update': + case 'plan': { + parts.push({ type: 'raw', rawValue: update }); + break; + } + default: { - parts.push({ type: 'raw', rawValue: msg }); + parts.push({ type: 'raw', rawValue: update }); break; } } return parts; } + +// Build the turn-final `finish` part from a `session/prompt` result. Closes open blocks if state given. +export function finishFromResult( + result: { stopReason?: string; _meta?: unknown }, + state?: AcpStreamState, +): HarnessV1StreamPart[] { + const parts: HarnessV1StreamPart[] = []; + if (state) { + if (state.openReasoningId !== null) { + parts.push({ type: 'reasoning-end', id: state.openReasoningId }); + state.openReasoningId = null; + } + if (state.openTextId !== null) { + parts.push({ type: 'text-end', id: state.openTextId }); + state.openTextId = null; + } + } + const finishReason = mapStopReason(result?.stopReason); + const usage = + result?._meta === undefined ? emptyUsage() : usageFromMeta(result._meta); + // finish-step carries the finishReason + usage the agent uses for the result; + // finish only drives end-of-turn telemetry. + parts.push({ type: 'finish-step', finishReason, usage }); + parts.push({ type: 'finish', finishReason, totalUsage: usage }); + return parts; +} diff --git a/packages/harness-grok-build/tsup.config.ts b/packages/harness-grok-build/tsup.config.ts index 46d75096eb81..cf290d13b899 100644 --- a/packages/harness-grok-build/tsup.config.ts +++ b/packages/harness-grok-build/tsup.config.ts @@ -8,14 +8,17 @@ export default defineConfig([ sourcemap: true, }, { - entry: { 'bridge/index': 'src/bridge/index.ts' }, + entry: { + 'bridge/index': 'src/bridge/index.ts', + 'bridge/host-tool-mcp': 'src/bridge/host-tool-mcp.ts', + }, format: ['esm'], target: 'es2022', outExtension: () => ({ js: '.mjs' }), dts: false, sourcemap: true, platform: 'node', - noExternal: ['@ai-sdk/harness'], - external: ['@xai-official/grok', 'ws', 'zod'], + noExternal: ['@ai-sdk/harness', '@ai-sdk/provider-utils'], + external: ['@xai-official/grok', '@modelcontextprotocol/sdk', 'ws', 'zod'], }, ]); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c69c9c14a307..e98700f83bb0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -2621,6 +2621,9 @@ importers: specifier: 3.25.76 version: 3.25.76 devDependencies: + '@modelcontextprotocol/sdk': + specifier: 1.29.0 + version: 1.29.0(@cfworker/json-schema@4.1.1)(zod@3.25.76) '@types/node': specifier: 22.19.19 version: 22.19.19 @@ -6464,12 +6467,6 @@ packages: peerDependencies: hono: ^4 - '@hono/node-server@1.19.14': - resolution: {integrity: sha512-GwtvgtXxnWsucXvbQXkRgqksiH2Qed37H9xHZocE5sA3N8O8O8/8FA3uclQXxXVzc9XBZuEOMK7+r02FmSpHtw==} - engines: {node: '>=18.14.1'} - peerDependencies: - hono: ^4 - '@hono/node-ws@1.3.1': resolution: {integrity: sha512-vo/MwCnpJAVHBkGzWjCJ28wF45fYHAfbPZcH2rodZODHtch2GHA94KtMfusmVycTUtsLAsaNsHhtY6P8X3RQsA==} engines: {node: '>=18.14.1'} @@ -24413,10 +24410,6 @@ snapshots: dependencies: hono: 4.12.25 - '@hono/node-server@1.19.14(hono@4.12.25)': - dependencies: - hono: 4.12.25 - '@hono/node-ws@1.3.1(@hono/node-server@1.19.13(hono@4.12.25))(hono@4.12.25)': dependencies: '@hono/node-server': 1.19.13(hono@4.12.25) @@ -26059,7 +26052,7 @@ snapshots: '@modelcontextprotocol/sdk@1.29.0(@cfworker/json-schema@4.1.1)(zod@3.25.76)': dependencies: - '@hono/node-server': 1.19.14(hono@4.12.25) + '@hono/node-server': 1.19.13(hono@4.12.25) ajv: 8.20.0 ajv-formats: 3.0.1(ajv@8.20.0) content-type: 1.0.5