From 94244c7d1ffe509a65826fafca9c365f4232d03f Mon Sep 17 00:00:00 2001 From: Kees van Dorp Date: Tue, 23 Jun 2026 11:10:49 +0200 Subject: [PATCH 1/4] feat(tools): add defineClientTool for client-resolved (HITL) tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Authored tools previously had to provide an `execute` (the compiler's `normalizeToolDefinition` and the runtime's `resolveToolDefinition` both called `expectFunction(execute)`). That made it impossible to author a human-in-the-loop tool the way the built-in `ask_question` works — no executor, the call parks for input and resolves out-of-band. Overriding `ask_question` to widen its input schema forced an `execute`, whose auto-result collided with the input response: two `tool_result` blocks for one `tool_use` id, which the provider rejects on resume ("each tool_use must have a single result"). Add `defineClientTool({ description, inputSchema, outputSchema? })`, which stamps `clientResolved: true` and carries no `execute`: - normalize-tool / schema-backed: allow omitting `execute` when `clientResolved`; every other tool still requires it. - resolve-tool: skip reattaching a live `execute` for client-resolved tools. - The runtime already surfaces executeless tools as client-side (buildToolSet / wrapToolExecute return undefined), so no harness change is needed; the resolved definition's `execute` is already Optional. `defineTool` is unchanged and still requires `execute`. Passing `execute` to `defineClientTool` throws. Fixes #203 Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kees van Dorp --- .changeset/client-resolved-tools.md | 21 +++++ .../authored-definition/schema-backed.test.ts | 26 +++++++ .../authored-definition/schema-backed.ts | 17 ++++- .../definitions/define-client-tool.test.ts | 31 ++++++++ packages/eve/src/public/definitions/tool.ts | 76 +++++++++++++++++++ packages/eve/src/public/tools/index.ts | 2 + packages/eve/src/runtime/resolve-tool.ts | 18 +++-- 7 files changed, 184 insertions(+), 7 deletions(-) create mode 100644 .changeset/client-resolved-tools.md create mode 100644 packages/eve/src/public/definitions/define-client-tool.test.ts diff --git a/.changeset/client-resolved-tools.md b/.changeset/client-resolved-tools.md new file mode 100644 index 000000000..3d7e7f88e --- /dev/null +++ b/.changeset/client-resolved-tools.md @@ -0,0 +1,21 @@ +--- +"eve": minor +--- + +Add `defineClientTool` for authoring client-resolved (human-in-the-loop) tools. + +A client-resolved tool has **no `execute`**: eve surfaces it to the model, parks +the turn when the model calls it, and resolves the call from the client/HITL +channel (e.g. an `inputResponses` answer) rather than running server code — the +same mechanism the built-in `ask_question` uses, now available to authored +tools. + +This unblocks widening/overriding `ask_question` with a richer, typed input +schema (typed HITL pickers) without the duplicate `tool_result` that a +`defineTool` override produced — authoring previously forced an `execute`, so a +parked call yielded two `tool_result` blocks for one `tool_use` id and the +provider rejected the resumed turn with "each tool_use must have a single +result". See #203. + +`defineTool` is unchanged and still requires `execute`; only `defineClientTool` +may omit it. diff --git a/packages/eve/src/internal/authored-definition/schema-backed.test.ts b/packages/eve/src/internal/authored-definition/schema-backed.test.ts index 64dc436a4..4d65a6450 100644 --- a/packages/eve/src/internal/authored-definition/schema-backed.test.ts +++ b/packages/eve/src/internal/authored-definition/schema-backed.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest"; import { z } from "#compiled/zod/index.js"; import { + defineClientTool, defineTool, defineDynamic, disableTool, @@ -109,6 +110,31 @@ describe("normalizeToolDefinition", () => { }); }); + it("normalizes a client-resolved tool without an execute function", () => { + const tool = defineClientTool({ + description: "Ask the user to pick a template.", + inputSchema: z.object({ prompt: z.string() }), + }); + + const entry = normalizeToolDefinition(tool, FAILURE_MESSAGE); + + expect(entry.kind).toBe("tool"); + if (entry.kind !== "tool") { + throw new Error("expected tool kind"); + } + expect(entry.definition.execute).toBeUndefined(); + expect(entry.definition.description).toBe("Ask the user to pick a template."); + }); + + it("still requires execute on tools that are not client-resolved", () => { + expect(() => + normalizeToolDefinition( + { description: "Echo.", inputSchema: { type: "object" } }, + FAILURE_MESSAGE, + ), + ).toThrow(FAILURE_MESSAGE); + }); + it("types approval context input from the tool input schema", () => { const tool = defineTool({ description: "Requires city-scoped approval.", diff --git a/packages/eve/src/internal/authored-definition/schema-backed.ts b/packages/eve/src/internal/authored-definition/schema-backed.ts index c82d7eb08..944470258 100644 --- a/packages/eve/src/internal/authored-definition/schema-backed.ts +++ b/packages/eve/src/internal/authored-definition/schema-backed.ts @@ -6,6 +6,7 @@ import { expectString, } from "#internal/authored-module.js"; import type { InternalToolDefinitionWithExecuteFn } from "#shared/tool-definition.js"; +import type { Optional } from "#shared/optional.js"; import { normalizeJsonSchemaDefinition } from "#internal/json-schema.js"; import { isDynamicSentinel, type DynamicToolEventName } from "#shared/dynamic-tool-definition.js"; @@ -15,7 +16,9 @@ import { isDynamicSentinel, type DynamicToolEventName } from "#shared/dynamic-to * Identity is path-derived — the compiler stamps the filename slug onto * the compiled entry. This shape never carries an authored `name`. */ -type NormalizedAuthoredTool = Readonly>; +type NormalizedAuthoredTool = Readonly< + Omit, "name"> +>; type MutableNormalizedAuthoredTool = { -readonly [K in keyof NormalizedAuthoredTool]: NormalizedAuthoredTool[K]; }; @@ -62,6 +65,7 @@ export function normalizeToolDefinition(value: unknown, message: string): Normal record, [ "auth", + "clientResolved", "description", "execute", "inputSchema", @@ -77,11 +81,20 @@ export function normalizeToolDefinition(value: unknown, message: string): Normal record.outputSchema === undefined ? undefined : normalizeJsonSchemaDefinition(record.outputSchema, "output"); + /* + * Client-resolved tools (`defineClientTool`, e.g. an `ask_question` override) + * have no executor: the model emits the call, the turn parks for input, and + * the result is supplied out-of-band. They are the one authored shape allowed + * to omit `execute`; every other tool must provide one. + */ + const clientResolved = record.clientResolved === true; const definition: MutableNormalizedAuthoredTool = { description: expectString(record.description, message), - execute: expectFunction(record.execute, message), inputSchema, }; + if (!clientResolved) { + definition.execute = expectFunction(record.execute, message); + } if (outputSchema !== undefined) { definition.outputSchema = outputSchema; } diff --git a/packages/eve/src/public/definitions/define-client-tool.test.ts b/packages/eve/src/public/definitions/define-client-tool.test.ts new file mode 100644 index 000000000..d522473c1 --- /dev/null +++ b/packages/eve/src/public/definitions/define-client-tool.test.ts @@ -0,0 +1,31 @@ +import { describe, expect, it } from "vitest"; +import { z } from "#compiled/zod/index.js"; + +import { defineClientTool } from "#public/definitions/tool.js"; +import { isBrandedToolEntry } from "#shared/dynamic-tool-definition.js"; + +describe("defineClientTool", () => { + it("brands the definition and marks it client-resolved with no execute", () => { + const tool = defineClientTool({ + description: "Ask the user a question.", + inputSchema: z.object({ prompt: z.string() }), + }); + + expect(isBrandedToolEntry(tool)).toBe(true); + expect((tool as { clientResolved?: unknown }).clientResolved).toBe(true); + expect((tool as { execute?: unknown }).execute).toBeUndefined(); + expect(tool.description).toBe("Ask the user a question."); + }); + + it("throws when an execute function is supplied", () => { + // `execute` is intentionally absent from the client-resolved overloads; + // cast past the type so the runtime guard itself is exercised. + const withExecute = { + description: "Not allowed.", + inputSchema: z.object({}), + execute: () => ({ status: "ignored" as const }), + } as unknown as Parameters[0]; + + expect(() => defineClientTool(withExecute)).toThrow(/must not define "execute"/); + }); +}); diff --git a/packages/eve/src/public/definitions/tool.ts b/packages/eve/src/public/definitions/tool.ts index 4806cf715..f21a51559 100644 --- a/packages/eve/src/public/definitions/tool.ts +++ b/packages/eve/src/public/definitions/tool.ts @@ -227,6 +227,82 @@ export function defineTool( return definition; } +/** + * Public client-resolved tool definition authored in `agent/tools/*.ts`. + * + * Unlike {@link ToolDefinition}, a client-resolved tool has **no `execute`**. + * eve never runs it; the model emits the call, the turn parks for input, and + * the result is supplied out-of-band (the human-in-the-loop input response, or + * the client tool channel). This is the same shape the built-in `ask_question` + * uses, exposed for authoring so apps can widen its input schema or build + * their own typed HITL pickers. + * + * The `clientResolved: true` marker is stamped by {@link defineClientTool} and + * is how the compiler/runtime know to skip the otherwise-required `execute`. + */ +export type ClientToolDefinition = PublicToolDefinition< + TInput, + TOutput +> & { + readonly clientResolved: true; +}; + +/** + * Defines a client-resolved (human-in-the-loop) tool — a tool with **no + * executor**. eve surfaces it to the model, parks the turn when the model + * calls it, and resolves the call from the client/HITL channel (e.g. an + * `inputResponses` answer) rather than running server code. Its single + * `tool_result` is the user's response. + * + * Author it as the default export of a file in `agent/tools/`. Naming the file + * `ask_question.ts` overrides the built-in question tool with a wider, typed + * input schema while keeping native pause/resume: + * + * ```ts + * import { defineClientTool } from "eve/tools"; + * import { z } from "zod"; + * + * export default defineClientTool({ + * description: "Ask the user to pick a template.", + * inputSchema: z.object({ + * prompt: z.string(), + * ui: z.object({ kind: z.literal("template_picker") }).passthrough(), + * }), + * }); + * ``` + * + * Unlike {@link defineTool}, no `execute` is permitted: passing one is a + * compile-time error, since a client-resolved call is never executed by eve. + */ +export function defineClientTool< + TInputSchema extends StandardJSONSchemaV1, +>(definition: { + description: ToolDefinition["description"]; + inputSchema: TInputSchema; + outputSchema?: JsonObject; +}): ClientToolDefinition, unknown>; +export function defineClientTool(definition: { + description: ToolDefinition["description"]; + inputSchema: JsonObject; + outputSchema?: JsonObject; +}): ClientToolDefinition, unknown>; +export function defineClientTool(definition: { + description: ToolDefinition["description"]; + inputSchema: unknown; + outputSchema?: JsonObject; +}): ClientToolDefinition { + if ((definition as { readonly execute?: unknown }).execute !== undefined) { + throw new Error( + `defineClientTool: client-resolved tools must not define "execute". ` + + `The call is resolved by the client/HITL channel, not the server. ` + + `Use defineTool for tools that execute.`, + ); + } + Object.assign(definition, { [TOOL_BRAND]: true, clientResolved: true }); + stampDefinitionKey(definition, `tool:${definition.description}`); + return definition as unknown as ClientToolDefinition; +} + /** * Defines a dynamic resolver evaluated at runtime from stream-event * handlers. It is shared across three slots, and the directory it is diff --git a/packages/eve/src/public/tools/index.ts b/packages/eve/src/public/tools/index.ts index 7c9a964b1..291f7830c 100644 --- a/packages/eve/src/public/tools/index.ts +++ b/packages/eve/src/public/tools/index.ts @@ -3,8 +3,10 @@ */ export { + type ClientToolDefinition, type DisabledToolSentinel, type EnableWorkflowToolSentinel, + defineClientTool, defineDynamic, defineTool, disableTool, diff --git a/packages/eve/src/runtime/resolve-tool.ts b/packages/eve/src/runtime/resolve-tool.ts index c25e5e698..d76362c05 100644 --- a/packages/eve/src/runtime/resolve-tool.ts +++ b/packages/eve/src/runtime/resolve-tool.ts @@ -46,14 +46,22 @@ export async function resolveToolDefinition( registerDefinitionSource(sourceKey, sourceEntry); registerDefinitionSource(`tool:${resolvedRecord.description}`, sourceEntry); - const execute = expectFunction( - resolvedRecord.execute, - describe(definition, "to provide an execute function"), - ) as ResolvedToolDefinition["execute"]; + /* + * Client-resolved tools (`defineClientTool`) carry no executor — eve never + * runs them; the call parks for input and resolves out-of-band. Every other + * authored tool must reattach a live `execute` from its module export. + */ + const execute = + resolvedRecord.clientResolved === true + ? undefined + : (expectFunction( + resolvedRecord.execute, + describe(definition, "to provide an execute function"), + ) as ResolvedToolDefinition["execute"]); return { description: definition.description, - execute, + ...(execute !== undefined ? { execute } : {}), exportName: definition.exportName, inputSchema: definition.inputSchema, logicalPath: definition.logicalPath, From 1d2077c23605f3a6eee88ec6979e47076019519c Mon Sep 17 00:00:00 2001 From: Kees van Dorp Date: Tue, 23 Jun 2026 13:39:04 +0200 Subject: [PATCH 2/4] test(tools): reject mixed client-resolved+execute shape; add HITL regressions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review feedback on the defineClientTool contract: - Reject mixed shapes at BOTH the compiler (normalize-tool) and runtime (resolve-tool): a client-resolved tool that also defines `execute` now throws instead of silently dropping the executor. (A non-client tool that omits `execute` was already rejected.) - e2e HITL fixture regressions: - client-resolved-question: an authored, widened `ask_question` (defineClientTool + typed `ui`) parks, resumes from a structured answer, and continues into a downstream `note` tool — exactly one tool_result for the parked call id. Before the fix this resume 400'd ("each tool_use must have a single result"); a green resume + downstream call proves the single result. - approval-vs-client-resolved: proves executable-with-approval and client-resolved input are separate paths — approval runs the executor; client input supplies the result. Verified: eve typecheck + unit tests (incl. the mixed-shape rejection) + oxlint; the HITL fixture typechecks (tsc) and `eve build`s with the override. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kees van Dorp --- .changeset/client-resolved-tools.md | 4 +- .../agent/tools/ask_question.ts | 25 +++++++ .../agent-tools-hitl/agent/tools/note.ts | 19 +++++ .../hitl/approval-vs-client-resolved.eval.ts | 69 +++++++++++++++++++ .../hitl/client-resolved-question.eval.ts | 49 +++++++++++++ .../authored-definition/schema-backed.test.ts | 14 ++++ .../authored-definition/schema-backed.ts | 11 ++- packages/eve/src/runtime/resolve-tool.ts | 24 ++++--- 8 files changed, 205 insertions(+), 10 deletions(-) create mode 100644 e2e/fixtures/agent-tools-hitl/agent/tools/ask_question.ts create mode 100644 e2e/fixtures/agent-tools-hitl/agent/tools/note.ts create mode 100644 e2e/fixtures/agent-tools-hitl/evals/hitl/approval-vs-client-resolved.eval.ts create mode 100644 e2e/fixtures/agent-tools-hitl/evals/hitl/client-resolved-question.eval.ts diff --git a/.changeset/client-resolved-tools.md b/.changeset/client-resolved-tools.md index 3d7e7f88e..916dcaf85 100644 --- a/.changeset/client-resolved-tools.md +++ b/.changeset/client-resolved-tools.md @@ -18,4 +18,6 @@ provider rejected the resumed turn with "each tool_use must have a single result". See #203. `defineTool` is unchanged and still requires `execute`; only `defineClientTool` -may omit it. +may omit it. The two shapes are mutually exclusive: the compiler and runtime +reject a client-resolved tool that also defines `execute` (and a non-client tool +that omits it). diff --git a/e2e/fixtures/agent-tools-hitl/agent/tools/ask_question.ts b/e2e/fixtures/agent-tools-hitl/agent/tools/ask_question.ts new file mode 100644 index 000000000..d14a84c97 --- /dev/null +++ b/e2e/fixtures/agent-tools-hitl/agent/tools/ask_question.ts @@ -0,0 +1,25 @@ +import { defineClientTool } from "eve/tools"; +import { z } from "zod"; + +/** + * Authored override of the built-in `ask_question`, widened with a typed `ui` + * payload. This is the exact shape that regressed in #203: as a plain + * `defineTool` it was forced to carry an `execute`, so a parked call produced a + * second `tool_result` for its `tool_use` id and the resumed turn was rejected + * ("each tool_use must have a single result"). As a `defineClientTool` it has no + * executor — the call parks for input and resolves to exactly one result. + * + * The schema is a strict superset of the built-in (prompt / options / + * allowFreeform), so the existing `ask-question-select` eval keeps passing while + * exercising the authored path. + */ +export default defineClientTool({ + description: + "Ask the user a question and wait for their answer. Plain choice: prompt (+ options, allowFreeform). Rich input: set a typed `ui` payload to render a picker.", + inputSchema: z.object({ + prompt: z.string(), + options: z.array(z.object({ id: z.string(), label: z.string() })).optional(), + allowFreeform: z.boolean().optional(), + ui: z.object({ kind: z.string() }).passthrough().optional(), + }), +}); diff --git a/e2e/fixtures/agent-tools-hitl/agent/tools/note.ts b/e2e/fixtures/agent-tools-hitl/agent/tools/note.ts new file mode 100644 index 000000000..2b03dd71e --- /dev/null +++ b/e2e/fixtures/agent-tools-hitl/agent/tools/note.ts @@ -0,0 +1,19 @@ +import { defineTool } from "eve/tools"; +import { z } from "zod"; + +/** + * Trivial downstream executable tool. Used by the client-resolved regression to + * continue the turn AFTER a parked `ask_question` resumes — exercising the + * reconstructed provider history (a duplicate result for the parked call would + * reject that follow-up model call). + */ +export default defineTool({ + description: + "Record a short note. Call this once, after the user answers a question, with their answer as `text`.", + inputSchema: z.object({ + text: z.string().describe("The text to record."), + }), + async execute(input) { + return { recorded: input.text }; + }, +}); diff --git a/e2e/fixtures/agent-tools-hitl/evals/hitl/approval-vs-client-resolved.eval.ts b/e2e/fixtures/agent-tools-hitl/evals/hitl/approval-vs-client-resolved.eval.ts new file mode 100644 index 000000000..7c11c3d4f --- /dev/null +++ b/e2e/fixtures/agent-tools-hitl/evals/hitl/approval-vs-client-resolved.eval.ts @@ -0,0 +1,69 @@ +import { defineEval } from "eve/evals"; + +import { GUARDED_ECHO_TOKEN, guardedEchoResults } from "./shared.js"; + +/** + * Separation regression: executable-with-approval and client-resolved are two + * distinct paths. + * + * - Path A — `guarded-echo` is an approval-gated *executable* tool: it parks for + * APPROVAL (confirmation), then its `execute` runs and the tool's own output + * (the executor token) is the result. + * - Path B — `ask_question` is client-resolved: it parks for INPUT (not + * approval) and the user's answer IS the result; no executor runs. + * + * Same parking machinery, opposite result sources — this asserts they don't + * collapse into one another. + */ +export default defineEval({ + description: + "Approval-gated execution and client-resolved input are separate paths: approval runs the executor; client input supplies the result.", + async test(t) { + // Path A — approval gate on an executable tool. + await t.send('Call the guarded-echo tool with note "sep".'); + const [approvalRequest] = t.expectInputRequests({ toolName: "guarded-echo" }); + if (approvalRequest === undefined) { + throw new Error("Expected a guarded-echo approval request."); + } + if (approvalRequest.display !== undefined && approvalRequest.display !== "confirmation") { + throw new Error( + `Approval must present as a confirmation, got ${String(approvalRequest.display)}.`, + ); + } + const approved = await t.respondAll("approve"); + approved.expectOk(); + const [echoed] = guardedEchoResults(t.events); + if (echoed === undefined || !echoed.includes(GUARDED_ECHO_TOKEN)) { + throw new Error("Approved executable tool did not run its executor."); + } + + // Path B — client-resolved input on the same session. + await t.send( + [ + "Now use the `ask_question` tool exactly once to ask me which color I prefer.", + "Set prompt to: 'Pick a color.'", + 'Provide exactly two options: - id "red", label "Red" - id "blue", label "Blue"', + "Do not answer the question yourself, wait for my response.", + ].join("\n"), + ); + const [inputRequest] = t.expectInputRequests({ toolName: "ask_question" }); + if (inputRequest === undefined) { + throw new Error("Expected an ask_question input request."); + } + if (inputRequest.display === "confirmation") { + throw new Error("Client-resolved input must not present as an approval confirmation."); + } + const answered = await t.respondAll("blue"); + answered.expectOk(); + + // The client-resolved tool produced no executor result of its own — only the + // earlier approved guarded-echo did. + if (guardedEchoResults(t.events).length !== 1) { + throw new Error("Unexpected extra executor result from the client-resolved path."); + } + + t.didNotFail(); + t.completed(); + t.messageIncludes(/\bblue\b/i); + }, +}); diff --git a/e2e/fixtures/agent-tools-hitl/evals/hitl/client-resolved-question.eval.ts b/e2e/fixtures/agent-tools-hitl/evals/hitl/client-resolved-question.eval.ts new file mode 100644 index 000000000..5c7c7392d --- /dev/null +++ b/e2e/fixtures/agent-tools-hitl/evals/hitl/client-resolved-question.eval.ts @@ -0,0 +1,49 @@ +import { defineEval } from "eve/evals"; + +/** + * Regression for #203 — the one that matters. + * + * An AUTHORED, widened `ask_question` (`defineClientTool` with a typed `ui` + * payload) must: park the turn on call, resume from a structured answer, and let + * the turn continue into a downstream executable tool — producing exactly ONE + * `tool_result` for the parked `tool_use` id. + * + * The single-result invariant is asserted operationally: before the fix the + * authored override carried an `execute`, so the resumed turn reconstructed two + * `tool_result` blocks for one id and the provider rejected it with a 400. So a + * green resume (`expectOk`) that continues into `note` and `completed()` can only + * happen if the reconstructed history held a single result for the call. + */ +export default defineEval({ + description: + "Client-resolved ask_question override parks, resumes from a structured answer, and continues to a downstream tool — one result, no duplicate-result failure on resume.", + async test(t) { + await t.send( + [ + "Use the `ask_question` tool exactly once to ask me to pick a template.", + "Set prompt to: 'Pick a template.' and set ui to { kind: 'template' }.", + "After I answer, call the `note` tool exactly once with text set to my answer.", + "Do not answer the question yourself, wait for my response.", + ].join("\n"), + ); + + const [request] = t.expectInputRequests({ toolName: "ask_question" }); + if (request === undefined) { + throw new Error("Expected a pending ask_question input request."); + } + + // Resume with a structured (JSON) answer, the way a typed picker resolves. + const resumed = await t.respond({ + requestId: request.requestId, + text: '{"picked_ids":["tpl_blue"]}', + }); + resumed.expectOk(); + + // The resumed turn flowed into a downstream executable tool, so the + // reconstructed provider history (with the parked call's single result) was + // accepted by the model call. + t.calledTool("note"); + t.didNotFail(); + t.completed(); + }, +}); diff --git a/packages/eve/src/internal/authored-definition/schema-backed.test.ts b/packages/eve/src/internal/authored-definition/schema-backed.test.ts index 4d65a6450..a475316d1 100644 --- a/packages/eve/src/internal/authored-definition/schema-backed.test.ts +++ b/packages/eve/src/internal/authored-definition/schema-backed.test.ts @@ -135,6 +135,20 @@ describe("normalizeToolDefinition", () => { ).toThrow(FAILURE_MESSAGE); }); + it("rejects a client-resolved tool that also defines execute", () => { + expect(() => + normalizeToolDefinition( + { + description: "Mixed shape.", + inputSchema: { type: "object" }, + clientResolved: true, + execute: () => ({ status: "ignored" as const }), + }, + FAILURE_MESSAGE, + ), + ).toThrow(/must not define an "execute"/); + }); + it("types approval context input from the tool input schema", () => { const tool = defineTool({ description: "Requires city-scoped approval.", diff --git a/packages/eve/src/internal/authored-definition/schema-backed.ts b/packages/eve/src/internal/authored-definition/schema-backed.ts index 944470258..e2577aa5f 100644 --- a/packages/eve/src/internal/authored-definition/schema-backed.ts +++ b/packages/eve/src/internal/authored-definition/schema-backed.ts @@ -85,9 +85,18 @@ export function normalizeToolDefinition(value: unknown, message: string): Normal * Client-resolved tools (`defineClientTool`, e.g. an `ask_question` override) * have no executor: the model emits the call, the turn parks for input, and * the result is supplied out-of-band. They are the one authored shape allowed - * to omit `execute`; every other tool must provide one. + * to omit `execute`; every other tool must provide one. The two shapes are + * mutually exclusive — a client-resolved tool that also carried an `execute` + * would yield a second result for its call id, so reject that mix outright + * rather than silently dropping the executor. */ const clientResolved = record.clientResolved === true; + if (clientResolved && record.execute !== undefined) { + throw new Error( + `A client-resolved tool must not define an "execute" function — its result ` + + `comes from the client/HITL input channel, not a server executor. ${message}`, + ); + } const definition: MutableNormalizedAuthoredTool = { description: expectString(record.description, message), inputSchema, diff --git a/packages/eve/src/runtime/resolve-tool.ts b/packages/eve/src/runtime/resolve-tool.ts index d76362c05..0d7e0ce0b 100644 --- a/packages/eve/src/runtime/resolve-tool.ts +++ b/packages/eve/src/runtime/resolve-tool.ts @@ -49,15 +49,23 @@ export async function resolveToolDefinition( /* * Client-resolved tools (`defineClientTool`) carry no executor — eve never * runs them; the call parks for input and resolves out-of-band. Every other - * authored tool must reattach a live `execute` from its module export. + * authored tool must reattach a live `execute` from its module export. The + * shapes are mutually exclusive: reject a client-resolved export that also + * carries an `execute` (defense in depth — the compiler rejects it too). */ - const execute = - resolvedRecord.clientResolved === true - ? undefined - : (expectFunction( - resolvedRecord.execute, - describe(definition, "to provide an execute function"), - ) as ResolvedToolDefinition["execute"]); + const clientResolved = resolvedRecord.clientResolved === true; + if (clientResolved && resolvedRecord.execute !== undefined) { + throw new ResolveAgentError( + describe(definition, 'not to define an "execute" function when it is client-resolved'), + { logicalPath: definition.logicalPath, sourceId: definition.sourceId }, + ); + } + const execute = clientResolved + ? undefined + : (expectFunction( + resolvedRecord.execute, + describe(definition, "to provide an execute function"), + ) as ResolvedToolDefinition["execute"]); return { description: definition.description, From b8f08de3ea31a74cb731c0672054f7211e083687 Mon Sep 17 00:00:00 2001 From: Kees van Dorp Date: Tue, 23 Jun 2026 17:02:48 +0200 Subject: [PATCH 3/4] ci: add gateway-free client-resolved-hitl-construction merge gate Splits the construction contract from the gateway-backed evals (per review on #204). New merge-blocking job proves, with no model gateway: - the client-resolved omit-execute + mixed-shape rejection unit guards, and - that the authored HITL fixture (ask_question override + approval-vs- client-resolved fixture) typechecks and builds against the authored surface. The gateway-backed `eve eval` (e2e-local / e2e-vercel) stays as runtime release evidence (single result on resume; approval-gated execution kept separate from client-resolved input). Keeps CI fast while guaranteeing no executor-less path ships without a fixture compiled against it. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kees van Dorp --- .github/workflows/ci.yml | 45 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3caf05fcf..1d5671ca9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -91,6 +91,51 @@ jobs: - name: Run unit tests run: pnpm test:unit + client-resolved-hitl-construction: + # Gateway-free, merge-blocking proof of the client-resolved / HITL + # CONSTRUCTION contract (vercel/eve#203): a client-resolved tool may omit + # `execute`, the mixed `clientResolved + execute` shape is rejected at both + # compile and runtime, and the authored HITL fixture (the `ask_question` + # override + the approval-vs-client-resolved fixture) typechecks and builds + # against the authored surface. The gateway-backed `eve eval` + # (e2e-local / e2e-vercel) stays as the RUNTIME evidence — that the resumed + # history yields exactly one result and keeps approval-gated execution + # separate from client-resolved input. This job guarantees no executor-less + # path ships without a fixture compiled against it. + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false + + - name: Setup pnpm + uses: pnpm/action-setup@0e279bb959325dab635dd2c09392533439d90093 # v6 + + - name: Setup Node.js + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6 + with: + node-version-file: .nvmrc + cache: "pnpm" + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Build eve + run: pnpm --filter eve build + + - name: Construction guards (omit-execute + mixed-shape rejection) + # Also covered by the full suite in test-unit; pinned here so this job is + # a self-contained statement of the construction contract. + run: >- + pnpm --filter eve exec vitest run --config vitest.unit.config.ts + src/public/definitions/define-client-tool.test.ts + src/internal/authored-definition/schema-backed.test.ts + + - name: Build + typecheck the authored HITL fixture + run: pnpm --filter agent-tools-hitl run typecheck + test-integration: name: test-integration (${{ matrix.os }}) runs-on: ${{ matrix.os }} From 6b54d8f1a748efebc1ae94f26208f3f697897a38 Mon Sep 17 00:00:00 2001 From: Kees van Dorp Date: Tue, 23 Jun 2026 17:15:03 +0200 Subject: [PATCH 4/4] docs(tools): document defineClientTool / client-resolved tools Public API added in this PR needs docs (per CONTRIBUTING). Add a "Custom client-resolved tools" section to the human-in-the-loop page covering defineClientTool: no execute, the ask_question override for typed pickers, the parked-input contract, and that defineTool/defineClientTool are mutually exclusive (exactly one result). Cross-link from the tools overview. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kees van Dorp --- docs/tools/human-in-the-loop.md | 21 +++++++++++++++++++++ docs/tools/overview.mdx | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/docs/tools/human-in-the-loop.md b/docs/tools/human-in-the-loop.md index a9b0af5b6..3fe5e9308 100644 --- a/docs/tools/human-in-the-loop.md +++ b/docs/tools/human-in-the-loop.md @@ -56,6 +56,27 @@ The built-in `ask_question` tool lets the model pause and ask the user, rather t `ask_question` is part of the [default harness](/docs/concepts/default-harness), so it is available without you defining anything. It produces the same `input.requested` pause as an approval, and resumes the same way. +### Custom client-resolved tools + +`ask_question` is a _client-resolved_ tool: it has no `execute`, so eve never runs it — the model emits the call, the turn parks, and the user's answer becomes its single result. Author your own with `defineClientTool` when you want that exact pause-and-resume but with a **richer, typed input schema** — a "pick a template", "choose media", or multi-field form that a channel or your frontend renders as a dedicated widget: + +```ts title="agent/tools/ask_question.ts" +import { defineClientTool } from "eve/tools"; +import { z } from "zod"; + +export default defineClientTool({ + description: "Ask the user to pick a template.", + inputSchema: z.object({ + prompt: z.string(), + ui: z.object({ kind: z.literal("template_picker") }).passthrough(), + }), +}); +``` + +Naming the file `ask_question.ts` overrides the built-in question tool with your wider schema while keeping the same parking behavior (parking is keyed on the `ask_question` name). The parked `input.requested` carries your full typed input, so the client renders the picker from it and resumes the turn with the user's structured choice. + +A `defineClientTool` tool must **not** declare an `execute` — that is the whole point, and passing one is rejected at compile and runtime. [`defineTool`](/docs/tools) is the opposite: it always requires an `execute` and runs on the server. The two shapes are mutually exclusive, so a parked call always resolves to exactly one result. + ## How pause and resume works Approvals and questions share one protocol: diff --git a/docs/tools/overview.mdx b/docs/tools/overview.mdx index c43637a02..923f01f86 100644 --- a/docs/tools/overview.mdx +++ b/docs/tools/overview.mdx @@ -63,7 +63,7 @@ export default defineTool({ }); ``` -Approval is one half of eve's [human-in-the-loop](./human-in-the-loop) model — the page covers the `always/once/never` helpers, input-dependent predicates, and how a gated call pauses and resumes durably. +Approval is one half of eve's [human-in-the-loop](./human-in-the-loop) model — the page covers the `always/once/never` helpers, input-dependent predicates, and how a gated call pauses and resumes durably. For the other half — a tool with **no** `execute` that parks for a typed user answer (`defineClientTool`, e.g. a custom picker) — see [client-resolved tools](./human-in-the-loop#custom-client-resolved-tools). ## Shape what the model sees with `toModelOutput`