From 62773adaf3bb2334f624674dac0a489269a8bddf Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Thu, 18 Jun 2026 11:10:28 +0200 Subject: [PATCH] feat(mcp): include schema-token proxy in live registration telemetry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit savings_report's registration_cost reported name+description tokens only, under-counting the real per-session injection cost. Add a schema_tokens proxy (param keys + public .describe() strings) and total_tokens. A full zod->JSON-schema conversion is deliberately avoided (version-coupled; emits an empty schema across mismatched zod instances) — the byte-exact schema-inclusive budget stays in tool-budget.test.ts (lean <=4200, full <=15000). --- .changeset/schema-token-telemetry.md | 5 ++++ apps/mcp-server/src/server.ts | 22 +++++++++++++- apps/mcp-server/src/tools/savings.ts | 5 +++- apps/mcp-server/src/tools/tool-profile.ts | 30 ++++++++++++++----- apps/mcp-server/test/server.test.ts | 10 +++++++ .../.openspec.yaml | 2 ++ .../notes.md | 16 ++++++++++ 7 files changed, 81 insertions(+), 9 deletions(-) create mode 100644 .changeset/schema-token-telemetry.md create mode 100644 openspec/changes/agent-claude-mcp-count-schema-tokens-in-registration-2026-06-18-10-54/.openspec.yaml create mode 100644 openspec/changes/agent-claude-mcp-count-schema-tokens-in-registration-2026-06-18-10-54/notes.md diff --git a/.changeset/schema-token-telemetry.md b/.changeset/schema-token-telemetry.md new file mode 100644 index 00000000..19f62dc2 --- /dev/null +++ b/.changeset/schema-token-telemetry.md @@ -0,0 +1,5 @@ +--- +"@colony/mcp-server": patch +--- + +mcp: live registration telemetry now reports a `schema_tokens` proxy + `total_tokens` in `savings_report`. Previously `registration_cost` counted name+description only, under-reporting the true per-session injection cost; the schema portion (param keys + describe strings) is now included so live receipts track closer to the byte-exact budget enforced by `tool-budget.test.ts`. diff --git a/apps/mcp-server/src/server.ts b/apps/mcp-server/src/server.ts index 28d8703e..1aa95170 100644 --- a/apps/mcp-server/src/server.ts +++ b/apps/mcp-server/src/server.ts @@ -51,6 +51,24 @@ export { buildBridgeStatusPayload } from './tools/bridge.js'; export type { BridgeStatus, BridgeStatusOptions } from './tools/bridge.js'; export { LEAN_TOOLS, resolveToolProfile } from './tools/tool-profile.js'; +/** + * Token proxy for a tool's input schema: param keys plus their public + * `.describe()` strings — the agent-facing bulk of the rendered JSON schema. + * Deliberately NOT a full zod→JSON-schema conversion (version-coupled; the + * byte-exact schema-inclusive budget lives in tool-budget.test.ts). Telemetry + * must never break registration, so a non-shape arg simply counts 0. + */ +function countSchemaTokens(schema: unknown): number { + if (!schema || typeof schema !== 'object' || Array.isArray(schema)) return 0; + const parts: string[] = []; + for (const [key, value] of Object.entries(schema as Record)) { + parts.push(key); + const desc = (value as { description?: unknown } | null)?.description; + if (typeof desc === 'string') parts.push(desc); + } + return parts.length === 0 ? 0 : countTokens(parts.join(' ')); +} + /** * MCP stdio server exposing progressive-disclosure tools: * - search: compact hits with BM25 + optional semantic re-rank @@ -81,10 +99,12 @@ export function buildServer( profile: toolProfile, tool_count: 0, name_description_tokens: 0, + schema_tokens: 0, }; - const recordRegistration = (name: string, description: string): void => { + const recordRegistration = (name: string, description: string, schema?: unknown): void => { registrationStats.tool_count += 1; registrationStats.name_description_tokens += countTokens(`${name} ${description}`); + registrationStats.schema_tokens += countSchemaTokens(schema); }; const registrar = gateToolRegistration( server, diff --git a/apps/mcp-server/src/tools/savings.ts b/apps/mcp-server/src/tools/savings.ts index 61d5a783..07f85dca 100644 --- a/apps/mcp-server/src/tools/savings.ts +++ b/apps/mcp-server/src/tools/savings.ts @@ -135,7 +135,10 @@ export function register(server: McpServer, ctx: ToolContext): void { profile: ctx.registrationStats.profile, tool_count: ctx.registrationStats.tool_count, name_description_tokens: ctx.registrationStats.name_description_tokens, - note: 'Per-session schema-injection cost basis: name+description tokens only. Schema-inclusive budgets are enforced by apps/mcp-server/test/tool-budget.test.ts (lean <=4200, full <=15000).', + schema_tokens: ctx.registrationStats.schema_tokens, + total_tokens: + ctx.registrationStats.name_description_tokens + ctx.registrationStats.schema_tokens, + note: 'Per-session schema-injection cost: name+description tokens plus a schema_tokens proxy (param keys + describe strings). The byte-exact schema-inclusive budget (SDK JSON.stringify(inputSchema)) is enforced by apps/mcp-server/test/tool-budget.test.ts (lean <=4200, full <=15000).', } : null; if (honest === true) { diff --git a/apps/mcp-server/src/tools/tool-profile.ts b/apps/mcp-server/src/tools/tool-profile.ts index 2bb15b41..3e6f8f53 100644 --- a/apps/mcp-server/src/tools/tool-profile.ts +++ b/apps/mcp-server/src/tools/tool-profile.ts @@ -59,7 +59,7 @@ export function resolveToolProfile( export function gateToolRegistration( server: McpServer, allow: (name: string) => boolean, - onRegister?: (name: string, description: string) => void, + onRegister?: (name: string, description: string, schema?: unknown) => void, ): McpServer { return new Proxy(server, { get(target, prop, _receiver) { @@ -68,7 +68,16 @@ export function gateToolRegistration( const name = args[0]; if (typeof name === 'string' && !allow(name)) return undefined; if (typeof name === 'string' && onRegister) { - onRegister(name, typeof args[1] === 'string' ? args[1] : ''); + // server.tool(name, description?, paramsShape?, annotations?, handler): + // the param shape sits after the description when present, else right + // after name. An annotations object can also land at args[2]; it carries + // no `.describe()` keys, so countSchemaTokens scores it 0 — harmless. + const hasDescription = typeof args[1] === 'string'; + onRegister( + name, + hasDescription ? (args[1] as string) : '', + hasDescription ? args[2] : args[1], + ); } return (target.tool as (...a: unknown[]) => unknown).apply(target, args); }; @@ -82,15 +91,22 @@ export function gateToolRegistration( } /** - * Registration-cost telemetry captured while tools register. Token figure - * covers name + description only — input schemas are zod shapes here and only - * become countable JSON schema at listTools time; the schema-inclusive budget - * lives in apps/mcp-server/test/tool-budget.test.ts. Tools registered via the - * SDK's schema-first overload (no description string) count name-only — a + * Registration-cost telemetry captured while tools register. + * `name_description_tokens` covers name + description. `schema_tokens` is a + * robust proxy for the input-schema injection cost — param keys plus their + * public `.describe()` strings, the agent-facing bulk of the rendered JSON + * schema. It is intentionally NOT a full zod→JSON-schema conversion: that is + * version-coupled (zod-to-json-schema is pinned to a specific zod build and + * silently emits an empty schema across instances) and belongs to the + * authoritative byte-exact budget gate in + * apps/mcp-server/test/tool-budget.test.ts, which counts the SDK's real + * JSON.stringify(inputSchema). Tools registered via the SDK's schema-first + * overload (no description string) count name-only for the prose figure — a * known undercount, acceptable for trend telemetry. */ export interface ToolRegistrationStats { profile: McpToolProfile; tool_count: number; name_description_tokens: number; + schema_tokens: number; } diff --git a/apps/mcp-server/test/server.test.ts b/apps/mcp-server/test/server.test.ts index 0d1ed604..6dd13317 100644 --- a/apps/mcp-server/test/server.test.ts +++ b/apps/mcp-server/test/server.test.ts @@ -285,11 +285,21 @@ describe('MCP server', () => { profile: string; tool_count: number; name_description_tokens: number; + schema_tokens: number; + total_tokens: number; } | null; }; expect(withRegistration.registration_cost).toMatchObject({ profile: 'full' }); expect(withRegistration.registration_cost?.tool_count).toBeGreaterThan(70); expect(withRegistration.registration_cost?.name_description_tokens).toBeGreaterThan(1000); + // > 200, not > 0: 70+ tools' param keys alone clear this, so a silent + // regression in the `.describe()` path (schema_tokens collapsing toward + // key-only counts) still trips the floor. + expect(withRegistration.registration_cost?.schema_tokens).toBeGreaterThan(200); + expect(withRegistration.registration_cost?.total_tokens).toBe( + (withRegistration.registration_cost?.name_description_tokens ?? 0) + + (withRegistration.registration_cost?.schema_tokens ?? 0), + ); expect(payload.live.cost_basis.configured).toBe(true); expect(payload.live.totals.total_cost_usd).toBeCloseTo(0.005, 12); expect(payload.live.totals.avg_cost_usd).toBeCloseTo(0.005, 12); diff --git a/openspec/changes/agent-claude-mcp-count-schema-tokens-in-registration-2026-06-18-10-54/.openspec.yaml b/openspec/changes/agent-claude-mcp-count-schema-tokens-in-registration-2026-06-18-10-54/.openspec.yaml new file mode 100644 index 00000000..95ae5a2c --- /dev/null +++ b/openspec/changes/agent-claude-mcp-count-schema-tokens-in-registration-2026-06-18-10-54/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-06-18 diff --git a/openspec/changes/agent-claude-mcp-count-schema-tokens-in-registration-2026-06-18-10-54/notes.md b/openspec/changes/agent-claude-mcp-count-schema-tokens-in-registration-2026-06-18-10-54/notes.md new file mode 100644 index 00000000..edc285eb --- /dev/null +++ b/openspec/changes/agent-claude-mcp-count-schema-tokens-in-registration-2026-06-18-10-54/notes.md @@ -0,0 +1,16 @@ +# agent-claude-mcp-count-schema-tokens-in-registration-2026-06-18-10-54 (minimal / T1) + +Branch: `agent//` + +Describe the change in a sentence or two. Commit message is the spec of record. + +## Handoff + +- Handoff: change=`agent-claude-mcp-count-schema-tokens-in-registration-2026-06-18-10-54`; branch=`agent//`; scope=`TODO`; action=`continue this sandbox or finish cleanup after a usage-limit/manual takeover`. +- Copy prompt: Continue `agent-claude-mcp-count-schema-tokens-in-registration-2026-06-18-10-54` on branch `agent//`. Work inside the existing sandbox, review `openspec/changes/agent-claude-mcp-count-schema-tokens-in-registration-2026-06-18-10-54/notes.md`, continue from the current state instead of creating a new sandbox, and when the work is done run `gx branch finish --branch agent// --base dev --via-pr --wait-for-merge --cleanup`. + +## Cleanup + +- [ ] Run: `gx branch finish --branch agent// --base dev --via-pr --wait-for-merge --cleanup` +- [ ] Record PR URL + `MERGED` state in the completion handoff. +- [ ] Confirm sandbox worktree is gone (`git worktree list`, `git branch -a`).