From 52068ba7479df6349892f015167524c6adf0b50b Mon Sep 17 00:00:00 2001 From: Kristiyan Ivanov Date: Mon, 8 Jun 2026 12:51:39 +0300 Subject: [PATCH] fix(agent-cache): include tool definitions in cache key for Vercel and LlamaIndex adapters The Vercel AI SDK and LlamaIndex adapters were not including tool definitions in the cache key, so requests with identical messages but different tools could return the same cached response. This also adds seed, stopSequences, responseFormat, and toolChoice to the Vercel adapter key. LlamaIndex prepareParams/prepare_params now accepts a tools option. The call closure is never serialized; only metadata (name, description, parameters) is used. Omitting tools falls back to messages-only keying (prior behavior). Tool-using requests will produce different keys than before, causing a one-time cache miss on upgrade. This is intended. Includes divergence/stability tests for all three fixed paths (Vercel TS, LlamaIndex TS, LlamaIndex Python) and updated changelogs and docs. --- docs/packages/agent-cache.md | 24 ++ packages/agent-cache-py/CHANGELOG.md | 15 + .../adapters/llamaindex.py | 44 +++ .../test_llamaindex_key_divergence.py | 105 +++++++ packages/agent-cache/CHANGELOG.md | 16 + .../__tests__/adapter-key-divergence.test.ts | 296 ++++++++++++++++++ packages/agent-cache/src/adapters/ai.ts | 50 ++- .../agent-cache/src/adapters/llamaindex.ts | 36 +++ 8 files changed, 584 insertions(+), 2 deletions(-) create mode 100644 packages/agent-cache-py/tests/adapters/test_llamaindex_key_divergence.py create mode 100644 packages/agent-cache/src/__tests__/adapter-key-divergence.test.ts diff --git a/docs/packages/agent-cache.md b/docs/packages/agent-cache.md index df82cac7..d1c3fe04 100644 --- a/docs/packages/agent-cache.md +++ b/docs/packages/agent-cache.md @@ -271,6 +271,8 @@ const model = new ChatOpenAI({ The adapter implements LangChain's `BaseCache` interface. +**Limitation:** LangChain's `BaseCache` interface exposes only `(prompt, llm_string)` to the cache layer. Tool definitions bound to the model are not passed through this interface, so tool-schema changes are not reflected in the cache key. If a tool's schema changes without a corresponding change to the model identity (model name, temperature, etc.), the cache may serve a stale response computed against the old schema. If this matters for your use case, incorporate a tool version string into your model configuration or use a separate cache namespace per tool revision. + ### Vercel AI SDK Import from `@betterdb/agent-cache/ai`. Requires `ai` ^6.0.135 as a peer dependency. @@ -288,6 +290,28 @@ const model = wrapLanguageModel({ The middleware intercepts non-streaming `doGenerate` calls. On a cache hit, the model is not called and the response includes `providerMetadata: { agentCache: { hit: true } }` so consumers can distinguish cached responses from real zero-token calls. Responses containing tool-call parts are not cached to avoid breaking tool-calling workflows. +Tool definitions, seed, stop sequences, response format, and tool choice are all included in the cache key automatically. Requests with identical messages but different tools (or different generation parameters) will not collide. + +### LlamaIndex + +Import from `@betterdb/agent-cache/llamaindex`. Requires `@llamaindex/core` >= 0.6.0 as a peer dependency. + +```typescript +import { prepareParams } from '@betterdb/agent-cache/llamaindex'; + +const params = await prepareParams(messages, { + model: 'gpt-4o', + temperature: 0, + tools: myTools, // BaseTool[] from LlamaIndex +}); + +const result = await cache.llm.check(params); +``` + +Tool definitions are included in the cache key when passed via the `tools` option. Only `tool.metadata` (name, description, parameters) is serialized; the `call` closure is never included. + +Callers must pass `tools` into `prepareParams` for tool-schema drift safety. Omitting `tools` falls back to messages-only keying (the prior behavior), meaning requests with identical messages but different tool sets will collide in the cache. + ### LangGraph Import from `@betterdb/agent-cache/langgraph`. Requires `@langchain/langgraph-checkpoint` >= 0.1.0 as a peer dependency. diff --git a/packages/agent-cache-py/CHANGELOG.md b/packages/agent-cache-py/CHANGELOG.md index bbab874f..37d56031 100644 --- a/packages/agent-cache-py/CHANGELOG.md +++ b/packages/agent-cache-py/CHANGELOG.md @@ -1,3 +1,18 @@ +## [0.7.0] - 2026-06-08 + +### Fixed + +- **LlamaIndex adapter: tool definitions now included in cache key.** When `tools` is passed to `prepare_params()`, tool metadata (name, description, parameters) is extracted and included in the cache key. Only serializable metadata is used; callable closures are never serialized. + +### Changed + +- **Cache keys changed for tool-using requests on the LlamaIndex adapter.** Existing cached entries for those requests will be a one-time miss after upgrade. This is intended: the prior entries were keyed without tool information and are not safe to reuse across differing tool sets. +- **`prepare_params()` now accepts a `tools` keyword argument (and `LlamaIndexPrepareOptions.tools` field).** Callers must pass `tools` to get tool-schema safety. Omitting it falls back to messages-only keying (prior behavior). + +### Known limitations + +- **LangChain adapter: tool-schema drift is not reflected in the cache key.** The framework's `BaseCache` interface exposes only `(prompt, llm_string)` to the cache layer, so tool definitions are structurally unreachable. Unchanged in this release; documented as a known limitation. + ## [0.6.0] - 2026-05-04 ### Added diff --git a/packages/agent-cache-py/betterdb_agent_cache/adapters/llamaindex.py b/packages/agent-cache-py/betterdb_agent_cache/adapters/llamaindex.py index 45beef5a..041b6b25 100644 --- a/packages/agent-cache-py/betterdb_agent_cache/adapters/llamaindex.py +++ b/packages/agent-cache-py/betterdb_agent_cache/adapters/llamaindex.py @@ -27,6 +27,16 @@ class LlamaIndexPrepareOptions: temperature: float | None = None top_p: float | None = None max_tokens: int | None = None + tools: list[Any] | None = None + """Tool definitions to include in the cache key. + + Pass the same tools list you provide to the LLM call. Each tool must + expose a ``metadata`` attribute (or dict key) with at least ``name``, + and optionally ``description`` and ``parameters``. Only metadata is + serialized; callable closures are never included. + + Omitting this field falls back to messages-only keying (prior behavior). + """ def _parse_input(value: Any) -> Any: @@ -87,6 +97,32 @@ async def _normalize_detail( return None +def _extract_tool_metadata(tool: Any) -> dict[str, Any]: + """Extract serializable metadata from a LlamaIndex BaseTool.""" + if hasattr(tool, "metadata"): + meta = tool.metadata + elif isinstance(tool, dict) and "metadata" in tool: + meta = tool["metadata"] + else: + meta = tool # Already a metadata-like dict + + if hasattr(meta, "name"): + name = meta.name + description = getattr(meta, "description", None) + parameters = getattr(meta, "parameters", None) + else: + name = meta.get("name", "") + description = meta.get("description") + parameters = meta.get("parameters") + + fn: dict[str, Any] = {"name": name} + if description is not None: + fn["description"] = description + if parameters is not None: + fn["parameters"] = parameters + return {"type": "function", "function": fn} + + async def prepare_params( messages: list[dict[str, Any]], opts: LlamaIndexPrepareOptions | None = None, @@ -96,6 +132,7 @@ async def prepare_params( temperature: float | None = None, top_p: float | None = None, max_tokens: int | None = None, + tools: list[Any] | None = None, ) -> LlmCacheParams: """Normalise a LlamaIndex message list to ``LlmCacheParams``. @@ -103,6 +140,10 @@ async def prepare_params( arguments directly:: params = await prepare_params(msgs, model="gpt-4o", temperature=0.7) + + To include tool definitions in the cache key (recommended when using tools):: + + params = await prepare_params(msgs, model="gpt-4o", tools=my_tools) """ if opts is None: opts = LlamaIndexPrepareOptions( @@ -111,6 +152,7 @@ async def prepare_params( temperature=temperature, top_p=top_p, max_tokens=max_tokens, + tools=tools, ) norm = opts.normalizer @@ -162,5 +204,7 @@ async def prepare_params( result["top_p"] = opts.top_p if opts.max_tokens is not None: result["max_tokens"] = opts.max_tokens + if opts.tools is not None and len(opts.tools) > 0: + result["tools"] = [_extract_tool_metadata(t) for t in opts.tools] return result diff --git a/packages/agent-cache-py/tests/adapters/test_llamaindex_key_divergence.py b/packages/agent-cache-py/tests/adapters/test_llamaindex_key_divergence.py new file mode 100644 index 00000000..cbe1b30d --- /dev/null +++ b/packages/agent-cache-py/tests/adapters/test_llamaindex_key_divergence.py @@ -0,0 +1,105 @@ +"""Key divergence tests for the LlamaIndex adapter. + +Proves that tool definitions, tool order, and non-serializable closures are +handled correctly in cache key computation. +""" +from __future__ import annotations + +import pytest +from betterdb_agent_cache.adapters.llamaindex import prepare_params +from betterdb_agent_cache.utils import llm_cache_hash + + +MSGS = [{"role": "user", "content": "Hello"}] + + +class _ToolMetadata: + """Mimics LlamaIndex ToolMetadata (attribute-based, not a dict).""" + + def __init__(self, name: str, description: str, parameters: dict | None = None): + self.name = name + self.description = description + self.parameters = parameters + + +class _FakeTool: + """Mimics LlamaIndex BaseTool with metadata + a non-serializable call.""" + + def __init__(self, meta: _ToolMetadata): + self.metadata = meta + + def call(self, _input): # noqa: ANN001, ANN201 — intentionally untyped + raise RuntimeError("should never be serialized") + + +TOOL_A_META = _ToolMetadata("get_weather", "Get weather", {"type": "object", "properties": {"city": {"type": "string"}}}) +TOOL_B_META = _ToolMetadata("search", "Search web", {"type": "object", "properties": {"q": {"type": "string"}}}) +TOOL_A_ALT_META = _ToolMetadata("get_weather", "Get weather", {"type": "object", "properties": {"location": {"type": "string"}}}) + +TOOL_A = _FakeTool(TOOL_A_META) +TOOL_B = _FakeTool(TOOL_B_META) +TOOL_A_ALT = _FakeTool(TOOL_A_ALT_META) + + +# ─── Case 1: Tool sensitivity ──────────────────────────────────────────────── + +@pytest.mark.asyncio +async def test_different_tool_names_produce_different_keys(): + p1 = await prepare_params(MSGS, model="gpt-4o", tools=[TOOL_A]) + p2 = await prepare_params(MSGS, model="gpt-4o", tools=[TOOL_B]) + assert llm_cache_hash(p1) != llm_cache_hash(p2) + + +@pytest.mark.asyncio +async def test_same_name_different_params_produce_different_keys(): + p1 = await prepare_params(MSGS, model="gpt-4o", tools=[TOOL_A]) + p2 = await prepare_params(MSGS, model="gpt-4o", tools=[TOOL_A_ALT]) + assert llm_cache_hash(p1) != llm_cache_hash(p2) + + +# ─── Case 2: Tool stability (order invariance) ─────────────────────────────── + +@pytest.mark.asyncio +async def test_same_tools_different_order_produce_same_key(): + p1 = await prepare_params(MSGS, model="gpt-4o", tools=[TOOL_A, TOOL_B]) + p2 = await prepare_params(MSGS, model="gpt-4o", tools=[TOOL_B, TOOL_A]) + assert llm_cache_hash(p1) == llm_cache_hash(p2) + + +# ─── Case 3: Tools-absent baseline ─────────────────────────────────────────── + +@pytest.mark.asyncio +async def test_no_tools_vs_with_tools_produce_different_keys(): + p_no = await prepare_params(MSGS, model="gpt-4o") + p_yes = await prepare_params(MSGS, model="gpt-4o", tools=[TOOL_A]) + assert llm_cache_hash(p_no) != llm_cache_hash(p_yes) + + +@pytest.mark.asyncio +async def test_no_tools_both_calls_produce_same_key(): + p1 = await prepare_params(MSGS, model="gpt-4o") + p2 = await prepare_params(MSGS, model="gpt-4o") + assert llm_cache_hash(p1) == llm_cache_hash(p2) + + +# ─── Case 6: Closure safety ────────────────────────────────────────────────── + +@pytest.mark.asyncio +async def test_tool_with_closure_produces_same_key_as_plain_metadata(): + """A tool carrying a non-serializable call closure must not throw and + must produce a key derived only from its metadata.""" + tool_with_closure = _FakeTool(TOOL_A_META) + tool_plain = {"metadata": {"name": "get_weather", "description": "Get weather", + "parameters": {"type": "object", "properties": {"city": {"type": "string"}}}}} + + p1 = await prepare_params(MSGS, model="gpt-4o", tools=[tool_with_closure]) + p2 = await prepare_params(MSGS, model="gpt-4o", tools=[tool_plain]) + assert llm_cache_hash(p1) == llm_cache_hash(p2) + + +@pytest.mark.asyncio +async def test_closure_key_is_deterministic(): + tool = _FakeTool(TOOL_A_META) + p1 = await prepare_params(MSGS, model="gpt-4o", tools=[tool]) + p2 = await prepare_params(MSGS, model="gpt-4o", tools=[tool]) + assert llm_cache_hash(p1) == llm_cache_hash(p2) diff --git a/packages/agent-cache/CHANGELOG.md b/packages/agent-cache/CHANGELOG.md index 6d03cb45..22bffd1a 100644 --- a/packages/agent-cache/CHANGELOG.md +++ b/packages/agent-cache/CHANGELOG.md @@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.7.0] - 2026-06-08 + +### Fixed + +- **Vercel AI SDK adapter: tool definitions now included in cache key.** Previously, the adapter only keyed on model, messages, temperature, topP, and maxTokens. Requests with identical messages but different tools could return the same cached response. `seed`, `stopSequences`, `responseFormat`, and `toolChoice` are now also part of the key. +- **LlamaIndex adapter: tool definitions now included in cache key.** When `tools` is passed to `prepareParams()`, tool metadata (name, description, parameters) is extracted and included in the cache key. Only serializable metadata is used; the `call` closure is never serialized. + +### Changed + +- **Cache keys changed for tool-using requests on Vercel and LlamaIndex adapters.** Existing cached entries for those requests will be a one-time miss after upgrade. This is intended: the prior entries were keyed without tool information and are not safe to reuse across differing tool sets. +- **LlamaIndex `prepareParams()` now accepts a `tools` option.** Callers must pass `tools` to get tool-schema safety. Omitting it falls back to messages-only keying (prior behavior). + +### Known limitations + +- **LangChain adapter: tool-schema drift is not reflected in the cache key.** The framework's `BaseCache` interface exposes only `(prompt, llm_string)` to the cache layer, so tool definitions are structurally unreachable. Unchanged in this release; documented as a known limitation. + ## [0.6.0] - 2026-05-04 ### Added diff --git a/packages/agent-cache/src/__tests__/adapter-key-divergence.test.ts b/packages/agent-cache/src/__tests__/adapter-key-divergence.test.ts new file mode 100644 index 00000000..7c709042 --- /dev/null +++ b/packages/agent-cache/src/__tests__/adapter-key-divergence.test.ts @@ -0,0 +1,296 @@ +import { describe, it, expect, vi, beforeAll } from 'vitest'; +import type { AgentCache } from '../AgentCache'; +import type { LlmCacheParams, LlmCacheResult } from '../types'; +import { llmCacheHash } from '../utils'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function createMockAgentCache(): AgentCache & { capturedParams: LlmCacheParams | null } { + const mock = { + capturedParams: null as LlmCacheParams | null, + llm: { + check: vi.fn(async (params: LlmCacheParams) => { + mock.capturedParams = params; + return { hit: true, response: 'cached', tier: 'llm' } as LlmCacheResult; + }), + store: vi.fn(), + }, + tool: { check: vi.fn(), store: vi.fn() }, + session: { + get: vi.fn(), set: vi.fn(), getAll: vi.fn(), + scanFieldsByPrefix: vi.fn(), delete: vi.fn(), + destroyThread: vi.fn(), touch: vi.fn(), + }, + stats: vi.fn(), + toolEffectiveness: vi.fn(), + flush: vi.fn(), + } as unknown as AgentCache & { capturedParams: LlmCacheParams | null }; + return mock; +} + +const BASE_PROMPT = [{ role: 'user', content: [{ type: 'text', text: 'Hello' }] }]; +const BASE_MESSAGES = [{ role: 'user' as const, content: 'Hello' }]; + +/** Call the Vercel middleware and return the LlmCacheParams it computed. */ +async function vercelParams( + middleware: { wrapGenerate?: Function }, + params: Record, + mockCache: AgentCache & { capturedParams: LlmCacheParams | null }, +): Promise { + mockCache.capturedParams = null; + await middleware.wrapGenerate!({ + doGenerate: vi.fn().mockResolvedValue({ + content: [{ type: 'text', text: 'ok' }], + finishReason: 'stop', + usage: { inputTokens: { total: 0 }, outputTokens: { total: 0 } }, + }), + params: { + model: { modelId: 'gpt-4o', provider: 'openai' }, + prompt: BASE_PROMPT, + ...params, + }, + }); + return mockCache.capturedParams!; +} + +// =========================================================================== +// Vercel AI SDK adapter — cache key divergence +// =========================================================================== + +describe('Vercel AI SDK adapter — cache key divergence', () => { + let createAgentCacheMiddleware: typeof import('../adapters/ai').createAgentCacheMiddleware; + + const toolA = { type: 'function', name: 'get_weather', description: 'Get weather', inputSchema: { type: 'object', properties: { city: { type: 'string' } } } }; + const toolB = { type: 'function', name: 'search', description: 'Search web', inputSchema: { type: 'object', properties: { q: { type: 'string' } } } }; + const toolA_altParams = { type: 'function', name: 'get_weather', description: 'Get weather', inputSchema: { type: 'object', properties: { location: { type: 'string' } } } }; + + beforeAll(async () => { + const m = await import('../adapters/ai'); + createAgentCacheMiddleware = m.createAgentCacheMiddleware; + }); + + // --- Case 1: Tool sensitivity --- + + it('different tool names produce different keys', async () => { + const mock = createMockAgentCache(); + const mw = createAgentCacheMiddleware({ cache: mock }); + + const p1 = await vercelParams(mw, { tools: [toolA] }, mock); + const p2 = await vercelParams(mw, { tools: [toolB] }, mock); + + expect(llmCacheHash(p1)).not.toBe(llmCacheHash(p2)); + }); + + it('same tool name but different parameter schemas produce different keys', async () => { + const mock = createMockAgentCache(); + const mw = createAgentCacheMiddleware({ cache: mock }); + + const p1 = await vercelParams(mw, { tools: [toolA] }, mock); + const p2 = await vercelParams(mw, { tools: [toolA_altParams] }, mock); + + expect(llmCacheHash(p1)).not.toBe(llmCacheHash(p2)); + }); + + // --- Case 2: Tool stability (order invariance) --- + + it('same tools in different array order produce the same key', async () => { + const mock = createMockAgentCache(); + const mw = createAgentCacheMiddleware({ cache: mock }); + + const p1 = await vercelParams(mw, { tools: [toolA, toolB] }, mock); + const p2 = await vercelParams(mw, { tools: [toolB, toolA] }, mock); + + expect(llmCacheHash(p1)).toBe(llmCacheHash(p2)); + }); + + // --- Case 3: Tools-absent baseline --- + + it('no tools and with tools produce different keys', async () => { + const mock = createMockAgentCache(); + const mw = createAgentCacheMiddleware({ cache: mock }); + + const pNoTools = await vercelParams(mw, {}, mock); + const pWithTools = await vercelParams(mw, { tools: [toolA] }, mock); + + expect(llmCacheHash(pNoTools)).not.toBe(llmCacheHash(pWithTools)); + }); + + it('no tools on both calls produces the same key', async () => { + const mock = createMockAgentCache(); + const mw = createAgentCacheMiddleware({ cache: mock }); + + const p1 = await vercelParams(mw, {}, mock); + const p2 = await vercelParams(mw, {}, mock); + + expect(llmCacheHash(p1)).toBe(llmCacheHash(p2)); + }); + + // --- Case 4: Param sensitivity --- + + it('changing seed changes the key', async () => { + const mock = createMockAgentCache(); + const mw = createAgentCacheMiddleware({ cache: mock }); + + const p1 = await vercelParams(mw, { seed: 42 }, mock); + const p2 = await vercelParams(mw, { seed: 99 }, mock); + + expect(llmCacheHash(p1)).not.toBe(llmCacheHash(p2)); + }); + + it('same seed produces the same key', async () => { + const mock = createMockAgentCache(); + const mw = createAgentCacheMiddleware({ cache: mock }); + + const p1 = await vercelParams(mw, { seed: 42 }, mock); + const p2 = await vercelParams(mw, { seed: 42 }, mock); + + expect(llmCacheHash(p1)).toBe(llmCacheHash(p2)); + }); + + it('changing stopSequences changes the key', async () => { + const mock = createMockAgentCache(); + const mw = createAgentCacheMiddleware({ cache: mock }); + + const p1 = await vercelParams(mw, { stopSequences: ['END'] }, mock); + const p2 = await vercelParams(mw, { stopSequences: ['STOP'] }, mock); + + expect(llmCacheHash(p1)).not.toBe(llmCacheHash(p2)); + }); + + it('changing responseFormat changes the key', async () => { + const mock = createMockAgentCache(); + const mw = createAgentCacheMiddleware({ cache: mock }); + + const p1 = await vercelParams(mw, { responseFormat: { type: 'text' } }, mock); + const p2 = await vercelParams(mw, { responseFormat: { type: 'json' } }, mock); + + expect(llmCacheHash(p1)).not.toBe(llmCacheHash(p2)); + }); + + it('changing toolChoice changes the key', async () => { + const mock = createMockAgentCache(); + const mw = createAgentCacheMiddleware({ cache: mock }); + + const p1 = await vercelParams(mw, { tools: [toolA], toolChoice: { type: 'auto' } }, mock); + const p2 = await vercelParams(mw, { tools: [toolA], toolChoice: { type: 'none' } }, mock); + + expect(llmCacheHash(p1)).not.toBe(llmCacheHash(p2)); + }); + + // --- Case 5: Canonical-shape parity --- + + it('Vercel flat tool shape produces same key as hand-written canonical shape', async () => { + const mock = createMockAgentCache(); + const mw = createAgentCacheMiddleware({ cache: mock }); + + const vercel = await vercelParams(mw, { tools: [toolA] }, mock); + + const handwritten: LlmCacheParams = { + model: 'gpt-4o', + messages: vercel.messages, + temperature: vercel.temperature, + top_p: vercel.top_p, + max_tokens: vercel.max_tokens, + tools: [{ + type: 'function', + function: { + name: 'get_weather', + description: 'Get weather', + parameters: { type: 'object', properties: { city: { type: 'string' } } }, + }, + }], + }; + + expect(llmCacheHash(vercel)).toBe(llmCacheHash(handwritten)); + }); +}); + +// =========================================================================== +// LlamaIndex TS adapter — cache key divergence +// =========================================================================== + +describe('LlamaIndex TS adapter — cache key divergence', () => { + let prepareParams: typeof import('../adapters/llamaindex').prepareParams; + + const msgs = [{ role: 'user', content: 'Hello' }] as import('@llamaindex/core/llms').ChatMessage[]; + + const toolA = { + metadata: { name: 'get_weather', description: 'Get weather', parameters: { type: 'object', properties: { city: { type: 'string' } } } }, + }; + const toolB = { + metadata: { name: 'search', description: 'Search web', parameters: { type: 'object', properties: { q: { type: 'string' } } } }, + }; + const toolA_altParams = { + metadata: { name: 'get_weather', description: 'Get weather', parameters: { type: 'object', properties: { location: { type: 'string' } } } }, + }; + + beforeAll(async () => { + const m = await import('../adapters/llamaindex'); + prepareParams = m.prepareParams; + }); + + // --- Case 1: Tool sensitivity --- + + it('different tool names produce different keys', async () => { + const p1 = await prepareParams(msgs, { model: 'gpt-4o', tools: [toolA] }); + const p2 = await prepareParams(msgs, { model: 'gpt-4o', tools: [toolB] }); + + expect(llmCacheHash(p1)).not.toBe(llmCacheHash(p2)); + }); + + it('same tool name but different parameter schemas produce different keys', async () => { + const p1 = await prepareParams(msgs, { model: 'gpt-4o', tools: [toolA] }); + const p2 = await prepareParams(msgs, { model: 'gpt-4o', tools: [toolA_altParams] }); + + expect(llmCacheHash(p1)).not.toBe(llmCacheHash(p2)); + }); + + // --- Case 2: Tool stability (order invariance) --- + + it('same tools in different array order produce the same key', async () => { + const p1 = await prepareParams(msgs, { model: 'gpt-4o', tools: [toolA, toolB] }); + const p2 = await prepareParams(msgs, { model: 'gpt-4o', tools: [toolB, toolA] }); + + expect(llmCacheHash(p1)).toBe(llmCacheHash(p2)); + }); + + // --- Case 3: Tools-absent baseline --- + + it('no tools and with tools produce different keys', async () => { + const pNoTools = await prepareParams(msgs, { model: 'gpt-4o' }); + const pWithTools = await prepareParams(msgs, { model: 'gpt-4o', tools: [toolA] }); + + expect(llmCacheHash(pNoTools)).not.toBe(llmCacheHash(pWithTools)); + }); + + it('no tools on both calls produces the same key', async () => { + const p1 = await prepareParams(msgs, { model: 'gpt-4o' }); + const p2 = await prepareParams(msgs, { model: 'gpt-4o' }); + + expect(llmCacheHash(p1)).toBe(llmCacheHash(p2)); + }); + + // --- Case 6: Closure safety --- + + it('tool with a non-serializable call closure produces stable key from metadata only', async () => { + const toolWithClosure = { + metadata: { name: 'get_weather', description: 'Get weather', parameters: { type: 'object' } }, + call: (input: unknown) => Promise.resolve({ temp: 20 }), + }; + const toolPlain = { + metadata: { name: 'get_weather', description: 'Get weather', parameters: { type: 'object' } }, + }; + + const p1 = await prepareParams(msgs, { model: 'gpt-4o', tools: [toolWithClosure] }); + const p2 = await prepareParams(msgs, { model: 'gpt-4o', tools: [toolPlain] }); + + // Both produce the same key (closure is ignored) + expect(llmCacheHash(p1)).toBe(llmCacheHash(p2)); + + // Key is deterministic across repeated calls + const p3 = await prepareParams(msgs, { model: 'gpt-4o', tools: [toolWithClosure] }); + expect(llmCacheHash(p1)).toBe(llmCacheHash(p3)); + }); +}); diff --git a/packages/agent-cache/src/adapters/ai.ts b/packages/agent-cache/src/adapters/ai.ts index 97948505..2ff98ba4 100644 --- a/packages/agent-cache/src/adapters/ai.ts +++ b/packages/agent-cache/src/adapters/ai.ts @@ -22,12 +22,27 @@ interface AiSdkModelV1 { provider?: string; } +interface AiSdkTool { + type: string; + name: string; + description?: string; + inputSchema?: unknown; + id?: string; + args?: unknown; +} + interface AiSdkParams { prompt?: AiSdkMessage[]; model?: AiSdkModelV1; temperature?: number; topP?: number; maxTokens?: number; + maxOutputTokens?: number; + tools?: AiSdkTool[]; + toolChoice?: unknown; + seed?: number; + stopSequences?: string[]; + responseFormat?: unknown; } function defaultExtractModel(params: unknown, model?: unknown): string { @@ -39,6 +54,30 @@ function defaultExtractModel(params: unknown, model?: unknown): string { return p.model?.modelId ?? 'unknown'; } +function convertTools(tools: AiSdkTool[]): LlmCacheParams['tools'] { + return tools.map((t) => { + if (t.type === 'function') { + return { + type: 'function', + function: { + name: t.name, + ...(t.description != null ? { description: t.description } : {}), + ...(t.inputSchema != null ? { parameters: t.inputSchema } : {}), + }, + }; + } + // Provider-defined tools (e.g. web_search) + return { + type: t.type, + function: { + name: t.name, + ...(t.id != null ? { id: t.id } : {}), + ...(t.args != null ? { args: t.args } : {}), + }, + }; + }); +} + function extractLlmParams(params: unknown, extractModel: (params: unknown, model?: unknown) => string, model?: unknown): LlmCacheParams { const p = params as AiSdkParams; @@ -49,13 +88,20 @@ function extractLlmParams(params: unknown, extractModel: (params: unknown, model } } - return { + const result: LlmCacheParams = { model: extractModel(params, model), messages, temperature: p.temperature, top_p: p.topP, - max_tokens: p.maxTokens, + max_tokens: p.maxTokens ?? p.maxOutputTokens, }; + if (p.tools != null && p.tools.length > 0) result.tools = convertTools(p.tools); + if (p.toolChoice != null) result.toolChoice = p.toolChoice; + if (p.seed != null) result.seed = p.seed; + if (p.stopSequences != null) result.stop = p.stopSequences; + if (p.responseFormat != null) result.responseFormat = p.responseFormat; + + return result; } interface ContentPart { diff --git a/packages/agent-cache/src/adapters/llamaindex.ts b/packages/agent-cache/src/adapters/llamaindex.ts index 1c948730..6ad2c5da 100644 --- a/packages/agent-cache/src/adapters/llamaindex.ts +++ b/packages/agent-cache/src/adapters/llamaindex.ts @@ -3,12 +3,38 @@ import type { ContentBlock, LlmCacheParams, TextBlock, BinaryBlock, ToolCallBloc import type { BinaryNormalizer, BinaryRef } from "../normalizer"; import { defaultNormalizer } from "../normalizer"; +/** + * Minimal interface matching the serializable portion of LlamaIndex's BaseTool.metadata. + * Only `name` is required; `description` and `parameters` are optional. + */ +export interface ToolMetadataLike { + name: string; + description?: string; + parameters?: Record; +} + +/** + * Minimal interface for a LlamaIndex tool. Only `metadata` is used for cache keying. + */ +export interface BaseToolLike { + metadata: ToolMetadataLike; + [key: string]: unknown; +} + export interface LlamaIndexPrepareOptions { model: string; normalizer?: BinaryNormalizer; temperature?: number; topP?: number; maxTokens?: number; + /** + * Tool definitions to include in the cache key. Pass the same tools array + * you provide to the LLM call. Only `tool.metadata` (name, description, + * parameters) is used; the `call` closure is never serialized. + * + * Omitting this field falls back to messages-only keying (prior behavior). + */ + tools?: BaseToolLike[]; } type AnyDetail = { type: string; text?: string; image_url?: { url: string }; data?: string; mimeType?: string }; @@ -119,6 +145,16 @@ export async function prepareParams( if (opts.temperature != null) result.temperature = opts.temperature; if (opts.topP != null) result.top_p = opts.topP; if (opts.maxTokens != null) result.max_tokens = opts.maxTokens; + if (opts.tools != null && opts.tools.length > 0) { + result.tools = opts.tools.map((t) => ({ + type: "function", + function: { + name: t.metadata.name, + ...(t.metadata.description != null ? { description: t.metadata.description } : {}), + ...(t.metadata.parameters != null ? { parameters: t.metadata.parameters } : {}), + }, + })); + } return result; }