diff --git a/src/clients/databricks.js b/src/clients/databricks.js index 5d31c79..92f4ac4 100644 --- a/src/clients/databricks.js +++ b/src/clients/databricks.js @@ -1104,6 +1104,64 @@ async function invokeLMStudio(body) { return performJsonRequest(endpoint, { headers, body: lmstudioBody }, "LM Studio"); } +/** + * Flatten an Anthropic-style content value into a plain string for the + * Bedrock Converse API. + * + * Prompt-cache injection (injectPromptCaching) rewrites string `system` + * fields and message `content` into arrays of `{ type, text, cache_control }` + * blocks. The Converse API has no `cache_control` concept and expects + * `system: [{ text: "" }]` and message content blocks shaped as + * `{ text: "" }`. Passing the injected array through unchanged would + * either drop the cache markers silently or nest an array under `text`, + * producing a ValidationException. + * + * @param {string|Array|undefined} value - String or array of content blocks + * @returns {string} Concatenated plain text + */ +function flattenContentToText(value) { + if (value == null) return ""; + if (typeof value === "string") return value; + if (Array.isArray(value)) { + return value + .map(block => { + if (typeof block === "string") return block; + if (block && typeof block === "object") return block.text || block.content || ""; + return ""; + }) + .join(""); + } + return String(value); +} + +/** + * Normalize a request body for the Bedrock Converse API. + * + * Strips `cache_control` markers and flattens any array-shaped `system` / + * message `content` (left behind by prompt-cache injection) back into the + * plain strings the Converse API expects. Returns a shallow copy with a + * normalized `messages` array; the original body is not mutated. + * + * @param {Object} body - Anthropic-format request body + * @returns {Object} Body safe for Converse request construction + */ +function normalizeBodyForConverse(body) { + const normalized = { ...body }; + + if (normalized.system !== undefined) { + normalized.system = flattenContentToText(normalized.system); + } + + if (Array.isArray(normalized.messages)) { + normalized.messages = normalized.messages.map(msg => ({ + ...msg, + content: flattenContentToText(msg.content), + })); + } + + return normalized; +} + async function invokeBedrock(body) { // 1. Validate Bearer token if (!config.bedrock?.apiKey) { @@ -1130,7 +1188,10 @@ async function invokeBedrock(body) { }, "=== INJECTING STANDARD TOOLS (Bedrock) ==="); } - const bedrockBody = { ...body, tools: toolsToSend }; + // Normalize away cache_control / array shapes that prompt-cache injection + // may have applied: the Converse API expects plain-string system and + // message content, not Anthropic cache_control blocks. + const bedrockBody = { ...normalizeBodyForConverse(body), tools: toolsToSend }; // 4. Detect model family and convert format const modelId = body._tierModel || config.bedrock.modelId; @@ -2579,4 +2640,5 @@ function destroyHttpAgents() { module.exports = { invokeModel, destroyHttpAgents, + normalizeBodyForConverse, }; diff --git a/src/clients/prompt-cache-injection.js b/src/clients/prompt-cache-injection.js index d0460bb..e1e774e 100644 --- a/src/clients/prompt-cache-injection.js +++ b/src/clients/prompt-cache-injection.js @@ -119,6 +119,51 @@ function needsCacheInjection(provider) { return EXPLICIT_CACHE_PROVIDERS.has(provider); } +// Model families that do NOT support cache_control breakpoints. cache_control +// is an Anthropic construct; on aggregating providers (Bedrock, OpenRouter) it +// only applies to models that natively understand it (Claude, and Gemini via +// proxy). Injecting markers onto these families produces request shapes the +// upstream model rejects or silently ignores. +const NON_CACHE_MODEL_PATTERNS = [ + /(^|[./-])titan/i, + /(^|[./-])nova/i, + /(^|[./-])llama/i, + /(^|[./-])mistral/i, + /(^|[./-])mixtral/i, + /(^|[./-])cohere/i, + /(^|[./-])command/i, // cohere command-* + /(^|[./-])j2/i, // ai21 jurassic + /(^|[./-])jamba/i, + /(^|[./-])deepseek/i, + /(^|[./-])qwen/i, + /(^|[./-])gpt/i, + /(^|[./-])openai/i, +]; + +/** + * Determine whether the model targeted by this request supports cache_control. + * + * Some providers in EXPLICIT_CACHE_PROVIDERS (notably bedrock and openrouter) + * route to many model families, only some of which understand cache_control. + * This guard inspects the resolved model id and blocks injection for families + * that are known not to support it. When the model id is absent or + * unrecognized, injection is allowed (fail-open) — Claude/Gemini-style ids and + * Anthropic-only providers fall through to true. + * + * @param {Object} body - Request body (may carry the resolved model id) + * @param {string} provider - Provider name + * @returns {boolean} + */ +function modelSupportsCacheControl(body, provider) { + // Providers that only ever route to Anthropic models always support it. + if (provider === 'azure-anthropic' || provider === 'databricks') return true; + + const modelId = body && (body._tierModel || body.model); + if (!modelId || typeof modelId !== 'string') return true; // unknown → fail open + + return !NON_CACHE_MODEL_PATTERNS.some(re => re.test(modelId)); +} + /** * Inject provider-side prompt caching into the request body. * Call this before sending to the provider. @@ -129,6 +174,9 @@ function needsCacheInjection(provider) { */ function injectPromptCaching(body, provider) { if (!needsCacheInjection(provider)) return 0; + // Gate on model capability: a provider may support cache_control in general + // while the specific routed model does not. + if (!modelSupportsCacheControl(body, provider)) return 0; return injectAnthropicCacheBreakpoints(body); } @@ -137,4 +185,5 @@ module.exports = { injectAnthropicCacheBreakpoints, injectGeminiCacheBreakpoints, needsCacheInjection, + modelSupportsCacheControl, }; diff --git a/test/databricks-bedrock-converse.test.js b/test/databricks-bedrock-converse.test.js new file mode 100644 index 0000000..890bb8d --- /dev/null +++ b/test/databricks-bedrock-converse.test.js @@ -0,0 +1,134 @@ +/** + * Tests for the Bedrock Converse request normalization. + * + * invokeModel runs injectPromptCaching() before dispatching to a provider, + * which rewrites string `system` / message `content` into Anthropic + * cache_control blocks. The Bedrock Converse API has no cache_control + * concept and requires plain-string system and message content, so the + * Bedrock path must flatten those shapes back before building the request. + */ + +const assert = require("assert"); +const { describe, it, beforeEach, afterEach } = require("node:test"); + +const { injectPromptCaching } = require("../src/clients/prompt-cache-injection"); + +describe("Bedrock Converse normalization", () => { + let originalEnv; + let normalizeBodyForConverse; + + beforeEach(() => { + originalEnv = { ...process.env }; + // databricks.js loads ../config at require time; give it valid creds. + process.env.MODEL_PROVIDER = "databricks"; + process.env.DATABRICKS_API_KEY = "test-key"; + process.env.DATABRICKS_API_BASE = "http://test.com"; + + delete require.cache[require.resolve("../src/config")]; + delete require.cache[require.resolve("../src/clients/databricks")]; + ({ normalizeBodyForConverse } = require("../src/clients/databricks")); + }); + + afterEach(() => { + process.env = originalEnv; + }); + + it("flattens a cache_control-injected system prompt back to a string", () => { + const body = { + system: "You are a helpful assistant", + messages: [{ role: "user", content: "Hello" }], + }; + + // Simulate the injection that invokeModel performs before dispatch. + injectPromptCaching(body, "bedrock"); + assert.ok(Array.isArray(body.system), "precondition: injection made system an array"); + + const normalized = normalizeBodyForConverse(body); + + assert.strictEqual(typeof normalized.system, "string"); + assert.strictEqual(normalized.system, "You are a helpful assistant"); + }); + + it("flattens injected message content back to plain strings", () => { + const body = { + system: "sys", + messages: [ + { role: "user", content: "first" }, + { role: "assistant", content: "second" }, + { role: "user", content: "third" }, + ], + }; + + injectPromptCaching(body, "bedrock"); + + const normalized = normalizeBodyForConverse(body); + + for (const msg of normalized.messages) { + assert.strictEqual(typeof msg.content, "string"); + } + assert.strictEqual(normalized.messages[0].content, "first"); + assert.strictEqual(normalized.messages[2].content, "third"); + }); + + it("produces a Converse-valid system shape ([{text:string}])", () => { + const body = { + system: "cached system prompt", + messages: [{ role: "user", content: "hi" }], + }; + + injectPromptCaching(body, "bedrock"); + const normalized = normalizeBodyForConverse(body); + + // Mirror how invokeBedrock builds the Converse system field. + const converseSystem = [{ text: normalized.system }]; + assert.strictEqual(converseSystem.length, 1); + assert.strictEqual(typeof converseSystem[0].text, "string"); + assert.strictEqual(converseSystem[0].text, "cached system prompt"); + }); + + it("produces Converse-valid content blocks ({text:string})", () => { + const body = { + system: "sys", + messages: [{ role: "user", content: "question" }], + }; + + injectPromptCaching(body, "bedrock"); + const normalized = normalizeBodyForConverse(body); + + // Mirror how invokeBedrock maps message content for Converse. + const blocks = Array.isArray(normalized.messages[0].content) + ? normalized.messages[0].content.map(c => ({ text: c.text || c.content || "" })) + : [{ text: normalized.messages[0].content }]; + + assert.strictEqual(blocks.length, 1); + assert.strictEqual(typeof blocks[0].text, "string"); + assert.strictEqual(blocks[0].text, "question"); + }); + + it("does not strip cache_control from the caller's original body", () => { + const body = { + system: "sys", + messages: [{ role: "user", content: "hi" }], + }; + + injectPromptCaching(body, "bedrock"); + normalizeBodyForConverse(body); + + // Normalization works on a copy, leaving the injected body untouched. + assert.ok(Array.isArray(body.system)); + assert.deepStrictEqual(body.system[0].cache_control, { type: "ephemeral" }); + }); + + it("handles array system blocks without cache_control", () => { + const body = { + system: [ + { type: "text", text: "Part A" }, + { type: "text", text: "Part B" }, + ], + messages: [], + }; + + const normalized = normalizeBodyForConverse(body); + assert.strictEqual(normalized.system, "Part APart B"); + }); +}); diff --git a/test/prompt-cache-injection.test.js b/test/prompt-cache-injection.test.js index 753ce5a..b9b6b64 100644 --- a/test/prompt-cache-injection.test.js +++ b/test/prompt-cache-injection.test.js @@ -9,6 +9,7 @@ const { injectAnthropicCacheBreakpoints, injectPromptCaching, needsCacheInjection, + modelSupportsCacheControl, } = require('../src/clients/prompt-cache-injection'); // ── needsCacheInjection ───────────────────────────────────────────── @@ -220,3 +221,82 @@ describe('injectPromptCaching', () => { assert.equal(count, 0); }); }); + +// ── model-capability gate ─────────────────────────────────────────── + +describe('modelSupportsCacheControl', () => { + it('always supports for Anthropic-only providers', () => { + assert.equal(modelSupportsCacheControl({}, 'azure-anthropic'), true); + assert.equal(modelSupportsCacheControl({ _tierModel: 'whatever' }, 'databricks'), true); + }); + + it('fails open when no model id is present', () => { + assert.equal(modelSupportsCacheControl({}, 'bedrock'), true); + assert.equal(modelSupportsCacheControl({}, 'openrouter'), true); + }); + + it('supports Claude model ids on bedrock', () => { + assert.equal( + modelSupportsCacheControl( + { _tierModel: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0' }, + 'bedrock' + ), + true + ); + }); + + it('blocks non-Claude bedrock families', () => { + assert.equal(modelSupportsCacheControl({ _tierModel: 'meta.llama3-70b-instruct-v1:0' }, 'bedrock'), false); + assert.equal(modelSupportsCacheControl({ _tierModel: 'amazon.titan-text-express-v1' }, 'bedrock'), false); + assert.equal(modelSupportsCacheControl({ _tierModel: 'mistral.mistral-7b-instruct-v0:2' }, 'bedrock'), false); + assert.equal(modelSupportsCacheControl({ _tierModel: 'cohere.command-text-v14' }, 'bedrock'), false); + }); + + it('blocks non-supporting openrouter models', () => { + assert.equal(modelSupportsCacheControl({ model: 'meta-llama/llama-3-70b' }, 'openrouter'), false); + assert.equal(modelSupportsCacheControl({ model: 'openai/gpt-4o' }, 'openrouter'), false); + }); +}); + +describe('injectPromptCaching capability gate', () => { + it('still injects for bedrock when the model id is unknown (backward compatible)', () => { + const body = { + system: 'test', + messages: [{ role: 'user', content: 'hi' }], + }; + const count = injectPromptCaching(body, 'bedrock'); + assert.equal(count, 2); + }); + + it('injects for a Claude model on bedrock', () => { + const body = { + _tierModel: 'anthropic.claude-3-5-sonnet-20241022-v2:0', + system: 'test', + messages: [{ role: 'user', content: 'hi' }], + }; + const count = injectPromptCaching(body, 'bedrock'); + assert.equal(count, 2); + }); + + it('skips injection for a non-Claude bedrock model and leaves body untouched', () => { + const body = { + _tierModel: 'meta.llama3-70b-instruct-v1:0', + system: 'test', + messages: [{ role: 'user', content: 'hi' }], + }; + const count = injectPromptCaching(body, 'bedrock'); + assert.equal(count, 0); + assert.equal(body.system, 'test'); // unchanged string, no array conversion + assert.equal(body.messages[0].content, 'hi'); + }); + + it('skips injection for a GPT model routed via openrouter', () => { + const body = { + model: 'openai/gpt-4o', + system: 'test', + messages: [{ role: 'user', content: 'hi' }], + }; + const count = injectPromptCaching(body, 'openrouter'); + assert.equal(count, 0); + }); +});