Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 63 additions & 1 deletion src/clients/databricks.js
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,64 @@ async function invokeLMStudio(body) {
return performJsonRequest(endpoint, { headers, body: lmstudioBody }, "LM Studio");
}

/**
* Flatten an Anthropic-style content value into a plain string for the
* Bedrock Converse API.
*
* Prompt-cache injection (injectPromptCaching) rewrites string `system`
* fields and message `content` into arrays of `{ type, text, cache_control }`
* blocks. The Converse API has no `cache_control` concept and expects
* `system: [{ text: "<string>" }]` and message content blocks shaped as
* `{ text: "<string>" }`. Passing the injected array through unchanged would
* either drop the cache markers silently or nest an array under `text`,
* producing a ValidationException.
*
* @param {string|Array|undefined} value - String or array of content blocks
* @returns {string} Concatenated plain text
*/
function flattenContentToText(value) {
if (value == null) return "";
if (typeof value === "string") return value;
if (Array.isArray(value)) {
return value
.map(block => {
if (typeof block === "string") return block;
if (block && typeof block === "object") return block.text || block.content || "";
return "";
})
.join("");
}
return String(value);
}

/**
* Normalize a request body for the Bedrock Converse API.
*
* Strips `cache_control` markers and flattens any array-shaped `system` /
* message `content` (left behind by prompt-cache injection) back into the
* plain strings the Converse API expects. Returns a shallow copy with a
* normalized `messages` array; the original body is not mutated.
*
* @param {Object} body - Anthropic-format request body
* @returns {Object} Body safe for Converse request construction
*/
function normalizeBodyForConverse(body) {
const normalized = { ...body };

if (normalized.system !== undefined) {
normalized.system = flattenContentToText(normalized.system);
}

if (Array.isArray(normalized.messages)) {
normalized.messages = normalized.messages.map(msg => ({
...msg,
content: flattenContentToText(msg.content),
}));
}

return normalized;
}

async function invokeBedrock(body) {
// 1. Validate Bearer token
if (!config.bedrock?.apiKey) {
Expand All @@ -1130,7 +1188,10 @@ async function invokeBedrock(body) {
}, "=== INJECTING STANDARD TOOLS (Bedrock) ===");
}

const bedrockBody = { ...body, tools: toolsToSend };
// Normalize away cache_control / array shapes that prompt-cache injection
// may have applied: the Converse API expects plain-string system and
// message content, not Anthropic cache_control blocks.
const bedrockBody = { ...normalizeBodyForConverse(body), tools: toolsToSend };

// 4. Detect model family and convert format
const modelId = body._tierModel || config.bedrock.modelId;
Expand Down Expand Up @@ -2579,4 +2640,5 @@ function destroyHttpAgents() {
module.exports = {
invokeModel,
destroyHttpAgents,
normalizeBodyForConverse,
};
49 changes: 49 additions & 0 deletions src/clients/prompt-cache-injection.js
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,51 @@ function needsCacheInjection(provider) {
return EXPLICIT_CACHE_PROVIDERS.has(provider);
}

// Model families that do NOT support cache_control breakpoints. cache_control
// is an Anthropic construct; on aggregating providers (Bedrock, OpenRouter) it
// only applies to models that natively understand it (Claude, and Gemini via
// proxy). Injecting markers onto these families produces request shapes the
// upstream model rejects or silently ignores.
const NON_CACHE_MODEL_PATTERNS = [
/(^|[./-])titan/i,
/(^|[./-])nova/i,
/(^|[./-])llama/i,
/(^|[./-])mistral/i,
/(^|[./-])mixtral/i,
/(^|[./-])cohere/i,
/(^|[./-])command/i, // cohere command-*
/(^|[./-])j2/i, // ai21 jurassic
/(^|[./-])jamba/i,
/(^|[./-])deepseek/i,
/(^|[./-])qwen/i,
/(^|[./-])gpt/i,
/(^|[./-])openai/i,
];

/**
* Determine whether the model targeted by this request supports cache_control.
*
* Some providers in EXPLICIT_CACHE_PROVIDERS (notably bedrock and openrouter)
* route to many model families, only some of which understand cache_control.
* This guard inspects the resolved model id and blocks injection for families
* that are known not to support it. When the model id is absent or
* unrecognized, injection is allowed (fail-open) — Claude/Gemini-style ids and
* Anthropic-only providers fall through to true.
*
* @param {Object} body - Request body (may carry the resolved model id)
* @param {string} provider - Provider name
* @returns {boolean}
*/
function modelSupportsCacheControl(body, provider) {
// Providers that only ever route to Anthropic models always support it.
if (provider === 'azure-anthropic' || provider === 'databricks') return true;

const modelId = body && (body._tierModel || body.model);
if (!modelId || typeof modelId !== 'string') return true; // unknown → fail open

return !NON_CACHE_MODEL_PATTERNS.some(re => re.test(modelId));
}

/**
* Inject provider-side prompt caching into the request body.
* Call this before sending to the provider.
Expand All @@ -129,6 +174,9 @@ function needsCacheInjection(provider) {
*/
function injectPromptCaching(body, provider) {
if (!needsCacheInjection(provider)) return 0;
// Gate on model capability: a provider may support cache_control in general
// while the specific routed model does not.
if (!modelSupportsCacheControl(body, provider)) return 0;
return injectAnthropicCacheBreakpoints(body);
}

Expand All @@ -137,4 +185,5 @@ module.exports = {
injectAnthropicCacheBreakpoints,
injectGeminiCacheBreakpoints,
needsCacheInjection,
modelSupportsCacheControl,
};
134 changes: 134 additions & 0 deletions test/databricks-bedrock-converse.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/**
* Tests for the Bedrock Converse request normalization.
*
* invokeModel runs injectPromptCaching() before dispatching to a provider,
* which rewrites string `system` / message `content` into Anthropic
* cache_control blocks. The Bedrock Converse API has no cache_control
* concept and requires plain-string system and message content, so the
* Bedrock path must flatten those shapes back before building the request.
*/

const assert = require("assert");
const { describe, it, beforeEach, afterEach } = require("node:test");

const { injectPromptCaching } = require("../src/clients/prompt-cache-injection");

describe("Bedrock Converse normalization", () => {
let originalEnv;
let normalizeBodyForConverse;

beforeEach(() => {
originalEnv = { ...process.env };
// databricks.js loads ../config at require time; give it valid creds.
process.env.MODEL_PROVIDER = "databricks";
process.env.DATABRICKS_API_KEY = "test-key";
process.env.DATABRICKS_API_BASE = "http://test.com";

delete require.cache[require.resolve("../src/config")];
delete require.cache[require.resolve("../src/clients/databricks")];
({ normalizeBodyForConverse } = require("../src/clients/databricks"));
});

afterEach(() => {
process.env = originalEnv;
});

it("flattens a cache_control-injected system prompt back to a string", () => {
const body = {
system: "You are a helpful assistant",
messages: [{ role: "user", content: "Hello" }],
};

// Simulate the injection that invokeModel performs before dispatch.
injectPromptCaching(body, "bedrock");
assert.ok(Array.isArray(body.system), "precondition: injection made system an array");

const normalized = normalizeBodyForConverse(body);

assert.strictEqual(typeof normalized.system, "string");
assert.strictEqual(normalized.system, "You are a helpful assistant");
});

it("flattens injected message content back to plain strings", () => {
const body = {
system: "sys",
messages: [
{ role: "user", content: "first" },
{ role: "assistant", content: "second" },
{ role: "user", content: "third" },
],
};

injectPromptCaching(body, "bedrock");

const normalized = normalizeBodyForConverse(body);

for (const msg of normalized.messages) {
assert.strictEqual(typeof msg.content, "string");
}
assert.strictEqual(normalized.messages[0].content, "first");
assert.strictEqual(normalized.messages[2].content, "third");
});

it("produces a Converse-valid system shape ([{text:string}])", () => {
const body = {
system: "cached system prompt",
messages: [{ role: "user", content: "hi" }],
};

injectPromptCaching(body, "bedrock");
const normalized = normalizeBodyForConverse(body);

// Mirror how invokeBedrock builds the Converse system field.
const converseSystem = [{ text: normalized.system }];
assert.strictEqual(converseSystem.length, 1);
assert.strictEqual(typeof converseSystem[0].text, "string");
assert.strictEqual(converseSystem[0].text, "cached system prompt");
});

it("produces Converse-valid content blocks ({text:string})", () => {
const body = {
system: "sys",
messages: [{ role: "user", content: "question" }],
};

injectPromptCaching(body, "bedrock");
const normalized = normalizeBodyForConverse(body);

// Mirror how invokeBedrock maps message content for Converse.
const blocks = Array.isArray(normalized.messages[0].content)
? normalized.messages[0].content.map(c => ({ text: c.text || c.content || "" }))
: [{ text: normalized.messages[0].content }];

assert.strictEqual(blocks.length, 1);
assert.strictEqual(typeof blocks[0].text, "string");
assert.strictEqual(blocks[0].text, "question");
});

it("does not strip cache_control from the caller's original body", () => {
const body = {
system: "sys",
messages: [{ role: "user", content: "hi" }],
};

injectPromptCaching(body, "bedrock");
normalizeBodyForConverse(body);

// Normalization works on a copy, leaving the injected body untouched.
assert.ok(Array.isArray(body.system));
assert.deepStrictEqual(body.system[0].cache_control, { type: "ephemeral" });
});

it("handles array system blocks without cache_control", () => {
const body = {
system: [
{ type: "text", text: "Part A" },
{ type: "text", text: "Part B" },
],
messages: [],
};

const normalized = normalizeBodyForConverse(body);
assert.strictEqual(normalized.system, "Part APart B");
});
});
80 changes: 80 additions & 0 deletions test/prompt-cache-injection.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const {
injectAnthropicCacheBreakpoints,
injectPromptCaching,
needsCacheInjection,
modelSupportsCacheControl,
} = require('../src/clients/prompt-cache-injection');

// ── needsCacheInjection ─────────────────────────────────────────────
Expand Down Expand Up @@ -220,3 +221,82 @@ describe('injectPromptCaching', () => {
assert.equal(count, 0);
});
});

// ── model-capability gate ───────────────────────────────────────────

describe('modelSupportsCacheControl', () => {
it('always supports for Anthropic-only providers', () => {
assert.equal(modelSupportsCacheControl({}, 'azure-anthropic'), true);
assert.equal(modelSupportsCacheControl({ _tierModel: 'whatever' }, 'databricks'), true);
});

it('fails open when no model id is present', () => {
assert.equal(modelSupportsCacheControl({}, 'bedrock'), true);
assert.equal(modelSupportsCacheControl({}, 'openrouter'), true);
});

it('supports Claude model ids on bedrock', () => {
assert.equal(
modelSupportsCacheControl(
{ _tierModel: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0' },
'bedrock'
),
true
);
});

it('blocks non-Claude bedrock families', () => {
assert.equal(modelSupportsCacheControl({ _tierModel: 'meta.llama3-70b-instruct-v1:0' }, 'bedrock'), false);
assert.equal(modelSupportsCacheControl({ _tierModel: 'amazon.titan-text-express-v1' }, 'bedrock'), false);
assert.equal(modelSupportsCacheControl({ _tierModel: 'mistral.mistral-7b-instruct-v0:2' }, 'bedrock'), false);
assert.equal(modelSupportsCacheControl({ _tierModel: 'cohere.command-text-v14' }, 'bedrock'), false);
});

it('blocks non-supporting openrouter models', () => {
assert.equal(modelSupportsCacheControl({ model: 'meta-llama/llama-3-70b' }, 'openrouter'), false);
assert.equal(modelSupportsCacheControl({ model: 'openai/gpt-4o' }, 'openrouter'), false);
});
});

describe('injectPromptCaching capability gate', () => {
it('still injects for bedrock when the model id is unknown (backward compatible)', () => {
const body = {
system: 'test',
messages: [{ role: 'user', content: 'hi' }],
};
const count = injectPromptCaching(body, 'bedrock');
assert.equal(count, 2);
});

it('injects for a Claude model on bedrock', () => {
const body = {
_tierModel: 'anthropic.claude-3-5-sonnet-20241022-v2:0',
system: 'test',
messages: [{ role: 'user', content: 'hi' }],
};
const count = injectPromptCaching(body, 'bedrock');
assert.equal(count, 2);
});

it('skips injection for a non-Claude bedrock model and leaves body untouched', () => {
const body = {
_tierModel: 'meta.llama3-70b-instruct-v1:0',
system: 'test',
messages: [{ role: 'user', content: 'hi' }],
};
const count = injectPromptCaching(body, 'bedrock');
assert.equal(count, 0);
assert.equal(body.system, 'test'); // unchanged string, no array conversion
assert.equal(body.messages[0].content, 'hi');
});

it('skips injection for a GPT model routed via openrouter', () => {
const body = {
model: 'openai/gpt-4o',
system: 'test',
messages: [{ role: 'user', content: 'hi' }],
};
const count = injectPromptCaching(body, 'openrouter');
assert.equal(count, 0);
});
});
Loading