From fe85368c8cddcb5feb197c4eba9a9cec238e6fda Mon Sep 17 00:00:00 2001 From: Vandre Sales <131375746+vandre-sales@users.noreply.github.com> Date: Sun, 24 May 2026 16:39:52 -0300 Subject: [PATCH 1/9] feat(bedrock): add anthropic.claude-opus-4-7 to native model registry Adds Claude Opus 4.7 to the Bedrock native model registry with: - Full ModelInfo (maxTokens, contextWindow, pricing, cache config) - supportsReasoningBudget: true (enables thinking budget in UI) - cachableFields for multi-point prompt caching - 1M context tier pricing - Global Inference support Without this entry, custom model usage falls back to guessModelInfoFromId() which lacks supportsReasoningBudget and cachableFields, causing "too many tokens" errors during parallel file injection (no cache = tokens accumulate). Note: Pricing estimated based on claude-opus-4-6-v1. To be verified against Bedrock console pricing page before merge. --- packages/types/src/providers/bedrock.ts | 27 +++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/packages/types/src/providers/bedrock.ts b/packages/types/src/providers/bedrock.ts index 9ea52bced8..e3aeaf848d 100644 --- a/packages/types/src/providers/bedrock.ts +++ b/packages/types/src/providers/bedrock.ts @@ -167,6 +167,30 @@ export const bedrockModels = { }, ], }, + "anthropic.claude-opus-4-7": { + maxTokens: 8192, + contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07' + supportsImages: true, + supportsPromptCache: true, + supportsReasoningBudget: true, + inputPrice: 5.0, // $5 per million input tokens (≤200K context) — verify against Bedrock console + outputPrice: 25.0, // $25 per million output tokens (≤200K context) — verify against Bedrock console + cacheWritesPrice: 6.25, // $6.25 per million tokens + cacheReadsPrice: 0.5, // $0.50 per million tokens + minTokensPerCachePoint: 1024, + maxCachePoints: 4, + cachableFields: ["system", "messages", "tools"], + // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07') + tiers: [ + { + contextWindow: 1_000_000, // 1M tokens with beta flag + inputPrice: 10.0, // $10 per million input tokens (>200K context) + outputPrice: 37.5, // $37.50 per million output tokens (>200K context) + cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context) + cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context) + }, + ], + }, "anthropic.claude-opus-4-5-20251101-v1:0": { maxTokens: 8192, contextWindow: 200_000, @@ -525,6 +549,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [ "anthropic.claude-sonnet-4-5-20250929-v1:0", "anthropic.claude-sonnet-4-6", "anthropic.claude-opus-4-6-v1", + "anthropic.claude-opus-4-7", ] as const // Amazon Bedrock models that support Global Inference profiles @@ -535,6 +560,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [ // - Claude Haiku 4.5 // - Claude Opus 4.5 // - Claude Opus 4.6 +// - Claude Opus 4.7 export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [ "anthropic.claude-sonnet-4-20250514-v1:0", "anthropic.claude-sonnet-4-5-20250929-v1:0", @@ -542,6 +568,7 @@ export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [ "anthropic.claude-haiku-4-5-20251001-v1:0", "anthropic.claude-opus-4-5-20251101-v1:0", "anthropic.claude-opus-4-6-v1", + "anthropic.claude-opus-4-7", ] as const // Amazon Bedrock Service Tier types From 6d951a02648b3a78dce71d2de2e4a688b53c9668 Mon Sep 17 00:00:00 2001 From: Vandre Sales Date: Sun, 24 May 2026 17:29:44 -0300 Subject: [PATCH 2/9] feat(bedrock): support Claude 4.7+ adaptive thinking and remove temperature Claude Opus/Sonnet 4.7 introduced breaking API changes: - temperature/top_p/top_k removed (causes 400 error) - thinking.type 'enabled' + budget_tokens removed (causes 400 error) - New thinking.type 'adaptive' with output_config.effort levels - New display: 'summarized' option to surface thinking content Changes: - Detect Gen 4.7+ models via baseModelId.includes('opus-4-7' | 'sonnet-4-7') - Omit temperature from inferenceConfig for 4.7+ models - Use thinking: { type: 'adaptive', display: 'summarized' } for 4.7+ - Set output_config.effort: 'xhigh' (highest level for coding/agentic tasks) - Maintain full backward compatibility with 4.6 and earlier models - Expanded BedrockAdditionalModelFields interface to support both formats References: - Claude 4.7 release notes (Apr 16, 2026) - effort levels: low | medium | high | xhigh | max --- src/api/providers/bedrock.ts | 51 +++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts index bb7f0d89c5..8779fcad1a 100644 --- a/src/api/providers/bedrock.ts +++ b/src/api/providers/bedrock.ts @@ -61,9 +61,20 @@ interface BedrockInferenceConfig { // Define interface for Bedrock additional model request fields // This includes thinking configuration, 1M context beta, and other model-specific parameters interface BedrockAdditionalModelFields { - thinking?: { - type: "enabled" - budget_tokens: number + thinking?: + | { + type: "enabled" + budget_tokens: number + } + | { + // Claude 4.7+ adaptive thinking — no budget_tokens, uses output_config.effort instead + type: "adaptive" + // "summarized" shows thinking content in UI; omit to keep thinking internal only + display?: "summarized" | "none" + } + output_config?: { + // Claude 4.7+ effort levels: "low" | "medium" | "high" | "xhigh" | "max" + effort: string } anthropic_beta?: string[] [key: string]: any // Add index signature to be compatible with DocumentType @@ -381,6 +392,11 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH let additionalModelRequestFields: BedrockAdditionalModelFields | undefined let thinkingEnabled = false + // Detect model generation for API compatibility + // Claude 4.7+ removed sampling params (temperature/top_p/top_k) and uses adaptive thinking + const baseModelId = this.parseBaseModelId(modelConfig.id) + const isGen47Model = baseModelId.includes("opus-4-7") || baseModelId.includes("sonnet-4-7") + // Determine if thinking should be enabled // metadata?.thinking?.enabled: Explicitly enabled through API metadata (direct request) // shouldUseReasoningBudget(): Enabled through user settings (enableReasoningEffort = true) @@ -392,27 +408,38 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH if ((isThinkingExplicitlyEnabled || isThinkingEnabledBySettings) && modelConfig.info.supportsReasoningBudget) { thinkingEnabled = true - additionalModelRequestFields = { - thinking: { - type: "enabled", - budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096, - }, + if (isGen47Model) { + // Claude 4.7+ uses adaptive thinking with effort levels — budget_tokens causes 400 error + // display: "summarized" surfaces thinking content in Zoo Code UI + additionalModelRequestFields = { + thinking: { type: "adaptive", display: "summarized" }, + output_config: { effort: "xhigh" }, + } + } else { + additionalModelRequestFields = { + thinking: { + type: "enabled", + budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096, + }, + } } logger.info("Extended thinking enabled for Bedrock request", { ctx: "bedrock", modelId: modelConfig.id, - thinking: additionalModelRequestFields.thinking, + thinking: additionalModelRequestFields?.thinking, }) } const inferenceConfig: BedrockInferenceConfig = { maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number), - temperature: modelConfig.temperature ?? (this.options.modelTemperature as number), + // Claude 4.7+ removed temperature parameter entirely — causes 400 error if sent + ...(isGen47Model + ? {} + : { temperature: modelConfig.temperature ?? (this.options.modelTemperature as number) }), } // Check if 1M context is enabled for supported Claude 4 models - // Use parseBaseModelId to handle cross-region inference prefixes - const baseModelId = this.parseBaseModelId(modelConfig.id) + // Use parseBaseModelId to handle cross-region inference prefixes (computed above) const is1MContextEnabled = BEDROCK_1M_CONTEXT_MODEL_IDS.includes(baseModelId as any) && this.options.awsBedrock1MContext From c53ab506952d4f2cefb7ee9f4f746ebc8983f204 Mon Sep 17 00:00:00 2001 From: vandre-sales Date: Thu, 28 May 2026 19:29:48 -0300 Subject: [PATCH 3/9] feat(types): add claude-opus-4-8 to Anthropic, Bedrock, and Vertex registries - Register claude-opus-4-8 in anthropicModels with 1M context, 128k output, supportsReasoningBudget, supportsReasoningBinary, supportsTemperature: false (mirrors 4.7 - no breaking API changes per the official migration guide). - Register anthropic.claude-opus-4-8 in bedrockModels with cache points, cachableFields, and 1M context tier pricing. - Register claude-opus-4-8 in vertexModels with the same shape. - Add anthropic.claude-opus-4-8 to BEDROCK_1M_CONTEXT_MODEL_IDS and BEDROCK_GLOBAL_INFERENCE_MODEL_IDS. - Add claude-opus-4-8 to VERTEX_1M_CONTEXT_MODEL_IDS. --- packages/types/src/providers/anthropic.ts | 18 +++++++++++++++ packages/types/src/providers/bedrock.ts | 28 +++++++++++++++++++++++ packages/types/src/providers/vertex.ts | 24 +++++++++++++++++++ 3 files changed, 70 insertions(+) diff --git a/packages/types/src/providers/anthropic.ts b/packages/types/src/providers/anthropic.ts index f3e99c691d..f123817e43 100644 --- a/packages/types/src/providers/anthropic.ts +++ b/packages/types/src/providers/anthropic.ts @@ -108,6 +108,24 @@ export const anthropicModels = { supportsReasoningBinary: true, supportsTemperature: false, }, + "claude-opus-4-8": { + maxTokens: 128_000, // Overridden to 8k if `enableReasoningEffort` is false. + contextWindow: 1_000_000, // 1M context window native (no beta header required, same as 4.7) + supportsImages: true, + supportsPromptCache: true, + inputPrice: 5.0, // $5 per million input tokens (regular tier) + outputPrice: 25.0, // $25 per million output tokens (regular tier) + cacheWritesPrice: 6.25, // $6.25 per million tokens + cacheReadsPrice: 0.5, // $0.50 per million tokens + // 4.8 inherits the adaptive-thinking model introduced in 4.7 — no breaking + // API changes. supportsReasoningBudget is kept true so the existing token-cap + // handling and max-token overrides behave identically. + supportsReasoningBudget: true, + // 4.8 still rejects budget_tokens-style thinking payloads, so the UI must + // expose reasoning as a binary on/off toggle on this provider path. + supportsReasoningBinary: true, + supportsTemperature: false, + }, "claude-opus-4-5-20251101": { maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false. contextWindow: 200_000, diff --git a/packages/types/src/providers/bedrock.ts b/packages/types/src/providers/bedrock.ts index e3aeaf848d..b3e1d3d74e 100644 --- a/packages/types/src/providers/bedrock.ts +++ b/packages/types/src/providers/bedrock.ts @@ -191,6 +191,32 @@ export const bedrockModels = { }, ], }, + "anthropic.claude-opus-4-8": { + maxTokens: 8192, + contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07' + supportsImages: true, + supportsPromptCache: true, + supportsReasoningBudget: true, + inputPrice: 5.0, // $5 per million input tokens (≤200K context) — verify against Bedrock console + outputPrice: 25.0, // $25 per million output tokens (≤200K context) — verify against Bedrock console + cacheWritesPrice: 6.25, // $6.25 per million tokens + cacheReadsPrice: 0.5, // $0.50 per million tokens + minTokensPerCachePoint: 1024, + maxCachePoints: 4, + cachableFields: ["system", "messages", "tools"], + // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07') + // 4.8 inherits the same Bedrock pricing structure as 4.7 — no API breaking changes. + // Adaptive thinking is the only supported reasoning mode (same as 4.7). + tiers: [ + { + contextWindow: 1_000_000, // 1M tokens with beta flag + inputPrice: 10.0, // $10 per million input tokens (>200K context) + outputPrice: 37.5, // $37.50 per million output tokens (>200K context) + cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context) + cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context) + }, + ], + }, "anthropic.claude-opus-4-5-20251101-v1:0": { maxTokens: 8192, contextWindow: 200_000, @@ -550,6 +576,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [ "anthropic.claude-sonnet-4-6", "anthropic.claude-opus-4-6-v1", "anthropic.claude-opus-4-7", + "anthropic.claude-opus-4-8", ] as const // Amazon Bedrock models that support Global Inference profiles @@ -569,6 +596,7 @@ export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [ "anthropic.claude-opus-4-5-20251101-v1:0", "anthropic.claude-opus-4-6-v1", "anthropic.claude-opus-4-7", + "anthropic.claude-opus-4-8", ] as const // Amazon Bedrock Service Tier types diff --git a/packages/types/src/providers/vertex.ts b/packages/types/src/providers/vertex.ts index e27c0cb101..38bb744b08 100644 --- a/packages/types/src/providers/vertex.ts +++ b/packages/types/src/providers/vertex.ts @@ -396,6 +396,29 @@ export const vertexModels = { }, ], }, + "claude-opus-4-8": { + maxTokens: 8192, + contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07' + supportsImages: true, + supportsPromptCache: true, + inputPrice: 5.0, // $5 per million input tokens (≤200K context) + outputPrice: 25.0, // $25 per million output tokens (≤200K context) + cacheWritesPrice: 6.25, // $6.25 per million tokens + cacheReadsPrice: 0.5, // $0.50 per million tokens + supportsReasoningBudget: true, + supportsTemperature: false, + // 4.8 inherits the same Vertex pricing structure as 4.7 — no breaking changes. + // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07') + tiers: [ + { + contextWindow: 1_000_000, // 1M tokens with beta flag + inputPrice: 10.0, // $10 per million input tokens (>200K context) + outputPrice: 37.5, // $37.50 per million output tokens (>200K context) + cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context) + cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context) + }, + ], + }, "claude-opus-4-5@20251101": { maxTokens: 8192, contextWindow: 200_000, @@ -595,6 +618,7 @@ export const VERTEX_1M_CONTEXT_MODEL_IDS = [ "claude-sonnet-4-6", "claude-opus-4-6", "claude-opus-4-7", + "claude-opus-4-8", ] as const export const VERTEX_REGIONS = [ From 7631312a368e4a3041b807e0ec616c56fe18e5d3 Mon Sep 17 00:00:00 2001 From: vandre-sales Date: Thu, 28 May 2026 19:31:23 -0300 Subject: [PATCH 4/9] feat(bedrock): support Claude Opus 4.8 (extends 4.7 adaptive thinking detection) - Anthropic provider: add claude-opus-4-8 to both prompt-caching switch statements so it gets the same handling as 4.7 (native 1M context, no beta header required). - Bedrock provider: rename isGen47Model -> isAdaptiveThinkingModel and expand the pattern to match opus-4-7, opus-4-8, sonnet-4-7, sonnet-4-8. 4.8 inherits the same adaptive-thinking + temperature-rejection contract from 4.7 with no breaking API changes. - OpenAI-compatible provider: update comment to mention 4.8 alongside 4.7; no logic change (already honors the supportsTemperature: false flag). The rename describes the capability (adaptive thinking) rather than a specific generation, making future Claude versions easier to support. --- src/api/providers/anthropic.ts | 2 ++ src/api/providers/bedrock.ts | 30 ++++++++++++++++++++++-------- src/api/providers/openai.ts | 7 ++++--- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index 68daeead28..81c221921f 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -93,6 +93,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa case "claude-sonnet-4-20250514": case "claude-opus-4-6": case "claude-opus-4-7": + case "claude-opus-4-8": case "claude-opus-4-5-20251101": case "claude-opus-4-1-20250805": case "claude-opus-4-20250514": @@ -161,6 +162,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa case "claude-sonnet-4-20250514": case "claude-opus-4-6": case "claude-opus-4-7": + case "claude-opus-4-8": case "claude-opus-4-5-20251101": case "claude-opus-4-1-20250805": case "claude-opus-4-20250514": diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts index 8779fcad1a..657f5c3af3 100644 --- a/src/api/providers/bedrock.ts +++ b/src/api/providers/bedrock.ts @@ -392,10 +392,19 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH let additionalModelRequestFields: BedrockAdditionalModelFields | undefined let thinkingEnabled = false - // Detect model generation for API compatibility - // Claude 4.7+ removed sampling params (temperature/top_p/top_k) and uses adaptive thinking + // Detect models that require the adaptive-thinking API contract. + // Starting with Claude Opus 4.7 (and the matching Sonnet 4.7), and continuing + // in Opus 4.8 / Sonnet 4.8, Anthropic removed sampling parameters + // (temperature/top_p/top_k) and replaced budget_tokens-based thinking with + // `thinking.type: "adaptive"` plus `output_config.effort`. The migration guide + // from 4.7 → 4.8 confirms there are no further breaking API changes, so we + // keep a single guard here that matches both generations. const baseModelId = this.parseBaseModelId(modelConfig.id) - const isGen47Model = baseModelId.includes("opus-4-7") || baseModelId.includes("sonnet-4-7") + const isAdaptiveThinkingModel = + baseModelId.includes("opus-4-7") || + baseModelId.includes("opus-4-8") || + baseModelId.includes("sonnet-4-7") || + baseModelId.includes("sonnet-4-8") // Determine if thinking should be enabled // metadata?.thinking?.enabled: Explicitly enabled through API metadata (direct request) @@ -408,9 +417,13 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH if ((isThinkingExplicitlyEnabled || isThinkingEnabledBySettings) && modelConfig.info.supportsReasoningBudget) { thinkingEnabled = true - if (isGen47Model) { - // Claude 4.7+ uses adaptive thinking with effort levels — budget_tokens causes 400 error - // display: "summarized" surfaces thinking content in Zoo Code UI + if (isAdaptiveThinkingModel) { + // Claude 4.7+ (incl. 4.8) uses adaptive thinking with effort levels — + // budget_tokens causes a 400 error. + // display: "summarized" surfaces thinking content in Zoo Code UI. + // effort "xhigh" remains the recommended level for agentic coding tasks + // across both 4.7 and 4.8 (4.8 changed the API default to "high" but + // the model continues to honour "xhigh" for deeper reasoning). additionalModelRequestFields = { thinking: { type: "adaptive", display: "summarized" }, output_config: { effort: "xhigh" }, @@ -432,8 +445,9 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH const inferenceConfig: BedrockInferenceConfig = { maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number), - // Claude 4.7+ removed temperature parameter entirely — causes 400 error if sent - ...(isGen47Model + // Claude 4.7+ (including 4.8) removed sampling parameters entirely — + // sending temperature causes a 400 error. + ...(isAdaptiveThinkingModel ? {} : { temperature: modelConfig.temperature ?? (this.options.modelTemperature as number) }), } diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 7ea33196f9..336a290c2f 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -154,9 +154,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelId, - // Some OpenAI-Compatible models (e.g. claude-opus-4-7) reject `temperature` as - // deprecated/unsupported. Honor the model's `supportsTemperature` flag and omit it - // when explicitly set to false (undefined still sends temperature, preserving behavior). + // Some OpenAI-Compatible models (e.g. claude-opus-4-7, claude-opus-4-8) reject + // `temperature` as deprecated/unsupported. Honor the model's `supportsTemperature` + // flag and omit it when explicitly set to false (undefined still sends temperature, + // preserving behavior). ...(modelInfo.supportsTemperature !== false && { temperature: this.options.modelTemperature ?? (deepseekReasoner ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0), From 25a23dd976aa151e8ed83bb965f5440235fe6295 Mon Sep 17 00:00:00 2001 From: vandre-sales Date: Thu, 28 May 2026 19:31:58 -0300 Subject: [PATCH 5/9] test: add unit coverage for Claude Opus 4.8 across providers - anthropic.spec.ts: 5 cases mirroring 4.7 (1M-beta-header guard, adaptive thinking ON/OFF, custom maxTokens, getModel info). - anthropic-vertex.spec.ts: 1M context tier pricing for Vertex Opus 4.8. - shared/api.spec.ts: getModelMaxOutputTokens hybrid-token handling on 4.8. - bedrock.spec.ts: new 'Claude 4.7+ adaptive thinking' block with 5 cases covering 4.7 + 4.8 adaptive thinking, reasoning-off behaviour, a 4.6 regression guard (budget_tokens + temperature), and cross-region prefix detection (us.anthropic.claude-opus-4-8). 235 unit tests pass, 0 type errors. Validated live end-to-end via Bedrock Global Inference (global.anthropic.claude-opus-4-8). --- .../__tests__/anthropic-vertex.spec.ts | 16 +++ src/api/providers/__tests__/anthropic.spec.ts | 112 +++++++++++++++ src/api/providers/__tests__/bedrock.spec.ts | 130 ++++++++++++++++++ src/shared/__tests__/api.spec.ts | 29 ++++ 4 files changed, 287 insertions(+) diff --git a/src/api/providers/__tests__/anthropic-vertex.spec.ts b/src/api/providers/__tests__/anthropic-vertex.spec.ts index 9ed0e51ad9..381d921c58 100644 --- a/src/api/providers/__tests__/anthropic-vertex.spec.ts +++ b/src/api/providers/__tests__/anthropic-vertex.spec.ts @@ -929,6 +929,22 @@ describe("VertexHandler", () => { expect(model.betas).toContain("context-1m-2025-08-07") }) + it("should enable 1M context for Claude Opus 4.8 when beta flag is set", () => { + const handler = new AnthropicVertexHandler({ + apiModelId: "claude-opus-4-8", + vertexProjectId: "test-project", + vertexRegion: "us-central1", + vertex1MContext: true, + }) + + const model = handler.getModel() + expect(model.info.contextWindow).toBe(1_000_000) + expect(model.info.inputPrice).toBe(10.0) + expect(model.info.outputPrice).toBe(37.5) + expect(model.info.supportsTemperature).toBe(false) + expect(model.betas).toContain("context-1m-2025-08-07") + }) + it("should not enable 1M context when flag is disabled", () => { const handler = new AnthropicVertexHandler({ apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0], diff --git a/src/api/providers/__tests__/anthropic.spec.ts b/src/api/providers/__tests__/anthropic.spec.ts index 1936423388..6aaa17d4f6 100644 --- a/src/api/providers/__tests__/anthropic.spec.ts +++ b/src/api/providers/__tests__/anthropic.spec.ts @@ -304,6 +304,101 @@ describe("AnthropicHandler", () => { expect(requestBody?.thinking).toEqual({ type: "adaptive" }) expect(requestBody?.max_tokens).toBe(32768) }) + + it("should not require the 1M context beta header for Claude Opus 4.8", async () => { + const opus48Handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-8", + anthropicBeta1MContext: true, + }) + + const stream = opus48Handler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + const requestOptions = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[1] + expect(requestBody?.temperature).toBeUndefined() + expect(requestOptions?.headers?.["anthropic-beta"]).toContain("prompt-caching-2024-07-31") + expect(requestOptions?.headers?.["anthropic-beta"]).not.toContain("context-1m-2025-08-07") + }) + + it("should use adaptive thinking for Claude Opus 4.8 when reasoning is enabled", async () => { + const opus48Handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-8", + enableReasoningEffort: true, + }) + + const stream = opus48Handler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + expect(requestBody?.thinking).toEqual({ type: "adaptive" }) + expect(requestBody?.max_tokens).toBe(16384) + }) + + it("should omit thinking for Claude Opus 4.8 when reasoning is disabled", async () => { + const opus48Handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-8", + enableReasoningEffort: false, + }) + + const stream = opus48Handler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + expect(requestBody?.thinking).toBeUndefined() + expect(requestBody?.max_tokens).toBe(8192) + }) + + it("should preserve custom maxTokens for Claude Opus 4.8 when reasoning is enabled", async () => { + const opus48Handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-8", + enableReasoningEffort: true, + modelMaxTokens: 32768, + }) + + const stream = opus48Handler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + expect(requestBody?.thinking).toEqual({ type: "adaptive" }) + expect(requestBody?.max_tokens).toBe(32768) + }) }) describe("completePrompt", () => { @@ -431,6 +526,23 @@ describe("AnthropicHandler", () => { expect(model.reasoningBudget).toBeUndefined() }) + it("should handle Claude Opus 4.8 model correctly", () => { + const handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-8", + }) + const model = handler.getModel() + expect(model.id).toBe("claude-opus-4-8") + expect(model.info.maxTokens).toBe(128000) + expect(model.info.contextWindow).toBe(1000000) + expect(model.maxTokens).toBe(8192) + expect(model.info.supportsReasoningBinary).toBe(true) + expect(model.info.supportsReasoningBudget).toBe(true) + expect(model.info.supportsPromptCache).toBe(true) + expect(model.info.supportsTemperature).toBe(false) + expect(model.reasoningBudget).toBeUndefined() + }) + it("should enable 1M context for Claude 4.5 Sonnet when beta flag is set", () => { const handler = new AnthropicHandler({ apiKey: "test-api-key", diff --git a/src/api/providers/__tests__/bedrock.spec.ts b/src/api/providers/__tests__/bedrock.spec.ts index 4ddf9f77af..6f0156a811 100644 --- a/src/api/providers/__tests__/bedrock.spec.ts +++ b/src/api/providers/__tests__/bedrock.spec.ts @@ -1335,4 +1335,134 @@ describe("AwsBedrockHandler", () => { expect(hasCachePoint).toBe(false) }) }) + + describe("Claude 4.7+ adaptive thinking (Opus 4.7 / Opus 4.8)", () => { + beforeEach(() => { + mockConverseStreamCommand.mockReset() + }) + + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }] + + it("should send adaptive thinking with effort xhigh for Claude Opus 4.7 when reasoning is enabled", async () => { + const opus47Handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-opus-4-7", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + enableReasoningEffort: true, + }) + + const generator = opus47Handler.createMessage("System prompt", messages) + await generator.next() + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // Adaptive thinking — no budget_tokens, must use effort levels. + expect(commandArg.additionalModelRequestFields?.thinking).toEqual({ + type: "adaptive", + display: "summarized", + }) + expect(commandArg.additionalModelRequestFields?.output_config).toEqual({ effort: "xhigh" }) + // 4.7+ rejects sampling parameters: temperature must be omitted entirely. + expect(commandArg.inferenceConfig?.temperature).toBeUndefined() + }) + + it("should send adaptive thinking with effort xhigh for Claude Opus 4.8 when reasoning is enabled", async () => { + const opus48Handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-opus-4-8", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + enableReasoningEffort: true, + }) + + const generator = opus48Handler.createMessage("System prompt", messages) + await generator.next() + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // 4.8 inherits the 4.7 adaptive-thinking contract — no breaking API changes. + expect(commandArg.additionalModelRequestFields?.thinking).toEqual({ + type: "adaptive", + display: "summarized", + }) + expect(commandArg.additionalModelRequestFields?.output_config).toEqual({ effort: "xhigh" }) + // Sampling parameters are still rejected on 4.8 — temperature must be absent. + expect(commandArg.inferenceConfig?.temperature).toBeUndefined() + }) + + it("should omit thinking and temperature for Claude Opus 4.8 when reasoning is disabled", async () => { + const opus48Handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-opus-4-8", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + enableReasoningEffort: false, + }) + + const generator = opus48Handler.createMessage("System prompt", messages) + await generator.next() + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // Without reasoning enabled, no adaptive thinking payload is sent. + expect(commandArg.additionalModelRequestFields?.thinking).toBeUndefined() + // Temperature is still omitted for 4.8 because the API rejects sampling params. + expect(commandArg.inferenceConfig?.temperature).toBeUndefined() + }) + + it("should still send temperature and budget_tokens thinking for older Claude Opus 4.6", async () => { + // Regression guard: the adaptive-thinking branch must NOT activate for 4.6 or earlier. + const opus46Handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-opus-4-6-v1", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + enableReasoningEffort: true, + }) + + const generator = opus46Handler.createMessage("System prompt", messages) + await generator.next() + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // 4.6 still uses the budget_tokens-based thinking format. + expect(commandArg.additionalModelRequestFields?.thinking?.type).toBe("enabled") + expect(commandArg.additionalModelRequestFields?.thinking?.budget_tokens).toBeGreaterThan(0) + // 4.6 still accepts temperature. + expect(commandArg.inferenceConfig?.temperature).toBeDefined() + }) + + it("should detect adaptive-thinking models via cross-region inference prefix (us.anthropic.claude-opus-4-8)", async () => { + // Regression guard: the heuristic uses parseBaseModelId, so cross-region prefixes + // like `us.` / `eu.` / `global.` must still be detected as 4.8. + const opus48GlobalHandler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-opus-4-8", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + awsUseCrossRegionInference: true, + enableReasoningEffort: true, + }) + + const generator = opus48GlobalHandler.createMessage("System prompt", messages) + await generator.next() + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // Model ID should carry the cross-region prefix. + expect(commandArg.modelId).toBe("us.anthropic.claude-opus-4-8") + // Adaptive thinking must still apply despite the prefix. + expect(commandArg.additionalModelRequestFields?.thinking).toEqual({ + type: "adaptive", + display: "summarized", + }) + expect(commandArg.inferenceConfig?.temperature).toBeUndefined() + }) + }) }) diff --git a/src/shared/__tests__/api.spec.ts b/src/shared/__tests__/api.spec.ts index c70e3945d7..06d3a5b3f1 100644 --- a/src/shared/__tests__/api.spec.ts +++ b/src/shared/__tests__/api.spec.ts @@ -106,6 +106,35 @@ describe("getModelMaxOutputTokens", () => { ).toBe(32_768) }) + test("should preserve Anthropic hybrid token handling for Claude Opus 4.8", () => { + // 4.8 inherits the same adaptive-thinking + binary-reasoning capability as 4.7 + // (no breaking API changes between 4.7 and 4.8 per the official migration guide). + const model: ModelInfo = { + contextWindow: 1_000_000, + supportsPromptCache: true, + supportsReasoningBudget: true, + supportsReasoningBinary: true, + supportsTemperature: false, + maxTokens: 128_000, + } + + expect( + getModelMaxOutputTokens({ + modelId: "claude-opus-4-8", + model, + settings: { apiProvider: "anthropic", enableReasoningEffort: false }, + }), + ).toBe(ANTHROPIC_DEFAULT_MAX_TOKENS) + + expect( + getModelMaxOutputTokens({ + modelId: "claude-opus-4-8", + model, + settings: { apiProvider: "anthropic", enableReasoningEffort: true, modelMaxTokens: 32_768 }, + }), + ).toBe(32_768) + }) + test("should return model.maxTokens for non-Anthropic models that support reasoning budget but aren't using it", () => { const geminiModelId = "gemini-2.5-flash-preview-04-17" const model: ModelInfo = { From 8618f8489cefdd0000c77a7abecb7336fb92f583 Mon Sep 17 00:00:00 2001 From: vandre-sales Date: Thu, 28 May 2026 20:17:51 -0300 Subject: [PATCH 6/9] fix(bedrock): omit temperature in completePrompt for adaptive-thinking models Addresses CodeRabbit review on #386. completePrompt was unconditionally sending temperature in its inferenceConfig, which causes a 400 error for Claude Opus/Sonnet 4.7 and 4.8 (sampling parameters were removed by Anthropic for these models). createMessage already guarded this, but the non-stream path did not. - Extract the adaptive-thinking detection into a private isAdaptiveThinkingModel(modelId) method (parseBaseModelId-aware, so cross-region/global prefixes are handled). - Reuse it in both createMessage and completePrompt so the two request paths stay consistent. - Add two regression tests: completePrompt omits temperature for opus-4-8 and still sends it for opus-4-6. 64 bedrock tests pass, check-types clean. --- src/api/providers/__tests__/bedrock.spec.ts | 43 +++++++++++++++++++ src/api/providers/bedrock.ts | 47 +++++++++++++++------ 2 files changed, 77 insertions(+), 13 deletions(-) diff --git a/src/api/providers/__tests__/bedrock.spec.ts b/src/api/providers/__tests__/bedrock.spec.ts index 6f0156a811..69339a537d 100644 --- a/src/api/providers/__tests__/bedrock.spec.ts +++ b/src/api/providers/__tests__/bedrock.spec.ts @@ -1464,5 +1464,48 @@ describe("AwsBedrockHandler", () => { }) expect(commandArg.inferenceConfig?.temperature).toBeUndefined() }) + + it("completePrompt should omit temperature for Claude Opus 4.8 (non-stream path)", async () => { + // Regression guard for the non-stream path: completePrompt must guard + // temperature the same way createMessage does, otherwise adaptive-thinking + // models (4.7/4.8) return a 400 from Bedrock. + const mockConverseCommand = vi.mocked(ConverseCommand) + + const opus48Handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-opus-4-8", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + }) + + await opus48Handler.completePrompt("Test prompt") + + expect(mockConverseCommand).toHaveBeenCalled() + const commandArg = mockConverseCommand.mock.calls[0][0] as any + + // 4.8 must NOT receive temperature in the non-stream inferenceConfig. + expect(commandArg.inferenceConfig?.temperature).toBeUndefined() + }) + + it("completePrompt should still send temperature for older Claude Opus 4.6 (non-stream path)", async () => { + // 4.6 and earlier still accept sampling parameters, so completePrompt must + // continue to send temperature for them. + const mockConverseCommand = vi.mocked(ConverseCommand) + + const opus46Handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-opus-4-6-v1", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + }) + + await opus46Handler.completePrompt("Test prompt") + + expect(mockConverseCommand).toHaveBeenCalled() + const commandArg = mockConverseCommand.mock.calls[0][0] as any + + // 4.6 must still receive temperature. + expect(commandArg.inferenceConfig?.temperature).toBeDefined() + }) }) }) diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts index 657f5c3af3..86f6d8fc75 100644 --- a/src/api/providers/bedrock.ts +++ b/src/api/providers/bedrock.ts @@ -297,6 +297,30 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH this.client = new BedrockRuntimeClient(clientConfig) } + /** + * Detect models that require the adaptive-thinking API contract. + * + * Starting with Claude Opus 4.7 (and the matching Sonnet 4.7), and continuing + * in Opus 4.8 / Sonnet 4.8, Anthropic removed sampling parameters + * (temperature/top_p/top_k) and replaced budget_tokens-based thinking with + * `thinking.type: "adaptive"` plus `output_config.effort`. The migration guide + * from 4.7 → 4.8 confirms there are no further breaking API changes, so a single + * guard matches both generations. Shared by createMessage and completePrompt so + * both request paths omit temperature for these models (sending it causes a 400). + * + * Accepts a model ID (with or without a cross-region/global prefix) and strips + * the prefix via parseBaseModelId before matching. + */ + private isAdaptiveThinkingModel(modelId: string): boolean { + const baseModelId = this.parseBaseModelId(modelId) + return ( + baseModelId.includes("opus-4-7") || + baseModelId.includes("opus-4-8") || + baseModelId.includes("sonnet-4-7") || + baseModelId.includes("sonnet-4-8") + ) + } + // Helper to guess model info from custom modelId string if not in bedrockModels private guessModelInfoFromId(modelId: string): Partial { // Define a mapping for model ID patterns and their configurations @@ -392,19 +416,11 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH let additionalModelRequestFields: BedrockAdditionalModelFields | undefined let thinkingEnabled = false - // Detect models that require the adaptive-thinking API contract. - // Starting with Claude Opus 4.7 (and the matching Sonnet 4.7), and continuing - // in Opus 4.8 / Sonnet 4.8, Anthropic removed sampling parameters - // (temperature/top_p/top_k) and replaced budget_tokens-based thinking with - // `thinking.type: "adaptive"` plus `output_config.effort`. The migration guide - // from 4.7 → 4.8 confirms there are no further breaking API changes, so we - // keep a single guard here that matches both generations. + // Detect models that require the adaptive-thinking API contract (Opus/Sonnet + // 4.7 and 4.8). See isAdaptiveThinkingModel for details. The same guard is + // reused in completePrompt so both request paths stay consistent. const baseModelId = this.parseBaseModelId(modelConfig.id) - const isAdaptiveThinkingModel = - baseModelId.includes("opus-4-7") || - baseModelId.includes("opus-4-8") || - baseModelId.includes("sonnet-4-7") || - baseModelId.includes("sonnet-4-8") + const isAdaptiveThinkingModel = this.isAdaptiveThinkingModel(modelConfig.id) // Determine if thinking should be enabled // metadata?.thinking?.enabled: Explicitly enabled through API metadata (direct request) @@ -788,7 +804,12 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH const inferenceConfig: BedrockInferenceConfig = { maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number), - temperature: modelConfig.temperature ?? (this.options.modelTemperature as number), + // Claude 4.7+ (including 4.8) removed sampling parameters entirely — + // sending temperature causes a 400 error. Guard the non-stream path the + // same way createMessage does so completePrompt also works for these models. + ...(this.isAdaptiveThinkingModel(modelConfig.id) + ? {} + : { temperature: modelConfig.temperature ?? (this.options.modelTemperature as number) }), } // For completePrompt, use a unique conversation ID based on the prompt From 506bc9a721ed41787b8f535d5abc88c81d195d7e Mon Sep 17 00:00:00 2001 From: vandre-sales Date: Thu, 28 May 2026 20:31:24 -0300 Subject: [PATCH 7/9] test(e2e): add Bedrock smoke test for Claude Opus 4.8 Addresses @edelauna's review request on #386 to cover 4.8 in the new Bedrock e2e harness. Mirrors the existing user-agent smoke test but re-points the provider at us.anthropic.claude-opus-4-8. Since 4.8 is an adaptive-thinking model, this exercises the request path that omits temperature (and sends thinking.type "adaptive" when reasoning is enabled), proving a Bedrock round-trip completes without a 400. Runs against the binary-event-stream mock server in CI and against real AWS when BEDROCK_LIVE_E2E=true. The original 4.7-era test is left untouched; model id is overridable via BEDROCK_OPUS_48_MODEL_ID. check-types clean (tsconfig.esm.json). --- .../src/suite/providers/bedrock.test.ts | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/apps/vscode-e2e/src/suite/providers/bedrock.test.ts b/apps/vscode-e2e/src/suite/providers/bedrock.test.ts index c378b908da..6a0c4907f7 100644 --- a/apps/vscode-e2e/src/suite/providers/bedrock.test.ts +++ b/apps/vscode-e2e/src/suite/providers/bedrock.test.ts @@ -8,6 +8,10 @@ const AWS_BEARER_TOKEN_BEDROCK = process.env.AWS_BEARER_TOKEN_BEDROCK const BEDROCK_REGION = process.env.BEDROCK_REGION ?? "us-east-1" // Use a cross-region inference profile so the token works without per-region model access. const BEDROCK_MODEL_ID = process.env.BEDROCK_MODEL_ID ?? "us.anthropic.claude-haiku-4-5-20251001-v1:0" +// Claude Opus 4.8 routed through a cross-region inference profile. 4.8 is an +// adaptive-thinking model, so this exercises the request path that omits +// temperature and (when reasoning is enabled) sends thinking.type "adaptive". +const BEDROCK_OPUS_48_MODEL_ID = process.env.BEDROCK_OPUS_48_MODEL_ID ?? "us.anthropic.claude-opus-4-8" const BEDROCK_LIVE_E2E = process.env.BEDROCK_LIVE_E2E === "true" suite("Bedrock provider", function () { @@ -91,4 +95,53 @@ suite("Bedrock provider", function () { assert.ok(true, "Task completed successfully via Bedrock with ZooCode# userAgentAppId") } }) + + test("Should complete a task end-to-end via AWS Bedrock using Claude Opus 4.8", async () => { + const api = globalThis.api + + // Re-point the provider at Claude Opus 4.8 while keeping the same transport + // (mock server in CI, real AWS in live mode). Parity smoke test: it proves the + // 4.8 request path — model resolution, adaptive-thinking payload, and the + // temperature omission required by 4.7+ — completes a Bedrock round-trip + // without a 400. The mock server replies with the same attempt_completion("4") + // tool call regardless of model, so a successful completion exercises request + // formation end-to-end. + if (!process.env.AIMOCK_URL && BEDROCK_LIVE_E2E && AWS_BEARER_TOKEN_BEDROCK) { + await api.setConfiguration({ + apiProvider: "bedrock" as const, + awsUseApiKey: true, + awsApiKey: AWS_BEARER_TOKEN_BEDROCK, + awsRegion: BEDROCK_REGION, + apiModelId: BEDROCK_OPUS_48_MODEL_ID, + }) + } else { + await api.setConfiguration({ + apiProvider: "bedrock" as const, + awsUseApiKey: true, + awsApiKey: "mock-key", + awsRegion: BEDROCK_REGION, + apiModelId: BEDROCK_OPUS_48_MODEL_ID, + awsBedrockEndpoint: mockServer!.url, + awsBedrockEndpointEnabled: true, + }) + } + + const taskId = await api.startNewTask({ + configuration: { mode: "ask", autoApprovalEnabled: true }, + text: "bedrock-opus-48-smoke: what is 2+2? Reply with only the number.", + }) + + await waitUntilCompleted({ api, taskId }) + + if (mockServer) { + // The request reached the Bedrock endpoint (no 400 from temperature/thinking). + const userAgent = mockServer.lastRequestHeaders?.["user-agent"] as string | undefined + assert.ok(userAgent, "Bedrock request should include user-agent header") + assert.ok(userAgent.includes("ZooCode#"), `user-agent should contain "ZooCode#" — got: ${userAgent}`) + } else { + // Live mode: a successful round-trip proves 4.8 request formation works + // against real AWS Bedrock (adaptive thinking, no rejected sampling params). + assert.ok(true, "Task completed successfully via Bedrock with Claude Opus 4.8") + } + }) }) From 26c4447b91ccb0f034f8fc37ba9b0a1137f6312e Mon Sep 17 00:00:00 2001 From: vandre-sales Date: Thu, 28 May 2026 20:40:13 -0300 Subject: [PATCH 8/9] test(bedrock): cover sonnet-4-7/4-8 branches in isAdaptiveThinkingModel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codecov flagged the sonnet-4-7 and sonnet-4-8 branches of isAdaptiveThinkingModel as uncovered — they have no Bedrock registry entry yet (future-proof guards), so no existing test reached them. Add a focused unit test that calls the private method directly (same pattern the suite already uses for parseBaseModelId / getPrefixForRegion), covering: - all four positive patterns: opus-4-7, opus-4-8, sonnet-4-7, sonnet-4-8 - cross-region / global prefixes (us./eu./global.) via parseBaseModelId - negative cases: opus-4-6, sonnet-4-6, claude-3-5, nova Brings patch coverage to 100%. 68 bedrock tests pass, check-types clean. --- src/api/providers/__tests__/bedrock.spec.ts | 35 +++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/api/providers/__tests__/bedrock.spec.ts b/src/api/providers/__tests__/bedrock.spec.ts index 69339a537d..3b2b001f19 100644 --- a/src/api/providers/__tests__/bedrock.spec.ts +++ b/src/api/providers/__tests__/bedrock.spec.ts @@ -1507,5 +1507,40 @@ describe("AwsBedrockHandler", () => { // 4.6 must still receive temperature. expect(commandArg.inferenceConfig?.temperature).toBeDefined() }) + + describe("isAdaptiveThinkingModel detection", () => { + // Unit-cover the private guard directly (same pattern the suite uses for + // parseBaseModelId / getPrefixForRegion). This exercises all four model + // patterns — including the future-proof sonnet-4-7 / sonnet-4-8 branches + // that have no registry entry yet — plus negative cases and prefix stripping. + const handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-3-5-sonnet-20241022-v2:0", + awsAccessKey: "test", + awsSecretKey: "test", + awsRegion: "us-east-1", + }) + const isAdaptiveThinkingModel = (handler as any).isAdaptiveThinkingModel.bind(handler) + + it("returns true for all adaptive-thinking model patterns (opus/sonnet 4.7 and 4.8)", () => { + expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-7")).toBe(true) + expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-8")).toBe(true) + // Future-proof Sonnet patterns — guarded even before a registry entry exists. + expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-7")).toBe(true) + expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-8")).toBe(true) + }) + + it("returns true when the id carries a cross-region or global prefix", () => { + expect(isAdaptiveThinkingModel("us.anthropic.claude-opus-4-8")).toBe(true) + expect(isAdaptiveThinkingModel("eu.anthropic.claude-sonnet-4-7")).toBe(true) + expect(isAdaptiveThinkingModel("global.anthropic.claude-opus-4-8")).toBe(true) + }) + + it("returns false for older / non-adaptive models", () => { + expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-6-v1")).toBe(false) + expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-6")).toBe(false) + expect(isAdaptiveThinkingModel("anthropic.claude-3-5-sonnet-20241022-v2:0")).toBe(false) + expect(isAdaptiveThinkingModel("amazon.nova-lite-v1:0")).toBe(false) + }) + }) }) }) From bf2129a0bac11535332a1c7cce9acee9bf542322 Mon Sep 17 00:00:00 2001 From: Elliott de Launay Date: Fri, 29 May 2026 12:16:02 +0000 Subject: [PATCH 9/9] fix(vertex): use adaptive thinking for Opus 4.8 --- packages/types/src/providers/vertex.ts | 2 ++ .../__tests__/anthropic-vertex.spec.ts | 31 +++++++++++++++++++ src/api/providers/anthropic-vertex.ts | 15 ++++++--- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/packages/types/src/providers/vertex.ts b/packages/types/src/providers/vertex.ts index 38bb744b08..345e9f00bd 100644 --- a/packages/types/src/providers/vertex.ts +++ b/packages/types/src/providers/vertex.ts @@ -384,6 +384,7 @@ export const vertexModels = { cacheWritesPrice: 6.25, // $6.25 per million tokens cacheReadsPrice: 0.5, // $0.50 per million tokens supportsReasoningBudget: true, + supportsReasoningBinary: true, supportsTemperature: false, // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07') tiers: [ @@ -406,6 +407,7 @@ export const vertexModels = { cacheWritesPrice: 6.25, // $6.25 per million tokens cacheReadsPrice: 0.5, // $0.50 per million tokens supportsReasoningBudget: true, + supportsReasoningBinary: true, supportsTemperature: false, // 4.8 inherits the same Vertex pricing structure as 4.7 — no breaking changes. // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07') diff --git a/src/api/providers/__tests__/anthropic-vertex.spec.ts b/src/api/providers/__tests__/anthropic-vertex.spec.ts index 381d921c58..6bf3f9485e 100644 --- a/src/api/providers/__tests__/anthropic-vertex.spec.ts +++ b/src/api/providers/__tests__/anthropic-vertex.spec.ts @@ -1161,6 +1161,37 @@ describe("VertexHandler", () => { undefined, ) }) + + it("should use adaptive thinking for Claude Opus 4.8", async () => { + const opus48Handler = new AnthropicVertexHandler({ + apiModelId: "claude-opus-4-8", + vertexProjectId: "test-project", + vertexRegion: "us-central1", + enableReasoningEffort: true, + }) + + const mockCreate = vitest.fn().mockImplementation(async () => ({ + async *[Symbol.asyncIterator]() { + yield { type: "message_start", message: { usage: { input_tokens: 10, output_tokens: 5 } } } + }, + })) + ;(opus48Handler["client"].messages as any).create = mockCreate + + await opus48Handler + .createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }]) + .next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + thinking: { type: "adaptive" }, + }), + undefined, + ) + + const request = mockCreate.mock.calls[0][0] + expect(request.thinking).not.toHaveProperty("budget_tokens") + expect(request.temperature).toBeUndefined() + }) }) describe("native tool calling", () => { diff --git a/src/api/providers/anthropic-vertex.ts b/src/api/providers/anthropic-vertex.ts index b6b94fcde7..b9685509c3 100644 --- a/src/api/providers/anthropic-vertex.ts +++ b/src/api/providers/anthropic-vertex.ts @@ -17,6 +17,7 @@ import { ApiStream } from "../transform/stream" import { addCacheBreakpoints } from "../transform/caching/vertex" import { getModelParams } from "../transform/model-params" import { filterNonAnthropicBlocks } from "../transform/anthropic-filter" +import { getAnthropicProviderReasoning } from "../transform/reasoning" import { convertOpenAIToolsToAnthropic, convertOpenAIToolChoiceToAnthropic, @@ -95,7 +96,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple * This ensures we stay under the 4-block limit while maintaining effective caching * for the most relevant context. */ - const params: Anthropic.Messages.MessageCreateParamsStreaming = { + const params = { model: id, max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, temperature, @@ -107,7 +108,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple messages: supportsPromptCache ? addCacheBreakpoints(sanitizedMessages) : sanitizedMessages, stream: true, ...nativeToolParams, - } + } as Anthropic.Messages.MessageCreateParamsStreaming // and prompt caching const requestOptions = betas?.length ? { headers: { "anthropic-beta": betas.join(",") } } : undefined @@ -240,6 +241,11 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple settings: this.options, defaultTemperature: 0, }) + const thinking = getAnthropicProviderReasoning({ + model: info, + reasoningBudget: params.reasoningBudget, + settings: this.options, + }) // Build betas array for request headers const betas: string[] = [] @@ -258,6 +264,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple info, betas: betas.length > 0 ? betas : undefined, ...params, + reasoning: thinking, } } @@ -271,7 +278,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple reasoning: thinking, } = this.getModel() - const params: Anthropic.Messages.MessageCreateParamsNonStreaming = { + const params = { model: id, max_tokens: maxTokens, temperature, @@ -285,7 +292,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple }, ], stream: false, - } + } as Anthropic.Messages.MessageCreateParamsNonStreaming const response = await this.client.messages.create(params) const content = response.content[0]