From 140867fccb8ab1a81a8dacb67069b33324ac0667 Mon Sep 17 00:00:00 2001 From: Vandre Sales <131375746+vandre-sales@users.noreply.github.com> Date: Sun, 24 May 2026 16:39:52 -0300 Subject: [PATCH 1/2] feat(bedrock): add anthropic.claude-opus-4-7 to native model registry Adds Claude Opus 4.7 to the Bedrock native model registry with: - Full ModelInfo (maxTokens, contextWindow, pricing, cache config) - supportsReasoningBudget: true (enables thinking budget in UI) - cachableFields for multi-point prompt caching - 1M context tier pricing - Global Inference support Without this entry, custom model usage falls back to guessModelInfoFromId() which lacks supportsReasoningBudget and cachableFields, causing "too many tokens" errors during parallel file injection (no cache = tokens accumulate). Note: Pricing estimated based on claude-opus-4-6-v1. To be verified against Bedrock console pricing page before merge. --- packages/types/src/providers/bedrock.ts | 27 +++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/packages/types/src/providers/bedrock.ts b/packages/types/src/providers/bedrock.ts index 9ea52bced8..e3aeaf848d 100644 --- a/packages/types/src/providers/bedrock.ts +++ b/packages/types/src/providers/bedrock.ts @@ -167,6 +167,30 @@ export const bedrockModels = { }, ], }, + "anthropic.claude-opus-4-7": { + maxTokens: 8192, + contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07' + supportsImages: true, + supportsPromptCache: true, + supportsReasoningBudget: true, + inputPrice: 5.0, // $5 per million input tokens (≤200K context) — verify against Bedrock console + outputPrice: 25.0, // $25 per million output tokens (≤200K context) — verify against Bedrock console + cacheWritesPrice: 6.25, // $6.25 per million tokens + cacheReadsPrice: 0.5, // $0.50 per million tokens + minTokensPerCachePoint: 1024, + maxCachePoints: 4, + cachableFields: ["system", "messages", "tools"], + // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07') + tiers: [ + { + contextWindow: 1_000_000, // 1M tokens with beta flag + inputPrice: 10.0, // $10 per million input tokens (>200K context) + outputPrice: 37.5, // $37.50 per million output tokens (>200K context) + cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context) + cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context) + }, + ], + }, "anthropic.claude-opus-4-5-20251101-v1:0": { maxTokens: 8192, contextWindow: 200_000, @@ -525,6 +549,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [ "anthropic.claude-sonnet-4-5-20250929-v1:0", "anthropic.claude-sonnet-4-6", "anthropic.claude-opus-4-6-v1", + "anthropic.claude-opus-4-7", ] as const // Amazon Bedrock models that support Global Inference profiles @@ -535,6 +560,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [ // - Claude Haiku 4.5 // - Claude Opus 4.5 // - Claude Opus 4.6 +// - Claude Opus 4.7 export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [ "anthropic.claude-sonnet-4-20250514-v1:0", "anthropic.claude-sonnet-4-5-20250929-v1:0", @@ -542,6 +568,7 @@ export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [ "anthropic.claude-haiku-4-5-20251001-v1:0", "anthropic.claude-opus-4-5-20251101-v1:0", "anthropic.claude-opus-4-6-v1", + "anthropic.claude-opus-4-7", ] as const // Amazon Bedrock Service Tier types From b917925ba66088423982c8baee434024553c3683 Mon Sep 17 00:00:00 2001 From: Vandre Sales Date: Sun, 24 May 2026 17:29:44 -0300 Subject: [PATCH 2/2] feat(bedrock): support Claude 4.7+ adaptive thinking and remove temperature Claude Opus/Sonnet 4.7 introduced breaking API changes: - temperature/top_p/top_k removed (causes 400 error) - thinking.type 'enabled' + budget_tokens removed (causes 400 error) - New thinking.type 'adaptive' with output_config.effort levels - New display: 'summarized' option to surface thinking content Changes: - Detect Gen 4.7+ models via baseModelId.includes('opus-4-7' | 'sonnet-4-7') - Omit temperature from inferenceConfig for 4.7+ models - Use thinking: { type: 'adaptive', display: 'summarized' } for 4.7+ - Set output_config.effort: 'xhigh' (highest level for coding/agentic tasks) - Maintain full backward compatibility with 4.6 and earlier models - Expanded BedrockAdditionalModelFields interface to support both formats References: - Claude 4.7 release notes (Apr 16, 2026) - effort levels: low | medium | high | xhigh | max --- src/api/providers/bedrock.ts | 51 +++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts index 3ceb251003..344fcd51db 100644 --- a/src/api/providers/bedrock.ts +++ b/src/api/providers/bedrock.ts @@ -61,9 +61,20 @@ interface BedrockInferenceConfig { // Define interface for Bedrock additional model request fields // This includes thinking configuration, 1M context beta, and other model-specific parameters interface BedrockAdditionalModelFields { - thinking?: { - type: "enabled" - budget_tokens: number + thinking?: + | { + type: "enabled" + budget_tokens: number + } + | { + // Claude 4.7+ adaptive thinking — no budget_tokens, uses output_config.effort instead + type: "adaptive" + // "summarized" shows thinking content in UI; omit to keep thinking internal only + display?: "summarized" | "none" + } + output_config?: { + // Claude 4.7+ effort levels: "low" | "medium" | "high" | "xhigh" | "max" + effort: string } anthropic_beta?: string[] [key: string]: any // Add index signature to be compatible with DocumentType @@ -381,6 +392,11 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH let additionalModelRequestFields: BedrockAdditionalModelFields | undefined let thinkingEnabled = false + // Detect model generation for API compatibility + // Claude 4.7+ removed sampling params (temperature/top_p/top_k) and uses adaptive thinking + const baseModelId = this.parseBaseModelId(modelConfig.id) + const isGen47Model = baseModelId.includes("opus-4-7") || baseModelId.includes("sonnet-4-7") + // Determine if thinking should be enabled // metadata?.thinking?.enabled: Explicitly enabled through API metadata (direct request) // shouldUseReasoningBudget(): Enabled through user settings (enableReasoningEffort = true) @@ -392,27 +408,38 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH if ((isThinkingExplicitlyEnabled || isThinkingEnabledBySettings) && modelConfig.info.supportsReasoningBudget) { thinkingEnabled = true - additionalModelRequestFields = { - thinking: { - type: "enabled", - budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096, - }, + if (isGen47Model) { + // Claude 4.7+ uses adaptive thinking with effort levels — budget_tokens causes 400 error + // display: "summarized" surfaces thinking content in Zoo Code UI + additionalModelRequestFields = { + thinking: { type: "adaptive", display: "summarized" }, + output_config: { effort: "xhigh" }, + } + } else { + additionalModelRequestFields = { + thinking: { + type: "enabled", + budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096, + }, + } } logger.info("Extended thinking enabled for Bedrock request", { ctx: "bedrock", modelId: modelConfig.id, - thinking: additionalModelRequestFields.thinking, + thinking: additionalModelRequestFields?.thinking, }) } const inferenceConfig: BedrockInferenceConfig = { maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number), - temperature: modelConfig.temperature ?? (this.options.modelTemperature as number), + // Claude 4.7+ removed temperature parameter entirely — causes 400 error if sent + ...(isGen47Model + ? {} + : { temperature: modelConfig.temperature ?? (this.options.modelTemperature as number) }), } // Check if 1M context is enabled for supported Claude 4 models - // Use parseBaseModelId to handle cross-region inference prefixes - const baseModelId = this.parseBaseModelId(modelConfig.id) + // Use parseBaseModelId to handle cross-region inference prefixes (computed above) const is1MContextEnabled = BEDROCK_1M_CONTEXT_MODEL_IDS.includes(baseModelId as any) && this.options.awsBedrock1MContext