From 140867fccb8ab1a81a8dacb67069b33324ac0667 Mon Sep 17 00:00:00 2001
From: Vandre Sales <131375746+vandre-sales@users.noreply.github.com>
Date: Sun, 24 May 2026 16:39:52 -0300
Subject: [PATCH 1/2] feat(bedrock): add anthropic.claude-opus-4-7 to native
 model registry

Adds Claude Opus 4.7 to the Bedrock native model registry with:
- Full ModelInfo (maxTokens, contextWindow, pricing, cache config)
- supportsReasoningBudget: true (enables thinking budget in UI)
- cachableFields for multi-point prompt caching
- 1M context tier pricing
- Global Inference support

Without this entry, custom model usage falls back to guessModelInfoFromId()
which lacks supportsReasoningBudget and cachableFields, causing "too many
tokens" errors during parallel file injection (no cache = tokens accumulate).

Note: Pricing estimated based on claude-opus-4-6-v1. To be verified against
Bedrock console pricing page before merge.
---
 packages/types/src/providers/bedrock.ts | 27 +++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/packages/types/src/providers/bedrock.ts b/packages/types/src/providers/bedrock.ts
index 9ea52bced8..e3aeaf848d 100644
--- a/packages/types/src/providers/bedrock.ts
+++ b/packages/types/src/providers/bedrock.ts
@@ -167,6 +167,30 @@ export const bedrockModels = {
 			},
 		],
 	},
+	"anthropic.claude-opus-4-7": {
+		maxTokens: 8192,
+		contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
+		supportsImages: true,
+		supportsPromptCache: true,
+		supportsReasoningBudget: true,
+		inputPrice: 5.0, // $5 per million input tokens (≤200K context) — verify against Bedrock console
+		outputPrice: 25.0, // $25 per million output tokens (≤200K context) — verify against Bedrock console
+		cacheWritesPrice: 6.25, // $6.25 per million tokens
+		cacheReadsPrice: 0.5, // $0.50 per million tokens
+		minTokensPerCachePoint: 1024,
+		maxCachePoints: 4,
+		cachableFields: ["system", "messages", "tools"],
+		// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
+		tiers: [
+			{
+				contextWindow: 1_000_000, // 1M tokens with beta flag
+				inputPrice: 10.0, // $10 per million input tokens (>200K context)
+				outputPrice: 37.5, // $37.50 per million output tokens (>200K context)
+				cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context)
+				cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context)
+			},
+		],
+	},
 	"anthropic.claude-opus-4-5-20251101-v1:0": {
 		maxTokens: 8192,
 		contextWindow: 200_000,
@@ -525,6 +549,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
 	"anthropic.claude-sonnet-4-5-20250929-v1:0",
 	"anthropic.claude-sonnet-4-6",
 	"anthropic.claude-opus-4-6-v1",
+	"anthropic.claude-opus-4-7",
 ] as const
 
 // Amazon Bedrock models that support Global Inference profiles
@@ -535,6 +560,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
 // - Claude Haiku 4.5
 // - Claude Opus 4.5
 // - Claude Opus 4.6
+// - Claude Opus 4.7
 export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
 	"anthropic.claude-sonnet-4-20250514-v1:0",
 	"anthropic.claude-sonnet-4-5-20250929-v1:0",
@@ -542,6 +568,7 @@ export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
 	"anthropic.claude-haiku-4-5-20251001-v1:0",
 	"anthropic.claude-opus-4-5-20251101-v1:0",
 	"anthropic.claude-opus-4-6-v1",
+	"anthropic.claude-opus-4-7",
 ] as const
 
 // Amazon Bedrock Service Tier types

From b917925ba66088423982c8baee434024553c3683 Mon Sep 17 00:00:00 2001
From: Vandre Sales <vandre.sales@gmail.com>
Date: Sun, 24 May 2026 17:29:44 -0300
Subject: [PATCH 2/2] feat(bedrock): support Claude 4.7+ adaptive thinking and
 remove temperature

Claude Opus/Sonnet 4.7 introduced breaking API changes:
- temperature/top_p/top_k removed (causes 400 error)
- thinking.type 'enabled' + budget_tokens removed (causes 400 error)
- New thinking.type 'adaptive' with output_config.effort levels
- New display: 'summarized' option to surface thinking content

Changes:
- Detect Gen 4.7+ models via baseModelId.includes('opus-4-7' | 'sonnet-4-7')
- Omit temperature from inferenceConfig for 4.7+ models
- Use thinking: { type: 'adaptive', display: 'summarized' } for 4.7+
- Set output_config.effort: 'xhigh' (highest level for coding/agentic tasks)
- Maintain full backward compatibility with 4.6 and earlier models
- Expanded BedrockAdditionalModelFields interface to support both formats

References:
- Claude 4.7 release notes (Apr 16, 2026)
- effort levels: low | medium | high | xhigh | max
---
 src/api/providers/bedrock.ts | 51 +++++++++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 12 deletions(-)

diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts
index 3ceb251003..344fcd51db 100644
--- a/src/api/providers/bedrock.ts
+++ b/src/api/providers/bedrock.ts
@@ -61,9 +61,20 @@ interface BedrockInferenceConfig {
 // Define interface for Bedrock additional model request fields
 // This includes thinking configuration, 1M context beta, and other model-specific parameters
 interface BedrockAdditionalModelFields {
-	thinking?: {
-		type: "enabled"
-		budget_tokens: number
+	thinking?:
+		| {
+				type: "enabled"
+				budget_tokens: number
+		  }
+		| {
+				// Claude 4.7+ adaptive thinking — no budget_tokens, uses output_config.effort instead
+				type: "adaptive"
+				// "summarized" shows thinking content in UI; omit to keep thinking internal only
+				display?: "summarized" | "none"
+		  }
+	output_config?: {
+		// Claude 4.7+ effort levels: "low" | "medium" | "high" | "xhigh" | "max"
+		effort: string
 	}
 	anthropic_beta?: string[]
 	[key: string]: any // Add index signature to be compatible with DocumentType
@@ -381,6 +392,11 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 		let additionalModelRequestFields: BedrockAdditionalModelFields | undefined
 		let thinkingEnabled = false
 
+		// Detect model generation for API compatibility
+		// Claude 4.7+ removed sampling params (temperature/top_p/top_k) and uses adaptive thinking
+		const baseModelId = this.parseBaseModelId(modelConfig.id)
+		const isGen47Model = baseModelId.includes("opus-4-7") || baseModelId.includes("sonnet-4-7")
+
 		// Determine if thinking should be enabled
 		// metadata?.thinking?.enabled: Explicitly enabled through API metadata (direct request)
 		// shouldUseReasoningBudget(): Enabled through user settings (enableReasoningEffort = true)
@@ -392,27 +408,38 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 
 		if ((isThinkingExplicitlyEnabled || isThinkingEnabledBySettings) && modelConfig.info.supportsReasoningBudget) {
 			thinkingEnabled = true
-			additionalModelRequestFields = {
-				thinking: {
-					type: "enabled",
-					budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096,
-				},
+			if (isGen47Model) {
+				// Claude 4.7+ uses adaptive thinking with effort levels — budget_tokens causes 400 error
+				// display: "summarized" surfaces thinking content in Zoo Code UI
+				additionalModelRequestFields = {
+					thinking: { type: "adaptive", display: "summarized" },
+					output_config: { effort: "xhigh" },
+				}
+			} else {
+				additionalModelRequestFields = {
+					thinking: {
+						type: "enabled",
+						budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096,
+					},
+				}
 			}
 			logger.info("Extended thinking enabled for Bedrock request", {
 				ctx: "bedrock",
 				modelId: modelConfig.id,
-				thinking: additionalModelRequestFields.thinking,
+				thinking: additionalModelRequestFields?.thinking,
 			})
 		}
 
 		const inferenceConfig: BedrockInferenceConfig = {
 			maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
-			temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
+			// Claude 4.7+ removed temperature parameter entirely — causes 400 error if sent
+			...(isGen47Model
+				? {}
+				: { temperature: modelConfig.temperature ?? (this.options.modelTemperature as number) }),
 		}
 
 		// Check if 1M context is enabled for supported Claude 4 models
-		// Use parseBaseModelId to handle cross-region inference prefixes
-		const baseModelId = this.parseBaseModelId(modelConfig.id)
+		// Use parseBaseModelId to handle cross-region inference prefixes (computed above)
 		const is1MContextEnabled =
 			BEDROCK_1M_CONTEXT_MODEL_IDS.includes(baseModelId as any) && this.options.awsBedrock1MContext