From fe85368c8cddcb5feb197c4eba9a9cec238e6fda Mon Sep 17 00:00:00 2001
From: Vandre Sales <131375746+vandre-sales@users.noreply.github.com>
Date: Sun, 24 May 2026 16:39:52 -0300
Subject: [PATCH 1/9] feat(bedrock): add anthropic.claude-opus-4-7 to native
 model registry

Adds Claude Opus 4.7 to the Bedrock native model registry with:
- Full ModelInfo (maxTokens, contextWindow, pricing, cache config)
- supportsReasoningBudget: true (enables thinking budget in UI)
- cachableFields for multi-point prompt caching
- 1M context tier pricing
- Global Inference support

Without this entry, custom model usage falls back to guessModelInfoFromId()
which lacks supportsReasoningBudget and cachableFields, causing "too many
tokens" errors during parallel file injection (no cache = tokens accumulate).

Note: Pricing estimated based on claude-opus-4-6-v1. To be verified against
Bedrock console pricing page before merge.
---
 packages/types/src/providers/bedrock.ts | 27 +++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/packages/types/src/providers/bedrock.ts b/packages/types/src/providers/bedrock.ts
index 9ea52bced8..e3aeaf848d 100644
--- a/packages/types/src/providers/bedrock.ts
+++ b/packages/types/src/providers/bedrock.ts
@@ -167,6 +167,30 @@ export const bedrockModels = {
 			},
 		],
 	},
+	"anthropic.claude-opus-4-7": {
+		maxTokens: 8192,
+		contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
+		supportsImages: true,
+		supportsPromptCache: true,
+		supportsReasoningBudget: true,
+		inputPrice: 5.0, // $5 per million input tokens (≤200K context) — verify against Bedrock console
+		outputPrice: 25.0, // $25 per million output tokens (≤200K context) — verify against Bedrock console
+		cacheWritesPrice: 6.25, // $6.25 per million tokens
+		cacheReadsPrice: 0.5, // $0.50 per million tokens
+		minTokensPerCachePoint: 1024,
+		maxCachePoints: 4,
+		cachableFields: ["system", "messages", "tools"],
+		// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
+		tiers: [
+			{
+				contextWindow: 1_000_000, // 1M tokens with beta flag
+				inputPrice: 10.0, // $10 per million input tokens (>200K context)
+				outputPrice: 37.5, // $37.50 per million output tokens (>200K context)
+				cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context)
+				cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context)
+			},
+		],
+	},
 	"anthropic.claude-opus-4-5-20251101-v1:0": {
 		maxTokens: 8192,
 		contextWindow: 200_000,
@@ -525,6 +549,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
 	"anthropic.claude-sonnet-4-5-20250929-v1:0",
 	"anthropic.claude-sonnet-4-6",
 	"anthropic.claude-opus-4-6-v1",
+	"anthropic.claude-opus-4-7",
 ] as const
 
 // Amazon Bedrock models that support Global Inference profiles
@@ -535,6 +560,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
 // - Claude Haiku 4.5
 // - Claude Opus 4.5
 // - Claude Opus 4.6
+// - Claude Opus 4.7
 export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
 	"anthropic.claude-sonnet-4-20250514-v1:0",
 	"anthropic.claude-sonnet-4-5-20250929-v1:0",
@@ -542,6 +568,7 @@ export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
 	"anthropic.claude-haiku-4-5-20251001-v1:0",
 	"anthropic.claude-opus-4-5-20251101-v1:0",
 	"anthropic.claude-opus-4-6-v1",
+	"anthropic.claude-opus-4-7",
 ] as const
 
 // Amazon Bedrock Service Tier types

From 6d951a02648b3a78dce71d2de2e4a688b53c9668 Mon Sep 17 00:00:00 2001
From: Vandre Sales <vandre.sales@gmail.com>
Date: Sun, 24 May 2026 17:29:44 -0300
Subject: [PATCH 2/9] feat(bedrock): support Claude 4.7+ adaptive thinking and
 remove temperature

Claude Opus/Sonnet 4.7 introduced breaking API changes:
- temperature/top_p/top_k removed (causes 400 error)
- thinking.type 'enabled' + budget_tokens removed (causes 400 error)
- New thinking.type 'adaptive' with output_config.effort levels
- New display: 'summarized' option to surface thinking content

Changes:
- Detect Gen 4.7+ models via baseModelId.includes('opus-4-7' | 'sonnet-4-7')
- Omit temperature from inferenceConfig for 4.7+ models
- Use thinking: { type: 'adaptive', display: 'summarized' } for 4.7+
- Set output_config.effort: 'xhigh' (highest level for coding/agentic tasks)
- Maintain full backward compatibility with 4.6 and earlier models
- Expanded BedrockAdditionalModelFields interface to support both formats

References:
- Claude 4.7 release notes (Apr 16, 2026)
- effort levels: low | medium | high | xhigh | max
---
 src/api/providers/bedrock.ts | 51 +++++++++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 12 deletions(-)

diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts
index bb7f0d89c5..8779fcad1a 100644
--- a/src/api/providers/bedrock.ts
+++ b/src/api/providers/bedrock.ts
@@ -61,9 +61,20 @@ interface BedrockInferenceConfig {
 // Define interface for Bedrock additional model request fields
 // This includes thinking configuration, 1M context beta, and other model-specific parameters
 interface BedrockAdditionalModelFields {
-	thinking?: {
-		type: "enabled"
-		budget_tokens: number
+	thinking?:
+		| {
+				type: "enabled"
+				budget_tokens: number
+		  }
+		| {
+				// Claude 4.7+ adaptive thinking — no budget_tokens, uses output_config.effort instead
+				type: "adaptive"
+				// "summarized" shows thinking content in UI; omit to keep thinking internal only
+				display?: "summarized" | "none"
+		  }
+	output_config?: {
+		// Claude 4.7+ effort levels: "low" | "medium" | "high" | "xhigh" | "max"
+		effort: string
 	}
 	anthropic_beta?: string[]
 	[key: string]: any // Add index signature to be compatible with DocumentType
@@ -381,6 +392,11 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 		let additionalModelRequestFields: BedrockAdditionalModelFields | undefined
 		let thinkingEnabled = false
 
+		// Detect model generation for API compatibility
+		// Claude 4.7+ removed sampling params (temperature/top_p/top_k) and uses adaptive thinking
+		const baseModelId = this.parseBaseModelId(modelConfig.id)
+		const isGen47Model = baseModelId.includes("opus-4-7") || baseModelId.includes("sonnet-4-7")
+
 		// Determine if thinking should be enabled
 		// metadata?.thinking?.enabled: Explicitly enabled through API metadata (direct request)
 		// shouldUseReasoningBudget(): Enabled through user settings (enableReasoningEffort = true)
@@ -392,27 +408,38 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 
 		if ((isThinkingExplicitlyEnabled || isThinkingEnabledBySettings) && modelConfig.info.supportsReasoningBudget) {
 			thinkingEnabled = true
-			additionalModelRequestFields = {
-				thinking: {
-					type: "enabled",
-					budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096,
-				},
+			if (isGen47Model) {
+				// Claude 4.7+ uses adaptive thinking with effort levels — budget_tokens causes 400 error
+				// display: "summarized" surfaces thinking content in Zoo Code UI
+				additionalModelRequestFields = {
+					thinking: { type: "adaptive", display: "summarized" },
+					output_config: { effort: "xhigh" },
+				}
+			} else {
+				additionalModelRequestFields = {
+					thinking: {
+						type: "enabled",
+						budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096,
+					},
+				}
 			}
 			logger.info("Extended thinking enabled for Bedrock request", {
 				ctx: "bedrock",
 				modelId: modelConfig.id,
-				thinking: additionalModelRequestFields.thinking,
+				thinking: additionalModelRequestFields?.thinking,
 			})
 		}
 
 		const inferenceConfig: BedrockInferenceConfig = {
 			maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
-			temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
+			// Claude 4.7+ removed temperature parameter entirely — causes 400 error if sent
+			...(isGen47Model
+				? {}
+				: { temperature: modelConfig.temperature ?? (this.options.modelTemperature as number) }),
 		}
 
 		// Check if 1M context is enabled for supported Claude 4 models
-		// Use parseBaseModelId to handle cross-region inference prefixes
-		const baseModelId = this.parseBaseModelId(modelConfig.id)
+		// Use parseBaseModelId to handle cross-region inference prefixes (computed above)
 		const is1MContextEnabled =
 			BEDROCK_1M_CONTEXT_MODEL_IDS.includes(baseModelId as any) && this.options.awsBedrock1MContext
 

From c53ab506952d4f2cefb7ee9f4f746ebc8983f204 Mon Sep 17 00:00:00 2001
From: vandre-sales <vandre.sales@meliva.ai>
Date: Thu, 28 May 2026 19:29:48 -0300
Subject: [PATCH 3/9] feat(types): add claude-opus-4-8 to Anthropic, Bedrock,
 and Vertex registries

- Register claude-opus-4-8 in anthropicModels with 1M context, 128k output,
  supportsReasoningBudget, supportsReasoningBinary, supportsTemperature: false
  (mirrors 4.7 - no breaking API changes per the official migration guide).
- Register anthropic.claude-opus-4-8 in bedrockModels with cache points,
  cachableFields, and 1M context tier pricing.
- Register claude-opus-4-8 in vertexModels with the same shape.
- Add anthropic.claude-opus-4-8 to BEDROCK_1M_CONTEXT_MODEL_IDS and
  BEDROCK_GLOBAL_INFERENCE_MODEL_IDS.
- Add claude-opus-4-8 to VERTEX_1M_CONTEXT_MODEL_IDS.
---
 packages/types/src/providers/anthropic.ts | 18 +++++++++++++++
 packages/types/src/providers/bedrock.ts   | 28 +++++++++++++++++++++++
 packages/types/src/providers/vertex.ts    | 24 +++++++++++++++++++
 3 files changed, 70 insertions(+)

diff --git a/packages/types/src/providers/anthropic.ts b/packages/types/src/providers/anthropic.ts
index f3e99c691d..f123817e43 100644
--- a/packages/types/src/providers/anthropic.ts
+++ b/packages/types/src/providers/anthropic.ts
@@ -108,6 +108,24 @@ export const anthropicModels = {
 		supportsReasoningBinary: true,
 		supportsTemperature: false,
 	},
+	"claude-opus-4-8": {
+		maxTokens: 128_000, // Overridden to 8k if `enableReasoningEffort` is false.
+		contextWindow: 1_000_000, // 1M context window native (no beta header required, same as 4.7)
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 5.0, // $5 per million input tokens (regular tier)
+		outputPrice: 25.0, // $25 per million output tokens (regular tier)
+		cacheWritesPrice: 6.25, // $6.25 per million tokens
+		cacheReadsPrice: 0.5, // $0.50 per million tokens
+		// 4.8 inherits the adaptive-thinking model introduced in 4.7 — no breaking
+		// API changes. supportsReasoningBudget is kept true so the existing token-cap
+		// handling and max-token overrides behave identically.
+		supportsReasoningBudget: true,
+		// 4.8 still rejects budget_tokens-style thinking payloads, so the UI must
+		// expose reasoning as a binary on/off toggle on this provider path.
+		supportsReasoningBinary: true,
+		supportsTemperature: false,
+	},
 	"claude-opus-4-5-20251101": {
 		maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false.
 		contextWindow: 200_000,
diff --git a/packages/types/src/providers/bedrock.ts b/packages/types/src/providers/bedrock.ts
index e3aeaf848d..b3e1d3d74e 100644
--- a/packages/types/src/providers/bedrock.ts
+++ b/packages/types/src/providers/bedrock.ts
@@ -191,6 +191,32 @@ export const bedrockModels = {
 			},
 		],
 	},
+	"anthropic.claude-opus-4-8": {
+		maxTokens: 8192,
+		contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
+		supportsImages: true,
+		supportsPromptCache: true,
+		supportsReasoningBudget: true,
+		inputPrice: 5.0, // $5 per million input tokens (≤200K context) — verify against Bedrock console
+		outputPrice: 25.0, // $25 per million output tokens (≤200K context) — verify against Bedrock console
+		cacheWritesPrice: 6.25, // $6.25 per million tokens
+		cacheReadsPrice: 0.5, // $0.50 per million tokens
+		minTokensPerCachePoint: 1024,
+		maxCachePoints: 4,
+		cachableFields: ["system", "messages", "tools"],
+		// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
+		// 4.8 inherits the same Bedrock pricing structure as 4.7 — no API breaking changes.
+		// Adaptive thinking is the only supported reasoning mode (same as 4.7).
+		tiers: [
+			{
+				contextWindow: 1_000_000, // 1M tokens with beta flag
+				inputPrice: 10.0, // $10 per million input tokens (>200K context)
+				outputPrice: 37.5, // $37.50 per million output tokens (>200K context)
+				cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context)
+				cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context)
+			},
+		],
+	},
 	"anthropic.claude-opus-4-5-20251101-v1:0": {
 		maxTokens: 8192,
 		contextWindow: 200_000,
@@ -550,6 +576,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
 	"anthropic.claude-sonnet-4-6",
 	"anthropic.claude-opus-4-6-v1",
 	"anthropic.claude-opus-4-7",
+	"anthropic.claude-opus-4-8",
 ] as const
 
 // Amazon Bedrock models that support Global Inference profiles
@@ -569,6 +596,7 @@ export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
 	"anthropic.claude-opus-4-5-20251101-v1:0",
 	"anthropic.claude-opus-4-6-v1",
 	"anthropic.claude-opus-4-7",
+	"anthropic.claude-opus-4-8",
 ] as const
 
 // Amazon Bedrock Service Tier types
diff --git a/packages/types/src/providers/vertex.ts b/packages/types/src/providers/vertex.ts
index e27c0cb101..38bb744b08 100644
--- a/packages/types/src/providers/vertex.ts
+++ b/packages/types/src/providers/vertex.ts
@@ -396,6 +396,29 @@ export const vertexModels = {
 			},
 		],
 	},
+	"claude-opus-4-8": {
+		maxTokens: 8192,
+		contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 5.0, // $5 per million input tokens (≤200K context)
+		outputPrice: 25.0, // $25 per million output tokens (≤200K context)
+		cacheWritesPrice: 6.25, // $6.25 per million tokens
+		cacheReadsPrice: 0.5, // $0.50 per million tokens
+		supportsReasoningBudget: true,
+		supportsTemperature: false,
+		// 4.8 inherits the same Vertex pricing structure as 4.7 — no breaking changes.
+		// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
+		tiers: [
+			{
+				contextWindow: 1_000_000, // 1M tokens with beta flag
+				inputPrice: 10.0, // $10 per million input tokens (>200K context)
+				outputPrice: 37.5, // $37.50 per million output tokens (>200K context)
+				cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context)
+				cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context)
+			},
+		],
+	},
 	"claude-opus-4-5@20251101": {
 		maxTokens: 8192,
 		contextWindow: 200_000,
@@ -595,6 +618,7 @@ export const VERTEX_1M_CONTEXT_MODEL_IDS = [
 	"claude-sonnet-4-6",
 	"claude-opus-4-6",
 	"claude-opus-4-7",
+	"claude-opus-4-8",
 ] as const
 
 export const VERTEX_REGIONS = [

From 7631312a368e4a3041b807e0ec616c56fe18e5d3 Mon Sep 17 00:00:00 2001
From: vandre-sales <vandre.sales@meliva.ai>
Date: Thu, 28 May 2026 19:31:23 -0300
Subject: [PATCH 4/9] feat(bedrock): support Claude Opus 4.8 (extends 4.7
 adaptive thinking detection)

- Anthropic provider: add claude-opus-4-8 to both prompt-caching switch
  statements so it gets the same handling as 4.7 (native 1M context, no beta
  header required).
- Bedrock provider: rename isGen47Model -> isAdaptiveThinkingModel and expand
  the pattern to match opus-4-7, opus-4-8, sonnet-4-7, sonnet-4-8. 4.8 inherits
  the same adaptive-thinking + temperature-rejection contract from 4.7 with no
  breaking API changes.
- OpenAI-compatible provider: update comment to mention 4.8 alongside 4.7; no
  logic change (already honors the supportsTemperature: false flag).

The rename describes the capability (adaptive thinking) rather than a specific
generation, making future Claude versions easier to support.
---
 src/api/providers/anthropic.ts |  2 ++
 src/api/providers/bedrock.ts   | 30 ++++++++++++++++++++++--------
 src/api/providers/openai.ts    |  7 ++++---
 3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts
index 68daeead28..81c221921f 100644
--- a/src/api/providers/anthropic.ts
+++ b/src/api/providers/anthropic.ts
@@ -93,6 +93,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 			case "claude-sonnet-4-20250514":
 			case "claude-opus-4-6":
 			case "claude-opus-4-7":
+			case "claude-opus-4-8":
 			case "claude-opus-4-5-20251101":
 			case "claude-opus-4-1-20250805":
 			case "claude-opus-4-20250514":
@@ -161,6 +162,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 								case "claude-sonnet-4-20250514":
 								case "claude-opus-4-6":
 								case "claude-opus-4-7":
+								case "claude-opus-4-8":
 								case "claude-opus-4-5-20251101":
 								case "claude-opus-4-1-20250805":
 								case "claude-opus-4-20250514":
diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts
index 8779fcad1a..657f5c3af3 100644
--- a/src/api/providers/bedrock.ts
+++ b/src/api/providers/bedrock.ts
@@ -392,10 +392,19 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 		let additionalModelRequestFields: BedrockAdditionalModelFields | undefined
 		let thinkingEnabled = false
 
-		// Detect model generation for API compatibility
-		// Claude 4.7+ removed sampling params (temperature/top_p/top_k) and uses adaptive thinking
+		// Detect models that require the adaptive-thinking API contract.
+		// Starting with Claude Opus 4.7 (and the matching Sonnet 4.7), and continuing
+		// in Opus 4.8 / Sonnet 4.8, Anthropic removed sampling parameters
+		// (temperature/top_p/top_k) and replaced budget_tokens-based thinking with
+		// `thinking.type: "adaptive"` plus `output_config.effort`. The migration guide
+		// from 4.7 → 4.8 confirms there are no further breaking API changes, so we
+		// keep a single guard here that matches both generations.
 		const baseModelId = this.parseBaseModelId(modelConfig.id)
-		const isGen47Model = baseModelId.includes("opus-4-7") || baseModelId.includes("sonnet-4-7")
+		const isAdaptiveThinkingModel =
+			baseModelId.includes("opus-4-7") ||
+			baseModelId.includes("opus-4-8") ||
+			baseModelId.includes("sonnet-4-7") ||
+			baseModelId.includes("sonnet-4-8")
 
 		// Determine if thinking should be enabled
 		// metadata?.thinking?.enabled: Explicitly enabled through API metadata (direct request)
@@ -408,9 +417,13 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 
 		if ((isThinkingExplicitlyEnabled || isThinkingEnabledBySettings) && modelConfig.info.supportsReasoningBudget) {
 			thinkingEnabled = true
-			if (isGen47Model) {
-				// Claude 4.7+ uses adaptive thinking with effort levels — budget_tokens causes 400 error
-				// display: "summarized" surfaces thinking content in Zoo Code UI
+			if (isAdaptiveThinkingModel) {
+				// Claude 4.7+ (incl. 4.8) uses adaptive thinking with effort levels —
+				// budget_tokens causes a 400 error.
+				// display: "summarized" surfaces thinking content in Zoo Code UI.
+				// effort "xhigh" remains the recommended level for agentic coding tasks
+				// across both 4.7 and 4.8 (4.8 changed the API default to "high" but
+				// the model continues to honour "xhigh" for deeper reasoning).
 				additionalModelRequestFields = {
 					thinking: { type: "adaptive", display: "summarized" },
 					output_config: { effort: "xhigh" },
@@ -432,8 +445,9 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 
 		const inferenceConfig: BedrockInferenceConfig = {
 			maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
-			// Claude 4.7+ removed temperature parameter entirely — causes 400 error if sent
-			...(isGen47Model
+			// Claude 4.7+ (including 4.8) removed sampling parameters entirely —
+			// sending temperature causes a 400 error.
+			...(isAdaptiveThinkingModel
 				? {}
 				: { temperature: modelConfig.temperature ?? (this.options.modelTemperature as number) }),
 		}
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 7ea33196f9..336a290c2f 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -154,9 +154,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 				model: modelId,
-				// Some OpenAI-Compatible models (e.g. claude-opus-4-7) reject `temperature` as
-				// deprecated/unsupported. Honor the model's `supportsTemperature` flag and omit it
-				// when explicitly set to false (undefined still sends temperature, preserving behavior).
+				// Some OpenAI-Compatible models (e.g. claude-opus-4-7, claude-opus-4-8) reject
+				// `temperature` as deprecated/unsupported. Honor the model's `supportsTemperature`
+				// flag and omit it when explicitly set to false (undefined still sends temperature,
+				// preserving behavior).
 				...(modelInfo.supportsTemperature !== false && {
 					temperature:
 						this.options.modelTemperature ?? (deepseekReasoner ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),

From 25a23dd976aa151e8ed83bb965f5440235fe6295 Mon Sep 17 00:00:00 2001
From: vandre-sales <vandre.sales@meliva.ai>
Date: Thu, 28 May 2026 19:31:58 -0300
Subject: [PATCH 5/9] test: add unit coverage for Claude Opus 4.8 across
 providers

- anthropic.spec.ts: 5 cases mirroring 4.7 (1M-beta-header guard, adaptive
  thinking ON/OFF, custom maxTokens, getModel info).
- anthropic-vertex.spec.ts: 1M context tier pricing for Vertex Opus 4.8.
- shared/api.spec.ts: getModelMaxOutputTokens hybrid-token handling on 4.8.
- bedrock.spec.ts: new 'Claude 4.7+ adaptive thinking' block with 5 cases
  covering 4.7 + 4.8 adaptive thinking, reasoning-off behaviour, a 4.6
  regression guard (budget_tokens + temperature), and cross-region prefix
  detection (us.anthropic.claude-opus-4-8).

235 unit tests pass, 0 type errors. Validated live end-to-end via Bedrock
Global Inference (global.anthropic.claude-opus-4-8).
---
 .../__tests__/anthropic-vertex.spec.ts        |  16 +++
 src/api/providers/__tests__/anthropic.spec.ts | 112 +++++++++++++++
 src/api/providers/__tests__/bedrock.spec.ts   | 130 ++++++++++++++++++
 src/shared/__tests__/api.spec.ts              |  29 ++++
 4 files changed, 287 insertions(+)

diff --git a/src/api/providers/__tests__/anthropic-vertex.spec.ts b/src/api/providers/__tests__/anthropic-vertex.spec.ts
index 9ed0e51ad9..381d921c58 100644
--- a/src/api/providers/__tests__/anthropic-vertex.spec.ts
+++ b/src/api/providers/__tests__/anthropic-vertex.spec.ts
@@ -929,6 +929,22 @@ describe("VertexHandler", () => {
 			expect(model.betas).toContain("context-1m-2025-08-07")
 		})
 
+		it("should enable 1M context for Claude Opus 4.8 when beta flag is set", () => {
+			const handler = new AnthropicVertexHandler({
+				apiModelId: "claude-opus-4-8",
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+				vertex1MContext: true,
+			})
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(1_000_000)
+			expect(model.info.inputPrice).toBe(10.0)
+			expect(model.info.outputPrice).toBe(37.5)
+			expect(model.info.supportsTemperature).toBe(false)
+			expect(model.betas).toContain("context-1m-2025-08-07")
+		})
+
 		it("should not enable 1M context when flag is disabled", () => {
 			const handler = new AnthropicVertexHandler({
 				apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0],
diff --git a/src/api/providers/__tests__/anthropic.spec.ts b/src/api/providers/__tests__/anthropic.spec.ts
index 1936423388..6aaa17d4f6 100644
--- a/src/api/providers/__tests__/anthropic.spec.ts
+++ b/src/api/providers/__tests__/anthropic.spec.ts
@@ -304,6 +304,101 @@ describe("AnthropicHandler", () => {
 			expect(requestBody?.thinking).toEqual({ type: "adaptive" })
 			expect(requestBody?.max_tokens).toBe(32768)
 		})
+
+		it("should not require the 1M context beta header for Claude Opus 4.8", async () => {
+			const opus48Handler = new AnthropicHandler({
+				apiKey: "test-api-key",
+				apiModelId: "claude-opus-4-8",
+				anthropicBeta1MContext: true,
+			})
+
+			const stream = opus48Handler.createMessage(systemPrompt, [
+				{
+					role: "user",
+					content: [{ type: "text" as const, text: "Hello" }],
+				},
+			])
+
+			for await (const _chunk of stream) {
+				// Consume stream
+			}
+
+			const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0]
+			const requestOptions = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[1]
+			expect(requestBody?.temperature).toBeUndefined()
+			expect(requestOptions?.headers?.["anthropic-beta"]).toContain("prompt-caching-2024-07-31")
+			expect(requestOptions?.headers?.["anthropic-beta"]).not.toContain("context-1m-2025-08-07")
+		})
+
+		it("should use adaptive thinking for Claude Opus 4.8 when reasoning is enabled", async () => {
+			const opus48Handler = new AnthropicHandler({
+				apiKey: "test-api-key",
+				apiModelId: "claude-opus-4-8",
+				enableReasoningEffort: true,
+			})
+
+			const stream = opus48Handler.createMessage(systemPrompt, [
+				{
+					role: "user",
+					content: [{ type: "text" as const, text: "Hello" }],
+				},
+			])
+
+			for await (const _chunk of stream) {
+				// Consume stream
+			}
+
+			const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0]
+			expect(requestBody?.thinking).toEqual({ type: "adaptive" })
+			expect(requestBody?.max_tokens).toBe(16384)
+		})
+
+		it("should omit thinking for Claude Opus 4.8 when reasoning is disabled", async () => {
+			const opus48Handler = new AnthropicHandler({
+				apiKey: "test-api-key",
+				apiModelId: "claude-opus-4-8",
+				enableReasoningEffort: false,
+			})
+
+			const stream = opus48Handler.createMessage(systemPrompt, [
+				{
+					role: "user",
+					content: [{ type: "text" as const, text: "Hello" }],
+				},
+			])
+
+			for await (const _chunk of stream) {
+				// Consume stream
+			}
+
+			const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0]
+			expect(requestBody?.thinking).toBeUndefined()
+			expect(requestBody?.max_tokens).toBe(8192)
+		})
+
+		it("should preserve custom maxTokens for Claude Opus 4.8 when reasoning is enabled", async () => {
+			const opus48Handler = new AnthropicHandler({
+				apiKey: "test-api-key",
+				apiModelId: "claude-opus-4-8",
+				enableReasoningEffort: true,
+				modelMaxTokens: 32768,
+			})
+
+			const stream = opus48Handler.createMessage(systemPrompt, [
+				{
+					role: "user",
+					content: [{ type: "text" as const, text: "Hello" }],
+				},
+			])
+
+			for await (const _chunk of stream) {
+				// Consume stream
+			}
+
+			const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0]
+			expect(requestBody?.thinking).toEqual({ type: "adaptive" })
+			expect(requestBody?.max_tokens).toBe(32768)
+		})
 	})
 
 	describe("completePrompt", () => {
@@ -431,6 +526,23 @@ describe("AnthropicHandler", () => {
 			expect(model.reasoningBudget).toBeUndefined()
 		})
 
+		it("should handle Claude Opus 4.8 model correctly", () => {
+			const handler = new AnthropicHandler({
+				apiKey: "test-api-key",
+				apiModelId: "claude-opus-4-8",
+			})
+			const model = handler.getModel()
+			expect(model.id).toBe("claude-opus-4-8")
+			expect(model.info.maxTokens).toBe(128000)
+			expect(model.info.contextWindow).toBe(1000000)
+			expect(model.maxTokens).toBe(8192)
+			expect(model.info.supportsReasoningBinary).toBe(true)
+			expect(model.info.supportsReasoningBudget).toBe(true)
+			expect(model.info.supportsPromptCache).toBe(true)
+			expect(model.info.supportsTemperature).toBe(false)
+			expect(model.reasoningBudget).toBeUndefined()
+		})
+
 		it("should enable 1M context for Claude 4.5 Sonnet when beta flag is set", () => {
 			const handler = new AnthropicHandler({
 				apiKey: "test-api-key",
diff --git a/src/api/providers/__tests__/bedrock.spec.ts b/src/api/providers/__tests__/bedrock.spec.ts
index 4ddf9f77af..6f0156a811 100644
--- a/src/api/providers/__tests__/bedrock.spec.ts
+++ b/src/api/providers/__tests__/bedrock.spec.ts
@@ -1335,4 +1335,134 @@ describe("AwsBedrockHandler", () => {
 			expect(hasCachePoint).toBe(false)
 		})
 	})
+
+	describe("Claude 4.7+ adaptive thinking (Opus 4.7 / Opus 4.8)", () => {
+		beforeEach(() => {
+			mockConverseStreamCommand.mockReset()
+		})
+
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+		it("should send adaptive thinking with effort xhigh for Claude Opus 4.7 when reasoning is enabled", async () => {
+			const opus47Handler = new AwsBedrockHandler({
+				apiModelId: "anthropic.claude-opus-4-7",
+				awsAccessKey: "test-access-key",
+				awsSecretKey: "test-secret-key",
+				awsRegion: "us-east-1",
+				enableReasoningEffort: true,
+			})
+
+			const generator = opus47Handler.createMessage("System prompt", messages)
+			await generator.next()
+
+			expect(mockConverseStreamCommand).toHaveBeenCalled()
+			const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any
+
+			// Adaptive thinking — no budget_tokens, must use effort levels.
+			expect(commandArg.additionalModelRequestFields?.thinking).toEqual({
+				type: "adaptive",
+				display: "summarized",
+			})
+			expect(commandArg.additionalModelRequestFields?.output_config).toEqual({ effort: "xhigh" })
+			// 4.7+ rejects sampling parameters: temperature must be omitted entirely.
+			expect(commandArg.inferenceConfig?.temperature).toBeUndefined()
+		})
+
+		it("should send adaptive thinking with effort xhigh for Claude Opus 4.8 when reasoning is enabled", async () => {
+			const opus48Handler = new AwsBedrockHandler({
+				apiModelId: "anthropic.claude-opus-4-8",
+				awsAccessKey: "test-access-key",
+				awsSecretKey: "test-secret-key",
+				awsRegion: "us-east-1",
+				enableReasoningEffort: true,
+			})
+
+			const generator = opus48Handler.createMessage("System prompt", messages)
+			await generator.next()
+
+			expect(mockConverseStreamCommand).toHaveBeenCalled()
+			const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any
+
+			// 4.8 inherits the 4.7 adaptive-thinking contract — no breaking API changes.
+			expect(commandArg.additionalModelRequestFields?.thinking).toEqual({
+				type: "adaptive",
+				display: "summarized",
+			})
+			expect(commandArg.additionalModelRequestFields?.output_config).toEqual({ effort: "xhigh" })
+			// Sampling parameters are still rejected on 4.8 — temperature must be absent.
+			expect(commandArg.inferenceConfig?.temperature).toBeUndefined()
+		})
+
+		it("should omit thinking and temperature for Claude Opus 4.8 when reasoning is disabled", async () => {
+			const opus48Handler = new AwsBedrockHandler({
+				apiModelId: "anthropic.claude-opus-4-8",
+				awsAccessKey: "test-access-key",
+				awsSecretKey: "test-secret-key",
+				awsRegion: "us-east-1",
+				enableReasoningEffort: false,
+			})
+
+			const generator = opus48Handler.createMessage("System prompt", messages)
+			await generator.next()
+
+			expect(mockConverseStreamCommand).toHaveBeenCalled()
+			const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any
+
+			// Without reasoning enabled, no adaptive thinking payload is sent.
+			expect(commandArg.additionalModelRequestFields?.thinking).toBeUndefined()
+			// Temperature is still omitted for 4.8 because the API rejects sampling params.
+			expect(commandArg.inferenceConfig?.temperature).toBeUndefined()
+		})
+
+		it("should still send temperature and budget_tokens thinking for older Claude Opus 4.6", async () => {
+			// Regression guard: the adaptive-thinking branch must NOT activate for 4.6 or earlier.
+			const opus46Handler = new AwsBedrockHandler({
+				apiModelId: "anthropic.claude-opus-4-6-v1",
+				awsAccessKey: "test-access-key",
+				awsSecretKey: "test-secret-key",
+				awsRegion: "us-east-1",
+				enableReasoningEffort: true,
+			})
+
+			const generator = opus46Handler.createMessage("System prompt", messages)
+			await generator.next()
+
+			expect(mockConverseStreamCommand).toHaveBeenCalled()
+			const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any
+
+			// 4.6 still uses the budget_tokens-based thinking format.
+			expect(commandArg.additionalModelRequestFields?.thinking?.type).toBe("enabled")
+			expect(commandArg.additionalModelRequestFields?.thinking?.budget_tokens).toBeGreaterThan(0)
+			// 4.6 still accepts temperature.
+			expect(commandArg.inferenceConfig?.temperature).toBeDefined()
+		})
+
+		it("should detect adaptive-thinking models via cross-region inference prefix (us.anthropic.claude-opus-4-8)", async () => {
+			// Regression guard: the heuristic uses parseBaseModelId, so cross-region prefixes
+			// like `us.` / `eu.` / `global.` must still be detected as 4.8.
+			const opus48GlobalHandler = new AwsBedrockHandler({
+				apiModelId: "anthropic.claude-opus-4-8",
+				awsAccessKey: "test-access-key",
+				awsSecretKey: "test-secret-key",
+				awsRegion: "us-east-1",
+				awsUseCrossRegionInference: true,
+				enableReasoningEffort: true,
+			})
+
+			const generator = opus48GlobalHandler.createMessage("System prompt", messages)
+			await generator.next()
+
+			expect(mockConverseStreamCommand).toHaveBeenCalled()
+			const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any
+
+			// Model ID should carry the cross-region prefix.
+			expect(commandArg.modelId).toBe("us.anthropic.claude-opus-4-8")
+			// Adaptive thinking must still apply despite the prefix.
+			expect(commandArg.additionalModelRequestFields?.thinking).toEqual({
+				type: "adaptive",
+				display: "summarized",
+			})
+			expect(commandArg.inferenceConfig?.temperature).toBeUndefined()
+		})
+	})
 })
diff --git a/src/shared/__tests__/api.spec.ts b/src/shared/__tests__/api.spec.ts
index c70e3945d7..06d3a5b3f1 100644
--- a/src/shared/__tests__/api.spec.ts
+++ b/src/shared/__tests__/api.spec.ts
@@ -106,6 +106,35 @@ describe("getModelMaxOutputTokens", () => {
 		).toBe(32_768)
 	})
 
+	test("should preserve Anthropic hybrid token handling for Claude Opus 4.8", () => {
+		// 4.8 inherits the same adaptive-thinking + binary-reasoning capability as 4.7
+		// (no breaking API changes between 4.7 and 4.8 per the official migration guide).
+		const model: ModelInfo = {
+			contextWindow: 1_000_000,
+			supportsPromptCache: true,
+			supportsReasoningBudget: true,
+			supportsReasoningBinary: true,
+			supportsTemperature: false,
+			maxTokens: 128_000,
+		}
+
+		expect(
+			getModelMaxOutputTokens({
+				modelId: "claude-opus-4-8",
+				model,
+				settings: { apiProvider: "anthropic", enableReasoningEffort: false },
+			}),
+		).toBe(ANTHROPIC_DEFAULT_MAX_TOKENS)
+
+		expect(
+			getModelMaxOutputTokens({
+				modelId: "claude-opus-4-8",
+				model,
+				settings: { apiProvider: "anthropic", enableReasoningEffort: true, modelMaxTokens: 32_768 },
+			}),
+		).toBe(32_768)
+	})
+
 	test("should return model.maxTokens for non-Anthropic models that support reasoning budget but aren't using it", () => {
 		const geminiModelId = "gemini-2.5-flash-preview-04-17"
 		const model: ModelInfo = {

From 8618f8489cefdd0000c77a7abecb7336fb92f583 Mon Sep 17 00:00:00 2001
From: vandre-sales <vandre.sales@meliva.ai>
Date: Thu, 28 May 2026 20:17:51 -0300
Subject: [PATCH 6/9] fix(bedrock): omit temperature in completePrompt for
 adaptive-thinking models

Addresses CodeRabbit review on #386. completePrompt was unconditionally
sending temperature in its inferenceConfig, which causes a 400 error for
Claude Opus/Sonnet 4.7 and 4.8 (sampling parameters were removed by
Anthropic for these models). createMessage already guarded this, but the
non-stream path did not.

- Extract the adaptive-thinking detection into a private
  isAdaptiveThinkingModel(modelId) method (parseBaseModelId-aware, so
  cross-region/global prefixes are handled).
- Reuse it in both createMessage and completePrompt so the two request
  paths stay consistent.
- Add two regression tests: completePrompt omits temperature for opus-4-8
  and still sends it for opus-4-6.

64 bedrock tests pass, check-types clean.
---
 src/api/providers/__tests__/bedrock.spec.ts | 43 +++++++++++++++++++
 src/api/providers/bedrock.ts                | 47 +++++++++++++++------
 2 files changed, 77 insertions(+), 13 deletions(-)

diff --git a/src/api/providers/__tests__/bedrock.spec.ts b/src/api/providers/__tests__/bedrock.spec.ts
index 6f0156a811..69339a537d 100644
--- a/src/api/providers/__tests__/bedrock.spec.ts
+++ b/src/api/providers/__tests__/bedrock.spec.ts
@@ -1464,5 +1464,48 @@ describe("AwsBedrockHandler", () => {
 			})
 			expect(commandArg.inferenceConfig?.temperature).toBeUndefined()
 		})
+
+		it("completePrompt should omit temperature for Claude Opus 4.8 (non-stream path)", async () => {
+			// Regression guard for the non-stream path: completePrompt must guard
+			// temperature the same way createMessage does, otherwise adaptive-thinking
+			// models (4.7/4.8) return a 400 from Bedrock.
+			const mockConverseCommand = vi.mocked(ConverseCommand)
+
+			const opus48Handler = new AwsBedrockHandler({
+				apiModelId: "anthropic.claude-opus-4-8",
+				awsAccessKey: "test-access-key",
+				awsSecretKey: "test-secret-key",
+				awsRegion: "us-east-1",
+			})
+
+			await opus48Handler.completePrompt("Test prompt")
+
+			expect(mockConverseCommand).toHaveBeenCalled()
+			const commandArg = mockConverseCommand.mock.calls[0][0] as any
+
+			// 4.8 must NOT receive temperature in the non-stream inferenceConfig.
+			expect(commandArg.inferenceConfig?.temperature).toBeUndefined()
+		})
+
+		it("completePrompt should still send temperature for older Claude Opus 4.6 (non-stream path)", async () => {
+			// 4.6 and earlier still accept sampling parameters, so completePrompt must
+			// continue to send temperature for them.
+			const mockConverseCommand = vi.mocked(ConverseCommand)
+
+			const opus46Handler = new AwsBedrockHandler({
+				apiModelId: "anthropic.claude-opus-4-6-v1",
+				awsAccessKey: "test-access-key",
+				awsSecretKey: "test-secret-key",
+				awsRegion: "us-east-1",
+			})
+
+			await opus46Handler.completePrompt("Test prompt")
+
+			expect(mockConverseCommand).toHaveBeenCalled()
+			const commandArg = mockConverseCommand.mock.calls[0][0] as any
+
+			// 4.6 must still receive temperature.
+			expect(commandArg.inferenceConfig?.temperature).toBeDefined()
+		})
 	})
 })
diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts
index 657f5c3af3..86f6d8fc75 100644
--- a/src/api/providers/bedrock.ts
+++ b/src/api/providers/bedrock.ts
@@ -297,6 +297,30 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 		this.client = new BedrockRuntimeClient(clientConfig)
 	}
 
+	/**
+	 * Detect models that require the adaptive-thinking API contract.
+	 *
+	 * Starting with Claude Opus 4.7 (and the matching Sonnet 4.7), and continuing
+	 * in Opus 4.8 / Sonnet 4.8, Anthropic removed sampling parameters
+	 * (temperature/top_p/top_k) and replaced budget_tokens-based thinking with
+	 * `thinking.type: "adaptive"` plus `output_config.effort`. The migration guide
+	 * from 4.7 → 4.8 confirms there are no further breaking API changes, so a single
+	 * guard matches both generations. Shared by createMessage and completePrompt so
+	 * both request paths omit temperature for these models (sending it causes a 400).
+	 *
+	 * Accepts a model ID (with or without a cross-region/global prefix) and strips
+	 * the prefix via parseBaseModelId before matching.
+	 */
+	private isAdaptiveThinkingModel(modelId: string): boolean {
+		const baseModelId = this.parseBaseModelId(modelId)
+		return (
+			baseModelId.includes("opus-4-7") ||
+			baseModelId.includes("opus-4-8") ||
+			baseModelId.includes("sonnet-4-7") ||
+			baseModelId.includes("sonnet-4-8")
+		)
+	}
+
 	// Helper to guess model info from custom modelId string if not in bedrockModels
 	private guessModelInfoFromId(modelId: string): Partial<ModelInfo> {
 		// Define a mapping for model ID patterns and their configurations
@@ -392,19 +416,11 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 		let additionalModelRequestFields: BedrockAdditionalModelFields | undefined
 		let thinkingEnabled = false
 
-		// Detect models that require the adaptive-thinking API contract.
-		// Starting with Claude Opus 4.7 (and the matching Sonnet 4.7), and continuing
-		// in Opus 4.8 / Sonnet 4.8, Anthropic removed sampling parameters
-		// (temperature/top_p/top_k) and replaced budget_tokens-based thinking with
-		// `thinking.type: "adaptive"` plus `output_config.effort`. The migration guide
-		// from 4.7 → 4.8 confirms there are no further breaking API changes, so we
-		// keep a single guard here that matches both generations.
+		// Detect models that require the adaptive-thinking API contract (Opus/Sonnet
+		// 4.7 and 4.8). See isAdaptiveThinkingModel for details. The same guard is
+		// reused in completePrompt so both request paths stay consistent.
 		const baseModelId = this.parseBaseModelId(modelConfig.id)
-		const isAdaptiveThinkingModel =
-			baseModelId.includes("opus-4-7") ||
-			baseModelId.includes("opus-4-8") ||
-			baseModelId.includes("sonnet-4-7") ||
-			baseModelId.includes("sonnet-4-8")
+		const isAdaptiveThinkingModel = this.isAdaptiveThinkingModel(modelConfig.id)
 
 		// Determine if thinking should be enabled
 		// metadata?.thinking?.enabled: Explicitly enabled through API metadata (direct request)
@@ -788,7 +804,12 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 
 			const inferenceConfig: BedrockInferenceConfig = {
 				maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
-				temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
+				// Claude 4.7+ (including 4.8) removed sampling parameters entirely —
+				// sending temperature causes a 400 error. Guard the non-stream path the
+				// same way createMessage does so completePrompt also works for these models.
+				...(this.isAdaptiveThinkingModel(modelConfig.id)
+					? {}
+					: { temperature: modelConfig.temperature ?? (this.options.modelTemperature as number) }),
 			}
 
 			// For completePrompt, use a unique conversation ID based on the prompt

From 506bc9a721ed41787b8f535d5abc88c81d195d7e Mon Sep 17 00:00:00 2001
From: vandre-sales <vandre.sales@meliva.ai>
Date: Thu, 28 May 2026 20:31:24 -0300
Subject: [PATCH 7/9] test(e2e): add Bedrock smoke test for Claude Opus 4.8

Addresses @edelauna's review request on #386 to cover 4.8 in the new
Bedrock e2e harness.

Mirrors the existing user-agent smoke test but re-points the provider at
us.anthropic.claude-opus-4-8. Since 4.8 is an adaptive-thinking model, this
exercises the request path that omits temperature (and sends thinking.type
"adaptive" when reasoning is enabled), proving a Bedrock round-trip
completes without a 400. Runs against the binary-event-stream mock server in
CI and against real AWS when BEDROCK_LIVE_E2E=true. The original 4.7-era
test is left untouched; model id is overridable via BEDROCK_OPUS_48_MODEL_ID.

check-types clean (tsconfig.esm.json).
---
 .../src/suite/providers/bedrock.test.ts       | 53 +++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/apps/vscode-e2e/src/suite/providers/bedrock.test.ts b/apps/vscode-e2e/src/suite/providers/bedrock.test.ts
index c378b908da..6a0c4907f7 100644
--- a/apps/vscode-e2e/src/suite/providers/bedrock.test.ts
+++ b/apps/vscode-e2e/src/suite/providers/bedrock.test.ts
@@ -8,6 +8,10 @@ const AWS_BEARER_TOKEN_BEDROCK = process.env.AWS_BEARER_TOKEN_BEDROCK
 const BEDROCK_REGION = process.env.BEDROCK_REGION ?? "us-east-1"
 // Use a cross-region inference profile so the token works without per-region model access.
 const BEDROCK_MODEL_ID = process.env.BEDROCK_MODEL_ID ?? "us.anthropic.claude-haiku-4-5-20251001-v1:0"
+// Claude Opus 4.8 routed through a cross-region inference profile. 4.8 is an
+// adaptive-thinking model, so this exercises the request path that omits
+// temperature and (when reasoning is enabled) sends thinking.type "adaptive".
+const BEDROCK_OPUS_48_MODEL_ID = process.env.BEDROCK_OPUS_48_MODEL_ID ?? "us.anthropic.claude-opus-4-8"
 const BEDROCK_LIVE_E2E = process.env.BEDROCK_LIVE_E2E === "true"
 
 suite("Bedrock provider", function () {
@@ -91,4 +95,53 @@ suite("Bedrock provider", function () {
 			assert.ok(true, "Task completed successfully via Bedrock with ZooCode# userAgentAppId")
 		}
 	})
+
+	test("Should complete a task end-to-end via AWS Bedrock using Claude Opus 4.8", async () => {
+		const api = globalThis.api
+
+		// Re-point the provider at Claude Opus 4.8 while keeping the same transport
+		// (mock server in CI, real AWS in live mode). Parity smoke test: it proves the
+		// 4.8 request path — model resolution, adaptive-thinking payload, and the
+		// temperature omission required by 4.7+ — completes a Bedrock round-trip
+		// without a 400. The mock server replies with the same attempt_completion("4")
+		// tool call regardless of model, so a successful completion exercises request
+		// formation end-to-end.
+		if (!process.env.AIMOCK_URL && BEDROCK_LIVE_E2E && AWS_BEARER_TOKEN_BEDROCK) {
+			await api.setConfiguration({
+				apiProvider: "bedrock" as const,
+				awsUseApiKey: true,
+				awsApiKey: AWS_BEARER_TOKEN_BEDROCK,
+				awsRegion: BEDROCK_REGION,
+				apiModelId: BEDROCK_OPUS_48_MODEL_ID,
+			})
+		} else {
+			await api.setConfiguration({
+				apiProvider: "bedrock" as const,
+				awsUseApiKey: true,
+				awsApiKey: "mock-key",
+				awsRegion: BEDROCK_REGION,
+				apiModelId: BEDROCK_OPUS_48_MODEL_ID,
+				awsBedrockEndpoint: mockServer!.url,
+				awsBedrockEndpointEnabled: true,
+			})
+		}
+
+		const taskId = await api.startNewTask({
+			configuration: { mode: "ask", autoApprovalEnabled: true },
+			text: "bedrock-opus-48-smoke: what is 2+2? Reply with only the number.",
+		})
+
+		await waitUntilCompleted({ api, taskId })
+
+		if (mockServer) {
+			// The request reached the Bedrock endpoint (no 400 from temperature/thinking).
+			const userAgent = mockServer.lastRequestHeaders?.["user-agent"] as string | undefined
+			assert.ok(userAgent, "Bedrock request should include user-agent header")
+			assert.ok(userAgent.includes("ZooCode#"), `user-agent should contain "ZooCode#" — got: ${userAgent}`)
+		} else {
+			// Live mode: a successful round-trip proves 4.8 request formation works
+			// against real AWS Bedrock (adaptive thinking, no rejected sampling params).
+			assert.ok(true, "Task completed successfully via Bedrock with Claude Opus 4.8")
+		}
+	})
 })

From 26c4447b91ccb0f034f8fc37ba9b0a1137f6312e Mon Sep 17 00:00:00 2001
From: vandre-sales <vandre.sales@meliva.ai>
Date: Thu, 28 May 2026 20:40:13 -0300
Subject: [PATCH 8/9] test(bedrock): cover sonnet-4-7/4-8 branches in
 isAdaptiveThinkingModel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Codecov flagged the sonnet-4-7 and sonnet-4-8 branches of
isAdaptiveThinkingModel as uncovered — they have no Bedrock registry entry
yet (future-proof guards), so no existing test reached them.

Add a focused unit test that calls the private method directly (same pattern
the suite already uses for parseBaseModelId / getPrefixForRegion), covering:
- all four positive patterns: opus-4-7, opus-4-8, sonnet-4-7, sonnet-4-8
- cross-region / global prefixes (us./eu./global.) via parseBaseModelId
- negative cases: opus-4-6, sonnet-4-6, claude-3-5, nova

Brings patch coverage to 100%. 68 bedrock tests pass, check-types clean.
---
 src/api/providers/__tests__/bedrock.spec.ts | 35 +++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/src/api/providers/__tests__/bedrock.spec.ts b/src/api/providers/__tests__/bedrock.spec.ts
index 69339a537d..3b2b001f19 100644
--- a/src/api/providers/__tests__/bedrock.spec.ts
+++ b/src/api/providers/__tests__/bedrock.spec.ts
@@ -1507,5 +1507,40 @@ describe("AwsBedrockHandler", () => {
 			// 4.6 must still receive temperature.
 			expect(commandArg.inferenceConfig?.temperature).toBeDefined()
 		})
+
+		describe("isAdaptiveThinkingModel detection", () => {
+			// Unit-cover the private guard directly (same pattern the suite uses for
+			// parseBaseModelId / getPrefixForRegion). This exercises all four model
+			// patterns — including the future-proof sonnet-4-7 / sonnet-4-8 branches
+			// that have no registry entry yet — plus negative cases and prefix stripping.
+			const handler = new AwsBedrockHandler({
+				apiModelId: "anthropic.claude-3-5-sonnet-20241022-v2:0",
+				awsAccessKey: "test",
+				awsSecretKey: "test",
+				awsRegion: "us-east-1",
+			})
+			const isAdaptiveThinkingModel = (handler as any).isAdaptiveThinkingModel.bind(handler)
+
+			it("returns true for all adaptive-thinking model patterns (opus/sonnet 4.7 and 4.8)", () => {
+				expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-7")).toBe(true)
+				expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-8")).toBe(true)
+				// Future-proof Sonnet patterns — guarded even before a registry entry exists.
+				expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-7")).toBe(true)
+				expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-8")).toBe(true)
+			})
+
+			it("returns true when the id carries a cross-region or global prefix", () => {
+				expect(isAdaptiveThinkingModel("us.anthropic.claude-opus-4-8")).toBe(true)
+				expect(isAdaptiveThinkingModel("eu.anthropic.claude-sonnet-4-7")).toBe(true)
+				expect(isAdaptiveThinkingModel("global.anthropic.claude-opus-4-8")).toBe(true)
+			})
+
+			it("returns false for older / non-adaptive models", () => {
+				expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-6-v1")).toBe(false)
+				expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-6")).toBe(false)
+				expect(isAdaptiveThinkingModel("anthropic.claude-3-5-sonnet-20241022-v2:0")).toBe(false)
+				expect(isAdaptiveThinkingModel("amazon.nova-lite-v1:0")).toBe(false)
+			})
+		})
 	})
 })

From bf2129a0bac11535332a1c7cce9acee9bf542322 Mon Sep 17 00:00:00 2001
From: Elliott de Launay <edelauna@gmail.com>
Date: Fri, 29 May 2026 12:16:02 +0000
Subject: [PATCH 9/9] fix(vertex): use adaptive thinking for Opus 4.8

---
 packages/types/src/providers/vertex.ts        |  2 ++
 .../__tests__/anthropic-vertex.spec.ts        | 31 +++++++++++++++++++
 src/api/providers/anthropic-vertex.ts         | 15 ++++++---
 3 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/packages/types/src/providers/vertex.ts b/packages/types/src/providers/vertex.ts
index 38bb744b08..345e9f00bd 100644
--- a/packages/types/src/providers/vertex.ts
+++ b/packages/types/src/providers/vertex.ts
@@ -384,6 +384,7 @@ export const vertexModels = {
 		cacheWritesPrice: 6.25, // $6.25 per million tokens
 		cacheReadsPrice: 0.5, // $0.50 per million tokens
 		supportsReasoningBudget: true,
+		supportsReasoningBinary: true,
 		supportsTemperature: false,
 		// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
 		tiers: [
@@ -406,6 +407,7 @@ export const vertexModels = {
 		cacheWritesPrice: 6.25, // $6.25 per million tokens
 		cacheReadsPrice: 0.5, // $0.50 per million tokens
 		supportsReasoningBudget: true,
+		supportsReasoningBinary: true,
 		supportsTemperature: false,
 		// 4.8 inherits the same Vertex pricing structure as 4.7 — no breaking changes.
 		// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
diff --git a/src/api/providers/__tests__/anthropic-vertex.spec.ts b/src/api/providers/__tests__/anthropic-vertex.spec.ts
index 381d921c58..6bf3f9485e 100644
--- a/src/api/providers/__tests__/anthropic-vertex.spec.ts
+++ b/src/api/providers/__tests__/anthropic-vertex.spec.ts
@@ -1161,6 +1161,37 @@ describe("VertexHandler", () => {
 				undefined,
 			)
 		})
+
+		it("should use adaptive thinking for Claude Opus 4.8", async () => {
+			const opus48Handler = new AnthropicVertexHandler({
+				apiModelId: "claude-opus-4-8",
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+				enableReasoningEffort: true,
+			})
+
+			const mockCreate = vitest.fn().mockImplementation(async () => ({
+				async *[Symbol.asyncIterator]() {
+					yield { type: "message_start", message: { usage: { input_tokens: 10, output_tokens: 5 } } }
+				},
+			}))
+			;(opus48Handler["client"].messages as any).create = mockCreate
+
+			await opus48Handler
+				.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }])
+				.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					thinking: { type: "adaptive" },
+				}),
+				undefined,
+			)
+
+			const request = mockCreate.mock.calls[0][0]
+			expect(request.thinking).not.toHaveProperty("budget_tokens")
+			expect(request.temperature).toBeUndefined()
+		})
 	})
 
 	describe("native tool calling", () => {
diff --git a/src/api/providers/anthropic-vertex.ts b/src/api/providers/anthropic-vertex.ts
index b6b94fcde7..b9685509c3 100644
--- a/src/api/providers/anthropic-vertex.ts
+++ b/src/api/providers/anthropic-vertex.ts
@@ -17,6 +17,7 @@ import { ApiStream } from "../transform/stream"
 import { addCacheBreakpoints } from "../transform/caching/vertex"
 import { getModelParams } from "../transform/model-params"
 import { filterNonAnthropicBlocks } from "../transform/anthropic-filter"
+import { getAnthropicProviderReasoning } from "../transform/reasoning"
 import {
 	convertOpenAIToolsToAnthropic,
 	convertOpenAIToolChoiceToAnthropic,
@@ -95,7 +96,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 		 * This ensures we stay under the 4-block limit while maintaining effective caching
 		 * for the most relevant context.
 		 */
-		const params: Anthropic.Messages.MessageCreateParamsStreaming = {
+		const params = {
 			model: id,
 			max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
 			temperature,
@@ -107,7 +108,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 			messages: supportsPromptCache ? addCacheBreakpoints(sanitizedMessages) : sanitizedMessages,
 			stream: true,
 			...nativeToolParams,
-		}
+		} as Anthropic.Messages.MessageCreateParamsStreaming
 
 		// and prompt caching
 		const requestOptions = betas?.length ? { headers: { "anthropic-beta": betas.join(",") } } : undefined
@@ -240,6 +241,11 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 			settings: this.options,
 			defaultTemperature: 0,
 		})
+		const thinking = getAnthropicProviderReasoning({
+			model: info,
+			reasoningBudget: params.reasoningBudget,
+			settings: this.options,
+		})
 
 		// Build betas array for request headers
 		const betas: string[] = []
@@ -258,6 +264,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 			info,
 			betas: betas.length > 0 ? betas : undefined,
 			...params,
+			reasoning: thinking,
 		}
 	}
 
@@ -271,7 +278,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 				reasoning: thinking,
 			} = this.getModel()
 
-			const params: Anthropic.Messages.MessageCreateParamsNonStreaming = {
+			const params = {
 				model: id,
 				max_tokens: maxTokens,
 				temperature,
@@ -285,7 +292,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 					},
 				],
 				stream: false,
-			}
+			} as Anthropic.Messages.MessageCreateParamsNonStreaming
 
 			const response = await this.client.messages.create(params)
 			const content = response.content[0]