Zoo-Code-Org · edelauna · May 29, 2026 · May 24, 2026 · May 24, 2026 · May 28, 2026
@@ -8,6 +8,10 @@ const AWS_BEARER_TOKEN_BEDROCK = process.env.AWS_BEARER_TOKEN_BEDROCK
 const BEDROCK_REGION = process.env.BEDROCK_REGION ?? "us-east-1"
 // Use a cross-region inference profile so the token works without per-region model access.
 const BEDROCK_MODEL_ID = process.env.BEDROCK_MODEL_ID ?? "us.anthropic.claude-haiku-4-5-20251001-v1:0"
+// Claude Opus 4.8 routed through a cross-region inference profile. 4.8 is an
+// adaptive-thinking model, so this exercises the request path that omits
+// temperature and (when reasoning is enabled) sends thinking.type "adaptive".
+const BEDROCK_OPUS_48_MODEL_ID = process.env.BEDROCK_OPUS_48_MODEL_ID ?? "us.anthropic.claude-opus-4-8"
 const BEDROCK_LIVE_E2E = process.env.BEDROCK_LIVE_E2E === "true"
 
 suite("Bedrock provider", function () {
@@ -91,4 +95,53 @@ suite("Bedrock provider", function () {
 			assert.ok(true, "Task completed successfully via Bedrock with ZooCode# userAgentAppId")
 		}
 	})
+
+	test("Should complete a task end-to-end via AWS Bedrock using Claude Opus 4.8", async () => {
+		const api = globalThis.api
+
+		// Re-point the provider at Claude Opus 4.8 while keeping the same transport
+		// (mock server in CI, real AWS in live mode). Parity smoke test: it proves the
+		// 4.8 request path — model resolution, adaptive-thinking payload, and the
+		// temperature omission required by 4.7+ — completes a Bedrock round-trip
+		// without a 400. The mock server replies with the same attempt_completion("4")
+		// tool call regardless of model, so a successful completion exercises request
+		// formation end-to-end.
+		if (!process.env.AIMOCK_URL && BEDROCK_LIVE_E2E && AWS_BEARER_TOKEN_BEDROCK) {
+			await api.setConfiguration({
+				apiProvider: "bedrock" as const,
+				awsUseApiKey: true,
+				awsApiKey: AWS_BEARER_TOKEN_BEDROCK,
+				awsRegion: BEDROCK_REGION,
+				apiModelId: BEDROCK_OPUS_48_MODEL_ID,
+			})
+		} else {
+			await api.setConfiguration({
+				apiProvider: "bedrock" as const,
+				awsUseApiKey: true,
+				awsApiKey: "mock-key",
+				awsRegion: BEDROCK_REGION,
+				apiModelId: BEDROCK_OPUS_48_MODEL_ID,
+				awsBedrockEndpoint: mockServer!.url,
+				awsBedrockEndpointEnabled: true,
+			})
+		}
+
+		const taskId = await api.startNewTask({
+			configuration: { mode: "ask", autoApprovalEnabled: true },
+			text: "bedrock-opus-48-smoke: what is 2+2? Reply with only the number.",
+		})
+
+		await waitUntilCompleted({ api, taskId })
+
+		if (mockServer) {
+			// The request reached the Bedrock endpoint (no 400 from temperature/thinking).
+			const userAgent = mockServer.lastRequestHeaders?.["user-agent"] as string | undefined
+			assert.ok(userAgent, "Bedrock request should include user-agent header")
+			assert.ok(userAgent.includes("ZooCode#"), `user-agent should contain "ZooCode#" — got: ${userAgent}`)
+		} else {
+			// Live mode: a successful round-trip proves 4.8 request formation works
+			// against real AWS Bedrock (adaptive thinking, no rejected sampling params).
+			assert.ok(true, "Task completed successfully via Bedrock with Claude Opus 4.8")
+		}
+	})
 })
@@ -108,6 +108,24 @@ export const anthropicModels = {
 		supportsReasoningBinary: true,
 		supportsTemperature: false,
 	},
+	"claude-opus-4-8": {
+		maxTokens: 128_000, // Overridden to 8k if `enableReasoningEffort` is false.
+		contextWindow: 1_000_000, // 1M context window native (no beta header required, same as 4.7)
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 5.0, // $5 per million input tokens (regular tier)
+		outputPrice: 25.0, // $25 per million output tokens (regular tier)
+		cacheWritesPrice: 6.25, // $6.25 per million tokens
+		cacheReadsPrice: 0.5, // $0.50 per million tokens
+		// 4.8 inherits the adaptive-thinking model introduced in 4.7 — no breaking
+		// API changes. supportsReasoningBudget is kept true so the existing token-cap
+		// handling and max-token overrides behave identically.
+		supportsReasoningBudget: true,
+		// 4.8 still rejects budget_tokens-style thinking payloads, so the UI must
+		// expose reasoning as a binary on/off toggle on this provider path.
+		supportsReasoningBinary: true,
+		supportsTemperature: false,
+	},
 	"claude-opus-4-5-20251101": {
 		maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false.
 		contextWindow: 200_000,

@@ -167,6 +167,56 @@ export const bedrockModels = {
 			},
 		],
 	},
+	"anthropic.claude-opus-4-7": {
+		maxTokens: 8192,
+		contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
+		supportsImages: true,
+		supportsPromptCache: true,
+		supportsReasoningBudget: true,
+		inputPrice: 5.0, // $5 per million input tokens (≤200K context) — verify against Bedrock console
+		outputPrice: 25.0, // $25 per million output tokens (≤200K context) — verify against Bedrock console
+		cacheWritesPrice: 6.25, // $6.25 per million tokens
+		cacheReadsPrice: 0.5, // $0.50 per million tokens
+		minTokensPerCachePoint: 1024,
+		maxCachePoints: 4,
+		cachableFields: ["system", "messages", "tools"],
+		// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
+		tiers: [
+			{
+				contextWindow: 1_000_000, // 1M tokens with beta flag
+				inputPrice: 10.0, // $10 per million input tokens (>200K context)
+				outputPrice: 37.5, // $37.50 per million output tokens (>200K context)
+				cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context)
+				cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context)
+			},
+		],
+	},
+	"anthropic.claude-opus-4-8": {
+		maxTokens: 8192,
+		contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
+		supportsImages: true,
+		supportsPromptCache: true,
+		supportsReasoningBudget: true,
+		inputPrice: 5.0, // $5 per million input tokens (≤200K context) — verify against Bedrock console
+		outputPrice: 25.0, // $25 per million output tokens (≤200K context) — verify against Bedrock console
+		cacheWritesPrice: 6.25, // $6.25 per million tokens
+		cacheReadsPrice: 0.5, // $0.50 per million tokens
+		minTokensPerCachePoint: 1024,
+		maxCachePoints: 4,
+		cachableFields: ["system", "messages", "tools"],
+		// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
+		// 4.8 inherits the same Bedrock pricing structure as 4.7 — no API breaking changes.
+		// Adaptive thinking is the only supported reasoning mode (same as 4.7).
+		tiers: [
+			{
+				contextWindow: 1_000_000, // 1M tokens with beta flag
+				inputPrice: 10.0, // $10 per million input tokens (>200K context)
+				outputPrice: 37.5, // $37.50 per million output tokens (>200K context)
+				cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context)
+				cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context)
+			},
+		],
+	},
 	"anthropic.claude-opus-4-5-20251101-v1:0": {
 		maxTokens: 8192,
 		contextWindow: 200_000,
@@ -525,6 +575,8 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
 	"anthropic.claude-sonnet-4-5-20250929-v1:0",
 	"anthropic.claude-sonnet-4-6",
 	"anthropic.claude-opus-4-6-v1",
+	"anthropic.claude-opus-4-7",
+	"anthropic.claude-opus-4-8",
 ] as const
 
 // Amazon Bedrock models that support Global Inference profiles
@@ -535,13 +587,16 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
 // - Claude Haiku 4.5
 // - Claude Opus 4.5
 // - Claude Opus 4.6
+// - Claude Opus 4.7
 export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
 	"anthropic.claude-sonnet-4-20250514-v1:0",
 	"anthropic.claude-sonnet-4-5-20250929-v1:0",
 	"anthropic.claude-sonnet-4-6",
 	"anthropic.claude-haiku-4-5-20251001-v1:0",
 	"anthropic.claude-opus-4-5-20251101-v1:0",
 	"anthropic.claude-opus-4-6-v1",
+	"anthropic.claude-opus-4-7",
+	"anthropic.claude-opus-4-8",
 ] as const
 
 // Amazon Bedrock Service Tier types

@@ -384,6 +384,7 @@ export const vertexModels = {
 		cacheWritesPrice: 6.25, // $6.25 per million tokens
 		cacheReadsPrice: 0.5, // $0.50 per million tokens
 		supportsReasoningBudget: true,
+		supportsReasoningBinary: true,
 		supportsTemperature: false,
 		// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
 		tiers: [
@@ -396,6 +397,30 @@ export const vertexModels = {
 			},
 		],
 	},
+	"claude-opus-4-8": {
+		maxTokens: 8192,
+		contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 5.0, // $5 per million input tokens (≤200K context)
+		outputPrice: 25.0, // $25 per million output tokens (≤200K context)
+		cacheWritesPrice: 6.25, // $6.25 per million tokens
+		cacheReadsPrice: 0.5, // $0.50 per million tokens
+		supportsReasoningBudget: true,
+		supportsReasoningBinary: true,
+		supportsTemperature: false,
+		// 4.8 inherits the same Vertex pricing structure as 4.7 — no breaking changes.
+		// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
+		tiers: [
+			{
+				contextWindow: 1_000_000, // 1M tokens with beta flag
+				inputPrice: 10.0, // $10 per million input tokens (>200K context)
+				outputPrice: 37.5, // $37.50 per million output tokens (>200K context)
+				cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context)
+				cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context)
+			},
+		],
+	},
 	"claude-opus-4-5@20251101": {
 		maxTokens: 8192,
 		contextWindow: 200_000,
@@ -595,6 +620,7 @@ export const VERTEX_1M_CONTEXT_MODEL_IDS = [
 	"claude-sonnet-4-6",
 	"claude-opus-4-6",
 	"claude-opus-4-7",
+	"claude-opus-4-8",
 ] as const
 
 export const VERTEX_REGIONS = [

@@ -929,6 +929,22 @@ describe("VertexHandler", () => {
 			expect(model.betas).toContain("context-1m-2025-08-07")
 		})
 
+		it("should enable 1M context for Claude Opus 4.8 when beta flag is set", () => {
+			const handler = new AnthropicVertexHandler({
+				apiModelId: "claude-opus-4-8",
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+				vertex1MContext: true,
+			})
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(1_000_000)
+			expect(model.info.inputPrice).toBe(10.0)
+			expect(model.info.outputPrice).toBe(37.5)
+			expect(model.info.supportsTemperature).toBe(false)
+			expect(model.betas).toContain("context-1m-2025-08-07")
+		})
+
 		it("should not enable 1M context when flag is disabled", () => {
 			const handler = new AnthropicVertexHandler({
 				apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0],
@@ -1145,6 +1161,37 @@ describe("VertexHandler", () => {
 				undefined,
 			)
 		})
+
+		it("should use adaptive thinking for Claude Opus 4.8", async () => {
+			const opus48Handler = new AnthropicVertexHandler({
+				apiModelId: "claude-opus-4-8",
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+				enableReasoningEffort: true,
+			})
+
+			const mockCreate = vitest.fn().mockImplementation(async () => ({
+				async *[Symbol.asyncIterator]() {
+					yield { type: "message_start", message: { usage: { input_tokens: 10, output_tokens: 5 } } }
+				},
+			}))
+			;(opus48Handler["client"].messages as any).create = mockCreate
+
+			await opus48Handler
+				.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }])
+				.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					thinking: { type: "adaptive" },
+				}),
+				undefined,
+			)
+
+			const request = mockCreate.mock.calls[0][0]
+			expect(request.thinking).not.toHaveProperty("budget_tokens")
+			expect(request.temperature).toBeUndefined()
+		})
 	})
 
 	describe("native tool calling", () => {