diff --git a/apps/vscode-e2e/src/suite/providers/bedrock.test.ts b/apps/vscode-e2e/src/suite/providers/bedrock.test.ts index c378b908da..6a0c4907f7 100644 --- a/apps/vscode-e2e/src/suite/providers/bedrock.test.ts +++ b/apps/vscode-e2e/src/suite/providers/bedrock.test.ts @@ -8,6 +8,10 @@ const AWS_BEARER_TOKEN_BEDROCK = process.env.AWS_BEARER_TOKEN_BEDROCK const BEDROCK_REGION = process.env.BEDROCK_REGION ?? "us-east-1" // Use a cross-region inference profile so the token works without per-region model access. const BEDROCK_MODEL_ID = process.env.BEDROCK_MODEL_ID ?? "us.anthropic.claude-haiku-4-5-20251001-v1:0" +// Claude Opus 4.8 routed through a cross-region inference profile. 4.8 is an +// adaptive-thinking model, so this exercises the request path that omits +// temperature and (when reasoning is enabled) sends thinking.type "adaptive". +const BEDROCK_OPUS_48_MODEL_ID = process.env.BEDROCK_OPUS_48_MODEL_ID ?? "us.anthropic.claude-opus-4-8" const BEDROCK_LIVE_E2E = process.env.BEDROCK_LIVE_E2E === "true" suite("Bedrock provider", function () { @@ -91,4 +95,53 @@ suite("Bedrock provider", function () { assert.ok(true, "Task completed successfully via Bedrock with ZooCode# userAgentAppId") } }) + + test("Should complete a task end-to-end via AWS Bedrock using Claude Opus 4.8", async () => { + const api = globalThis.api + + // Re-point the provider at Claude Opus 4.8 while keeping the same transport + // (mock server in CI, real AWS in live mode). Parity smoke test: it proves the + // 4.8 request path — model resolution, adaptive-thinking payload, and the + // temperature omission required by 4.7+ — completes a Bedrock round-trip + // without a 400. The mock server replies with the same attempt_completion("4") + // tool call regardless of model, so a successful completion exercises request + // formation end-to-end. + if (!process.env.AIMOCK_URL && BEDROCK_LIVE_E2E && AWS_BEARER_TOKEN_BEDROCK) { + await api.setConfiguration({ + apiProvider: "bedrock" as const, + awsUseApiKey: true, + awsApiKey: AWS_BEARER_TOKEN_BEDROCK, + awsRegion: BEDROCK_REGION, + apiModelId: BEDROCK_OPUS_48_MODEL_ID, + }) + } else { + await api.setConfiguration({ + apiProvider: "bedrock" as const, + awsUseApiKey: true, + awsApiKey: "mock-key", + awsRegion: BEDROCK_REGION, + apiModelId: BEDROCK_OPUS_48_MODEL_ID, + awsBedrockEndpoint: mockServer!.url, + awsBedrockEndpointEnabled: true, + }) + } + + const taskId = await api.startNewTask({ + configuration: { mode: "ask", autoApprovalEnabled: true }, + text: "bedrock-opus-48-smoke: what is 2+2? Reply with only the number.", + }) + + await waitUntilCompleted({ api, taskId }) + + if (mockServer) { + // The request reached the Bedrock endpoint (no 400 from temperature/thinking). + const userAgent = mockServer.lastRequestHeaders?.["user-agent"] as string | undefined + assert.ok(userAgent, "Bedrock request should include user-agent header") + assert.ok(userAgent.includes("ZooCode#"), `user-agent should contain "ZooCode#" — got: ${userAgent}`) + } else { + // Live mode: a successful round-trip proves 4.8 request formation works + // against real AWS Bedrock (adaptive thinking, no rejected sampling params). + assert.ok(true, "Task completed successfully via Bedrock with Claude Opus 4.8") + } + }) }) diff --git a/packages/types/src/providers/anthropic.ts b/packages/types/src/providers/anthropic.ts index f3e99c691d..f123817e43 100644 --- a/packages/types/src/providers/anthropic.ts +++ b/packages/types/src/providers/anthropic.ts @@ -108,6 +108,24 @@ export const anthropicModels = { supportsReasoningBinary: true, supportsTemperature: false, }, + "claude-opus-4-8": { + maxTokens: 128_000, // Overridden to 8k if `enableReasoningEffort` is false. + contextWindow: 1_000_000, // 1M context window native (no beta header required, same as 4.7) + supportsImages: true, + supportsPromptCache: true, + inputPrice: 5.0, // $5 per million input tokens (regular tier) + outputPrice: 25.0, // $25 per million output tokens (regular tier) + cacheWritesPrice: 6.25, // $6.25 per million tokens + cacheReadsPrice: 0.5, // $0.50 per million tokens + // 4.8 inherits the adaptive-thinking model introduced in 4.7 — no breaking + // API changes. supportsReasoningBudget is kept true so the existing token-cap + // handling and max-token overrides behave identically. + supportsReasoningBudget: true, + // 4.8 still rejects budget_tokens-style thinking payloads, so the UI must + // expose reasoning as a binary on/off toggle on this provider path. + supportsReasoningBinary: true, + supportsTemperature: false, + }, "claude-opus-4-5-20251101": { maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false. contextWindow: 200_000, diff --git a/packages/types/src/providers/bedrock.ts b/packages/types/src/providers/bedrock.ts index 9ea52bced8..b3e1d3d74e 100644 --- a/packages/types/src/providers/bedrock.ts +++ b/packages/types/src/providers/bedrock.ts @@ -167,6 +167,56 @@ export const bedrockModels = { }, ], }, + "anthropic.claude-opus-4-7": { + maxTokens: 8192, + contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07' + supportsImages: true, + supportsPromptCache: true, + supportsReasoningBudget: true, + inputPrice: 5.0, // $5 per million input tokens (≤200K context) — verify against Bedrock console + outputPrice: 25.0, // $25 per million output tokens (≤200K context) — verify against Bedrock console + cacheWritesPrice: 6.25, // $6.25 per million tokens + cacheReadsPrice: 0.5, // $0.50 per million tokens + minTokensPerCachePoint: 1024, + maxCachePoints: 4, + cachableFields: ["system", "messages", "tools"], + // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07') + tiers: [ + { + contextWindow: 1_000_000, // 1M tokens with beta flag + inputPrice: 10.0, // $10 per million input tokens (>200K context) + outputPrice: 37.5, // $37.50 per million output tokens (>200K context) + cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context) + cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context) + }, + ], + }, + "anthropic.claude-opus-4-8": { + maxTokens: 8192, + contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07' + supportsImages: true, + supportsPromptCache: true, + supportsReasoningBudget: true, + inputPrice: 5.0, // $5 per million input tokens (≤200K context) — verify against Bedrock console + outputPrice: 25.0, // $25 per million output tokens (≤200K context) — verify against Bedrock console + cacheWritesPrice: 6.25, // $6.25 per million tokens + cacheReadsPrice: 0.5, // $0.50 per million tokens + minTokensPerCachePoint: 1024, + maxCachePoints: 4, + cachableFields: ["system", "messages", "tools"], + // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07') + // 4.8 inherits the same Bedrock pricing structure as 4.7 — no API breaking changes. + // Adaptive thinking is the only supported reasoning mode (same as 4.7). + tiers: [ + { + contextWindow: 1_000_000, // 1M tokens with beta flag + inputPrice: 10.0, // $10 per million input tokens (>200K context) + outputPrice: 37.5, // $37.50 per million output tokens (>200K context) + cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context) + cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context) + }, + ], + }, "anthropic.claude-opus-4-5-20251101-v1:0": { maxTokens: 8192, contextWindow: 200_000, @@ -525,6 +575,8 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [ "anthropic.claude-sonnet-4-5-20250929-v1:0", "anthropic.claude-sonnet-4-6", "anthropic.claude-opus-4-6-v1", + "anthropic.claude-opus-4-7", + "anthropic.claude-opus-4-8", ] as const // Amazon Bedrock models that support Global Inference profiles @@ -535,6 +587,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [ // - Claude Haiku 4.5 // - Claude Opus 4.5 // - Claude Opus 4.6 +// - Claude Opus 4.7 export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [ "anthropic.claude-sonnet-4-20250514-v1:0", "anthropic.claude-sonnet-4-5-20250929-v1:0", @@ -542,6 +595,8 @@ export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [ "anthropic.claude-haiku-4-5-20251001-v1:0", "anthropic.claude-opus-4-5-20251101-v1:0", "anthropic.claude-opus-4-6-v1", + "anthropic.claude-opus-4-7", + "anthropic.claude-opus-4-8", ] as const // Amazon Bedrock Service Tier types diff --git a/packages/types/src/providers/vertex.ts b/packages/types/src/providers/vertex.ts index e27c0cb101..345e9f00bd 100644 --- a/packages/types/src/providers/vertex.ts +++ b/packages/types/src/providers/vertex.ts @@ -384,6 +384,7 @@ export const vertexModels = { cacheWritesPrice: 6.25, // $6.25 per million tokens cacheReadsPrice: 0.5, // $0.50 per million tokens supportsReasoningBudget: true, + supportsReasoningBinary: true, supportsTemperature: false, // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07') tiers: [ @@ -396,6 +397,30 @@ export const vertexModels = { }, ], }, + "claude-opus-4-8": { + maxTokens: 8192, + contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07' + supportsImages: true, + supportsPromptCache: true, + inputPrice: 5.0, // $5 per million input tokens (≤200K context) + outputPrice: 25.0, // $25 per million output tokens (≤200K context) + cacheWritesPrice: 6.25, // $6.25 per million tokens + cacheReadsPrice: 0.5, // $0.50 per million tokens + supportsReasoningBudget: true, + supportsReasoningBinary: true, + supportsTemperature: false, + // 4.8 inherits the same Vertex pricing structure as 4.7 — no breaking changes. + // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07') + tiers: [ + { + contextWindow: 1_000_000, // 1M tokens with beta flag + inputPrice: 10.0, // $10 per million input tokens (>200K context) + outputPrice: 37.5, // $37.50 per million output tokens (>200K context) + cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context) + cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context) + }, + ], + }, "claude-opus-4-5@20251101": { maxTokens: 8192, contextWindow: 200_000, @@ -595,6 +620,7 @@ export const VERTEX_1M_CONTEXT_MODEL_IDS = [ "claude-sonnet-4-6", "claude-opus-4-6", "claude-opus-4-7", + "claude-opus-4-8", ] as const export const VERTEX_REGIONS = [ diff --git a/src/api/providers/__tests__/anthropic-vertex.spec.ts b/src/api/providers/__tests__/anthropic-vertex.spec.ts index 9ed0e51ad9..6bf3f9485e 100644 --- a/src/api/providers/__tests__/anthropic-vertex.spec.ts +++ b/src/api/providers/__tests__/anthropic-vertex.spec.ts @@ -929,6 +929,22 @@ describe("VertexHandler", () => { expect(model.betas).toContain("context-1m-2025-08-07") }) + it("should enable 1M context for Claude Opus 4.8 when beta flag is set", () => { + const handler = new AnthropicVertexHandler({ + apiModelId: "claude-opus-4-8", + vertexProjectId: "test-project", + vertexRegion: "us-central1", + vertex1MContext: true, + }) + + const model = handler.getModel() + expect(model.info.contextWindow).toBe(1_000_000) + expect(model.info.inputPrice).toBe(10.0) + expect(model.info.outputPrice).toBe(37.5) + expect(model.info.supportsTemperature).toBe(false) + expect(model.betas).toContain("context-1m-2025-08-07") + }) + it("should not enable 1M context when flag is disabled", () => { const handler = new AnthropicVertexHandler({ apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0], @@ -1145,6 +1161,37 @@ describe("VertexHandler", () => { undefined, ) }) + + it("should use adaptive thinking for Claude Opus 4.8", async () => { + const opus48Handler = new AnthropicVertexHandler({ + apiModelId: "claude-opus-4-8", + vertexProjectId: "test-project", + vertexRegion: "us-central1", + enableReasoningEffort: true, + }) + + const mockCreate = vitest.fn().mockImplementation(async () => ({ + async *[Symbol.asyncIterator]() { + yield { type: "message_start", message: { usage: { input_tokens: 10, output_tokens: 5 } } } + }, + })) + ;(opus48Handler["client"].messages as any).create = mockCreate + + await opus48Handler + .createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }]) + .next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + thinking: { type: "adaptive" }, + }), + undefined, + ) + + const request = mockCreate.mock.calls[0][0] + expect(request.thinking).not.toHaveProperty("budget_tokens") + expect(request.temperature).toBeUndefined() + }) }) describe("native tool calling", () => { diff --git a/src/api/providers/__tests__/anthropic.spec.ts b/src/api/providers/__tests__/anthropic.spec.ts index 1936423388..6aaa17d4f6 100644 --- a/src/api/providers/__tests__/anthropic.spec.ts +++ b/src/api/providers/__tests__/anthropic.spec.ts @@ -304,6 +304,101 @@ describe("AnthropicHandler", () => { expect(requestBody?.thinking).toEqual({ type: "adaptive" }) expect(requestBody?.max_tokens).toBe(32768) }) + + it("should not require the 1M context beta header for Claude Opus 4.8", async () => { + const opus48Handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-8", + anthropicBeta1MContext: true, + }) + + const stream = opus48Handler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + const requestOptions = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[1] + expect(requestBody?.temperature).toBeUndefined() + expect(requestOptions?.headers?.["anthropic-beta"]).toContain("prompt-caching-2024-07-31") + expect(requestOptions?.headers?.["anthropic-beta"]).not.toContain("context-1m-2025-08-07") + }) + + it("should use adaptive thinking for Claude Opus 4.8 when reasoning is enabled", async () => { + const opus48Handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-8", + enableReasoningEffort: true, + }) + + const stream = opus48Handler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + expect(requestBody?.thinking).toEqual({ type: "adaptive" }) + expect(requestBody?.max_tokens).toBe(16384) + }) + + it("should omit thinking for Claude Opus 4.8 when reasoning is disabled", async () => { + const opus48Handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-8", + enableReasoningEffort: false, + }) + + const stream = opus48Handler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + expect(requestBody?.thinking).toBeUndefined() + expect(requestBody?.max_tokens).toBe(8192) + }) + + it("should preserve custom maxTokens for Claude Opus 4.8 when reasoning is enabled", async () => { + const opus48Handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-8", + enableReasoningEffort: true, + modelMaxTokens: 32768, + }) + + const stream = opus48Handler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + expect(requestBody?.thinking).toEqual({ type: "adaptive" }) + expect(requestBody?.max_tokens).toBe(32768) + }) }) describe("completePrompt", () => { @@ -431,6 +526,23 @@ describe("AnthropicHandler", () => { expect(model.reasoningBudget).toBeUndefined() }) + it("should handle Claude Opus 4.8 model correctly", () => { + const handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-8", + }) + const model = handler.getModel() + expect(model.id).toBe("claude-opus-4-8") + expect(model.info.maxTokens).toBe(128000) + expect(model.info.contextWindow).toBe(1000000) + expect(model.maxTokens).toBe(8192) + expect(model.info.supportsReasoningBinary).toBe(true) + expect(model.info.supportsReasoningBudget).toBe(true) + expect(model.info.supportsPromptCache).toBe(true) + expect(model.info.supportsTemperature).toBe(false) + expect(model.reasoningBudget).toBeUndefined() + }) + it("should enable 1M context for Claude 4.5 Sonnet when beta flag is set", () => { const handler = new AnthropicHandler({ apiKey: "test-api-key", diff --git a/src/api/providers/__tests__/bedrock.spec.ts b/src/api/providers/__tests__/bedrock.spec.ts index 4ddf9f77af..3b2b001f19 100644 --- a/src/api/providers/__tests__/bedrock.spec.ts +++ b/src/api/providers/__tests__/bedrock.spec.ts @@ -1335,4 +1335,212 @@ describe("AwsBedrockHandler", () => { expect(hasCachePoint).toBe(false) }) }) + + describe("Claude 4.7+ adaptive thinking (Opus 4.7 / Opus 4.8)", () => { + beforeEach(() => { + mockConverseStreamCommand.mockReset() + }) + + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }] + + it("should send adaptive thinking with effort xhigh for Claude Opus 4.7 when reasoning is enabled", async () => { + const opus47Handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-opus-4-7", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + enableReasoningEffort: true, + }) + + const generator = opus47Handler.createMessage("System prompt", messages) + await generator.next() + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // Adaptive thinking — no budget_tokens, must use effort levels. + expect(commandArg.additionalModelRequestFields?.thinking).toEqual({ + type: "adaptive", + display: "summarized", + }) + expect(commandArg.additionalModelRequestFields?.output_config).toEqual({ effort: "xhigh" }) + // 4.7+ rejects sampling parameters: temperature must be omitted entirely. + expect(commandArg.inferenceConfig?.temperature).toBeUndefined() + }) + + it("should send adaptive thinking with effort xhigh for Claude Opus 4.8 when reasoning is enabled", async () => { + const opus48Handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-opus-4-8", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + enableReasoningEffort: true, + }) + + const generator = opus48Handler.createMessage("System prompt", messages) + await generator.next() + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // 4.8 inherits the 4.7 adaptive-thinking contract — no breaking API changes. + expect(commandArg.additionalModelRequestFields?.thinking).toEqual({ + type: "adaptive", + display: "summarized", + }) + expect(commandArg.additionalModelRequestFields?.output_config).toEqual({ effort: "xhigh" }) + // Sampling parameters are still rejected on 4.8 — temperature must be absent. + expect(commandArg.inferenceConfig?.temperature).toBeUndefined() + }) + + it("should omit thinking and temperature for Claude Opus 4.8 when reasoning is disabled", async () => { + const opus48Handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-opus-4-8", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + enableReasoningEffort: false, + }) + + const generator = opus48Handler.createMessage("System prompt", messages) + await generator.next() + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // Without reasoning enabled, no adaptive thinking payload is sent. + expect(commandArg.additionalModelRequestFields?.thinking).toBeUndefined() + // Temperature is still omitted for 4.8 because the API rejects sampling params. + expect(commandArg.inferenceConfig?.temperature).toBeUndefined() + }) + + it("should still send temperature and budget_tokens thinking for older Claude Opus 4.6", async () => { + // Regression guard: the adaptive-thinking branch must NOT activate for 4.6 or earlier. + const opus46Handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-opus-4-6-v1", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + enableReasoningEffort: true, + }) + + const generator = opus46Handler.createMessage("System prompt", messages) + await generator.next() + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // 4.6 still uses the budget_tokens-based thinking format. + expect(commandArg.additionalModelRequestFields?.thinking?.type).toBe("enabled") + expect(commandArg.additionalModelRequestFields?.thinking?.budget_tokens).toBeGreaterThan(0) + // 4.6 still accepts temperature. + expect(commandArg.inferenceConfig?.temperature).toBeDefined() + }) + + it("should detect adaptive-thinking models via cross-region inference prefix (us.anthropic.claude-opus-4-8)", async () => { + // Regression guard: the heuristic uses parseBaseModelId, so cross-region prefixes + // like `us.` / `eu.` / `global.` must still be detected as 4.8. + const opus48GlobalHandler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-opus-4-8", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + awsUseCrossRegionInference: true, + enableReasoningEffort: true, + }) + + const generator = opus48GlobalHandler.createMessage("System prompt", messages) + await generator.next() + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // Model ID should carry the cross-region prefix. + expect(commandArg.modelId).toBe("us.anthropic.claude-opus-4-8") + // Adaptive thinking must still apply despite the prefix. + expect(commandArg.additionalModelRequestFields?.thinking).toEqual({ + type: "adaptive", + display: "summarized", + }) + expect(commandArg.inferenceConfig?.temperature).toBeUndefined() + }) + + it("completePrompt should omit temperature for Claude Opus 4.8 (non-stream path)", async () => { + // Regression guard for the non-stream path: completePrompt must guard + // temperature the same way createMessage does, otherwise adaptive-thinking + // models (4.7/4.8) return a 400 from Bedrock. + const mockConverseCommand = vi.mocked(ConverseCommand) + + const opus48Handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-opus-4-8", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + }) + + await opus48Handler.completePrompt("Test prompt") + + expect(mockConverseCommand).toHaveBeenCalled() + const commandArg = mockConverseCommand.mock.calls[0][0] as any + + // 4.8 must NOT receive temperature in the non-stream inferenceConfig. + expect(commandArg.inferenceConfig?.temperature).toBeUndefined() + }) + + it("completePrompt should still send temperature for older Claude Opus 4.6 (non-stream path)", async () => { + // 4.6 and earlier still accept sampling parameters, so completePrompt must + // continue to send temperature for them. + const mockConverseCommand = vi.mocked(ConverseCommand) + + const opus46Handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-opus-4-6-v1", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + }) + + await opus46Handler.completePrompt("Test prompt") + + expect(mockConverseCommand).toHaveBeenCalled() + const commandArg = mockConverseCommand.mock.calls[0][0] as any + + // 4.6 must still receive temperature. + expect(commandArg.inferenceConfig?.temperature).toBeDefined() + }) + + describe("isAdaptiveThinkingModel detection", () => { + // Unit-cover the private guard directly (same pattern the suite uses for + // parseBaseModelId / getPrefixForRegion). This exercises all four model + // patterns — including the future-proof sonnet-4-7 / sonnet-4-8 branches + // that have no registry entry yet — plus negative cases and prefix stripping. + const handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-3-5-sonnet-20241022-v2:0", + awsAccessKey: "test", + awsSecretKey: "test", + awsRegion: "us-east-1", + }) + const isAdaptiveThinkingModel = (handler as any).isAdaptiveThinkingModel.bind(handler) + + it("returns true for all adaptive-thinking model patterns (opus/sonnet 4.7 and 4.8)", () => { + expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-7")).toBe(true) + expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-8")).toBe(true) + // Future-proof Sonnet patterns — guarded even before a registry entry exists. + expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-7")).toBe(true) + expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-8")).toBe(true) + }) + + it("returns true when the id carries a cross-region or global prefix", () => { + expect(isAdaptiveThinkingModel("us.anthropic.claude-opus-4-8")).toBe(true) + expect(isAdaptiveThinkingModel("eu.anthropic.claude-sonnet-4-7")).toBe(true) + expect(isAdaptiveThinkingModel("global.anthropic.claude-opus-4-8")).toBe(true) + }) + + it("returns false for older / non-adaptive models", () => { + expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-6-v1")).toBe(false) + expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-6")).toBe(false) + expect(isAdaptiveThinkingModel("anthropic.claude-3-5-sonnet-20241022-v2:0")).toBe(false) + expect(isAdaptiveThinkingModel("amazon.nova-lite-v1:0")).toBe(false) + }) + }) + }) }) diff --git a/src/api/providers/anthropic-vertex.ts b/src/api/providers/anthropic-vertex.ts index b6b94fcde7..b9685509c3 100644 --- a/src/api/providers/anthropic-vertex.ts +++ b/src/api/providers/anthropic-vertex.ts @@ -17,6 +17,7 @@ import { ApiStream } from "../transform/stream" import { addCacheBreakpoints } from "../transform/caching/vertex" import { getModelParams } from "../transform/model-params" import { filterNonAnthropicBlocks } from "../transform/anthropic-filter" +import { getAnthropicProviderReasoning } from "../transform/reasoning" import { convertOpenAIToolsToAnthropic, convertOpenAIToolChoiceToAnthropic, @@ -95,7 +96,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple * This ensures we stay under the 4-block limit while maintaining effective caching * for the most relevant context. */ - const params: Anthropic.Messages.MessageCreateParamsStreaming = { + const params = { model: id, max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, temperature, @@ -107,7 +108,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple messages: supportsPromptCache ? addCacheBreakpoints(sanitizedMessages) : sanitizedMessages, stream: true, ...nativeToolParams, - } + } as Anthropic.Messages.MessageCreateParamsStreaming // and prompt caching const requestOptions = betas?.length ? { headers: { "anthropic-beta": betas.join(",") } } : undefined @@ -240,6 +241,11 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple settings: this.options, defaultTemperature: 0, }) + const thinking = getAnthropicProviderReasoning({ + model: info, + reasoningBudget: params.reasoningBudget, + settings: this.options, + }) // Build betas array for request headers const betas: string[] = [] @@ -258,6 +264,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple info, betas: betas.length > 0 ? betas : undefined, ...params, + reasoning: thinking, } } @@ -271,7 +278,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple reasoning: thinking, } = this.getModel() - const params: Anthropic.Messages.MessageCreateParamsNonStreaming = { + const params = { model: id, max_tokens: maxTokens, temperature, @@ -285,7 +292,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple }, ], stream: false, - } + } as Anthropic.Messages.MessageCreateParamsNonStreaming const response = await this.client.messages.create(params) const content = response.content[0] diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index 68daeead28..81c221921f 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -93,6 +93,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa case "claude-sonnet-4-20250514": case "claude-opus-4-6": case "claude-opus-4-7": + case "claude-opus-4-8": case "claude-opus-4-5-20251101": case "claude-opus-4-1-20250805": case "claude-opus-4-20250514": @@ -161,6 +162,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa case "claude-sonnet-4-20250514": case "claude-opus-4-6": case "claude-opus-4-7": + case "claude-opus-4-8": case "claude-opus-4-5-20251101": case "claude-opus-4-1-20250805": case "claude-opus-4-20250514": diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts index bb7f0d89c5..86f6d8fc75 100644 --- a/src/api/providers/bedrock.ts +++ b/src/api/providers/bedrock.ts @@ -61,9 +61,20 @@ interface BedrockInferenceConfig { // Define interface for Bedrock additional model request fields // This includes thinking configuration, 1M context beta, and other model-specific parameters interface BedrockAdditionalModelFields { - thinking?: { - type: "enabled" - budget_tokens: number + thinking?: + | { + type: "enabled" + budget_tokens: number + } + | { + // Claude 4.7+ adaptive thinking — no budget_tokens, uses output_config.effort instead + type: "adaptive" + // "summarized" shows thinking content in UI; omit to keep thinking internal only + display?: "summarized" | "none" + } + output_config?: { + // Claude 4.7+ effort levels: "low" | "medium" | "high" | "xhigh" | "max" + effort: string } anthropic_beta?: string[] [key: string]: any // Add index signature to be compatible with DocumentType @@ -286,6 +297,30 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH this.client = new BedrockRuntimeClient(clientConfig) } + /** + * Detect models that require the adaptive-thinking API contract. + * + * Starting with Claude Opus 4.7 (and the matching Sonnet 4.7), and continuing + * in Opus 4.8 / Sonnet 4.8, Anthropic removed sampling parameters + * (temperature/top_p/top_k) and replaced budget_tokens-based thinking with + * `thinking.type: "adaptive"` plus `output_config.effort`. The migration guide + * from 4.7 → 4.8 confirms there are no further breaking API changes, so a single + * guard matches both generations. Shared by createMessage and completePrompt so + * both request paths omit temperature for these models (sending it causes a 400). + * + * Accepts a model ID (with or without a cross-region/global prefix) and strips + * the prefix via parseBaseModelId before matching. + */ + private isAdaptiveThinkingModel(modelId: string): boolean { + const baseModelId = this.parseBaseModelId(modelId) + return ( + baseModelId.includes("opus-4-7") || + baseModelId.includes("opus-4-8") || + baseModelId.includes("sonnet-4-7") || + baseModelId.includes("sonnet-4-8") + ) + } + // Helper to guess model info from custom modelId string if not in bedrockModels private guessModelInfoFromId(modelId: string): Partial { // Define a mapping for model ID patterns and their configurations @@ -381,6 +416,12 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH let additionalModelRequestFields: BedrockAdditionalModelFields | undefined let thinkingEnabled = false + // Detect models that require the adaptive-thinking API contract (Opus/Sonnet + // 4.7 and 4.8). See isAdaptiveThinkingModel for details. The same guard is + // reused in completePrompt so both request paths stay consistent. + const baseModelId = this.parseBaseModelId(modelConfig.id) + const isAdaptiveThinkingModel = this.isAdaptiveThinkingModel(modelConfig.id) + // Determine if thinking should be enabled // metadata?.thinking?.enabled: Explicitly enabled through API metadata (direct request) // shouldUseReasoningBudget(): Enabled through user settings (enableReasoningEffort = true) @@ -392,27 +433,43 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH if ((isThinkingExplicitlyEnabled || isThinkingEnabledBySettings) && modelConfig.info.supportsReasoningBudget) { thinkingEnabled = true - additionalModelRequestFields = { - thinking: { - type: "enabled", - budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096, - }, + if (isAdaptiveThinkingModel) { + // Claude 4.7+ (incl. 4.8) uses adaptive thinking with effort levels — + // budget_tokens causes a 400 error. + // display: "summarized" surfaces thinking content in Zoo Code UI. + // effort "xhigh" remains the recommended level for agentic coding tasks + // across both 4.7 and 4.8 (4.8 changed the API default to "high" but + // the model continues to honour "xhigh" for deeper reasoning). + additionalModelRequestFields = { + thinking: { type: "adaptive", display: "summarized" }, + output_config: { effort: "xhigh" }, + } + } else { + additionalModelRequestFields = { + thinking: { + type: "enabled", + budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096, + }, + } } logger.info("Extended thinking enabled for Bedrock request", { ctx: "bedrock", modelId: modelConfig.id, - thinking: additionalModelRequestFields.thinking, + thinking: additionalModelRequestFields?.thinking, }) } const inferenceConfig: BedrockInferenceConfig = { maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number), - temperature: modelConfig.temperature ?? (this.options.modelTemperature as number), + // Claude 4.7+ (including 4.8) removed sampling parameters entirely — + // sending temperature causes a 400 error. + ...(isAdaptiveThinkingModel + ? {} + : { temperature: modelConfig.temperature ?? (this.options.modelTemperature as number) }), } // Check if 1M context is enabled for supported Claude 4 models - // Use parseBaseModelId to handle cross-region inference prefixes - const baseModelId = this.parseBaseModelId(modelConfig.id) + // Use parseBaseModelId to handle cross-region inference prefixes (computed above) const is1MContextEnabled = BEDROCK_1M_CONTEXT_MODEL_IDS.includes(baseModelId as any) && this.options.awsBedrock1MContext @@ -747,7 +804,12 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH const inferenceConfig: BedrockInferenceConfig = { maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number), - temperature: modelConfig.temperature ?? (this.options.modelTemperature as number), + // Claude 4.7+ (including 4.8) removed sampling parameters entirely — + // sending temperature causes a 400 error. Guard the non-stream path the + // same way createMessage does so completePrompt also works for these models. + ...(this.isAdaptiveThinkingModel(modelConfig.id) + ? {} + : { temperature: modelConfig.temperature ?? (this.options.modelTemperature as number) }), } // For completePrompt, use a unique conversation ID based on the prompt diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 7ea33196f9..336a290c2f 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -154,9 +154,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelId, - // Some OpenAI-Compatible models (e.g. claude-opus-4-7) reject `temperature` as - // deprecated/unsupported. Honor the model's `supportsTemperature` flag and omit it - // when explicitly set to false (undefined still sends temperature, preserving behavior). + // Some OpenAI-Compatible models (e.g. claude-opus-4-7, claude-opus-4-8) reject + // `temperature` as deprecated/unsupported. Honor the model's `supportsTemperature` + // flag and omit it when explicitly set to false (undefined still sends temperature, + // preserving behavior). ...(modelInfo.supportsTemperature !== false && { temperature: this.options.modelTemperature ?? (deepseekReasoner ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0), diff --git a/src/shared/__tests__/api.spec.ts b/src/shared/__tests__/api.spec.ts index c70e3945d7..06d3a5b3f1 100644 --- a/src/shared/__tests__/api.spec.ts +++ b/src/shared/__tests__/api.spec.ts @@ -106,6 +106,35 @@ describe("getModelMaxOutputTokens", () => { ).toBe(32_768) }) + test("should preserve Anthropic hybrid token handling for Claude Opus 4.8", () => { + // 4.8 inherits the same adaptive-thinking + binary-reasoning capability as 4.7 + // (no breaking API changes between 4.7 and 4.8 per the official migration guide). + const model: ModelInfo = { + contextWindow: 1_000_000, + supportsPromptCache: true, + supportsReasoningBudget: true, + supportsReasoningBinary: true, + supportsTemperature: false, + maxTokens: 128_000, + } + + expect( + getModelMaxOutputTokens({ + modelId: "claude-opus-4-8", + model, + settings: { apiProvider: "anthropic", enableReasoningEffort: false }, + }), + ).toBe(ANTHROPIC_DEFAULT_MAX_TOKENS) + + expect( + getModelMaxOutputTokens({ + modelId: "claude-opus-4-8", + model, + settings: { apiProvider: "anthropic", enableReasoningEffort: true, modelMaxTokens: 32_768 }, + }), + ).toBe(32_768) + }) + test("should return model.maxTokens for non-Anthropic models that support reasoning budget but aren't using it", () => { const geminiModelId = "gemini-2.5-flash-preview-04-17" const model: ModelInfo = {