Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions apps/vscode-e2e/src/suite/providers/bedrock.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ const AWS_BEARER_TOKEN_BEDROCK = process.env.AWS_BEARER_TOKEN_BEDROCK
const BEDROCK_REGION = process.env.BEDROCK_REGION ?? "us-east-1"
// Use a cross-region inference profile so the token works without per-region model access.
const BEDROCK_MODEL_ID = process.env.BEDROCK_MODEL_ID ?? "us.anthropic.claude-haiku-4-5-20251001-v1:0"
// Claude Opus 4.8 routed through a cross-region inference profile. 4.8 is an
// adaptive-thinking model, so this exercises the request path that omits
// temperature and (when reasoning is enabled) sends thinking.type "adaptive".
const BEDROCK_OPUS_48_MODEL_ID = process.env.BEDROCK_OPUS_48_MODEL_ID ?? "us.anthropic.claude-opus-4-8"
const BEDROCK_LIVE_E2E = process.env.BEDROCK_LIVE_E2E === "true"

suite("Bedrock provider", function () {
Expand Down Expand Up @@ -91,4 +95,53 @@ suite("Bedrock provider", function () {
assert.ok(true, "Task completed successfully via Bedrock with ZooCode# userAgentAppId")
}
})

test("Should complete a task end-to-end via AWS Bedrock using Claude Opus 4.8", async () => {
const api = globalThis.api

// Re-point the provider at Claude Opus 4.8 while keeping the same transport
// (mock server in CI, real AWS in live mode). Parity smoke test: it proves the
// 4.8 request path — model resolution, adaptive-thinking payload, and the
// temperature omission required by 4.7+ — completes a Bedrock round-trip
// without a 400. The mock server replies with the same attempt_completion("4")
// tool call regardless of model, so a successful completion exercises request
// formation end-to-end.
if (!process.env.AIMOCK_URL && BEDROCK_LIVE_E2E && AWS_BEARER_TOKEN_BEDROCK) {
await api.setConfiguration({
apiProvider: "bedrock" as const,
awsUseApiKey: true,
awsApiKey: AWS_BEARER_TOKEN_BEDROCK,
awsRegion: BEDROCK_REGION,
apiModelId: BEDROCK_OPUS_48_MODEL_ID,
})
} else {
await api.setConfiguration({
apiProvider: "bedrock" as const,
awsUseApiKey: true,
awsApiKey: "mock-key",
awsRegion: BEDROCK_REGION,
apiModelId: BEDROCK_OPUS_48_MODEL_ID,
awsBedrockEndpoint: mockServer!.url,
awsBedrockEndpointEnabled: true,
})
}

const taskId = await api.startNewTask({
configuration: { mode: "ask", autoApprovalEnabled: true },
text: "bedrock-opus-48-smoke: what is 2+2? Reply with only the number.",
})

await waitUntilCompleted({ api, taskId })

if (mockServer) {
// The request reached the Bedrock endpoint (no 400 from temperature/thinking).
const userAgent = mockServer.lastRequestHeaders?.["user-agent"] as string | undefined
assert.ok(userAgent, "Bedrock request should include user-agent header")
assert.ok(userAgent.includes("ZooCode#"), `user-agent should contain "ZooCode#" — got: ${userAgent}`)
} else {
// Live mode: a successful round-trip proves 4.8 request formation works
// against real AWS Bedrock (adaptive thinking, no rejected sampling params).
assert.ok(true, "Task completed successfully via Bedrock with Claude Opus 4.8")
}
})
})
18 changes: 18 additions & 0 deletions packages/types/src/providers/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,24 @@ export const anthropicModels = {
supportsReasoningBinary: true,
supportsTemperature: false,
},
"claude-opus-4-8": {
maxTokens: 128_000, // Overridden to 8k if `enableReasoningEffort` is false.
contextWindow: 1_000_000, // 1M context window native (no beta header required, same as 4.7)
supportsImages: true,
supportsPromptCache: true,
inputPrice: 5.0, // $5 per million input tokens (regular tier)
outputPrice: 25.0, // $25 per million output tokens (regular tier)
cacheWritesPrice: 6.25, // $6.25 per million tokens
cacheReadsPrice: 0.5, // $0.50 per million tokens
// 4.8 inherits the adaptive-thinking model introduced in 4.7 — no breaking
// API changes. supportsReasoningBudget is kept true so the existing token-cap
// handling and max-token overrides behave identically.
supportsReasoningBudget: true,
// 4.8 still rejects budget_tokens-style thinking payloads, so the UI must
// expose reasoning as a binary on/off toggle on this provider path.
supportsReasoningBinary: true,
supportsTemperature: false,
},
"claude-opus-4-5-20251101": {
maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false.
contextWindow: 200_000,
Expand Down
55 changes: 55 additions & 0 deletions packages/types/src/providers/bedrock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,56 @@ export const bedrockModels = {
},
],
},
"anthropic.claude-opus-4-7": {
maxTokens: 8192,
contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
supportsImages: true,
supportsPromptCache: true,
supportsReasoningBudget: true,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing supportsTemperature: false — without it ApiOptions.tsx:787 still shows the temperature slider for this model even though temperature is silently stripped by isAdaptiveThinkingModel.

inputPrice: 5.0, // $5 per million input tokens (≤200K context) — verify against Bedrock console
outputPrice: 25.0, // $25 per million output tokens (≤200K context) — verify against Bedrock console
cacheWritesPrice: 6.25, // $6.25 per million tokens
cacheReadsPrice: 0.5, // $0.50 per million tokens
minTokensPerCachePoint: 1024,
maxCachePoints: 4,
cachableFields: ["system", "messages", "tools"],
// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
tiers: [
{
contextWindow: 1_000_000, // 1M tokens with beta flag
inputPrice: 10.0, // $10 per million input tokens (>200K context)
outputPrice: 37.5, // $37.50 per million output tokens (>200K context)
cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context)
cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context)
},
],
},
"anthropic.claude-opus-4-8": {
maxTokens: 8192,
contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
supportsImages: true,
supportsPromptCache: true,
supportsReasoningBudget: true,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same for Opus 4.8 — supportsTemperature: false should be included here too.

inputPrice: 5.0, // $5 per million input tokens (≤200K context) — verify against Bedrock console
outputPrice: 25.0, // $25 per million output tokens (≤200K context) — verify against Bedrock console
cacheWritesPrice: 6.25, // $6.25 per million tokens
cacheReadsPrice: 0.5, // $0.50 per million tokens
minTokensPerCachePoint: 1024,
maxCachePoints: 4,
cachableFields: ["system", "messages", "tools"],
// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
// 4.8 inherits the same Bedrock pricing structure as 4.7 — no API breaking changes.
// Adaptive thinking is the only supported reasoning mode (same as 4.7).
tiers: [
{
contextWindow: 1_000_000, // 1M tokens with beta flag
inputPrice: 10.0, // $10 per million input tokens (>200K context)
outputPrice: 37.5, // $37.50 per million output tokens (>200K context)
cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context)
cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context)
},
],
},
"anthropic.claude-opus-4-5-20251101-v1:0": {
maxTokens: 8192,
contextWindow: 200_000,
Expand Down Expand Up @@ -525,6 +575,8 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
"anthropic.claude-sonnet-4-5-20250929-v1:0",
"anthropic.claude-sonnet-4-6",
"anthropic.claude-opus-4-6-v1",
"anthropic.claude-opus-4-7",
"anthropic.claude-opus-4-8",
] as const

// Amazon Bedrock models that support Global Inference profiles
Expand All @@ -535,13 +587,16 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
// - Claude Haiku 4.5
// - Claude Opus 4.5
// - Claude Opus 4.6
// - Claude Opus 4.7
export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
"anthropic.claude-sonnet-4-20250514-v1:0",
"anthropic.claude-sonnet-4-5-20250929-v1:0",
"anthropic.claude-sonnet-4-6",
"anthropic.claude-haiku-4-5-20251001-v1:0",
"anthropic.claude-opus-4-5-20251101-v1:0",
"anthropic.claude-opus-4-6-v1",
"anthropic.claude-opus-4-7",
"anthropic.claude-opus-4-8",
] as const

// Amazon Bedrock Service Tier types
Expand Down
26 changes: 26 additions & 0 deletions packages/types/src/providers/vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ export const vertexModels = {
cacheWritesPrice: 6.25, // $6.25 per million tokens
cacheReadsPrice: 0.5, // $0.50 per million tokens
supportsReasoningBudget: true,
supportsReasoningBinary: true,
supportsTemperature: false,
// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
tiers: [
Expand All @@ -396,6 +397,30 @@ export const vertexModels = {
},
],
},
"claude-opus-4-8": {
maxTokens: 8192,
contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
supportsImages: true,
supportsPromptCache: true,
inputPrice: 5.0, // $5 per million input tokens (≤200K context)
outputPrice: 25.0, // $25 per million output tokens (≤200K context)
cacheWritesPrice: 6.25, // $6.25 per million tokens
cacheReadsPrice: 0.5, // $0.50 per million tokens
supportsReasoningBudget: true,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing supportsReasoningBinary: true — the Anthropic provider entry has it (anthropic.ts:126). Without it, anthropic-vertex.ts resolves thinking via getModelParams → getAnthropicReasoning and emits { type: "enabled", budget_tokens: N }. Anthropic docs confirm type: "enabled" is not supported on Opus 4.7+ (returns 400).

Adding the flag alone won't fully fix it — anthropic-vertex.ts also needs to call getAnthropicProviderReasoning directly (like anthropic.ts:65) instead of relying on reasoning from getModelParams.

supportsReasoningBinary: true,
supportsTemperature: false,
// 4.8 inherits the same Vertex pricing structure as 4.7 — no breaking changes.
// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
tiers: [
{
contextWindow: 1_000_000, // 1M tokens with beta flag
inputPrice: 10.0, // $10 per million input tokens (>200K context)
outputPrice: 37.5, // $37.50 per million output tokens (>200K context)
cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context)
cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context)
},
],
},
"claude-opus-4-5@20251101": {
maxTokens: 8192,
contextWindow: 200_000,
Expand Down Expand Up @@ -595,6 +620,7 @@ export const VERTEX_1M_CONTEXT_MODEL_IDS = [
"claude-sonnet-4-6",
"claude-opus-4-6",
"claude-opus-4-7",
"claude-opus-4-8",
] as const

export const VERTEX_REGIONS = [
Expand Down
47 changes: 47 additions & 0 deletions src/api/providers/__tests__/anthropic-vertex.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,22 @@ describe("VertexHandler", () => {
expect(model.betas).toContain("context-1m-2025-08-07")
})

it("should enable 1M context for Claude Opus 4.8 when beta flag is set", () => {
const handler = new AnthropicVertexHandler({
apiModelId: "claude-opus-4-8",
vertexProjectId: "test-project",
vertexRegion: "us-central1",
vertex1MContext: true,
})

const model = handler.getModel()
expect(model.info.contextWindow).toBe(1_000_000)
expect(model.info.inputPrice).toBe(10.0)
expect(model.info.outputPrice).toBe(37.5)
expect(model.info.supportsTemperature).toBe(false)
expect(model.betas).toContain("context-1m-2025-08-07")
})

it("should not enable 1M context when flag is disabled", () => {
const handler = new AnthropicVertexHandler({
apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0],
Expand Down Expand Up @@ -1145,6 +1161,37 @@ describe("VertexHandler", () => {
undefined,
)
})

it("should use adaptive thinking for Claude Opus 4.8", async () => {
const opus48Handler = new AnthropicVertexHandler({
apiModelId: "claude-opus-4-8",
vertexProjectId: "test-project",
vertexRegion: "us-central1",
enableReasoningEffort: true,
})

const mockCreate = vitest.fn().mockImplementation(async () => ({
async *[Symbol.asyncIterator]() {
yield { type: "message_start", message: { usage: { input_tokens: 10, output_tokens: 5 } } }
},
}))
;(opus48Handler["client"].messages as any).create = mockCreate

await opus48Handler
.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }])
.next()

expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
thinking: { type: "adaptive" },
}),
undefined,
)

const request = mockCreate.mock.calls[0][0]
expect(request.thinking).not.toHaveProperty("budget_tokens")
expect(request.temperature).toBeUndefined()
})
})

describe("native tool calling", () => {
Expand Down
Loading
Loading