diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts index ef532a9791..700745f41b 100644 --- a/packages/types/src/provider-settings.ts +++ b/packages/types/src/provider-settings.ts @@ -38,6 +38,7 @@ export const DEFAULT_CONSECUTIVE_MISTAKE_LIMIT = 3 export const dynamicProviders = [ "openrouter", "vercel-ai-gateway", + "zoo-gateway", "litellm", "requesty", "unbound", @@ -399,6 +400,12 @@ const vercelAiGatewaySchema = baseProviderSettingsSchema.extend({ vercelAiGatewayModelId: z.string().optional(), }) +const zooGatewaySchema = baseProviderSettingsSchema.extend({ + zooSessionToken: z.string().optional(), + zooGatewayModelId: z.string().optional(), + zooGatewayBaseUrl: z.string().optional(), +}) + const basetenSchema = apiModelIdProviderModelSchema.extend({ basetenApiKey: z.string().optional(), }) @@ -437,6 +444,7 @@ export const providerSettingsSchemaDiscriminated = z.discriminatedUnion("apiProv fireworksSchema.merge(z.object({ apiProvider: z.literal("fireworks") })), qwenCodeSchema.merge(z.object({ apiProvider: z.literal("qwen-code") })), vercelAiGatewaySchema.merge(z.object({ apiProvider: z.literal("vercel-ai-gateway") })), + zooGatewaySchema.merge(z.object({ apiProvider: z.literal("zoo-gateway") })), defaultSchema, ]) @@ -471,6 +479,7 @@ export const providerSettingsSchema = z.object({ ...fireworksSchema.shape, ...qwenCodeSchema.shape, ...vercelAiGatewaySchema.shape, + ...zooGatewaySchema.shape, ...codebaseIndexProviderSchema.shape, }) @@ -501,6 +510,7 @@ export const modelIdKeys = [ "unboundModelId", "litellmModelId", "vercelAiGatewayModelId", + "zooGatewayModelId", ] as const satisfies readonly (keyof ProviderSettings)[] export type ModelIdKey = (typeof modelIdKeys)[number] @@ -546,6 +556,7 @@ export const modelIdKeysByProvider: Record = { zai: "apiModelId", fireworks: "apiModelId", "vercel-ai-gateway": "vercelAiGatewayModelId", + "zoo-gateway": "zooGatewayModelId", } /** @@ -564,8 +575,13 @@ export const getApiProtocol = (provider: ProviderName | undefined, modelId?: str return "anthropic" } - // Vercel AI Gateway uses anthropic protocol for anthropic models. - if (provider && provider === "vercel-ai-gateway" && modelId && modelId.toLowerCase().startsWith("anthropic/")) { + // Vercel AI Gateway, Zoo Gateway, and Roo use anthropic protocol for anthropic models. + if ( + provider && + ["vercel-ai-gateway", "zoo-gateway", "roo"].includes(provider) && + modelId && + modelId.toLowerCase().startsWith("anthropic/") + ) { return "anthropic" } @@ -662,6 +678,7 @@ export const MODELS_BY_PROVIDER: Record< requesty: { id: "requesty", label: "Requesty", models: [] }, unbound: { id: "unbound", label: "Unbound", models: [] }, "vercel-ai-gateway": { id: "vercel-ai-gateway", label: "Vercel AI Gateway", models: [] }, + "zoo-gateway": { id: "zoo-gateway", label: "Zoo Gateway", models: [] }, // Local providers; models discovered from localhost endpoints. lmstudio: { id: "lmstudio", label: "LM Studio", models: [] }, diff --git a/packages/types/src/providers/index.ts b/packages/types/src/providers/index.ts index e75f5c4240..04788dfba9 100644 --- a/packages/types/src/providers/index.ts +++ b/packages/types/src/providers/index.ts @@ -25,6 +25,7 @@ export * from "./vercel-ai-gateway.js" export * from "./zai.js" export * from "./minimax.js" export * from "./mimo.js" +export * from "./zoo-gateway.js" import { anthropicDefaultModelId } from "./anthropic.js" import { basetenDefaultModelId } from "./baseten.js" @@ -49,6 +50,7 @@ import { vercelAiGatewayDefaultModelId } from "./vercel-ai-gateway.js" import { internationalZAiDefaultModelId, mainlandZAiDefaultModelId } from "./zai.js" import { minimaxDefaultModelId } from "./minimax.js" import { mimoDefaultModelId } from "./mimo.js" +import { zooGatewayDefaultModelId } from "./zoo-gateway.js" // Import the ProviderName type from provider-settings to avoid duplication import type { ProviderName } from "../provider-settings.js" @@ -115,6 +117,8 @@ export function getProviderDefaultModelId( return unboundDefaultModelId case "vercel-ai-gateway": return vercelAiGatewayDefaultModelId + case "zoo-gateway": + return zooGatewayDefaultModelId case "anthropic": case "gemini-cli": case "fake-ai": diff --git a/packages/types/src/providers/zoo-gateway.ts b/packages/types/src/providers/zoo-gateway.ts new file mode 100644 index 0000000000..8596026441 --- /dev/null +++ b/packages/types/src/providers/zoo-gateway.ts @@ -0,0 +1,24 @@ +import type { ModelInfo } from "../model.js" + +// Zoo Gateway uses the same model ID format as Vercel AI Gateway (provider/model-name) +export const zooGatewayDefaultModelId = "anthropic/claude-sonnet-4" + +// Zoo Gateway serves the same models as Vercel AI Gateway, so prompt caching support is identical +// We reuse VERCEL_AI_GATEWAY_PROMPT_CACHING_MODELS from vercel-ai-gateway.ts +// Instead of duplicating, we just export a reference to indicate they're the same +export { VERCEL_AI_GATEWAY_PROMPT_CACHING_MODELS as ZOO_GATEWAY_PROMPT_CACHING_MODELS } from "./vercel-ai-gateway.js" + +export const zooGatewayDefaultModelInfo: ModelInfo = { + maxTokens: 64000, + contextWindow: 200000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 3, + outputPrice: 15, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + description: + "Claude Sonnet 4 significantly improves on Sonnet 3.7's industry-leading capabilities, excelling in coding with a state-of-the-art 72.7% on SWE-bench. The model balances performance and efficiency for internal and external use cases, with enhanced steerability for greater control over implementations.", +} + +export const ZOO_GATEWAY_DEFAULT_TEMPERATURE = 0.7 diff --git a/src/api/index.ts b/src/api/index.ts index c9e5e7b1b9..b50ac2492c 100644 --- a/src/api/index.ts +++ b/src/api/index.ts @@ -32,6 +32,7 @@ import { ZAiHandler, FireworksHandler, VercelAiGatewayHandler, + ZooGatewayHandler, MiniMaxHandler, MimoHandler, BasetenHandler, @@ -176,6 +177,8 @@ export function buildApiHandler(configuration: ProviderSettings): ApiHandler { return new FireworksHandler(options) case "vercel-ai-gateway": return new VercelAiGatewayHandler(options) + case "zoo-gateway": + return new ZooGatewayHandler(options) case "minimax": return new MiniMaxHandler(options) case "baseten": diff --git a/src/api/providers/__tests__/zoo-gateway.spec.ts b/src/api/providers/__tests__/zoo-gateway.spec.ts new file mode 100644 index 0000000000..bfc02d79aa --- /dev/null +++ b/src/api/providers/__tests__/zoo-gateway.spec.ts @@ -0,0 +1,326 @@ +// npx vitest run src/api/providers/__tests__/zoo-gateway.spec.ts + +vitest.mock("vscode", () => ({})) + +import OpenAI from "openai" + +import { zooGatewayDefaultModelId, ZOO_GATEWAY_DEFAULT_TEMPERATURE } from "@roo-code/types" + +import { ZooGatewayHandler } from "../zoo-gateway" +import { ApiHandlerOptions } from "../../../shared/api" +import { Package } from "../../../shared/package" + +vitest.mock("openai") +vitest.mock("delay", () => ({ default: vitest.fn(() => Promise.resolve()) })) +vitest.mock("../fetchers/modelCache", () => ({ + getModels: vitest.fn().mockImplementation(() => { + return Promise.resolve({ + "anthropic/claude-sonnet-4": { + maxTokens: 64000, + contextWindow: 200000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 3, + outputPrice: 15, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + description: "Claude Sonnet 4", + }, + "anthropic/claude-3.5-haiku": { + maxTokens: 32000, + contextWindow: 200000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 1, + outputPrice: 5, + cacheWritesPrice: 1.25, + cacheReadsPrice: 0.1, + description: "Claude 3.5 Haiku", + }, + }) + }), + getModelsFromCache: vitest.fn().mockReturnValue(undefined), +})) + +vitest.mock("../../../services/zoo-code-auth", () => ({ + getZooCodeBaseUrl: vitest.fn(() => "https://www.zoocode.dev"), + getCachedZooCodeToken: vitest.fn(() => undefined), + clearZooCodeToken: vitest.fn(async () => undefined), +})) + +vitest.mock("../../transform/caching/vercel-ai-gateway", () => ({ + addCacheBreakpoints: vitest.fn(), +})) + +const mockCreate = vitest.fn() + +function mockOpenAIClient() { + vitest.mocked(OpenAI).mockImplementation( + () => + ({ + chat: { + completions: { + create: mockCreate, + }, + }, + }) as unknown as OpenAI, + ) +} + +mockOpenAIClient() + +describe("ZooGatewayHandler", () => { + const mockOptions: ApiHandlerOptions = { + zooSessionToken: "zoo_ext_test_token", + zooGatewayModelId: "anthropic/claude-sonnet-4", + } + + beforeEach(() => { + vitest.clearAllMocks() + mockCreate.mockClear() + mockOpenAIClient() + }) + + describe("constructor", () => { + it("requires authentication before constructing the client", () => { + expect(() => new ZooGatewayHandler({})).toThrow( + "Zoo Gateway requires authentication. Please sign in to Zoo Code first.", + ) + expect(OpenAI).not.toHaveBeenCalled() + }) + + it("initializes OpenAI with Zoo enrichment headers and session token", () => { + const handler = new ZooGatewayHandler({ + ...mockOptions, + zooGatewayBaseUrl: "https://staging.zoocode.dev/api/gateway/v1", + }) + + expect(handler).toBeInstanceOf(ZooGatewayHandler) + expect(OpenAI).toHaveBeenCalledWith({ + baseURL: "https://staging.zoocode.dev/api/gateway/v1", + apiKey: mockOptions.zooSessionToken, + defaultHeaders: expect.objectContaining({ + "HTTP-Referer": "https://github.com/RooVetGit/Roo-Cline", + "X-Title": "Roo Code", + "X-Zoo-Editor": "vscode", + "X-Zoo-Extension-Version": Package.version, + }), + }) + }) + + it("defaults the gateway base URL from getZooCodeBaseUrl", () => { + new ZooGatewayHandler(mockOptions) + + expect(OpenAI).toHaveBeenCalledWith( + expect.objectContaining({ + baseURL: "https://www.zoocode.dev/api/gateway/v1", + }), + ) + }) + }) + + describe("fetchModel", () => { + it("returns configured model info", async () => { + const handler = new ZooGatewayHandler(mockOptions) + const result = await handler.fetchModel() + + expect(result.id).toBe(mockOptions.zooGatewayModelId) + expect(result.info.maxTokens).toBe(64000) + expect(result.info.supportsPromptCache).toBe(true) + }) + + it("falls back to the default model when none is configured", async () => { + const handler = new ZooGatewayHandler({ zooSessionToken: "zoo_ext_test_token" }) + const result = await handler.fetchModel() + + expect(result.id).toBe(zooGatewayDefaultModelId) + }) + }) + + describe("createMessage", () => { + beforeEach(() => { + mockCreate.mockImplementation(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { + choices: [{ delta: { content: "Test response" }, index: 0 }], + usage: null, + } + yield { + choices: [{ delta: {}, index: 0 }], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + cache_creation_input_tokens: 2, + prompt_tokens_details: { cached_tokens: 3 }, + cost: 0.005, + }, + } + }, + })) + }) + + it("streams text and usage chunks", async () => { + const handler = new ZooGatewayHandler(mockOptions) + const stream = handler.createMessage("You are helpful.", [{ role: "user", content: "Hello" }]) + + const chunks = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + expect(chunks).toEqual([ + { type: "text", text: "Test response" }, + { + type: "usage", + inputTokens: 10, + outputTokens: 5, + cacheWriteTokens: 2, + cacheReadTokens: 3, + totalCost: 0.005, + }, + ]) + }) + + it("forwards task and mode metadata as request headers", async () => { + const handler = new ZooGatewayHandler(mockOptions) + + await handler.createMessage("prompt", [], { taskId: "task-123", mode: "code" }).next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.any(Object), + expect.objectContaining({ + headers: { + "X-Zoo-Task-ID": "task-123", + "X-Zoo-Mode": "code", + }, + }), + ) + }) + + it("uses custom temperature when provided", async () => { + const handler = new ZooGatewayHandler({ + ...mockOptions, + modelTemperature: 0.5, + }) + + await handler.createMessage("prompt", [{ role: "user", content: "Hi" }]).next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + temperature: 0.5, + }), + expect.any(Object), + ) + }) + + it("uses the default temperature when none is provided", async () => { + const handler = new ZooGatewayHandler(mockOptions) + + await handler.createMessage("prompt", [{ role: "user", content: "Hi" }]).next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + temperature: ZOO_GATEWAY_DEFAULT_TEMPERATURE, + }), + expect.any(Object), + ) + }) + + it("adds cache breakpoints for supported models", async () => { + const { addCacheBreakpoints } = await import("../../transform/caching/vercel-ai-gateway") + const handler = new ZooGatewayHandler({ + ...mockOptions, + zooGatewayModelId: "anthropic/claude-3.5-haiku", + }) + + await handler.createMessage("prompt", [{ role: "user", content: "Hi" }]).next() + + expect(addCacheBreakpoints).toHaveBeenCalled() + }) + + it("yields tool_call_partial chunks when streaming tool calls", async () => { + mockCreate.mockImplementation(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + id: "call_123", + function: { name: "test_tool", arguments: '{"arg1":' }, + }, + ], + }, + index: 0, + }, + ], + } + }, + })) + + const handler = new ZooGatewayHandler(mockOptions) + const chunks = [] + for await (const chunk of handler.createMessage("prompt", [])) { + chunks.push(chunk) + } + + expect(chunks).toEqual([ + { + type: "tool_call_partial", + index: 0, + id: "call_123", + name: "test_tool", + arguments: '{"arg1":', + }, + ]) + }) + }) + + describe("completePrompt", () => { + beforeEach(() => { + mockCreate.mockImplementation(async () => ({ + choices: [{ message: { role: "assistant", content: "Test completion response" } }], + })) + }) + + it("returns completion text from the gateway", async () => { + const handler = new ZooGatewayHandler(mockOptions) + + const result = await handler.completePrompt("Complete this: Hello") + + expect(result).toBe("Test completion response") + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "anthropic/claude-sonnet-4", + messages: [{ role: "user", content: "Complete this: Hello" }], + stream: false, + temperature: ZOO_GATEWAY_DEFAULT_TEMPERATURE, + max_completion_tokens: 64000, + }), + ) + }) + + it("wraps errors with a Zoo Gateway prefix", async () => { + const handler = new ZooGatewayHandler(mockOptions) + mockCreate.mockImplementation(() => { + throw new Error("upstream failure") + }) + + await expect(handler.completePrompt("Test")).rejects.toThrow( + "Zoo Gateway completion error: upstream failure", + ) + }) + + it("returns an empty string when the model returns no content", async () => { + const handler = new ZooGatewayHandler(mockOptions) + mockCreate.mockImplementation(async () => ({ + choices: [{ message: { role: "assistant", content: null } }], + })) + + await expect(handler.completePrompt("Test")).resolves.toBe("") + }) + }) +}) diff --git a/src/api/providers/fetchers/__tests__/zoo-gateway.spec.ts b/src/api/providers/fetchers/__tests__/zoo-gateway.spec.ts new file mode 100644 index 0000000000..8fc2690216 --- /dev/null +++ b/src/api/providers/fetchers/__tests__/zoo-gateway.spec.ts @@ -0,0 +1,150 @@ +// npx vitest run src/api/providers/fetchers/__tests__/zoo-gateway.spec.ts + +import axios from "axios" + +import { getZooGatewayModels, parseZooGatewayModel } from "../zoo-gateway" + +vitest.mock("axios") +const mockedAxios = axios as any + +describe("Zoo Gateway Fetchers", () => { + beforeEach(() => { + vitest.clearAllMocks() + }) + + describe("getZooGatewayModels", () => { + const baseUrl = "https://example.test/api/gateway/v1" + const token = "zoo_ext_test_token" + + const mockResponse = { + data: { + object: "list", + data: [ + { + id: "anthropic/claude-sonnet-4", + object: "model", + created: 1640995200, + owned_by: "anthropic", + name: "Claude Sonnet 4", + description: "Sonnet 4", + context_window: 200000, + max_tokens: 64000, + type: "language", + pricing: { + input: "3.00", + output: "15.00", + input_cache_write: "3.75", + input_cache_read: "0.30", + }, + }, + { + id: "image/dall-e-3", + object: "model", + created: 1640995200, + owned_by: "openai", + name: "DALL-E 3", + description: "Image", + context_window: 4000, + max_tokens: 1000, + type: "image", + pricing: { input: "40.00", output: "0.00" }, + }, + ], + }, + } + + it("forwards the bearer token and timeout, filters non-language models", async () => { + mockedAxios.get.mockResolvedValueOnce(mockResponse) + + const models = await getZooGatewayModels({ + zooGatewayBaseUrl: baseUrl, + zooSessionToken: token, + } as any) + + expect(mockedAxios.get).toHaveBeenCalledWith( + `${baseUrl}/models`, + expect.objectContaining({ + headers: expect.objectContaining({ Authorization: `Bearer ${token}` }), + timeout: expect.any(Number), + }), + ) + expect(Object.keys(models)).toHaveLength(1) + expect(models["anthropic/claude-sonnet-4"]).toBeDefined() + }) + + it("omits the Authorization header when no token is provided", async () => { + mockedAxios.get.mockResolvedValueOnce(mockResponse) + + await getZooGatewayModels({ zooGatewayBaseUrl: baseUrl } as any) + + const call = mockedAxios.get.mock.calls[0] + expect(call[1].headers.Authorization).toBeUndefined() + }) + + it("returns {} and never leaks the error object when the request fails", async () => { + const consoleErrorSpy = vitest.spyOn(console, "error").mockImplementation(() => {}) + const failure: any = new Error("Network error") + // Simulate axios attaching the request config (which contains the bearer token). + failure.config = { headers: { Authorization: "Bearer should-never-be-logged" } } + failure.code = "ECONNRESET" + failure.response = { status: 502, statusText: "Bad Gateway" } + mockedAxios.get.mockRejectedValueOnce(failure) + + const models = await getZooGatewayModels({ + zooGatewayBaseUrl: baseUrl, + zooSessionToken: token, + } as any) + + expect(models).toEqual({}) + const logged = consoleErrorSpy.mock.calls.map((args) => String(args[0])).join("\n") + expect(logged).toContain("status=502") + expect(logged).toContain("code=ECONNRESET") + expect(logged).not.toContain("should-never-be-logged") + expect(logged).not.toContain("Authorization") + consoleErrorSpy.mockRestore() + }) + + it("returns {} on a structurally broken response instead of throwing", async () => { + const consoleErrorSpy = vitest.spyOn(console, "error").mockImplementation(() => {}) + mockedAxios.get.mockResolvedValueOnce({ data: { unexpected: true } }) + + const models = await getZooGatewayModels({ + zooGatewayBaseUrl: baseUrl, + zooSessionToken: token, + } as any) + + expect(models).toEqual({}) + expect(consoleErrorSpy).toHaveBeenCalled() + consoleErrorSpy.mockRestore() + }) + }) + + describe("parseZooGatewayModel", () => { + it("delegates to the vercel-ai-gateway parser", () => { + const result = parseZooGatewayModel({ + id: "anthropic/claude-sonnet-4", + model: { + id: "anthropic/claude-sonnet-4", + object: "model", + created: 0, + owned_by: "anthropic", + name: "Claude Sonnet 4", + description: "Sonnet", + context_window: 200000, + max_tokens: 64000, + type: "language", + pricing: { + input: "3.00", + output: "15.00", + input_cache_write: "3.75", + input_cache_read: "0.30", + }, + } as any, + }) + + expect(result.contextWindow).toBe(200000) + expect(result.maxTokens).toBe(64000) + expect(result.supportsPromptCache).toBe(true) + }) + }) +}) diff --git a/src/api/providers/fetchers/modelCache.ts b/src/api/providers/fetchers/modelCache.ts index ee4ea5bfe6..5936150d68 100644 --- a/src/api/providers/fetchers/modelCache.ts +++ b/src/api/providers/fetchers/modelCache.ts @@ -26,6 +26,7 @@ import { getOllamaModels } from "./ollama" import { getLMStudioModels } from "./lmstudio" import { getPoeModels } from "./poe" import { getDeepSeekModels } from "./deepseek" +import { getZooGatewayModels } from "./zoo-gateway" const memoryCache = new NodeCache({ stdTTL: 5 * 60, checkperiod: 5 * 60 }) @@ -92,6 +93,9 @@ async function fetchModelsFromProvider(options: GetModelsOptions): Promise => { const { provider } = options - let models = getModelsFromCache(provider) + // Always fetch fresh to prevent serving stale models from different auth contexts. + const shouldSkipCache = provider === "zoo-gateway" + + let models = shouldSkipCache ? undefined : getModelsFromCache(provider) if (models) { return models @@ -128,13 +135,14 @@ export const getModels = async (options: GetModelsOptions): Promise // Only cache non-empty results to prevent persisting failed API responses // Empty results could indicate API failure rather than "no models exist" - if (modelCount > 0) { + // Zoo Gateway models are user-specific - skip caching entirely + if (modelCount > 0 && !shouldSkipCache) { memoryCache.set(provider, models) await writeModels(provider, models).catch((err) => console.error(`[MODEL_CACHE] Error writing ${provider} models to file cache:`, err), ) - } else { + } else if (modelCount === 0) { TelemetryService.instance.captureEvent(TelemetryEventName.MODEL_CACHE_EMPTY_RESPONSE, { provider, context: "getModels", @@ -163,12 +171,22 @@ export const getModels = async (options: GetModelsOptions): Promise export const refreshModels = async (options: GetModelsOptions): Promise => { const { provider } = options - // Check if there's already an in-flight refresh for this provider + // Zoo Gateway models are user-specific (auth-scoped). Mirror the bypass in + // getModels() so we never persist one user's model list and serve it to a + // different authenticated user from cache. + const shouldSkipCache = provider === "zoo-gateway" + + // Check if there's already an in-flight refresh for this provider. // This prevents race conditions where multiple concurrent refreshes might - // overwrite each other's results - const existingRequest = inFlightRefresh.get(provider) - if (existingRequest) { - return existingRequest + // overwrite each other's results. Skip de-duplication for auth-scoped + // providers because two concurrent calls may carry different tokens + // (e.g., after a sign-out/sign-in within the same session) and we must + // not return the first caller's results to the second caller. + if (!shouldSkipCache) { + const existingRequest = inFlightRefresh.get(provider) + if (existingRequest) { + return existingRequest + } } // Create the refresh promise and track it @@ -179,7 +197,7 @@ export const refreshModels = async (options: GetModelsOptions): Promise - console.error(`[refreshModels] Error writing ${provider} models to disk:`, err), - ) + await writeModels(provider, models).catch((err) => + console.error(`[refreshModels] Error writing ${provider} models to disk:`, err), + ) + } return models } catch (error) { - // Log the error for debugging, then return existing cache if available (graceful degradation) + // Log the error for debugging, then return existing cache if available (graceful degradation). + // For auth-scoped providers (zoo-gateway) we MUST NOT return cached models from a prior + // session, since they could belong to a different user — return empty instead. console.error(`[refreshModels] Failed to refresh ${provider} models:`, error) + if (shouldSkipCache) { + return {} + } return getModelsFromCache(provider) || {} } finally { // Always clean up the in-flight tracking - inFlightRefresh.delete(provider) + if (!shouldSkipCache) { + inFlightRefresh.delete(provider) + } } })() - // Track the in-flight request - inFlightRefresh.set(provider, refreshPromise) + // Track the in-flight request (auth-scoped providers are excluded; see above). + if (!shouldSkipCache) { + inFlightRefresh.set(provider, refreshPromise) + } return refreshPromise } diff --git a/src/api/providers/fetchers/zoo-gateway.ts b/src/api/providers/fetchers/zoo-gateway.ts new file mode 100644 index 0000000000..8b6922d602 --- /dev/null +++ b/src/api/providers/fetchers/zoo-gateway.ts @@ -0,0 +1,126 @@ +import axios from "axios" + +import type { ModelInfo } from "@roo-code/types" + +import type { ApiHandlerOptions } from "../../../shared/api" +import { getZooCodeBaseUrl } from "../../../services/zoo-code-auth" + +// Reuse the same schemas and parsing logic from vercel-ai-gateway since the API format is identical +import { type VercelAiGatewayModel, parseVercelAiGatewayModel } from "./vercel-ai-gateway" + +import { z } from "zod" + +/** + * ZooGatewayPricing (same format as Vercel AI Gateway) + */ + +const zooGatewayPricingSchema = z.object({ + input: z.string().optional(), + output: z.string().optional(), + input_cache_write: z.string().optional(), + input_cache_read: z.string().optional(), + image: z.string().optional(), +}) + +/** + * ZooGatewayModel (same format as Vercel AI Gateway) + */ + +const zooGatewayModelSchema = z.object({ + id: z.string(), + object: z.string(), + created: z.number(), + owned_by: z.string(), + name: z.string(), + description: z.string(), + context_window: z.number(), + max_tokens: z.number(), + type: z.string(), + pricing: zooGatewayPricingSchema, +}) + +/** + * ZooGatewayModelsResponse + */ + +const zooGatewayModelsResponseSchema = z.object({ + object: z.string(), + data: z.array(zooGatewayModelSchema), +}) + +type ZooGatewayModelsResponse = z.infer + +// Bound model discovery so a network stall can't hang provider initialization paths. +const MODEL_DISCOVERY_TIMEOUT_MS = 15_000 + +/** + * getZooGatewayModels + * + * Fetches models from the Zoo Gateway API. Requires authentication via the zoo_ext_ token. + */ + +export async function getZooGatewayModels(options?: ApiHandlerOptions): Promise> { + const models: Record = {} + const baseURL = options?.zooGatewayBaseUrl ?? `${getZooCodeBaseUrl()}/api/gateway/v1` + + // Build headers - Zoo Gateway requires authentication via the zoo_ext_ session token + const headers: Record = {} + if (options?.zooSessionToken) { + headers["Authorization"] = `Bearer ${options.zooSessionToken}` + } + + try { + const response = await axios.get(`${baseURL}/models`, { + headers, + timeout: MODEL_DISCOVERY_TIMEOUT_MS, + }) + const result = zooGatewayModelsResponseSchema.safeParse(response.data) + + // Fall back to the raw response only when it looks structurally sound; otherwise return + // an empty list rather than crashing on `response.data.data` being undefined. + const data = result.success ? result.data.data : Array.isArray(response.data?.data) ? response.data.data : [] + + if (!result.success) { + console.error(`Zoo Gateway models response is invalid ${JSON.stringify(result.error.format())}`) + } + + for (const model of data) { + const { id } = model + + // Only include language models for chat inference. + // Embedding models are statically defined in embeddingModels.ts. + if (model.type !== "language") { + continue + } + + // Parse model using the same logic as Vercel AI Gateway since formats are identical + models[id] = parseZooGatewayModel({ id, model: model as VercelAiGatewayModel }) + } + } catch (error) { + // Log only safe fields; never serialize the full error object because it + // includes request config/headers which carry the bearer session token. + const err = error as { + message?: string + name?: string + code?: string + response?: { status?: number; statusText?: string } + } + console.error( + `Error fetching Zoo Gateway models: name=${err.name ?? "Error"} code=${err.code ?? "unknown"} status=${err.response?.status ?? "unknown"} ${err.response?.statusText ?? ""} message=${err.message ?? "unknown error"}`, + ) + } + + return models +} + +/** + * parseZooGatewayModel + * + * Parses a Zoo Gateway model into ModelInfo format. + * Zoo Gateway returns the same format as Vercel AI Gateway, so we can reuse the parsing logic. + */ + +export const parseZooGatewayModel = ({ id, model }: { id: string; model: VercelAiGatewayModel }): ModelInfo => { + // Reuse the parsing logic from vercel-ai-gateway + return parseVercelAiGatewayModel({ id, model }) +} diff --git a/src/api/providers/index.ts b/src/api/providers/index.ts index 416cef1c47..98a235948a 100644 --- a/src/api/providers/index.ts +++ b/src/api/providers/index.ts @@ -25,6 +25,7 @@ export { XAIHandler } from "./xai" export { ZAiHandler } from "./zai" export { FireworksHandler } from "./fireworks" export { VercelAiGatewayHandler } from "./vercel-ai-gateway" +export { ZooGatewayHandler } from "./zoo-gateway" export { MiniMaxHandler } from "./minimax" export { MimoHandler } from "./mimo" export { BasetenHandler } from "./baseten" diff --git a/src/api/providers/zoo-gateway.ts b/src/api/providers/zoo-gateway.ts new file mode 100644 index 0000000000..ab0b632c9a --- /dev/null +++ b/src/api/providers/zoo-gateway.ts @@ -0,0 +1,167 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import OpenAI from "openai" + +import { + zooGatewayDefaultModelId, + zooGatewayDefaultModelInfo, + ZOO_GATEWAY_DEFAULT_TEMPERATURE, + VERCEL_AI_GATEWAY_PROMPT_CACHING_MODELS, +} from "@roo-code/types" + +import { ApiHandlerOptions } from "../../shared/api" +import { getZooCodeBaseUrl } from "../../services/zoo-code-auth" +import { Package } from "../../shared/package" + +import { ApiStream } from "../transform/stream" +import { convertToOpenAiMessages } from "../transform/openai-format" +import { addCacheBreakpoints } from "../transform/caching/vercel-ai-gateway" + +import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" +import { RouterProvider } from "./router-provider" + +// Extend OpenAI's CompletionUsage to include Zoo Gateway specific fields (same as Vercel AI Gateway) +interface ZooGatewayUsage extends OpenAI.CompletionUsage { + cache_creation_input_tokens?: number + cost?: number +} + +export class ZooGatewayHandler extends RouterProvider implements SingleCompletionHandler { + constructor(options: ApiHandlerOptions) { + const baseURL = options.zooGatewayBaseUrl ?? `${getZooCodeBaseUrl()}/api/gateway/v1` + + // Fail fast with a clear message instead of waiting for a 401. + // The token is set automatically by handleZooCodeCallback() after the user + // authenticates via the "Sign in with Zoo Code" flow in the extension. + if (!options.zooSessionToken) { + throw new Error("Zoo Gateway requires authentication. Please sign in to Zoo Code first.") + } + + // Merge Zoo-specific enrichment headers into openAiHeaders so they flow through + // the parent's single OpenAI client. We avoid reassigning `this.client` (which + // is declared readonly on RouterProvider) and the wasted client allocation it + // caused. Per-request headers (task id / mode) are set in createMessage below. + super({ + options: { + ...options, + openAiHeaders: { + "X-Zoo-Editor": "vscode", + "X-Zoo-Extension-Version": Package.version, + ...(options.openAiHeaders || {}), + }, + }, + name: "zoo-gateway", + baseURL, + apiKey: options.zooSessionToken, + modelId: options.zooGatewayModelId, + defaultModelId: zooGatewayDefaultModelId, + defaultModelInfo: zooGatewayDefaultModelInfo, + }) + } + + override async *createMessage( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + ): ApiStream { + const { id: modelId, info } = await this.fetchModel() + + const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "system", content: systemPrompt }, + ...convertToOpenAiMessages(messages), + ] + + // Apply prompt caching for models that support it + // Zoo Gateway serves the same models as Vercel AI Gateway, so caching support is identical + if (VERCEL_AI_GATEWAY_PROMPT_CACHING_MODELS.has(modelId) && info.supportsPromptCache) { + addCacheBreakpoints(systemPrompt, openAiMessages) + } + + // Build request headers with enrichment metadata + const requestHeaders: Record = {} + if (metadata?.taskId) { + requestHeaders["X-Zoo-Task-ID"] = metadata.taskId + } + if (metadata?.mode) { + requestHeaders["X-Zoo-Mode"] = metadata.mode + } + + const body: OpenAI.Chat.ChatCompletionCreateParams = { + model: modelId, + messages: openAiMessages, + temperature: this.supportsTemperature(modelId) + ? (this.options.modelTemperature ?? ZOO_GATEWAY_DEFAULT_TEMPERATURE) + : undefined, + max_completion_tokens: info.maxTokens, + stream: true, + stream_options: { include_usage: true }, + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + parallel_tool_calls: metadata?.parallelToolCalls ?? true, + } + + const completion = await this.client.chat.completions.create(body, { + headers: requestHeaders, + }) + + for await (const chunk of completion) { + const delta = chunk.choices[0]?.delta + if (delta?.content) { + yield { + type: "text", + text: delta.content, + } + } + + // Emit raw tool call chunks - NativeToolCallParser handles state management + if (delta?.tool_calls) { + for (const toolCall of delta.tool_calls) { + yield { + type: "tool_call_partial", + index: toolCall.index, + id: toolCall.id, + name: toolCall.function?.name, + arguments: toolCall.function?.arguments, + } + } + } + + if (chunk.usage) { + const usage = chunk.usage as ZooGatewayUsage + yield { + type: "usage", + inputTokens: usage.prompt_tokens || 0, + outputTokens: usage.completion_tokens || 0, + cacheWriteTokens: usage.cache_creation_input_tokens || undefined, + cacheReadTokens: usage.prompt_tokens_details?.cached_tokens || undefined, + totalCost: usage.cost ?? 0, + } + } + } + } + + async completePrompt(prompt: string): Promise { + const { id: modelId, info } = await this.fetchModel() + + try { + const requestOptions: OpenAI.Chat.ChatCompletionCreateParams = { + model: modelId, + messages: [{ role: "user", content: prompt }], + stream: false, + } + + if (this.supportsTemperature(modelId)) { + requestOptions.temperature = this.options.modelTemperature ?? ZOO_GATEWAY_DEFAULT_TEMPERATURE + } + + requestOptions.max_completion_tokens = info.maxTokens + + const response = await this.client.chat.completions.create(requestOptions) + return response.choices[0]?.message.content || "" + } catch (error) { + if (error instanceof Error) { + throw new Error(`Zoo Gateway completion error: ${error.message}`) + } + throw error + } + } +} diff --git a/src/core/webview/__tests__/ClineProvider.spec.ts b/src/core/webview/__tests__/ClineProvider.spec.ts index be9d705684..3a4858edad 100644 --- a/src/core/webview/__tests__/ClineProvider.spec.ts +++ b/src/core/webview/__tests__/ClineProvider.spec.ts @@ -2477,6 +2477,7 @@ describe("ClineProvider - Router Models", () => { ollama: {}, lmstudio: {}, "vercel-ai-gateway": mockModels, + "zoo-gateway": {}, poe: {}, deepseek: {}, }, @@ -2523,6 +2524,7 @@ describe("ClineProvider - Router Models", () => { lmstudio: {}, litellm: {}, "vercel-ai-gateway": mockModels, + "zoo-gateway": {}, poe: {}, deepseek: {}, }, @@ -2618,6 +2620,7 @@ describe("ClineProvider - Router Models", () => { ollama: {}, lmstudio: {}, "vercel-ai-gateway": mockModels, + "zoo-gateway": {}, poe: {}, deepseek: {}, }, diff --git a/src/core/webview/__tests__/webviewMessageHandler.spec.ts b/src/core/webview/__tests__/webviewMessageHandler.spec.ts index 17e0caebb0..32e0f9b48c 100644 --- a/src/core/webview/__tests__/webviewMessageHandler.spec.ts +++ b/src/core/webview/__tests__/webviewMessageHandler.spec.ts @@ -369,6 +369,7 @@ describe("webviewMessageHandler - requestRouterModels", () => { ollama: {}, lmstudio: {}, "vercel-ai-gateway": mockModels, + "zoo-gateway": {}, poe: {}, deepseek: {}, }, @@ -455,6 +456,7 @@ describe("webviewMessageHandler - requestRouterModels", () => { ollama: {}, lmstudio: {}, "vercel-ai-gateway": mockModels, + "zoo-gateway": {}, poe: {}, deepseek: {}, }, @@ -510,6 +512,7 @@ describe("webviewMessageHandler - requestRouterModels", () => { ollama: {}, lmstudio: {}, "vercel-ai-gateway": mockModels, + "zoo-gateway": {}, poe: {}, deepseek: {}, }, diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index 429de051b8..10141c1c87 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -922,6 +922,7 @@ export const webviewMessageHandler = async ( : { openrouter: {}, "vercel-ai-gateway": {}, + "zoo-gateway": {}, litellm: {}, requesty: {}, unbound: {}, diff --git a/src/shared/api.ts b/src/shared/api.ts index a6f31855ca..678156a183 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -171,6 +171,7 @@ type CommonFetchParams = { const dynamicProviderExtras = { openrouter: {} as {}, // eslint-disable-line @typescript-eslint/no-empty-object-type "vercel-ai-gateway": {} as {}, // eslint-disable-line @typescript-eslint/no-empty-object-type + "zoo-gateway": {} as { apiKey?: string; baseUrl?: string }, litellm: {} as { apiKey: string; baseUrl: string }, requesty: {} as { apiKey?: string; baseUrl?: string }, unbound: {} as { apiKey?: string }, diff --git a/webview-ui/src/components/ui/hooks/useSelectedModel.ts b/webview-ui/src/components/ui/hooks/useSelectedModel.ts index c4f3040084..bab13b7e9e 100644 --- a/webview-ui/src/components/ui/hooks/useSelectedModel.ts +++ b/webview-ui/src/components/ui/hooks/useSelectedModel.ts @@ -346,6 +346,15 @@ function getSelectedModel({ const info = routerModels["vercel-ai-gateway"]?.[id] return { id, info } } + case "zoo-gateway": { + const id = getValidatedModelId( + apiConfiguration.zooGatewayModelId, + routerModels["zoo-gateway"], + defaultModelId, + ) + const info = routerModels["zoo-gateway"]?.[id] + return { id, info } + } // case "anthropic": // case "fake-ai": default: { diff --git a/webview-ui/src/utils/__tests__/validate.spec.ts b/webview-ui/src/utils/__tests__/validate.spec.ts index 7d6152a03d..0416151f8e 100644 --- a/webview-ui/src/utils/__tests__/validate.spec.ts +++ b/webview-ui/src/utils/__tests__/validate.spec.ts @@ -44,6 +44,7 @@ describe("Model Validation Functions", () => { ollama: {}, lmstudio: {}, "vercel-ai-gateway": {}, + "zoo-gateway": {}, poe: {}, deepseek: {}, }