diff --git a/extensions/cli/src/stream/streamChatResponse.test.ts b/extensions/cli/src/stream/streamChatResponse.test.ts index 131132e3f64..a7b746e4787 100644 --- a/extensions/cli/src/stream/streamChatResponse.test.ts +++ b/extensions/cli/src/stream/streamChatResponse.test.ts @@ -256,7 +256,7 @@ describe("processStreamingResponse - content preservation", () => { expect(result.finalContent).toBe("Hello world!"); }); - it("routes gpt-5 models through responsesStream and preserves streaming tool updates", async () => { + it("routes gpt-5 models through chatCompletionStream and preserves streaming tool updates", async () => { const gpt5Chunks: ChatCompletionChunk[] = [ { id: "resp_gpt5", @@ -348,13 +348,13 @@ describe("processStreamingResponse - content preservation", () => { ]; const responsesStream = vi.fn().mockImplementation(async function* () { + throw new Error("responsesStream should stay adapter-owned in CLI"); + }); + const chatCompletionStream = vi.fn().mockImplementation(async function* () { for (const chunk of gpt5Chunks) { yield chunk; } }); - const chatCompletionStream = vi.fn().mockImplementation(async function* () { - throw new Error("chatCompletionStream should not be used for gpt-5"); - }); mockLlmApi = { responsesStream, @@ -374,8 +374,8 @@ describe("processStreamingResponse - content preservation", () => { systemMessage: "You are a helpful assistant.", }); - expect(responsesStream).toHaveBeenCalledTimes(1); - expect(chatCompletionStream).not.toHaveBeenCalled(); + expect(chatCompletionStream).toHaveBeenCalledTimes(1); + expect(responsesStream).not.toHaveBeenCalled(); expect(result.content).toBe("Analyzing repository…"); expect(result.toolCalls).toHaveLength(1); expect(result.toolCalls[0]).toMatchObject({ diff --git a/extensions/cli/src/util/exponentialBackoff.test.ts b/extensions/cli/src/util/exponentialBackoff.test.ts index d8365fb64b7..32bca7fd902 100644 --- a/extensions/cli/src/util/exponentialBackoff.test.ts +++ b/extensions/cli/src/util/exponentialBackoff.test.ts @@ -1,6 +1,10 @@ +import { BaseLlmApi } from "@continuedev/openai-adapters"; import { vi } from "vitest"; -import { ExponentialBackoffOptions } from "./exponentialBackoff.js"; +import { + chatCompletionStreamWithBackoff, + ExponentialBackoffOptions, +} from "./exponentialBackoff.js"; // Since the functions are not exported, we need to recreate them for testing function isRetryableError(error: any): boolean { @@ -69,6 +73,43 @@ function calculateDelay( } describe("exponentialBackoff utilities", () => { + describe("chatCompletionStreamWithBackoff", () => { + it("delegates response-capable model routing to chatCompletionStream", async () => { + const abortController = new AbortController(); + const chatCompletionStream = vi + .fn() + .mockImplementation(async function* () { + yield { choices: [{ delta: { content: "chat path" } }] }; + }); + const responsesStream = vi.fn().mockImplementation(async function* () { + yield { choices: [{ delta: { content: "responses path" } }] }; + }); + const llmApi = { + chatCompletionStream, + responsesStream, + } as unknown as BaseLlmApi; + + const stream = await chatCompletionStreamWithBackoff( + llmApi, + { + model: "gpt-5", + messages: [{ role: "user", content: "hello" }], + stream: true, + }, + abortController.signal, + ); + + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk); + } + + expect(chatCompletionStream).toHaveBeenCalledTimes(1); + expect(responsesStream).not.toHaveBeenCalled(); + expect(chunks[0].choices[0].delta.content).toBe("chat path"); + }); + }); + describe("isRetryableError", () => { it("should return true for network connection errors", () => { const error = { code: "ECONNRESET" }; diff --git a/extensions/cli/src/util/exponentialBackoff.ts b/extensions/cli/src/util/exponentialBackoff.ts index 8f3004ae9bb..40dc2fefebd 100644 --- a/extensions/cli/src/util/exponentialBackoff.ts +++ b/extensions/cli/src/util/exponentialBackoff.ts @@ -1,4 +1,4 @@ -import { BaseLlmApi, isResponsesModel } from "@continuedev/openai-adapters"; +import { BaseLlmApi } from "@continuedev/openai-adapters"; import type { ChatCompletionCreateParamsStreaming } from "openai/resources.mjs"; import { error, warn } from "../logging.js"; @@ -182,14 +182,6 @@ export async function chatCompletionStreamWithBackoff( throw new Error("Request aborted"); } - const useResponses = - typeof llmApi.responsesStream === "function" && - isResponsesModel(params.model); - - if (useResponses) { - return llmApi.responsesStream!(params, abortSignal); - } - return llmApi.chatCompletionStream(params, abortSignal); } catch (err: any) { lastError = err;