continuedev · lyonsno · May 29, 2026
@@ -256,7 +256,7 @@ describe("processStreamingResponse - content preservation", () => {
     expect(result.finalContent).toBe("Hello world!");
   });
 
-  it("routes gpt-5 models through responsesStream and preserves streaming tool updates", async () => {
+  it("routes gpt-5 models through chatCompletionStream and preserves streaming tool updates", async () => {
     const gpt5Chunks: ChatCompletionChunk[] = [
       {
         id: "resp_gpt5",
@@ -348,13 +348,13 @@ describe("processStreamingResponse - content preservation", () => {
     ];
 
     const responsesStream = vi.fn().mockImplementation(async function* () {
+      throw new Error("responsesStream should stay adapter-owned in CLI");
+    });
+    const chatCompletionStream = vi.fn().mockImplementation(async function* () {
       for (const chunk of gpt5Chunks) {
         yield chunk;
       }
     });
-    const chatCompletionStream = vi.fn().mockImplementation(async function* () {
-      throw new Error("chatCompletionStream should not be used for gpt-5");
-    });
 
     mockLlmApi = {
       responsesStream,
@@ -374,8 +374,8 @@ describe("processStreamingResponse - content preservation", () => {
       systemMessage: "You are a helpful assistant.",
     });
 
-    expect(responsesStream).toHaveBeenCalledTimes(1);
-    expect(chatCompletionStream).not.toHaveBeenCalled();
+    expect(chatCompletionStream).toHaveBeenCalledTimes(1);
+    expect(responsesStream).not.toHaveBeenCalled();
     expect(result.content).toBe("Analyzing repository…");
     expect(result.toolCalls).toHaveLength(1);
     expect(result.toolCalls[0]).toMatchObject({

@@ -1,6 +1,10 @@
+import { BaseLlmApi } from "@continuedev/openai-adapters";
 import { vi } from "vitest";
 
-import { ExponentialBackoffOptions } from "./exponentialBackoff.js";
+import {
+  chatCompletionStreamWithBackoff,
+  ExponentialBackoffOptions,
+} from "./exponentialBackoff.js";
 
 // Since the functions are not exported, we need to recreate them for testing
 function isRetryableError(error: any): boolean {
@@ -69,6 +73,43 @@ function calculateDelay(
 }
 
 describe("exponentialBackoff utilities", () => {
+  describe("chatCompletionStreamWithBackoff", () => {
+    it("delegates response-capable model routing to chatCompletionStream", async () => {
+      const abortController = new AbortController();
+      const chatCompletionStream = vi
+        .fn()
+        .mockImplementation(async function* () {
+          yield { choices: [{ delta: { content: "chat path" } }] };
+        });
+      const responsesStream = vi.fn().mockImplementation(async function* () {
+        yield { choices: [{ delta: { content: "responses path" } }] };
+      });
+      const llmApi = {
+        chatCompletionStream,
+        responsesStream,
+      } as unknown as BaseLlmApi;
+
+      const stream = await chatCompletionStreamWithBackoff(
+        llmApi,
+        {
+          model: "gpt-5",
+          messages: [{ role: "user", content: "hello" }],
+          stream: true,
+        },
+        abortController.signal,
+      );
+
+      const chunks = [];
+      for await (const chunk of stream) {
+        chunks.push(chunk);
+      }
+
+      expect(chatCompletionStream).toHaveBeenCalledTimes(1);
+      expect(responsesStream).not.toHaveBeenCalled();
+      expect(chunks[0].choices[0].delta.content).toBe("chat path");
+    });
+  });
+
   describe("isRetryableError", () => {
     it("should return true for network connection errors", () => {
       const error = { code: "ECONNRESET" };

@@ -1,4 +1,4 @@
-import { BaseLlmApi, isResponsesModel } from "@continuedev/openai-adapters";
+import { BaseLlmApi } from "@continuedev/openai-adapters";
 import type { ChatCompletionCreateParamsStreaming } from "openai/resources.mjs";
 
 import { error, warn } from "../logging.js";
@@ -182,14 +182,6 @@ export async function chatCompletionStreamWithBackoff(
         throw new Error("Request aborted");
       }
 
-      const useResponses =
-        typeof llmApi.responsesStream === "function" &&
-        isResponsesModel(params.model);
-
-      if (useResponses) {
-        return llmApi.responsesStream!(params, abortSignal);
-      }
-
       return llmApi.chatCompletionStream(params, abortSignal);
     } catch (err: any) {
       lastError = err;