From 1bd7e6a38c3f0f58a9b6971ba31d8c888d9f975d Mon Sep 17 00:00:00 2001
From: lellansin <lellansin@gmail.com>
Date: Tue, 19 May 2026 16:24:40 +0800
Subject: [PATCH 1/8] perf: reuse OpenAI client instance and add connection
 warmup

Cache the OpenAI client at module level keyed by (apiKey, baseURL)
to avoid creating a fresh HTTP connection pool on every LLM turn.
The client is a stateless fetch wrapper so sharing across calls is
safe.  Model, thinking-mode and other settings are still read fresh
from config files each time.

Also add a mount-time warmup effect that eagerly creates the client
so the TCP+TLS connection is established while the user composes
their first prompt.
---
 .gitignore     |  1 +
 src/ui/App.tsx | 41 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 11b67ce..dd972a7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@ dist/
 .vscode/
 *.tgz
 *.log
+scripts/
diff --git a/src/ui/App.tsx b/src/ui/App.tsx
index 582abaf..e82e5f3 100644
--- a/src/ui/App.tsx
+++ b/src/ui/App.tsx
@@ -162,6 +162,13 @@ export function App({ projectRoot, initialPrompt, onRestart }: AppProps): React.
     void refreshSkills();
   }, [refreshSessionsList, refreshSkills]);
 
+  // Eagerly create the OpenAI client on mount so the TCP+TLS connection
+  // warmup (fire-and-forget inside createOpenAIClient) starts before the
+  // user sends their first prompt.
+  useEffect(() => {
+    createOpenAIClient(projectRoot);
+  }, [projectRoot]);
+
   useLayoutEffect(() => {
     const settings = resolveCurrentSettings(projectRoot);
     void sessionManager.initMcpServers(settings.mcpServers);
@@ -721,6 +728,13 @@ export function resolveCurrentSettings(projectRoot: string = process.cwd()): Res
   );
 }
 
+// Module-level cache for the OpenAI client instance.  The client itself is
+// a stateless fetch wrapper, so it is safe to share across calls as long as
+// the apiKey + baseURL stay the same.  Model, thinking-mode and other
+// settings are always read fresh from the project / user config files.
+let _cachedOpenAI: OpenAI | null = null;
+let _cachedOpenAIKey = "";
+
 export function createOpenAIClient(projectRoot: string = process.cwd()): {
   client: OpenAI | null;
   model: string;
@@ -749,12 +763,35 @@ export function createOpenAIClient(projectRoot: string = process.cwd()): {
     };
   }
 
-  const client = new OpenAI({
+  const cacheKey = `${settings.apiKey}::${settings.baseURL}`;
+  if (_cachedOpenAI && _cachedOpenAIKey === cacheKey) {
+    return {
+      client: _cachedOpenAI,
+      model: settings.model,
+      baseURL: settings.baseURL,
+      thinkingEnabled: settings.thinkingEnabled,
+      reasoningEffort: settings.reasoningEffort,
+      debugLogEnabled: settings.debugLogEnabled,
+      notify: settings.notify,
+      webSearchTool: settings.webSearchTool,
+      env: settings.env,
+      machineId: getMachineId(),
+    };
+  }
+
+  _cachedOpenAI = new OpenAI({
     apiKey: settings.apiKey,
     baseURL: settings.baseURL || undefined,
   });
+  _cachedOpenAIKey = cacheKey;
+
+  // Fire-and-forget warmup: pre-establish TCP+TLS connection to the API
+  // server while the user is composing their first prompt.  Errors are
+  // silently ignored — the real request will retry on its own if needed.
+  void _cachedOpenAI.models.list().catch(() => {});
+
   return {
-    client,
+    client: _cachedOpenAI,
     model: settings.model,
     baseURL: settings.baseURL,
     thinkingEnabled: settings.thinkingEnabled,

From 2e5b2ed2a4eed8c463546385ecbf374002a8d6c6 Mon Sep 17 00:00:00 2001
From: lellansin <lellansin@gmail.com>
Date: Tue, 19 May 2026 16:54:30 +0800
Subject: [PATCH 2/8] perf: replace undici fetch with custom https.Agent for
 long keepAlive
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The default undici-based global fetch only keeps connections alive for
4 seconds, which is too short for a CLI where the user may spend
10–30 seconds reading output before typing the next prompt.

Add a custom fetch implementation backed by node:https.Agent with
keepAlive: true and a 60-second idle timeout.  The custom fetch is
passed to the OpenAI SDK constructor so every LLM API request
benefits from persistent connections across conversational turns.

Also handles streaming request bodies (ReadableStream) for SDK
features like file uploads.
---
 src/ui/App.tsx | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/src/ui/App.tsx b/src/ui/App.tsx
index e82e5f3..1cdd855 100644
--- a/src/ui/App.tsx
+++ b/src/ui/App.tsx
@@ -2,6 +2,7 @@ import React, { useCallback, useEffect, useLayoutEffect, useMemo, useRef, useSta
 import { Box, Static, Text, useApp, useStdout, useWindowSize } from "ink";
 import chalk from "chalk";
 import * as fs from "fs";
+import https from "node:https";
 import * as os from "os";
 import * as path from "path";
 import OpenAI from "openai";
@@ -728,6 +729,99 @@ export function resolveCurrentSettings(projectRoot: string = process.cwd()): Res
   );
 }
 
+// Custom fetch implementation that uses node:https.Agent with a configurable
+// keepAlive timeout.  The default undici-based global fetch only keeps
+// connections alive for 4 seconds, which is too short for a CLI where the
+// user may spend 10–30 seconds reading output before typing the next prompt.
+// With this custom Agent we get full control over idle connection lifetime.
+const KEEP_ALIVE_MSEC = 60_000; // 1 minute
+
+function createCustomFetch(keepAliveMsecs: number = KEEP_ALIVE_MSEC) {
+  const agent = new https.Agent({ keepAlive: true, keepAliveMsecs });
+
+  return async function customFetch(url: string | URL | Request, init?: RequestInit): Promise<Response> {
+    const urlObj = typeof url === "string" ? new URL(url) : url instanceof URL ? url : new URL(url.url);
+    const { method = "GET", headers = {}, body: reqBody, signal } = init ?? {};
+
+    // Normalize Headers to a plain Record
+    const plainHeaders: Record<string, string> = {};
+    if (headers instanceof Headers) {
+      for (const [k, v] of headers) plainHeaders[k] = v;
+    } else if (Array.isArray(headers)) {
+      for (const [k, v] of headers) plainHeaders[k] = v;
+    } else {
+      Object.assign(plainHeaders, headers);
+    }
+
+    const port = urlObj.port ? Number(urlObj.port) : 443;
+
+    return new Promise((resolve, reject) => {
+      const req = https.request(
+        {
+          hostname: urlObj.hostname,
+          port,
+          path: urlObj.pathname + urlObj.search,
+          method,
+          headers: plainHeaders,
+          agent,
+          signal: signal ?? undefined,
+        },
+        (res) => {
+          const resHeaders = new Headers();
+          for (const [k, v] of Object.entries(res.headers)) {
+            if (v) (Array.isArray(v) ? v : [v]).forEach((val) => resHeaders.append(k, val));
+          }
+
+          const body = new ReadableStream({
+            start(controller) {
+              res.on("data", (chunk: Buffer) => controller.enqueue(new Uint8Array(chunk)));
+              res.on("end", () => controller.close());
+              res.on("error", (err) => controller.error(err));
+            },
+            cancel() {
+              res.destroy();
+            },
+          });
+
+          resolve(
+            new Response(body, {
+              status: res.statusCode,
+              statusText: res.statusMessage,
+              headers: resHeaders,
+            })
+          );
+        }
+      );
+
+      req.on("error", reject);
+
+      if (reqBody) {
+        if (typeof reqBody === "string" || reqBody instanceof Uint8Array || ArrayBuffer.isView(reqBody)) {
+          req.write(reqBody as Parameters<typeof req.write>[0]);
+        } else if (reqBody instanceof ReadableStream) {
+          // Pipe streaming request body (used for file uploads by the SDK)
+          const reader = (reqBody as ReadableStream<Uint8Array>).getReader();
+          (async () => {
+            try {
+              while (true) {
+                const { done, value } = await reader.read();
+                if (done) break;
+                if (value) req.write(value);
+              }
+              req.end();
+            } catch (err) {
+              req.destroy(err instanceof Error ? err : new Error(String(err)));
+            }
+          })();
+          return; // req.end() is called inside the async IIFE
+        }
+      }
+
+      req.end();
+    });
+  };
+}
+
 // Module-level cache for the OpenAI client instance.  The client itself is
 // a stateless fetch wrapper, so it is safe to share across calls as long as
 // the apiKey + baseURL stay the same.  Model, thinking-mode and other
@@ -782,6 +876,7 @@ export function createOpenAIClient(projectRoot: string = process.cwd()): {
   _cachedOpenAI = new OpenAI({
     apiKey: settings.apiKey,
     baseURL: settings.baseURL || undefined,
+    fetch: createCustomFetch(),
   });
   _cachedOpenAIKey = cacheKey;
 

From 6f8d2e228d853f8014741c8108f6936e7d037c82 Mon Sep 17 00:00:00 2001
From: lellansin <lellansin@gmail.com>
Date: Tue, 19 May 2026 17:04:09 +0800
Subject: [PATCH 3/8] refactor: replace custom fetch wrapper with undici Agent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use npm undici's Agent with keepAliveTimeout: 60s instead of the
90-line custom https.Agent-based fetch wrapper.  The approach is the
same but much simpler — just pass undiciFetch with a configured
Agent dispatcher to the OpenAI SDK.
---
 src/ui/App.tsx | 103 +++++--------------------------------------------
 1 file changed, 9 insertions(+), 94 deletions(-)

diff --git a/src/ui/App.tsx b/src/ui/App.tsx
index 1cdd855..42397a5 100644
--- a/src/ui/App.tsx
+++ b/src/ui/App.tsx
@@ -2,10 +2,10 @@ import React, { useCallback, useEffect, useLayoutEffect, useMemo, useRef, useSta
 import { Box, Static, Text, useApp, useStdout, useWindowSize } from "ink";
 import chalk from "chalk";
 import * as fs from "fs";
-import https from "node:https";
 import * as os from "os";
 import * as path from "path";
 import OpenAI from "openai";
+import { Agent, fetch as undiciFetch } from "undici";
 import {
   type LlmStreamProgress,
   type MessageMeta,
@@ -729,98 +729,12 @@ export function resolveCurrentSettings(projectRoot: string = process.cwd()): Res
   );
 }
 
-// Custom fetch implementation that uses node:https.Agent with a configurable
-// keepAlive timeout.  The default undici-based global fetch only keeps
-// connections alive for 4 seconds, which is too short for a CLI where the
-// user may spend 10–30 seconds reading output before typing the next prompt.
-// With this custom Agent we get full control over idle connection lifetime.
-const KEEP_ALIVE_MSEC = 60_000; // 1 minute
-
-function createCustomFetch(keepAliveMsecs: number = KEEP_ALIVE_MSEC) {
-  const agent = new https.Agent({ keepAlive: true, keepAliveMsecs });
-
-  return async function customFetch(url: string | URL | Request, init?: RequestInit): Promise<Response> {
-    const urlObj = typeof url === "string" ? new URL(url) : url instanceof URL ? url : new URL(url.url);
-    const { method = "GET", headers = {}, body: reqBody, signal } = init ?? {};
-
-    // Normalize Headers to a plain Record
-    const plainHeaders: Record<string, string> = {};
-    if (headers instanceof Headers) {
-      for (const [k, v] of headers) plainHeaders[k] = v;
-    } else if (Array.isArray(headers)) {
-      for (const [k, v] of headers) plainHeaders[k] = v;
-    } else {
-      Object.assign(plainHeaders, headers);
-    }
-
-    const port = urlObj.port ? Number(urlObj.port) : 443;
-
-    return new Promise((resolve, reject) => {
-      const req = https.request(
-        {
-          hostname: urlObj.hostname,
-          port,
-          path: urlObj.pathname + urlObj.search,
-          method,
-          headers: plainHeaders,
-          agent,
-          signal: signal ?? undefined,
-        },
-        (res) => {
-          const resHeaders = new Headers();
-          for (const [k, v] of Object.entries(res.headers)) {
-            if (v) (Array.isArray(v) ? v : [v]).forEach((val) => resHeaders.append(k, val));
-          }
-
-          const body = new ReadableStream({
-            start(controller) {
-              res.on("data", (chunk: Buffer) => controller.enqueue(new Uint8Array(chunk)));
-              res.on("end", () => controller.close());
-              res.on("error", (err) => controller.error(err));
-            },
-            cancel() {
-              res.destroy();
-            },
-          });
-
-          resolve(
-            new Response(body, {
-              status: res.statusCode,
-              statusText: res.statusMessage,
-              headers: resHeaders,
-            })
-          );
-        }
-      );
-
-      req.on("error", reject);
-
-      if (reqBody) {
-        if (typeof reqBody === "string" || reqBody instanceof Uint8Array || ArrayBuffer.isView(reqBody)) {
-          req.write(reqBody as Parameters<typeof req.write>[0]);
-        } else if (reqBody instanceof ReadableStream) {
-          // Pipe streaming request body (used for file uploads by the SDK)
-          const reader = (reqBody as ReadableStream<Uint8Array>).getReader();
-          (async () => {
-            try {
-              while (true) {
-                const { done, value } = await reader.read();
-                if (done) break;
-                if (value) req.write(value);
-              }
-              req.end();
-            } catch (err) {
-              req.destroy(err instanceof Error ? err : new Error(String(err)));
-            }
-          })();
-          return; // req.end() is called inside the async IIFE
-        }
-      }
-
-      req.end();
-    });
-  };
-}
+// Custom undici Agent with a 60-second keepAlive timeout.  The default
+// global fetch (undici) only keeps connections alive for 4 seconds, which
+// is too short for a CLI where the user may spend 10–30 seconds reading
+// output between prompts.  By passing a dedicated Agent to undiciFetch we
+// keep connections reusable for a full minute after the last request.
+const _keepAliveAgent = new Agent({ keepAliveTimeout: 60_000 });
 
 // Module-level cache for the OpenAI client instance.  The client itself is
 // a stateless fetch wrapper, so it is safe to share across calls as long as
@@ -876,7 +790,8 @@ export function createOpenAIClient(projectRoot: string = process.cwd()): {
   _cachedOpenAI = new OpenAI({
     apiKey: settings.apiKey,
     baseURL: settings.baseURL || undefined,
-    fetch: createCustomFetch(),
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    fetch: (url: any, init: any) => undiciFetch(url, { ...init, dispatcher: _keepAliveAgent }),
   });
   _cachedOpenAIKey = cacheKey;
 

From 255226a3c9bdd7254dd8b5728a0e1bff7de28707 Mon Sep 17 00:00:00 2001
From: lellansin <lellansin@gmail.com>
Date: Tue, 19 May 2026 18:33:10 +0800
Subject: [PATCH 4/8] chore: add undici devDependency for custom keepAlive
 Agent

Required by the custom fetch wrapper that replaces the default
4s keepAlive undici global dispatcher with a custom Agent (60s).
---
 package-lock.json | 13 ++++++++++++-
 package.json      |  3 ++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index 800d75a..0b43587 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -38,7 +38,8 @@
         "prettier": "^3.8.3",
         "tsx": "^4.21.0",
         "typescript": "^6.0.3",
-        "typescript-eslint": "^8.59.2"
+        "typescript-eslint": "^8.59.2",
+        "undici": "^8.3.0"
       },
       "engines": {
         "node": ">=22"
@@ -4096,6 +4097,16 @@
         "typescript": ">=4.8.4 <6.1.0"
       }
     },
+    "node_modules/undici": {
+      "version": "8.3.0",
+      "resolved": "https://registry.npmmirror.com/undici/-/undici-8.3.0.tgz",
+      "integrity": "sha512-TkUDgb6tl7KOGZ+7e8E3d2FYgUQgF6z5YypqjWmixVQSQERFcVrVg0ySADm2LVLRh5ljAaHTCR5Fmz3Q34rB7Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=22.19.0"
+      }
+    },
     "node_modules/undici-types": {
       "version": "7.19.2",
       "resolved": "https://registry.npmmirror.com/undici-types/-/undici-types-7.19.2.tgz",
diff --git a/package.json b/package.json
index c438d68..b805d18 100644
--- a/package.json
+++ b/package.json
@@ -65,6 +65,7 @@
     "prettier": "^3.8.3",
     "tsx": "^4.21.0",
     "typescript": "^6.0.3",
-    "typescript-eslint": "^8.59.2"
+    "typescript-eslint": "^8.59.2",
+    "undici": "^8.3.0"
   }
 }

From 5b74c00db5bf16e1519c6aaafb233c4c2b78bf1a Mon Sep 17 00:00:00 2001
From: lellansin <lellansin@gmail.com>
Date: Tue, 19 May 2026 18:33:59 +0800
Subject: [PATCH 5/8] fix: move undici from devDependencies to dependencies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

undici is imported at runtime in App.tsx for the custom keepAlive
Agent.  When bundled with --packages=external, end users need the
package installed — it cannot be a devDependency.
---
 package-lock.json | 5 ++---
 package.json      | 4 ++--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index 0b43587..7d68f74 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -18,6 +18,7 @@
         "ink-gradient": "^4.0.0",
         "openai": "^6.35.0",
         "react": "^19.2.5",
+        "undici": "^8.3.0",
         "zod": "^4.4.3"
       },
       "bin": {
@@ -38,8 +39,7 @@
         "prettier": "^3.8.3",
         "tsx": "^4.21.0",
         "typescript": "^6.0.3",
-        "typescript-eslint": "^8.59.2",
-        "undici": "^8.3.0"
+        "typescript-eslint": "^8.59.2"
       },
       "engines": {
         "node": ">=22"
@@ -4101,7 +4101,6 @@
       "version": "8.3.0",
       "resolved": "https://registry.npmmirror.com/undici/-/undici-8.3.0.tgz",
       "integrity": "sha512-TkUDgb6tl7KOGZ+7e8E3d2FYgUQgF6z5YypqjWmixVQSQERFcVrVg0ySADm2LVLRh5ljAaHTCR5Fmz3Q34rB7Q==",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">=22.19.0"
diff --git a/package.json b/package.json
index b805d18..6d58864 100644
--- a/package.json
+++ b/package.json
@@ -48,6 +48,7 @@
     "ink-gradient": "^4.0.0",
     "openai": "^6.35.0",
     "react": "^19.2.5",
+    "undici": "^8.3.0",
     "zod": "^4.4.3"
   },
   "devDependencies": {
@@ -65,7 +66,6 @@
     "prettier": "^3.8.3",
     "tsx": "^4.21.0",
     "typescript": "^6.0.3",
-    "typescript-eslint": "^8.59.2",
-    "undici": "^8.3.0"
+    "typescript-eslint": "^8.59.2"
   }
 }

From db78e2b1756e2e9e2f9eea008e30e2f4638e0856 Mon Sep 17 00:00:00 2001
From: lellansin <lellansin@gmail.com>
Date: Tue, 19 May 2026 18:47:23 +0800
Subject: [PATCH 6/8] fix: downgrade undici to v7 for Node 20 compatibility

undici v8 requires Node >=22, but the CI matrix includes Node 20
which the project intentionally supports.  v7 works on >=20.18.1.
---
 package-lock.json | 10 +++++-----
 package.json      |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index 7d68f74..82db9af 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -18,7 +18,7 @@
         "ink-gradient": "^4.0.0",
         "openai": "^6.35.0",
         "react": "^19.2.5",
-        "undici": "^8.3.0",
+        "undici": "^7.25.0",
         "zod": "^4.4.3"
       },
       "bin": {
@@ -4098,12 +4098,12 @@
       }
     },
     "node_modules/undici": {
-      "version": "8.3.0",
-      "resolved": "https://registry.npmmirror.com/undici/-/undici-8.3.0.tgz",
-      "integrity": "sha512-TkUDgb6tl7KOGZ+7e8E3d2FYgUQgF6z5YypqjWmixVQSQERFcVrVg0ySADm2LVLRh5ljAaHTCR5Fmz3Q34rB7Q==",
+      "version": "7.25.0",
+      "resolved": "https://registry.npmmirror.com/undici/-/undici-7.25.0.tgz",
+      "integrity": "sha512-xXnp4kTyor2Zq+J1FfPI6Eq3ew5h6Vl0F/8d9XU5zZQf1tX9s2Su1/3PiMmUANFULpmksxkClamIZcaUqryHsQ==",
       "license": "MIT",
       "engines": {
-        "node": ">=22.19.0"
+        "node": ">=20.18.1"
       }
     },
     "node_modules/undici-types": {
diff --git a/package.json b/package.json
index 6d58864..b2826c2 100644
--- a/package.json
+++ b/package.json
@@ -48,7 +48,7 @@
     "ink-gradient": "^4.0.0",
     "openai": "^6.35.0",
     "react": "^19.2.5",
-    "undici": "^8.3.0",
+    "undici": "^7.25.0",
     "zod": "^4.4.3"
   },
   "devDependencies": {

From 87d52ade53833a18118e23a595f511a4823b0a6c Mon Sep 17 00:00:00 2001
From: lellansin <lellansin@gmail.com>
Date: Tue, 19 May 2026 19:00:28 +0800
Subject: [PATCH 7/8] fix: add 3s timeout to warmup request to prevent exit
 hang

Codex review found that the fire-and-forget warmup models.list()
had no timeout.  The OpenAI client defaults to a 10-minute timeout,
so an unreachable API could keep the Node process alive long after
the user exits.
---
 src/ui/App.tsx | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/ui/App.tsx b/src/ui/App.tsx
index 42397a5..515d5e6 100644
--- a/src/ui/App.tsx
+++ b/src/ui/App.tsx
@@ -796,9 +796,17 @@ export function createOpenAIClient(projectRoot: string = process.cwd()): {
   _cachedOpenAIKey = cacheKey;
 
   // Fire-and-forget warmup: pre-establish TCP+TLS connection to the API
-  // server while the user is composing their first prompt.  Errors are
-  // silently ignored — the real request will retry on its own if needed.
-  void _cachedOpenAI.models.list().catch(() => {});
+  // server while the user is composing their first prompt.  Bounded by a
+  // short timeout so a slow / unreachable API never blocks process exit.
+  void (async () => {
+    const ac = new AbortController();
+    const timer = setTimeout(() => ac.abort(), 3000);
+    try {
+      await _cachedOpenAI.models.list({ signal: ac.signal }).catch(() => {});
+    } finally {
+      clearTimeout(timer);
+    }
+  })();
 
   return {
     client: _cachedOpenAI,

From b2544b831252c5d58f15cbfa6b5c7c04e1a1aa8f Mon Sep 17 00:00:00 2001
From: lellansin <lellansin@gmail.com>
Date: Wed, 20 May 2026 17:36:41 +0800
Subject: [PATCH 8/8] perf: reuse OpenAI client and add undici keep-alive Agent
 with connection warmup

Extract OpenAI client creation logic into src/common/openai-client.ts:
- Custom undici Agent with 60s keepAlive timeout (default is 4s)
- Module-level client instance cache (reuse across calls)
- Fire-and-forget connection warmup on first creation (3s timeout)
- getMachineId() helper

The App.tsx now simply imports and re-exports createOpenAIClient from
the new common module, keeping UI concerns separate from HTTP/client
lifecycle management.
---
 package-lock.json           |  10 +++
 package.json                |   1 +
 src/common/openai-client.ts | 117 ++++++++++++++++++++++++++++++++++++
 src/ui/App.tsx              |  73 +++-------------------
 src/ui/index.ts             |   2 +-
 5 files changed, 138 insertions(+), 65 deletions(-)
 create mode 100644 src/common/openai-client.ts

diff --git a/package-lock.json b/package-lock.json
index 17a77ca..cdb85de 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -18,6 +18,7 @@
         "ink-gradient": "^4.0.0",
         "openai": "^6.35.0",
         "react": "^19.2.5",
+        "undici": "^7.25.0",
         "zod": "^4.4.3"
       },
       "bin": {
@@ -4096,6 +4097,15 @@
         "typescript": ">=4.8.4 <6.1.0"
       }
     },
+    "node_modules/undici": {
+      "version": "7.25.0",
+      "resolved": "https://registry.npmmirror.com/undici/-/undici-7.25.0.tgz",
+      "integrity": "sha512-xXnp4kTyor2Zq+J1FfPI6Eq3ew5h6Vl0F/8d9XU5zZQf1tX9s2Su1/3PiMmUANFULpmksxkClamIZcaUqryHsQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=20.18.1"
+      }
+    },
     "node_modules/undici-types": {
       "version": "7.19.2",
       "resolved": "https://registry.npmmirror.com/undici-types/-/undici-types-7.19.2.tgz",
diff --git a/package.json b/package.json
index b72fd96..bf8d167 100644
--- a/package.json
+++ b/package.json
@@ -48,6 +48,7 @@
     "ink-gradient": "^4.0.0",
     "openai": "^6.35.0",
     "react": "^19.2.5",
+    "undici": "^7.25.0",
     "zod": "^4.4.3"
   },
   "devDependencies": {
diff --git a/src/common/openai-client.ts b/src/common/openai-client.ts
new file mode 100644
index 0000000..7f9634c
--- /dev/null
+++ b/src/common/openai-client.ts
@@ -0,0 +1,117 @@
+import * as fs from "fs";
+import * as os from "os";
+import * as path from "path";
+import OpenAI from "openai";
+import { Agent, fetch as undiciFetch } from "undici";
+import { resolveCurrentSettings } from "../ui/App";
+
+// Custom undici Agent with a 60-second keepAlive timeout.  The default
+// global fetch (undici) only keeps connections alive for 4 seconds, which
+// is too short for a CLI where the user may spend 10–30 seconds reading
+// output between prompts.  By passing a dedicated Agent to undiciFetch we
+// keep connections reusable for a full minute after the last request.
+const keepAliveAgent = new Agent({ keepAliveTimeout: 60_000 });
+
+// Module-level cache for the OpenAI client instance.  The client itself is
+// a stateless fetch wrapper, so it is safe to share across calls as long as
+// the apiKey + baseURL stay the same.  Model, thinking-mode and other
+// settings are always read fresh from the project / user config files.
+let cachedOpenAI: OpenAI | null = null;
+let cachedOpenAIKey = "";
+
+export function createOpenAIClient(projectRoot: string = process.cwd()): {
+  client: OpenAI | null;
+  model: string;
+  baseURL: string;
+  thinkingEnabled: boolean;
+  reasoningEffort: "high" | "max";
+  debugLogEnabled: boolean;
+  notify?: string;
+  webSearchTool?: string;
+  env: Record<string, string>;
+  machineId?: string;
+} {
+  const settings = resolveCurrentSettings(projectRoot);
+  if (!settings.apiKey) {
+    return {
+      client: null,
+      model: settings.model,
+      baseURL: settings.baseURL,
+      thinkingEnabled: settings.thinkingEnabled,
+      reasoningEffort: settings.reasoningEffort,
+      debugLogEnabled: settings.debugLogEnabled,
+      notify: settings.notify,
+      webSearchTool: settings.webSearchTool,
+      env: settings.env,
+      machineId: getMachineId(),
+    };
+  }
+
+  const cacheKey = `${settings.apiKey}::${settings.baseURL}`;
+  if (cachedOpenAI && cachedOpenAIKey === cacheKey) {
+    return {
+      client: cachedOpenAI,
+      model: settings.model,
+      baseURL: settings.baseURL,
+      thinkingEnabled: settings.thinkingEnabled,
+      reasoningEffort: settings.reasoningEffort,
+      debugLogEnabled: settings.debugLogEnabled,
+      notify: settings.notify,
+      webSearchTool: settings.webSearchTool,
+      env: settings.env,
+      machineId: getMachineId(),
+    };
+  }
+
+  cachedOpenAI = new OpenAI({
+    apiKey: settings.apiKey,
+    baseURL: settings.baseURL || undefined,
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    fetch: (url: any, init: any) => undiciFetch(url, { ...init, dispatcher: keepAliveAgent }),
+  });
+  cachedOpenAIKey = cacheKey;
+
+  // Fire-and-forget warmup: pre-establish TCP+TLS connection to the API
+  // server while the user is composing their first prompt.  Bounded by a
+  // short timeout so a slow / unreachable API never blocks process exit.
+  void (async () => {
+    const ac = new AbortController();
+    const timer = setTimeout(() => ac.abort(), 3000);
+    try {
+      await cachedOpenAI.models.list({ signal: ac.signal }).catch(() => {});
+    } finally {
+      clearTimeout(timer);
+    }
+  })();
+
+  return {
+    client: cachedOpenAI,
+    model: settings.model,
+    baseURL: settings.baseURL,
+    thinkingEnabled: settings.thinkingEnabled,
+    reasoningEffort: settings.reasoningEffort,
+    debugLogEnabled: settings.debugLogEnabled,
+    notify: settings.notify,
+    webSearchTool: settings.webSearchTool,
+    env: settings.env,
+    machineId: getMachineId(),
+  };
+}
+
+function getMachineId(): string | undefined {
+  try {
+    const idPath = path.join(os.homedir(), ".deepcode", "machine-id");
+    if (fs.existsSync(idPath)) {
+      const raw = fs.readFileSync(idPath, "utf8").trim();
+      if (raw) {
+        return raw;
+      }
+    }
+    const generated = `${os.hostname()}-${Math.random().toString(36).slice(2)}-${Date.now()}`;
+    fs.mkdirSync(path.dirname(idPath), { recursive: true });
+    fs.writeFileSync(idPath, generated, "utf8");
+    return generated;
+  } catch {
+    return undefined;
+  }
+}
diff --git a/src/ui/App.tsx b/src/ui/App.tsx
index 75d6689..5419a2a 100644
--- a/src/ui/App.tsx
+++ b/src/ui/App.tsx
@@ -4,7 +4,7 @@ import chalk from "chalk";
 import * as fs from "fs";
 import * as os from "os";
 import * as path from "path";
-import OpenAI from "openai";
+import { createOpenAIClient } from "../common/openai-client";
 import {
   type LlmStreamProgress,
   type MessageMeta,
@@ -166,6 +166,13 @@ export function App({ projectRoot, initialPrompt, onRestart }: AppProps): React.
     void refreshSkills();
   }, [refreshSessionsList, refreshSkills]);
 
+  // Eagerly create the OpenAI client on mount so the TCP+TLS connection
+  // warmup (fire-and-forget inside createOpenAIClient) starts before the
+  // user sends their first prompt.
+  useEffect(() => {
+    createOpenAIClient(projectRoot);
+  }, [projectRoot]);
+
   useLayoutEffect(() => {
     const settings = resolveCurrentSettings(projectRoot);
     void sessionManager.initMcpServers(settings.mcpServers);
@@ -838,69 +845,7 @@ export function resolveCurrentSettings(projectRoot: string = process.cwd()): Res
   );
 }
 
-export function createOpenAIClient(projectRoot: string = process.cwd()): {
-  client: OpenAI | null;
-  model: string;
-  baseURL: string;
-  thinkingEnabled: boolean;
-  reasoningEffort: "high" | "max";
-  debugLogEnabled: boolean;
-  notify?: string;
-  webSearchTool?: string;
-  env: Record<string, string>;
-  machineId?: string;
-} {
-  const settings = resolveCurrentSettings(projectRoot);
-  if (!settings.apiKey) {
-    return {
-      client: null,
-      model: settings.model,
-      baseURL: settings.baseURL,
-      thinkingEnabled: settings.thinkingEnabled,
-      reasoningEffort: settings.reasoningEffort,
-      debugLogEnabled: settings.debugLogEnabled,
-      notify: settings.notify,
-      webSearchTool: settings.webSearchTool,
-      env: settings.env,
-      machineId: getMachineId(),
-    };
-  }
-
-  const client = new OpenAI({
-    apiKey: settings.apiKey,
-    baseURL: settings.baseURL || undefined,
-  });
-  return {
-    client,
-    model: settings.model,
-    baseURL: settings.baseURL,
-    thinkingEnabled: settings.thinkingEnabled,
-    reasoningEffort: settings.reasoningEffort,
-    debugLogEnabled: settings.debugLogEnabled,
-    notify: settings.notify,
-    webSearchTool: settings.webSearchTool,
-    env: settings.env,
-    machineId: getMachineId(),
-  };
-}
-
-function getMachineId(): string | undefined {
-  try {
-    const idPath = path.join(os.homedir(), ".deepcode", "machine-id");
-    if (fs.existsSync(idPath)) {
-      const raw = fs.readFileSync(idPath, "utf8").trim();
-      if (raw) {
-        return raw;
-      }
-    }
-    const generated = `${os.hostname()}-${Math.random().toString(36).slice(2)}-${Date.now()}`;
-    fs.mkdirSync(path.dirname(idPath), { recursive: true });
-    fs.writeFileSync(idPath, generated, "utf8");
-    return generated;
-  } catch {
-    return undefined;
-  }
-}
+export { createOpenAIClient } from "../common/openai-client";
 
 function getUserSettingsPath(): string {
   return path.join(os.homedir(), ".deepcode", "settings.json");
diff --git a/src/ui/index.ts b/src/ui/index.ts
index 26e7eaa..d899d4b 100644
--- a/src/ui/index.ts
+++ b/src/ui/index.ts
@@ -11,9 +11,9 @@ export {
   writeProjectSettings,
   writeModelConfigSelection,
   resolveCurrentSettings,
-  createOpenAIClient,
   buildPromptDraftFromSessionMessage,
 } from "./App";
+export { createOpenAIClient } from "../common/openai-client";
 export { default as AppContainer } from "./AppContainer";
 export { AskUserQuestionPrompt } from "./AskUserQuestionPrompt";
 export { MessageView } from "./components";