diff --git a/.agents/types/agent-definition.ts b/.agents/types/agent-definition.ts
index d89843404e..80cfacdea4 100644
--- a/.agents/types/agent-definition.ts
+++ b/.agents/types/agent-definition.ts
@@ -109,6 +109,25 @@ export interface AgentDefinition {
       audio?: number | string
       request?: number | string
     }
+    /**
+     * Override the upstream LLM endpoint with an OpenAI-compatible base URL.
+     * When set, this agent's LLM calls bypass the Codebuff backend / OpenRouter
+     * and go directly to `${baseUrl}/chat/completions`.
+     *
+     * Use for local models (Ollama, LM Studio) or self-hosted OpenAI-compatible
+     * providers. The other providerOptions keys (order, allow_fallbacks, etc.)
+     * are OpenRouter-specific and ignored when `baseUrl` is set.
+     *
+     * Falls back to env var CODEBUFF_BASE_URL when unset.
+     * Example: "http://localhost:11434/v1"
+     */
+    baseUrl?: string
+    /**
+     * API key for the endpoint set in `baseUrl`. Ignored if `baseUrl` is unset.
+     * Falls back to env var CODEBUFF_PROVIDER_API_KEY. Most local runtimes
+     * (Ollama, LM Studio) ignore the value entirely.
+     */
+    apiKey?: string
   }
 
   // ============================================================================
diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts
index 030de3a14f..8847a98ecb 100644
--- a/agents/types/agent-definition.ts
+++ b/agents/types/agent-definition.ts
@@ -109,6 +109,25 @@ export interface AgentDefinition {
       audio?: number | string
       request?: number | string
     }
+    /**
+     * Override the upstream LLM endpoint with an OpenAI-compatible base URL.
+     * When set, this agent's LLM calls bypass the Codebuff backend / OpenRouter
+     * and go directly to `${baseUrl}/chat/completions`.
+     *
+     * Use for local models (Ollama, LM Studio) or self-hosted OpenAI-compatible
+     * providers. The other providerOptions keys (order, allow_fallbacks, etc.)
+     * are OpenRouter-specific and ignored when `baseUrl` is set.
+     *
+     * Falls back to env var CODEBUFF_BASE_URL when unset.
+     * Example: "http://localhost:11434/v1"
+     */
+    baseUrl?: string
+    /**
+     * API key for the endpoint set in `baseUrl`. Ignored if `baseUrl` is unset.
+     * Falls back to env var CODEBUFF_PROVIDER_API_KEY. Most local runtimes
+     * (Ollama, LM Studio) ignore the value entirely.
+     */
+    apiKey?: string
   }
 
   // ============================================================================
diff --git a/cli/src/commands/__tests__/local-provider.test.ts b/cli/src/commands/__tests__/local-provider.test.ts
new file mode 100644
index 0000000000..2f9580c720
--- /dev/null
+++ b/cli/src/commands/__tests__/local-provider.test.ts
@@ -0,0 +1,344 @@
+import { describe, expect, test, beforeEach, afterEach } from 'bun:test'
+
+import {
+  applyLocalAction,
+  DEFAULT_LOCAL_BASE_URL,
+  getActiveLocalBaseUrl,
+  getActiveLocalModel,
+  parseLocalArgs,
+} from '../local-provider'
+
+describe('parseLocalArgs — basic shapes', () => {
+  test('empty args → status', () => {
+    expect(parseLocalArgs('').kind).toBe('status')
+    expect(parseLocalArgs('   ').kind).toBe('status')
+    expect(parseLocalArgs('\t\n').kind).toBe('status')
+  })
+
+  test('"status" → status', () => {
+    expect(parseLocalArgs('status').kind).toBe('status')
+    expect(parseLocalArgs('  status  ').kind).toBe('status')
+    expect(parseLocalArgs('STATUS').kind).toBe('status')
+  })
+
+  test('"list" / "models" → list', () => {
+    expect(parseLocalArgs('list').kind).toBe('list')
+    expect(parseLocalArgs('models').kind).toBe('list')
+  })
+
+  test('"off" → disable', () => {
+    expect(parseLocalArgs('off').kind).toBe('disable')
+    expect(parseLocalArgs('disable').kind).toBe('disable')
+  })
+
+  test('"off" with stray args → invalid', () => {
+    const r = parseLocalArgs('off http://oops')
+    expect(r.kind).toBe('invalid')
+  })
+
+  test('unknown subcommand → invalid', () => {
+    const r = parseLocalArgs('foobar')
+    expect(r.kind).toBe('invalid')
+    if (r.kind === 'invalid') expect(r.reason).toContain('Unknown')
+  })
+})
+
+describe('parseLocalArgs — enable shapes', () => {
+  test('"on" → enable with default URL, no model', () => {
+    const r = parseLocalArgs('on')
+    expect(r.kind).toBe('enable')
+    if (r.kind === 'enable') {
+      expect(r.baseUrl).toBe(DEFAULT_LOCAL_BASE_URL)
+      expect(r.model).toBeUndefined()
+    }
+  })
+
+  test('"on <url>" → enable with URL only', () => {
+    const r = parseLocalArgs('on http://localhost:1234/v1')
+    expect(r.kind).toBe('enable')
+    if (r.kind === 'enable') {
+      expect(r.baseUrl).toBe('http://localhost:1234/v1')
+      expect(r.model).toBeUndefined()
+    }
+  })
+
+  test('"on <model>" (model only, no URL) → enable with default URL + model', () => {
+    const r = parseLocalArgs('on llama3.1:8b')
+    expect(r.kind).toBe('enable')
+    if (r.kind === 'enable') {
+      expect(r.baseUrl).toBe(DEFAULT_LOCAL_BASE_URL)
+      expect(r.model).toBe('llama3.1:8b')
+    }
+  })
+
+  test('"on <url> <model>" → both set', () => {
+    const r = parseLocalArgs('on http://localhost:1234/v1 llama3.1:8b')
+    expect(r.kind).toBe('enable')
+    if (r.kind === 'enable') {
+      expect(r.baseUrl).toBe('http://localhost:1234/v1')
+      expect(r.model).toBe('llama3.1:8b')
+    }
+  })
+
+  test('"enable <url>" and "set <model>" aliases work', () => {
+    const a = parseLocalArgs('enable http://x:1/v1')
+    expect(a.kind).toBe('enable')
+    const b = parseLocalArgs('set gemma4:e2b')
+    expect(b.kind).toBe('enable')
+    if (b.kind === 'enable') expect(b.model).toBe('gemma4:e2b')
+  })
+
+  test('bare URL → enable', () => {
+    const r = parseLocalArgs('http://localhost:11434/v1')
+    expect(r.kind).toBe('enable')
+    if (r.kind === 'enable') expect(r.baseUrl).toBe('http://localhost:11434/v1')
+  })
+
+  test('bare model tag → enable with default URL + model', () => {
+    const r = parseLocalArgs('llama3.1:8b')
+    expect(r.kind).toBe('enable')
+    if (r.kind === 'enable') {
+      expect(r.baseUrl).toBe(DEFAULT_LOCAL_BASE_URL)
+      expect(r.model).toBe('llama3.1:8b')
+    }
+  })
+
+  test('non-http URL → invalid', () => {
+    const r = parseLocalArgs('on ftp://localhost')
+    expect(r.kind).toBe('invalid')
+  })
+
+  test('malformed URL → invalid', () => {
+    const r = parseLocalArgs('on http://')
+    expect(r.kind).toBe('invalid')
+  })
+
+  test('https URL accepted', () => {
+    const r = parseLocalArgs('on https://my-vm.example.com:8080/v1 llama3.1:8b')
+    expect(r.kind).toBe('enable')
+    if (r.kind === 'enable')
+      expect(r.baseUrl).toBe('https://my-vm.example.com:8080/v1')
+  })
+})
+
+describe('parseLocalArgs — model subcommand', () => {
+  test('"model <name>" → set-model', () => {
+    const r = parseLocalArgs('model llama3.1:8b')
+    expect(r.kind).toBe('set-model')
+    if (r.kind === 'set-model') expect(r.model).toBe('llama3.1:8b')
+  })
+
+  test('"model clear" / "model off" / "model none" → clear-model', () => {
+    expect(parseLocalArgs('model clear').kind).toBe('clear-model')
+    expect(parseLocalArgs('model off').kind).toBe('clear-model')
+    expect(parseLocalArgs('model none').kind).toBe('clear-model')
+  })
+
+  test('"model" without name → invalid', () => {
+    const r = parseLocalArgs('model')
+    expect(r.kind).toBe('invalid')
+  })
+
+  test('"model <flag>" → invalid', () => {
+    const r = parseLocalArgs('model --x')
+    expect(r.kind).toBe('invalid')
+  })
+})
+
+describe('applyLocalAction (side effects on process.env)', () => {
+  let originalBaseUrl: string | undefined
+  let originalApiKey: string | undefined
+  let originalModel: string | undefined
+
+  beforeEach(() => {
+    originalBaseUrl = process.env.CODEBUFF_BASE_URL
+    originalApiKey = process.env.CODEBUFF_PROVIDER_API_KEY
+    originalModel = process.env.CODEBUFF_PROVIDER_MODEL
+    delete process.env.CODEBUFF_BASE_URL
+    delete process.env.CODEBUFF_PROVIDER_API_KEY
+    delete process.env.CODEBUFF_PROVIDER_MODEL
+  })
+
+  afterEach(() => {
+    if (originalBaseUrl === undefined) delete process.env.CODEBUFF_BASE_URL
+    else process.env.CODEBUFF_BASE_URL = originalBaseUrl
+    if (originalApiKey === undefined)
+      delete process.env.CODEBUFF_PROVIDER_API_KEY
+    else process.env.CODEBUFF_PROVIDER_API_KEY = originalApiKey
+    if (originalModel === undefined) delete process.env.CODEBUFF_PROVIDER_MODEL
+    else process.env.CODEBUFF_PROVIDER_MODEL = originalModel
+  })
+
+  test('enable without model sets baseUrl, clears any previous model override', async () => {
+    process.env.CODEBUFF_PROVIDER_MODEL = 'stale-model'
+    const msg = await applyLocalAction({
+      kind: 'enable',
+      baseUrl: 'http://localhost:11434/v1',
+    })
+    expect(process.env.CODEBUFF_BASE_URL).toBe('http://localhost:11434/v1')
+    expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined()
+    expect(msg).toContain('ON')
+    expect(msg).toContain('No model override')
+    expect(msg).toContain('llama3.1:8b')
+  })
+
+  test('enable with model sets both env vars', async () => {
+    const msg = await applyLocalAction({
+      kind: 'enable',
+      baseUrl: 'http://localhost:11434/v1',
+      model: 'llama3.1:8b',
+    })
+    expect(process.env.CODEBUFF_BASE_URL).toBe('http://localhost:11434/v1')
+    expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b')
+    expect(msg).toContain('Model override: llama3.1:8b')
+  })
+
+  test('set-model when local is OFF → error', async () => {
+    const msg = await applyLocalAction({
+      kind: 'set-model',
+      model: 'llama3.1:8b',
+    })
+    expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined()
+    expect(msg).toContain('OFF')
+  })
+
+  test('set-model when local is ON → updates model', async () => {
+    process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1'
+    const msg = await applyLocalAction({
+      kind: 'set-model',
+      model: 'llama3.1:8b',
+    })
+    expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b')
+    expect(msg).toContain('Model override: llama3.1:8b')
+  })
+
+  test('clear-model removes only the model, keeps baseUrl', async () => {
+    process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1'
+    process.env.CODEBUFF_PROVIDER_MODEL = 'llama3.1:8b'
+    const msg = await applyLocalAction({ kind: 'clear-model' })
+    expect(process.env.CODEBUFF_BASE_URL).toBe('http://localhost:11434/v1')
+    expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined()
+    expect(msg).toContain('cleared')
+  })
+
+  test('clear-model when none set is friendly', async () => {
+    const msg = await applyLocalAction({ kind: 'clear-model' })
+    expect(msg).toContain('No model override')
+  })
+
+  test('disable clears baseUrl, apiKey, and model', async () => {
+    process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1'
+    process.env.CODEBUFF_PROVIDER_API_KEY = 'ollama'
+    process.env.CODEBUFF_PROVIDER_MODEL = 'llama3.1:8b'
+    const msg = await applyLocalAction({ kind: 'disable' })
+    expect(process.env.CODEBUFF_BASE_URL).toBeUndefined()
+    expect(process.env.CODEBUFF_PROVIDER_API_KEY).toBeUndefined()
+    expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined()
+    expect(msg).toContain('OFF')
+    expect(msg).toContain('llama3.1:8b')
+  })
+
+  test('disable when already off → idempotent', async () => {
+    const msg = await applyLocalAction({ kind: 'disable' })
+    expect(msg).toContain('already OFF')
+  })
+
+  test('status when off mentions /local list and shows usage', async () => {
+    const msg = await applyLocalAction({ kind: 'status' })
+    expect(msg).toContain('OFF')
+    expect(msg).toContain('/local list')
+  })
+
+  test('status when on with model shows both URL and model', async () => {
+    process.env.CODEBUFF_BASE_URL = 'http://localhost:1234/v1'
+    process.env.CODEBUFF_PROVIDER_MODEL = 'llama3.1:8b'
+    const msg = await applyLocalAction({ kind: 'status' })
+    expect(msg).toContain('ON')
+    expect(msg).toContain('http://localhost:1234/v1')
+    expect(msg).toContain('llama3.1:8b')
+  })
+
+  test('status when on without model warns about no model override', async () => {
+    process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1'
+    const msg = await applyLocalAction({ kind: 'status' })
+    expect(msg).toContain('ON')
+    expect(msg).toContain('(none')
+  })
+
+  test('invalid returns reason prefixed', async () => {
+    const msg = await applyLocalAction({
+      kind: 'invalid',
+      reason: 'something wrong',
+    })
+    expect(msg).toContain('something wrong')
+  })
+
+  test('list when off returns error', async () => {
+    const msg = await applyLocalAction({ kind: 'list' })
+    expect(msg).toContain('OFF')
+  })
+})
+
+describe('parseLocalArgs + applyLocalAction end-to-end', () => {
+  let originalBaseUrl: string | undefined
+  let originalModel: string | undefined
+
+  beforeEach(() => {
+    originalBaseUrl = process.env.CODEBUFF_BASE_URL
+    originalModel = process.env.CODEBUFF_PROVIDER_MODEL
+    delete process.env.CODEBUFF_BASE_URL
+    delete process.env.CODEBUFF_PROVIDER_MODEL
+  })
+
+  afterEach(() => {
+    if (originalBaseUrl === undefined) delete process.env.CODEBUFF_BASE_URL
+    else process.env.CODEBUFF_BASE_URL = originalBaseUrl
+    if (originalModel === undefined) delete process.env.CODEBUFF_PROVIDER_MODEL
+    else process.env.CODEBUFF_PROVIDER_MODEL = originalModel
+  })
+
+  test('user types `/local on llama3.1:8b` → URL default + model set', async () => {
+    await applyLocalAction(parseLocalArgs('on llama3.1:8b'))
+    expect(process.env.CODEBUFF_BASE_URL).toBe(DEFAULT_LOCAL_BASE_URL)
+    expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b')
+  })
+
+  test('user types `/local llama3.1:8b` (no `on`) → same effect', async () => {
+    await applyLocalAction(parseLocalArgs('llama3.1:8b'))
+    expect(process.env.CODEBUFF_BASE_URL).toBe(DEFAULT_LOCAL_BASE_URL)
+    expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b')
+  })
+
+  test('user types `/local on http://x/v1 llama3.1:8b` → both set', async () => {
+    await applyLocalAction(parseLocalArgs('on http://x.example.com:9999/v1 llama3.1:8b'))
+    expect(process.env.CODEBUFF_BASE_URL).toBe('http://x.example.com:9999/v1')
+    expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b')
+  })
+
+  test('user types `/local model llama3.1:8b` after `/local on` → model added', async () => {
+    await applyLocalAction(parseLocalArgs('on'))
+    expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined()
+    await applyLocalAction(parseLocalArgs('model llama3.1:8b'))
+    expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b')
+  })
+
+  test('user types `/local off` → both cleared', async () => {
+    await applyLocalAction(parseLocalArgs('on llama3.1:8b'))
+    await applyLocalAction(parseLocalArgs('off'))
+    expect(process.env.CODEBUFF_BASE_URL).toBeUndefined()
+    expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined()
+  })
+
+  test('mutations are visible via getter functions', async () => {
+    await applyLocalAction(parseLocalArgs('on llama3.1:8b'))
+    expect(getActiveLocalBaseUrl()).toBe(DEFAULT_LOCAL_BASE_URL)
+    expect(getActiveLocalModel()).toBe('llama3.1:8b')
+  })
+
+  test('re-enabling without model clears previous model override', async () => {
+    await applyLocalAction(parseLocalArgs('on llama3.1:8b'))
+    await applyLocalAction(parseLocalArgs('on'))
+    expect(getActiveLocalBaseUrl()).toBe(DEFAULT_LOCAL_BASE_URL)
+    expect(getActiveLocalModel()).toBeUndefined()
+  })
+})
diff --git a/cli/src/commands/command-registry.ts b/cli/src/commands/command-registry.ts
index 0eda49607e..9afa024028 100644
--- a/cli/src/commands/command-registry.ts
+++ b/cli/src/commands/command-registry.ts
@@ -5,6 +5,7 @@ import { handleAdsEnable, handleAdsDisable } from './ads'
 import { handleHelpCommand } from './help'
 import { handleImageCommand } from './image'
 import { handleInitializationFlowLocally } from './init'
+import { applyLocalAction, parseLocalArgs } from './local-provider'
 import { buildInterviewPrompt, buildPlanPrompt, buildReviewPromptFromArgs } from './prompt-builders'
 import { runBashCommand } from './router'
 import { handleUsageCommand } from './usage'
@@ -392,6 +393,18 @@ const ALL_COMMANDS: CommandDefinition[] = [
       clearInput(params)
     },
   }),
+  defineCommandWithArgs({
+    name: 'local',
+    handler: async (params, args) => {
+      const userText = params.inputValue.trim()
+      params.setMessages((prev) => [...prev, getUserMessage(userText)])
+      params.saveToHistory(userText)
+      clearInput(params)
+
+      const message = await applyLocalAction(parseLocalArgs(args))
+      params.setMessages((prev) => [...prev, getSystemMessage(message)])
+    },
+  }),
   // Mode commands generated from AGENT_MODES (excluded in Freebuff)
   ...(IS_FREEBUFF ? [] : AGENT_MODES).map((mode) =>
     defineCommandWithArgs({
diff --git a/cli/src/commands/local-provider.ts b/cli/src/commands/local-provider.ts
new file mode 100644
index 0000000000..85a3a65143
--- /dev/null
+++ b/cli/src/commands/local-provider.ts
@@ -0,0 +1,401 @@
+/**
+ * /local slash command — runtime toggle for the custom OpenAI-compatible
+ * provider (Ollama, LM Studio, self-hosted).
+ *
+ * Mutates process.env at runtime. The SDK reads these env vars lazily on
+ * every promptAiSdkStream call, so changes take effect immediately for the
+ * next request without needing to rebuild the CodebuffClient.
+ *
+ * Subcommands:
+ *   /local                          — show current status
+ *   /local on                       — enable with default Ollama URL (model unchanged)
+ *   /local on <model>               — enable with default URL + model override
+ *   /local on <url>                 — enable with custom URL (model unchanged)
+ *   /local on <url> <model>         — enable with URL + model override
+ *   /local set <model>              — alias for `/local on <model>`
+ *   /local model <model>            — set model override only (URL must already be set)
+ *   /local model clear              — clear the model override
+ *   /local off                      — disable, return to Codebuff backend
+ *   /local status                   — same as `/local`
+ *   /local list                     — query the local provider for available models
+ *
+ * Agent-level providerOptions.baseUrl always wins; /local only affects agents
+ * that don't set their own baseUrl. Same for the model override — agents with
+ * an explicit providerOptions.baseUrl use their own declared model.
+ */
+
+import {
+  PROVIDER_API_KEY_ENV_VAR,
+  PROVIDER_BASE_URL_ENV_VAR,
+  PROVIDER_MODEL_ENV_VAR,
+} from '@codebuff/common/constants/custom-provider'
+
+/** Default URL used by `/local on` when the user doesn't specify one. */
+export const DEFAULT_LOCAL_BASE_URL = 'http://localhost:11434/v1'
+
+export type LocalCommandAction =
+  | { kind: 'status' }
+  | { kind: 'enable'; baseUrl: string; model?: string }
+  | { kind: 'set-model'; model: string }
+  | { kind: 'clear-model' }
+  | { kind: 'list' }
+  | { kind: 'disable' }
+  | { kind: 'invalid'; reason: string }
+
+function looksLikeUrl(token: string): boolean {
+  // Anything with a scheme separator — caller validates the actual scheme.
+  return token.includes('://')
+}
+
+function isLikelyModelTag(token: string): boolean {
+  // Ollama-style tags: name[:tag], e.g. "llama3.1:8b", "gemma4:e2b", "qwen2.5".
+  // Reject URL-shaped tokens and flags.
+  return Boolean(token) && !looksLikeUrl(token) && !token.startsWith('-')
+}
+
+/**
+ * Parse the args passed to `/local`. Pure function — no side effects.
+ * Exported for unit testing.
+ */
+export function parseLocalArgs(rawArgs: string): LocalCommandAction {
+  const trimmed = rawArgs.trim()
+
+  // No args → show status
+  if (!trimmed) {
+    return { kind: 'status' }
+  }
+
+  const tokens = trimmed.split(/\s+/)
+  const subcommand = tokens[0]
+  const sub = subcommand.toLowerCase()
+  const rest = tokens.slice(1)
+
+  if (sub === 'status') {
+    return { kind: 'status' }
+  }
+
+  if (sub === 'list' || sub === 'models') {
+    return { kind: 'list' }
+  }
+
+  if (sub === 'off' || sub === 'disable') {
+    if (rest.length > 0) {
+      return {
+        kind: 'invalid',
+        reason: `\`/local ${sub}\` does not take arguments. Got: "${rest.join(' ')}"`,
+      }
+    }
+    return { kind: 'disable' }
+  }
+
+  if (sub === 'model') {
+    if (rest.length === 0) {
+      return {
+        kind: 'invalid',
+        reason: 'Usage: `/local model <model>` or `/local model clear`',
+      }
+    }
+    const value = rest.join(' ')
+    if (value === 'clear' || value === 'off' || value === 'none') {
+      return { kind: 'clear-model' }
+    }
+    if (!isLikelyModelTag(rest[0])) {
+      return {
+        kind: 'invalid',
+        reason: `Invalid model name: "${value}". Expected something like "llama3.1:8b".`,
+      }
+    }
+    return { kind: 'set-model', model: value }
+  }
+
+  if (sub === 'on' || sub === 'enable' || sub === 'set') {
+    return parseEnable(rest)
+  }
+
+  // Looks like a bare URL or bare model (e.g. `/local http://...` or `/local llama3.1:8b`)?
+  if (looksLikeUrl(subcommand)) {
+    return parseEnable([subcommand, ...rest])
+  }
+  // Bare model shortcut: must contain `:` so we don't silently accept typos
+  // like `/local foobar`. Use `/local on <name>` for tagless models.
+  if (subcommand.includes(':') && isLikelyModelTag(subcommand) && rest.length === 0) {
+    return { kind: 'enable', baseUrl: DEFAULT_LOCAL_BASE_URL, model: subcommand }
+  }
+
+  return {
+    kind: 'invalid',
+    reason: `Unknown /local subcommand: "${subcommand}". Try: on, off, model, status, list.`,
+  }
+}
+
+/**
+ * Parse the tokens after `/local on` / `/local enable` / `/local set`.
+ * Supports four shapes:
+ *   (empty)        → default URL, no model override
+ *   <url>          → URL, no model override
+ *   <model>        → default URL + model
+ *   <url> <model>  → URL + model
+ */
+function parseEnable(tokens: string[]): LocalCommandAction {
+  if (tokens.length === 0) {
+    return { kind: 'enable', baseUrl: DEFAULT_LOCAL_BASE_URL }
+  }
+
+  if (tokens.length === 1) {
+    const t = tokens[0]
+    // URL-shaped tokens go through URL validation regardless of scheme.
+    if (looksLikeUrl(t)) {
+      const v = validateBaseUrl(t)
+      if (!v.ok) return { kind: 'invalid', reason: v.reason }
+      return { kind: 'enable', baseUrl: v.url }
+    }
+    if (isLikelyModelTag(t)) {
+      return { kind: 'enable', baseUrl: DEFAULT_LOCAL_BASE_URL, model: t }
+    }
+    return { kind: 'invalid', reason: `Could not interpret "${t}" as URL or model name.` }
+  }
+
+  // Two or more tokens. Pattern: first is URL, rest joined is model.
+  const [first, ...rest] = tokens
+  if (!looksLikeUrl(first)) {
+    return {
+      kind: 'invalid',
+      reason: `Expected URL or model. Got: "${first}". Usage: /local on [url] [model]`,
+    }
+  }
+  const v = validateBaseUrl(first)
+  if (!v.ok) return { kind: 'invalid', reason: v.reason }
+  const modelToken = rest.join(' ')
+  if (!isLikelyModelTag(rest[0])) {
+    return {
+      kind: 'invalid',
+      reason: `Invalid model name: "${modelToken}".`,
+    }
+  }
+  return { kind: 'enable', baseUrl: v.url, model: modelToken }
+}
+
+function validateBaseUrl(
+  raw: string,
+):
+  | { ok: true; url: string }
+  | { ok: false; reason: string } {
+  if (!raw || !raw.trim()) {
+    return { ok: false, reason: 'URL is required.' }
+  }
+  if (!raw.startsWith('http://') && !raw.startsWith('https://')) {
+    return {
+      ok: false,
+      reason: `URL must start with http:// or https://. Got: "${raw}"`,
+    }
+  }
+  try {
+    // eslint-disable-next-line no-new
+    new URL(raw)
+  } catch {
+    return { ok: false, reason: `Invalid URL: "${raw}"` }
+  }
+  return { ok: true, url: raw }
+}
+
+/**
+ * Read the currently-active local provider URL (or undefined if disabled).
+ * Reads from process.env so it reflects both shell-set values and /local-set values.
+ */
+export function getActiveLocalBaseUrl(): string | undefined {
+  return process.env[PROVIDER_BASE_URL_ENV_VAR]
+}
+
+/** Read the currently-active local model override (or undefined). */
+export function getActiveLocalModel(): string | undefined {
+  return process.env[PROVIDER_MODEL_ENV_VAR]
+}
+
+/**
+ * Apply an action to process.env. Returns a user-facing message describing what happened.
+ * Side effects are isolated to this function for testability.
+ *
+ * Note: `list` is async because it hits the network. Other actions are sync.
+ */
+export async function applyLocalAction(
+  action: LocalCommandAction,
+): Promise<string> {
+  if (action.kind === 'invalid') {
+    return `❌ ${action.reason}`
+  }
+
+  if (action.kind === 'status') {
+    return formatStatus()
+  }
+
+  if (action.kind === 'list') {
+    return listModels()
+  }
+
+  if (action.kind === 'enable') {
+    process.env[PROVIDER_BASE_URL_ENV_VAR] = action.baseUrl
+    if (action.model) {
+      process.env[PROVIDER_MODEL_ENV_VAR] = action.model
+    } else {
+      // Important: an `enable` without an explicit model clears any previous
+      // model override, so an old setting doesn't silently apply to a new URL.
+      delete process.env[PROVIDER_MODEL_ENV_VAR]
+    }
+    const lines = [
+      'Local provider: ON',
+      `  URL: ${action.baseUrl}`,
+    ]
+    if (action.model) {
+      lines.push(`  Model override: ${action.model}`)
+      lines.push('')
+      lines.push(
+        `Agents that would otherwise use a cloud model will use \`${action.model}\` instead.`,
+      )
+    } else {
+      lines.push('')
+      lines.push('⚠️  No model override set. Cloud models (e.g.')
+      lines.push('  `anthropic/claude-opus-4-7`) will not exist on the local provider.')
+      lines.push('  Run `/local model <name>` (e.g. `/local model llama3.1:8b`)')
+      lines.push('  or `/local list` to see available models.')
+    }
+    lines.push('')
+    lines.push('Note: agents with their own `providerOptions.baseUrl` still win.')
+    lines.push('Disable with: /local off')
+    return lines.join('\n')
+  }
+
+  if (action.kind === 'set-model') {
+    if (!getActiveLocalBaseUrl()) {
+      return [
+        '❌ Local provider is OFF. Enable it first with `/local on` before setting a model.',
+      ].join('\n')
+    }
+    process.env[PROVIDER_MODEL_ENV_VAR] = action.model
+    return [
+      `Model override: ${action.model}`,
+      '',
+      `Local provider remains ON at ${getActiveLocalBaseUrl()}.`,
+      `Agents will use \`${action.model}\` for inference.`,
+    ].join('\n')
+  }
+
+  if (action.kind === 'clear-model') {
+    const wasSet = getActiveLocalModel()
+    delete process.env[PROVIDER_MODEL_ENV_VAR]
+    if (!wasSet) {
+      return 'No model override was set. No change.'
+    }
+    return [
+      `Model override cleared (was: ${wasSet}).`,
+      '',
+      'Warning: without an override, the agent\'s declared cloud model will be sent',
+      'to the local provider — likely a "model not found" error. Either set a new',
+      'model with `/local model <name>` or turn local mode off with `/local off`.',
+    ].join('\n')
+  }
+
+  // action.kind === 'disable'
+  const wasBaseUrl = getActiveLocalBaseUrl()
+  const wasModel = getActiveLocalModel()
+  delete process.env[PROVIDER_BASE_URL_ENV_VAR]
+  delete process.env[PROVIDER_API_KEY_ENV_VAR]
+  delete process.env[PROVIDER_MODEL_ENV_VAR]
+  if (!wasBaseUrl && !wasModel) {
+    return 'Local provider was already OFF. No change.'
+  }
+  const lines = ['Local provider: OFF']
+  if (wasBaseUrl) lines.push(`  Previously: ${wasBaseUrl}`)
+  if (wasModel) lines.push(`  Cleared model override: ${wasModel}`)
+  lines.push('')
+  lines.push('Routing returns to the Codebuff backend.')
+  return lines.join('\n')
+}
+
+function formatStatus(): string {
+  const url = getActiveLocalBaseUrl()
+  const model = getActiveLocalModel()
+  if (!url) {
+    return [
+      'Local provider: OFF',
+      '',
+      'All agents (without per-agent providerOptions.baseUrl) go through the Codebuff backend.',
+      '',
+      `Enable with: /local on <model>   (e.g. /local on llama3.1:8b)`,
+      `             /local on           (uses ${DEFAULT_LOCAL_BASE_URL}, no model override)`,
+      `Discover available local models: /local list`,
+    ].join('\n')
+  }
+  const lines = [
+    'Local provider: ON',
+    `  URL: ${url}`,
+  ]
+  if (model) lines.push(`  Model override: ${model}`)
+  else
+    lines.push(
+      '  Model override: (none — agent\'s declared model will be sent as-is)',
+    )
+  lines.push('')
+  lines.push('Agents without their own providerOptions.baseUrl will use this endpoint.')
+  lines.push('Commands: /local model <name>, /local off, /local list')
+  return lines.join('\n')
+}
+
+/**
+ * Query the local provider's `/api/tags` endpoint (Ollama-compatible) to list
+ * available models. Best-effort — short timeout, friendly fallback.
+ */
+async function listModels(): Promise<string> {
+  const baseUrl = getActiveLocalBaseUrl()
+  if (!baseUrl) {
+    return [
+      '❌ Local provider is OFF. Run `/local on <url>` first, then `/local list`.',
+    ].join('\n')
+  }
+  // /api/tags lives at the root of the Ollama server, not under /v1.
+  // Strip a trailing /v1 if present, then append /api/tags.
+  const root = baseUrl.replace(/\/+$/, '').replace(/\/v1$/, '')
+  const tagsUrl = `${root}/api/tags`
+
+  const controller = new AbortController()
+  const timeout = setTimeout(() => controller.abort(), 3000)
+  try {
+    const res = await fetch(tagsUrl, { signal: controller.signal })
+    if (!res.ok) {
+      return [
+        `Could not list models at ${tagsUrl} (HTTP ${res.status}).`,
+        '',
+        'Note: this only works for Ollama-compatible providers.',
+        'For LM Studio or others, set the model manually with `/local model <name>`.',
+      ].join('\n')
+    }
+    const body = (await res.json()) as { models?: Array<{ name?: string }> }
+    const names = (body.models ?? [])
+      .map((m) => m.name)
+      .filter((n): n is string => typeof n === 'string')
+    if (names.length === 0) {
+      return [
+        `Local provider has no models loaded.`,
+        '',
+        'Try `ollama pull llama3.1:8b` (or any tag of your choice) and run `/local list` again.',
+      ].join('\n')
+    }
+    const active = getActiveLocalModel()
+    const lines = [`Available models at ${root}:`]
+    for (const name of names) {
+      const marker = name === active ? '  ▶ ' : '    '
+      lines.push(`${marker}${name}`)
+    }
+    lines.push('')
+    lines.push(`Use \`/local model <name>\` to pick one.`)
+    return lines.join('\n')
+  } catch (e) {
+    return [
+      `Could not reach ${tagsUrl}.`,
+      '',
+      'Check that the provider is running and the URL is correct.',
+      'For non-Ollama providers, set the model manually with `/local model <name>`.',
+    ].join('\n')
+  } finally {
+    clearTimeout(timeout)
+  }
+}
diff --git a/cli/src/data/slash-commands.ts b/cli/src/data/slash-commands.ts
index 14d71abecd..57e9b2668c 100644
--- a/cli/src/data/slash-commands.ts
+++ b/cli/src/data/slash-commands.ts
@@ -163,6 +163,11 @@ const ALL_SLASH_COMMANDS: SlashCommand[] = [
     description: 'Attach an image file (or Ctrl+V to paste from clipboard)',
     aliases: ['img', 'attach'],
   },
+  {
+    id: 'local',
+    label: 'local',
+    description: 'Toggle local LLM provider (Ollama/LM Studio). Usage: /local [on <model>|off|model <name>|list|status]',
+  },
   ...MODE_COMMANDS,
   // {
   //   id: 'publish',
diff --git a/common/src/constants/custom-provider.ts b/common/src/constants/custom-provider.ts
new file mode 100644
index 0000000000..5b59394287
--- /dev/null
+++ b/common/src/constants/custom-provider.ts
@@ -0,0 +1,17 @@
+/** Env var that overrides the upstream LLM endpoint with an OpenAI-compatible base URL.
+ *  Lower precedence than per-agent providerOptions.baseUrl and the CodebuffClient option. */
+export const PROVIDER_BASE_URL_ENV_VAR = 'CODEBUFF_BASE_URL'
+
+/** Env var providing the API key for the endpoint set by PROVIDER_BASE_URL_ENV_VAR.
+ *  Most local runtimes (Ollama, LM Studio) ignore the key entirely. */
+export const PROVIDER_API_KEY_ENV_VAR = 'CODEBUFF_PROVIDER_API_KEY'
+
+/** Env var overriding the agent's declared model when a custom provider is active.
+ *  Used by `/local on <model>` to substitute the cloud model (e.g.
+ *  `anthropic/claude-opus-4-7`) with a model the local provider actually has
+ *  (e.g. `llama3.1:8b`).
+ *
+ *  Only takes effect when PROVIDER_BASE_URL_ENV_VAR is set AND the agent
+ *  itself doesn't declare its own `providerOptions.baseUrl` — agents with an
+ *  explicit baseUrl are assumed to declare a matching model. */
+export const PROVIDER_MODEL_ENV_VAR = 'CODEBUFF_PROVIDER_MODEL'
diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index 030de3a14f..8847a98ecb 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -109,6 +109,25 @@ export interface AgentDefinition {
       audio?: number | string
       request?: number | string
     }
+    /**
+     * Override the upstream LLM endpoint with an OpenAI-compatible base URL.
+     * When set, this agent's LLM calls bypass the Codebuff backend / OpenRouter
+     * and go directly to `${baseUrl}/chat/completions`.
+     *
+     * Use for local models (Ollama, LM Studio) or self-hosted OpenAI-compatible
+     * providers. The other providerOptions keys (order, allow_fallbacks, etc.)
+     * are OpenRouter-specific and ignored when `baseUrl` is set.
+     *
+     * Falls back to env var CODEBUFF_BASE_URL when unset.
+     * Example: "http://localhost:11434/v1"
+     */
+    baseUrl?: string
+    /**
+     * API key for the endpoint set in `baseUrl`. Ignored if `baseUrl` is unset.
+     * Falls back to env var CODEBUFF_PROVIDER_API_KEY. Most local runtimes
+     * (Ollama, LM Studio) ignore the value entirely.
+     */
+    apiKey?: string
   }
 
   // ============================================================================
diff --git a/common/src/types/agent-template.ts b/common/src/types/agent-template.ts
index 0d89ba7ede..6cb0a77842 100644
--- a/common/src/types/agent-template.ts
+++ b/common/src/types/agent-template.ts
@@ -37,6 +37,12 @@ export type OpenRouterReasoningOptions = {
     }
 )
 
+/**
+ * OpenRouter provider-routing options, plus optional fields to override the
+ * upstream endpoint with an OpenAI-compatible base URL (e.g. Ollama, LM Studio,
+ * self-hosted). The routing fields below are OpenRouter-specific and are
+ * ignored when `baseUrl` is set.
+ */
 export type OpenRouterProviderRoutingOptions = {
   /**
    * List of provider slugs to try in order (e.g. ["anthropic", "openai"])
@@ -90,6 +96,20 @@ export type OpenRouterProviderRoutingOptions = {
     audio?: number | string
     request?: number | string
   }
+  /**
+   * Override the upstream LLM endpoint with an OpenAI-compatible base URL.
+   * When set, this agent's calls bypass the Codebuff backend / OpenRouter
+   * and go directly to `${baseUrl}/chat/completions`.
+   *
+   * Other keys above (order, allow_fallbacks, ...) are OpenRouter-specific and
+   * are ignored when baseUrl is set. Falls back to env var CODEBUFF_BASE_URL.
+   */
+  baseUrl?: string
+  /**
+   * API key for the endpoint set in `baseUrl`. Ignored if baseUrl is unset.
+   * Falls back to env var CODEBUFF_PROVIDER_API_KEY.
+   */
+  apiKey?: string
 }
 
 export type OpenRouterProviderOptions = {
diff --git a/common/src/types/contracts/llm.ts b/common/src/types/contracts/llm.ts
index 11c5a5ba0c..853ef54b42 100644
--- a/common/src/types/contracts/llm.ts
+++ b/common/src/types/contracts/llm.ts
@@ -56,6 +56,9 @@ export type PromptAiSdkStreamFn = (
     includeCacheControl?: boolean
     cacheDebugCorrelation?: string
     agentProviderOptions?: OpenRouterProviderRoutingOptions
+    /** Fallback custom-provider config injected by the SDK Client.
+     *  Lower precedence than an agent's own providerOptions.baseUrl. */
+    clientCustomProvider?: { baseUrl?: string; apiKey?: string }
     /** List of agents that can be spawned - used to transform agent tool calls */
     spawnableAgents?: string[]
     /** Map of locally available agent templates - used to transform agent tool calls */
diff --git a/common/src/types/dynamic-agent-template.ts b/common/src/types/dynamic-agent-template.ts
index d0a4097305..b9762566d1 100644
--- a/common/src/types/dynamic-agent-template.ts
+++ b/common/src/types/dynamic-agent-template.ts
@@ -167,6 +167,8 @@ export const DynamicAgentDefinitionSchema = z.object({
           request: z.union([z.number(), z.string()]).optional(),
         })
         .optional(),
+      baseUrl: z.string().url().optional(),
+      apiKey: z.string().optional(),
     })
     .optional(),
 
diff --git a/sdk/src/env.ts b/sdk/src/env.ts
index 033e3f245d..b49f88b6fa 100644
--- a/sdk/src/env.ts
+++ b/sdk/src/env.ts
@@ -7,6 +7,11 @@
 
 import { BYOK_OPENROUTER_ENV_VAR } from '@codebuff/common/constants/byok'
 import { CHATGPT_OAUTH_TOKEN_ENV_VAR } from '@codebuff/common/constants/chatgpt-oauth'
+import {
+  PROVIDER_API_KEY_ENV_VAR,
+  PROVIDER_BASE_URL_ENV_VAR,
+  PROVIDER_MODEL_ENV_VAR,
+} from '@codebuff/common/constants/custom-provider'
 import { API_KEY_ENV_VAR } from '@codebuff/common/constants/paths'
 import { getBaseEnv } from '@codebuff/common/env-process'
 
@@ -48,3 +53,28 @@ export const getByokOpenrouterApiKeyFromEnv = (): string | undefined => {
 export const getChatGptOAuthTokenFromEnv = (): string | undefined => {
   return process.env[CHATGPT_OAUTH_TOKEN_ENV_VAR]
 }
+
+/**
+ * Get the custom upstream provider base URL from environment.
+ * Used when an agent's providerOptions.baseUrl is unset and no CodebuffClient option overrides it.
+ */
+export const getCustomProviderBaseUrlFromEnv = (): string | undefined => {
+  return process.env[PROVIDER_BASE_URL_ENV_VAR]
+}
+
+/**
+ * Get the custom upstream provider API key from environment.
+ * Paired with getCustomProviderBaseUrlFromEnv.
+ */
+export const getCustomProviderApiKeyFromEnv = (): string | undefined => {
+  return process.env[PROVIDER_API_KEY_ENV_VAR]
+}
+
+/**
+ * Get the override model name from environment. When the custom provider is
+ * active, this value replaces the agent's declared model.
+ * Returns undefined if unset.
+ */
+export const getCustomProviderModelFromEnv = (): string | undefined => {
+  return process.env[PROVIDER_MODEL_ENV_VAR]
+}
diff --git a/sdk/src/impl/__tests__/model-provider-custom.test.ts b/sdk/src/impl/__tests__/model-provider-custom.test.ts
new file mode 100644
index 0000000000..0ff4336355
--- /dev/null
+++ b/sdk/src/impl/__tests__/model-provider-custom.test.ts
@@ -0,0 +1,94 @@
+import { describe, expect, test, afterEach, mock } from 'bun:test'
+
+describe('getModelForRequest with customProvider', () => {
+  afterEach(() => {
+    mock.restore()
+  })
+
+  test('returns isCustomProvider: true when customProvider.baseUrl is set', async () => {
+    const { getModelForRequest } = await import('../model-provider')
+
+    const result = await getModelForRequest({
+      apiKey: 'cb-test-key',
+      model: 'gemma2:9b',
+      customProvider: { baseUrl: 'http://localhost:11434/v1', apiKey: 'ollama' },
+    })
+
+    expect(result.isCustomProvider).toBe(true)
+    expect(result.isChatGptOAuth).toBe(false)
+    expect(result.model).toBeDefined()
+    expect((result.model as any).modelId).toBe('gemma2:9b')
+  })
+
+  test('does not return isCustomProvider when baseUrl is missing', async () => {
+    const { getModelForRequest } = await import('../model-provider')
+
+    const result = await getModelForRequest({
+      apiKey: 'cb-test-key',
+      model: 'anthropic/claude-sonnet-4',
+    })
+
+    expect(result.isCustomProvider).toBe(false)
+  })
+
+  test('customProvider takes precedence over ChatGPT OAuth eligibility', async () => {
+    const { getModelForRequest } = await import('../model-provider')
+
+    const result = await getModelForRequest({
+      apiKey: 'cb-test-key',
+      model: 'openai/gpt-5.3',
+      customProvider: { baseUrl: 'http://localhost:11434/v1' },
+    })
+
+    expect(result.isCustomProvider).toBe(true)
+    expect(result.isChatGptOAuth).toBe(false)
+  })
+
+  test('trims trailing slash from baseUrl (constructs cleanly)', async () => {
+    const { getModelForRequest } = await import('../model-provider')
+
+    const result = await getModelForRequest({
+      apiKey: 'cb-test-key',
+      model: 'gemma2:9b',
+      customProvider: { baseUrl: 'http://localhost:11434/v1/' },
+    })
+
+    expect(result.isCustomProvider).toBe(true)
+  })
+
+  test('omitting apiKey is allowed', async () => {
+    const { getModelForRequest } = await import('../model-provider')
+
+    const result = await getModelForRequest({
+      apiKey: 'cb-test-key',
+      model: 'gemma2:9b',
+      customProvider: { baseUrl: 'http://localhost:11434/v1' },
+    })
+
+    expect(result.isCustomProvider).toBe(true)
+  })
+
+  test('customProvider arg drives selection regardless of env (precedence contract)', async () => {
+    // This documents the contract: getModelForRequest receives the *resolved*
+    // customProvider — the caller (promptAiSdkStream) is responsible for
+    // applying the agent > client > env precedence ladder before calling.
+    process.env.CODEBUFF_BASE_URL = 'http://from-env:11434/v1'
+    process.env.CODEBUFF_PROVIDER_API_KEY = 'env-key'
+
+    const { getModelForRequest } = await import('../model-provider')
+    const result = await getModelForRequest({
+      apiKey: 'cb-key',
+      model: 'gemma2:9b',
+      customProvider: {
+        baseUrl: 'http://from-agent:11434/v1',
+        apiKey: 'agent-key',
+      },
+    })
+
+    expect(result.isCustomProvider).toBe(true)
+    expect(result.model).toBeDefined()
+
+    delete process.env.CODEBUFF_BASE_URL
+    delete process.env.CODEBUFF_PROVIDER_API_KEY
+  })
+})
diff --git a/sdk/src/impl/agent-runtime.ts b/sdk/src/impl/agent-runtime.ts
index 17858d8196..d7d077bf76 100644
--- a/sdk/src/impl/agent-runtime.ts
+++ b/sdk/src/impl/agent-runtime.ts
@@ -29,6 +29,8 @@ export function getAgentRuntimeImpl(
     logger?: Logger
     apiKey: string
     clientEnv?: ClientEnv
+    /** Default custom provider used for runs that don't set one per-agent. */
+    clientCustomProvider?: { baseUrl?: string; apiKey?: string }
   } & Pick<
     AgentRuntimeScopedDeps,
     | 'handleStepsLogChunk'
@@ -44,6 +46,7 @@ export function getAgentRuntimeImpl(
     logger,
     apiKey,
     clientEnv = clientEnvDefault,
+    clientCustomProvider,
     handleStepsLogChunk,
     requestToolCall,
     requestMcpToolData,
@@ -87,7 +90,10 @@ export function getAgentRuntimeImpl(
       }),
 
     // LLM
-    promptAiSdkStream,
+    promptAiSdkStream: clientCustomProvider
+      ? (streamParams) =>
+          promptAiSdkStream({ ...streamParams, clientCustomProvider })
+      : promptAiSdkStream,
     promptAiSdk,
     promptAiSdkStructured,
 
diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts
index 60bb678bb1..60283e1745 100644
--- a/sdk/src/impl/llm.ts
+++ b/sdk/src/impl/llm.ts
@@ -23,6 +23,11 @@ import {
   markChatGptOAuthRateLimited,
 } from './model-provider'
 import { refreshChatGptOAuthToken } from '../credentials'
+import {
+  getCustomProviderApiKeyFromEnv,
+  getCustomProviderBaseUrlFromEnv,
+  getCustomProviderModelFromEnv,
+} from '../env'
 import { getErrorStatusCode } from '../error-utils'
 
 import type { ModelRequestParams } from './model-provider'
@@ -130,6 +135,71 @@ type OpenRouterUsageAccounting = {
   }
 }
 
+/**
+ * Retry count for direct calls to a custom OpenAI-compatible provider.
+ * One retry absorbs brief model-load stalls on first call. We deliberately
+ * don't retry more — local failures are usually deterministic (provider down,
+ * wrong URL, model not pulled) and extra retries only make errors slower.
+ */
+const CUSTOM_PROVIDER_MAX_RETRIES = 1
+
+/**
+ * Wrap raw errors from a custom OpenAI-compatible endpoint in a friendly,
+ * actionable message. Distinguishes connection failures (provider down,
+ * wrong URL) from model-not-found errors.
+ */
+function buildCustomProviderError(args: {
+  baseUrl: string
+  model: string
+  rawMessage: string
+  rawCode?: string
+}): string {
+  const lower = args.rawMessage.toLowerCase()
+  const codeLower = (args.rawCode ?? '').toLowerCase()
+  const isConnectionError =
+    lower.includes('econnrefused') ||
+    lower.includes('connectionrefused') ||
+    lower.includes('connection refused') ||
+    lower.includes('unable to connect') ||
+    lower.includes('fetch failed') ||
+    lower.includes('etimedout') ||
+    lower.includes('enotfound') ||
+    lower.includes('socket hang up') ||
+    codeLower === 'connectionrefused' ||
+    codeLower === 'econnrefused' ||
+    codeLower === 'enotfound' ||
+    codeLower === 'etimedout'
+  const isModelNotFound =
+    lower.includes('model not found') ||
+    lower.includes('does not exist') ||
+    (lower.includes('404') && lower.includes(args.model.toLowerCase()))
+
+  if (isConnectionError) {
+    return [
+      `Cannot reach LLM provider at ${args.baseUrl}.`,
+      ``,
+      `Check:`,
+      `  • Is the provider running? (e.g. \`ollama serve\` or LM Studio's Local Server)`,
+      `  • Is the URL correct? Currently configured: ${args.baseUrl}`,
+      `  • Is the model '${args.model}' loaded? (e.g. \`ollama list\`)`,
+      ``,
+      `Original error: ${args.rawMessage}`,
+    ].join('\n')
+  }
+  if (isModelNotFound) {
+    return [
+      `Model '${args.model}' not found at ${args.baseUrl}.`,
+      ``,
+      `Check:`,
+      `  • Pull the model first: \`ollama pull ${args.model}\``,
+      `  • Verify the exact tag with \`ollama list\``,
+      ``,
+      `Original error: ${args.rawMessage}`,
+    ].join('\n')
+  }
+  return args.rawMessage
+}
+
 /**
  * Check if an error is an OAuth rate limit error that should trigger fallback.
  */
@@ -303,13 +373,55 @@ export async function* promptAiSdkStream(
     return promptAborted('User cancelled input')
   }
 
+  // Resolve custom-provider precedence: agent > client option > env.
+  // First non-empty baseUrl wins; its apiKey comes along to avoid mixing
+  // credentials with the wrong endpoint.
+  const customSources = [
+    params.agentProviderOptions,
+    params.clientCustomProvider,
+    {
+      baseUrl: getCustomProviderBaseUrlFromEnv(),
+      apiKey: getCustomProviderApiKeyFromEnv(),
+    },
+  ]
+  const winningSource = customSources.find((s) => s?.baseUrl)
+  const resolvedBaseUrl = winningSource?.baseUrl
+  const resolvedApiKey = winningSource?.apiKey
+
+  // Model override: substitute the agent's declared model with the env-configured
+  // local model when the custom provider is active. Skipped when an agent
+  // explicitly sets its own providerOptions.baseUrl — that agent is assumed to
+  // have declared a matching model. See PROVIDER_MODEL_ENV_VAR JSDoc.
+  const agentBaseUrl = params.agentProviderOptions?.baseUrl
+  const envModelOverride =
+    resolvedBaseUrl && !agentBaseUrl
+      ? getCustomProviderModelFromEnv()
+      : undefined
+  const effectiveModel = envModelOverride ?? params.model
+
+  // Surface the substitution so users can confirm in logs that their /local
+  // model override is actually being applied to outbound requests.
+  if (envModelOverride && envModelOverride !== params.model) {
+    logger.info(
+      {
+        requestedModel: params.model,
+        effectiveModel,
+        baseUrl: resolvedBaseUrl,
+      },
+      'Custom provider active: substituting agent model with /local override',
+    )
+  }
+
   const modelParams: ModelRequestParams = {
     apiKey: params.apiKey,
-    model: params.model,
+    model: effectiveModel,
     skipChatGptOAuth: params.skipChatGptOAuth,
     costMode: params.costMode,
+    ...(resolvedBaseUrl
+      ? { customProvider: { baseUrl: resolvedBaseUrl, apiKey: resolvedApiKey } }
+      : {}),
   }
-  const { model: aiSDKModel, isChatGptOAuth } =
+  const { model: aiSDKModel, isChatGptOAuth, isCustomProvider } =
     await getModelForRequest(modelParams)
 
   if (isChatGptOAuth) {
@@ -329,9 +441,13 @@ export async function* promptAiSdkStream(
     prompt: undefined,
     model: aiSDKModel,
     messages: convertCbToModelMessages(params),
-    ...(isChatGptOAuth && { maxRetries: 0 }),
-    // For ChatGPT OAuth direct, don't send codebuff metadata/provider options to OpenAI
-    ...(isChatGptOAuth
+    // ChatGPT OAuth: no retries (we fall back to Codebuff on first failure).
+    // Custom provider: see CUSTOM_PROVIDER_MAX_RETRIES.
+    ...(isChatGptOAuth ? { maxRetries: 0 } : {}),
+    ...(isCustomProvider ? { maxRetries: CUSTOM_PROVIDER_MAX_RETRIES } : {}),
+    // Direct routes (ChatGPT OAuth, custom provider): skip codebuff_metadata
+    // and OpenRouter routing keys — neither belongs in those request bodies.
+    ...(isChatGptOAuth || isCustomProvider
       ? {}
       : {
         providerOptions: getProviderOptions({
@@ -458,7 +574,32 @@ export async function* promptAiSdkStream(
   // Track if we've yielded any content - if so, we can't safely fall back
   let hasYieldedContent = false
 
-  for await (const chunkValue of response.fullStream) {
+  // For custom-provider streams, a connection refusal at request init throws
+  // from the iterator before any error chunk is emitted. Rewrap into a
+  // friendly message so users see "is Ollama running?" not raw "fetch failed".
+  const stream = isCustomProvider && resolvedBaseUrl
+    ? (async function* () {
+        try {
+          yield* response.fullStream
+        } catch (e) {
+          const rawMessage = e instanceof Error ? e.message : String(e)
+          const rawCode =
+            e && typeof e === 'object' && 'code' in e
+              ? String((e as { code?: unknown }).code ?? '')
+              : undefined
+          throw new Error(
+            buildCustomProviderError({
+              baseUrl: resolvedBaseUrl,
+              model: effectiveModel,
+              rawMessage,
+              rawCode,
+            }),
+          )
+        }
+      })()
+    : response.fullStream
+
+  for await (const chunkValue of stream) {
     if (chunkValue.type !== 'text-delta') {
       const flushed = stopSequenceHandler.flush()
       if (flushed) {
@@ -603,6 +744,25 @@ export async function* promptAiSdkStream(
         'Error in AI SDK stream',
       )
 
+      // For custom-provider failures, rewrap with a friendly, actionable message
+      // before throwing so users see "is Ollama running?" not raw "fetch failed".
+      if (isCustomProvider && resolvedBaseUrl) {
+        const rawCode =
+          chunkValue.error &&
+          typeof chunkValue.error === 'object' &&
+          'code' in chunkValue.error
+            ? String((chunkValue.error as { code?: unknown }).code ?? '')
+            : undefined
+        throw new Error(
+          buildCustomProviderError({
+            baseUrl: resolvedBaseUrl,
+            model: effectiveModel,
+            rawMessage: errorMessage,
+            rawCode,
+          }),
+        )
+      }
+
       // For all other errors, throw them -- they are fatal.
       throw chunkValue.error
     }
diff --git a/sdk/src/impl/model-provider.ts b/sdk/src/impl/model-provider.ts
index 83e016c611..03329aa8c1 100644
--- a/sdk/src/impl/model-provider.ts
+++ b/sdk/src/impl/model-provider.ts
@@ -86,6 +86,8 @@ export interface ModelRequestParams {
   skipChatGptOAuth?: boolean
   /** Cost mode (e.g. 'free') — affects fallback behavior for OAuth routes */
   costMode?: string
+  /** When set, route this request directly to the OpenAI-compatible endpoint and bypass Codebuff/OAuth. */
+  customProvider?: { baseUrl: string; apiKey?: string }
 }
 
 /**
@@ -96,6 +98,8 @@ export interface ModelResult {
   model: LanguageModel
   /** Whether this model uses ChatGPT OAuth direct (affects cost tracking) */
   isChatGptOAuth: boolean
+  /** Whether this model uses a custom OpenAI-compatible endpoint (affects cost tracking + metadata) */
+  isCustomProvider: boolean
 }
 
 // Usage accounting type for OpenRouter/Codebuff backend responses
@@ -115,7 +119,21 @@ type OpenRouterUsageAccounting = {
  * This function is async because it may need to refresh the OAuth token.
  */
 export async function getModelForRequest(params: ModelRequestParams): Promise<ModelResult> {
-  const { apiKey, model, skipChatGptOAuth, costMode } = params
+  const { apiKey, model, skipChatGptOAuth, costMode, customProvider } = params
+
+  // 1) Custom OpenAI-compatible endpoint wins — explicit per-agent / client / env override.
+  //    Bypasses Codebuff backend AND ChatGPT OAuth.
+  if (customProvider?.baseUrl) {
+    return {
+      model: createCustomProviderModel({
+        model,
+        baseUrl: customProvider.baseUrl,
+        apiKey: customProvider.apiKey,
+      }),
+      isChatGptOAuth: false,
+      isCustomProvider: true,
+    }
+  }
 
   // Check if we should use ChatGPT OAuth direct
   // Only attempt for allowlisted models; non-allowlisted models silently fall through to backend.
@@ -140,6 +158,7 @@ export async function getModelForRequest(params: ModelRequestParams): Promise<Mo
         return {
           model: createOpenAIOAuthModel(model, chatGptOAuthCredentials.accessToken),
           isChatGptOAuth: true,
+          isCustomProvider: false,
         }
       }
 
@@ -156,6 +175,7 @@ export async function getModelForRequest(params: ModelRequestParams): Promise<Mo
   return {
     model: createCodebuffBackendModel(apiKey, model),
     isChatGptOAuth: false,
+    isCustomProvider: false,
   }
 }
 
@@ -256,3 +276,35 @@ function createCodebuffBackendModel(
     supportsStructuredOutputs: true,
   })
 }
+
+/**
+ * Create an OpenAI-compatible model pointed at a user-supplied base URL.
+ * Used for local providers (Ollama, LM Studio) and self-hosted endpoints.
+ *
+ * No metadata extractor — direct calls don't flow through Codebuff's usage
+ * accounting. No codebuff_metadata is sent (handled by the caller).
+ */
+function createCustomProviderModel(params: {
+  model: string
+  baseUrl: string
+  apiKey?: string
+}): LanguageModel {
+  const { model, baseUrl, apiKey } = params
+  const trimmedBase = baseUrl.replace(/\/+$/, '')
+
+  return new OpenAICompatibleChatLanguageModel(model, {
+    provider: 'custom',
+    url: ({ path: endpoint }) => `${trimmedBase}${endpoint}`,
+    headers: () => ({
+      // Most local runtimes (Ollama, LM Studio) ignore the Authorization header
+      // entirely. Send a non-empty placeholder since some servers reject empty
+      // Bearer values; never send the user's Codebuff key on this code path.
+      Authorization: `Bearer ${apiKey ?? 'unused'}`,
+      'Content-Type': 'application/json',
+      'user-agent': `ai-sdk/openai-compatible/${VERSION}/codebuff-custom-provider`,
+    }),
+    fetch: undefined,
+    includeUsage: undefined,
+    supportsStructuredOutputs: true,
+  })
+}
diff --git a/sdk/src/run.ts b/sdk/src/run.ts
index f5794a7def..36944ed0e9 100644
--- a/sdk/src/run.ts
+++ b/sdk/src/run.ts
@@ -85,6 +85,16 @@ export type CodebuffClientOptions = {
   maxAgentSteps?: number
   env?: Record<string, string>
 
+  /**
+   * Default custom OpenAI-compatible provider base URL for runs that don't set
+   * one per-agent. Used for local models (Ollama, LM Studio) or self-hosted
+   * endpoints. Lower precedence than an agent's own providerOptions.baseUrl;
+   * higher precedence than the CODEBUFF_BASE_URL env var.
+   */
+  providerBaseUrl?: string
+  /** Default API key paired with providerBaseUrl. Ignored if providerBaseUrl is unset. */
+  providerApiKey?: string
+
   handleEvent?: (event: PrintModeEvent) => void | Promise<void>
   handleStreamChunk?: (
     chunk:
@@ -198,6 +208,8 @@ async function runOnce({
   agentDefinitions,
   maxAgentSteps = MAX_AGENT_STEPS_DEFAULT,
   env,
+  providerBaseUrl,
+  providerApiKey,
 
   handleEvent,
   handleStreamChunk,
@@ -376,6 +388,9 @@ async function runOnce({
   const agentRuntimeImpl = getAgentRuntimeImpl({
     logger,
     apiKey,
+    clientCustomProvider: providerBaseUrl
+      ? { baseUrl: providerBaseUrl, apiKey: providerApiKey }
+      : undefined,
     handleStepsLogChunk: () => {
       // Does nothing for now
     },