diff --git a/.agents/types/agent-definition.ts b/.agents/types/agent-definition.ts index d89843404e..80cfacdea4 100644 --- a/.agents/types/agent-definition.ts +++ b/.agents/types/agent-definition.ts @@ -109,6 +109,25 @@ export interface AgentDefinition { audio?: number | string request?: number | string } + /** + * Override the upstream LLM endpoint with an OpenAI-compatible base URL. + * When set, this agent's LLM calls bypass the Codebuff backend / OpenRouter + * and go directly to `${baseUrl}/chat/completions`. + * + * Use for local models (Ollama, LM Studio) or self-hosted OpenAI-compatible + * providers. The other providerOptions keys (order, allow_fallbacks, etc.) + * are OpenRouter-specific and ignored when `baseUrl` is set. + * + * Falls back to env var CODEBUFF_BASE_URL when unset. + * Example: "http://localhost:11434/v1" + */ + baseUrl?: string + /** + * API key for the endpoint set in `baseUrl`. Ignored if `baseUrl` is unset. + * Falls back to env var CODEBUFF_PROVIDER_API_KEY. Most local runtimes + * (Ollama, LM Studio) ignore the value entirely. + */ + apiKey?: string } // ============================================================================ diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts index 030de3a14f..8847a98ecb 100644 --- a/agents/types/agent-definition.ts +++ b/agents/types/agent-definition.ts @@ -109,6 +109,25 @@ export interface AgentDefinition { audio?: number | string request?: number | string } + /** + * Override the upstream LLM endpoint with an OpenAI-compatible base URL. + * When set, this agent's LLM calls bypass the Codebuff backend / OpenRouter + * and go directly to `${baseUrl}/chat/completions`. + * + * Use for local models (Ollama, LM Studio) or self-hosted OpenAI-compatible + * providers. The other providerOptions keys (order, allow_fallbacks, etc.) + * are OpenRouter-specific and ignored when `baseUrl` is set. + * + * Falls back to env var CODEBUFF_BASE_URL when unset. + * Example: "http://localhost:11434/v1" + */ + baseUrl?: string + /** + * API key for the endpoint set in `baseUrl`. Ignored if `baseUrl` is unset. + * Falls back to env var CODEBUFF_PROVIDER_API_KEY. Most local runtimes + * (Ollama, LM Studio) ignore the value entirely. + */ + apiKey?: string } // ============================================================================ diff --git a/cli/src/commands/__tests__/local-provider.test.ts b/cli/src/commands/__tests__/local-provider.test.ts new file mode 100644 index 0000000000..2f9580c720 --- /dev/null +++ b/cli/src/commands/__tests__/local-provider.test.ts @@ -0,0 +1,344 @@ +import { describe, expect, test, beforeEach, afterEach } from 'bun:test' + +import { + applyLocalAction, + DEFAULT_LOCAL_BASE_URL, + getActiveLocalBaseUrl, + getActiveLocalModel, + parseLocalArgs, +} from '../local-provider' + +describe('parseLocalArgs — basic shapes', () => { + test('empty args → status', () => { + expect(parseLocalArgs('').kind).toBe('status') + expect(parseLocalArgs(' ').kind).toBe('status') + expect(parseLocalArgs('\t\n').kind).toBe('status') + }) + + test('"status" → status', () => { + expect(parseLocalArgs('status').kind).toBe('status') + expect(parseLocalArgs(' status ').kind).toBe('status') + expect(parseLocalArgs('STATUS').kind).toBe('status') + }) + + test('"list" / "models" → list', () => { + expect(parseLocalArgs('list').kind).toBe('list') + expect(parseLocalArgs('models').kind).toBe('list') + }) + + test('"off" → disable', () => { + expect(parseLocalArgs('off').kind).toBe('disable') + expect(parseLocalArgs('disable').kind).toBe('disable') + }) + + test('"off" with stray args → invalid', () => { + const r = parseLocalArgs('off http://oops') + expect(r.kind).toBe('invalid') + }) + + test('unknown subcommand → invalid', () => { + const r = parseLocalArgs('foobar') + expect(r.kind).toBe('invalid') + if (r.kind === 'invalid') expect(r.reason).toContain('Unknown') + }) +}) + +describe('parseLocalArgs — enable shapes', () => { + test('"on" → enable with default URL, no model', () => { + const r = parseLocalArgs('on') + expect(r.kind).toBe('enable') + if (r.kind === 'enable') { + expect(r.baseUrl).toBe(DEFAULT_LOCAL_BASE_URL) + expect(r.model).toBeUndefined() + } + }) + + test('"on " → enable with URL only', () => { + const r = parseLocalArgs('on http://localhost:1234/v1') + expect(r.kind).toBe('enable') + if (r.kind === 'enable') { + expect(r.baseUrl).toBe('http://localhost:1234/v1') + expect(r.model).toBeUndefined() + } + }) + + test('"on " (model only, no URL) → enable with default URL + model', () => { + const r = parseLocalArgs('on llama3.1:8b') + expect(r.kind).toBe('enable') + if (r.kind === 'enable') { + expect(r.baseUrl).toBe(DEFAULT_LOCAL_BASE_URL) + expect(r.model).toBe('llama3.1:8b') + } + }) + + test('"on " → both set', () => { + const r = parseLocalArgs('on http://localhost:1234/v1 llama3.1:8b') + expect(r.kind).toBe('enable') + if (r.kind === 'enable') { + expect(r.baseUrl).toBe('http://localhost:1234/v1') + expect(r.model).toBe('llama3.1:8b') + } + }) + + test('"enable " and "set " aliases work', () => { + const a = parseLocalArgs('enable http://x:1/v1') + expect(a.kind).toBe('enable') + const b = parseLocalArgs('set gemma4:e2b') + expect(b.kind).toBe('enable') + if (b.kind === 'enable') expect(b.model).toBe('gemma4:e2b') + }) + + test('bare URL → enable', () => { + const r = parseLocalArgs('http://localhost:11434/v1') + expect(r.kind).toBe('enable') + if (r.kind === 'enable') expect(r.baseUrl).toBe('http://localhost:11434/v1') + }) + + test('bare model tag → enable with default URL + model', () => { + const r = parseLocalArgs('llama3.1:8b') + expect(r.kind).toBe('enable') + if (r.kind === 'enable') { + expect(r.baseUrl).toBe(DEFAULT_LOCAL_BASE_URL) + expect(r.model).toBe('llama3.1:8b') + } + }) + + test('non-http URL → invalid', () => { + const r = parseLocalArgs('on ftp://localhost') + expect(r.kind).toBe('invalid') + }) + + test('malformed URL → invalid', () => { + const r = parseLocalArgs('on http://') + expect(r.kind).toBe('invalid') + }) + + test('https URL accepted', () => { + const r = parseLocalArgs('on https://my-vm.example.com:8080/v1 llama3.1:8b') + expect(r.kind).toBe('enable') + if (r.kind === 'enable') + expect(r.baseUrl).toBe('https://my-vm.example.com:8080/v1') + }) +}) + +describe('parseLocalArgs — model subcommand', () => { + test('"model " → set-model', () => { + const r = parseLocalArgs('model llama3.1:8b') + expect(r.kind).toBe('set-model') + if (r.kind === 'set-model') expect(r.model).toBe('llama3.1:8b') + }) + + test('"model clear" / "model off" / "model none" → clear-model', () => { + expect(parseLocalArgs('model clear').kind).toBe('clear-model') + expect(parseLocalArgs('model off').kind).toBe('clear-model') + expect(parseLocalArgs('model none').kind).toBe('clear-model') + }) + + test('"model" without name → invalid', () => { + const r = parseLocalArgs('model') + expect(r.kind).toBe('invalid') + }) + + test('"model " → invalid', () => { + const r = parseLocalArgs('model --x') + expect(r.kind).toBe('invalid') + }) +}) + +describe('applyLocalAction (side effects on process.env)', () => { + let originalBaseUrl: string | undefined + let originalApiKey: string | undefined + let originalModel: string | undefined + + beforeEach(() => { + originalBaseUrl = process.env.CODEBUFF_BASE_URL + originalApiKey = process.env.CODEBUFF_PROVIDER_API_KEY + originalModel = process.env.CODEBUFF_PROVIDER_MODEL + delete process.env.CODEBUFF_BASE_URL + delete process.env.CODEBUFF_PROVIDER_API_KEY + delete process.env.CODEBUFF_PROVIDER_MODEL + }) + + afterEach(() => { + if (originalBaseUrl === undefined) delete process.env.CODEBUFF_BASE_URL + else process.env.CODEBUFF_BASE_URL = originalBaseUrl + if (originalApiKey === undefined) + delete process.env.CODEBUFF_PROVIDER_API_KEY + else process.env.CODEBUFF_PROVIDER_API_KEY = originalApiKey + if (originalModel === undefined) delete process.env.CODEBUFF_PROVIDER_MODEL + else process.env.CODEBUFF_PROVIDER_MODEL = originalModel + }) + + test('enable without model sets baseUrl, clears any previous model override', async () => { + process.env.CODEBUFF_PROVIDER_MODEL = 'stale-model' + const msg = await applyLocalAction({ + kind: 'enable', + baseUrl: 'http://localhost:11434/v1', + }) + expect(process.env.CODEBUFF_BASE_URL).toBe('http://localhost:11434/v1') + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined() + expect(msg).toContain('ON') + expect(msg).toContain('No model override') + expect(msg).toContain('llama3.1:8b') + }) + + test('enable with model sets both env vars', async () => { + const msg = await applyLocalAction({ + kind: 'enable', + baseUrl: 'http://localhost:11434/v1', + model: 'llama3.1:8b', + }) + expect(process.env.CODEBUFF_BASE_URL).toBe('http://localhost:11434/v1') + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b') + expect(msg).toContain('Model override: llama3.1:8b') + }) + + test('set-model when local is OFF → error', async () => { + const msg = await applyLocalAction({ + kind: 'set-model', + model: 'llama3.1:8b', + }) + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined() + expect(msg).toContain('OFF') + }) + + test('set-model when local is ON → updates model', async () => { + process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1' + const msg = await applyLocalAction({ + kind: 'set-model', + model: 'llama3.1:8b', + }) + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b') + expect(msg).toContain('Model override: llama3.1:8b') + }) + + test('clear-model removes only the model, keeps baseUrl', async () => { + process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1' + process.env.CODEBUFF_PROVIDER_MODEL = 'llama3.1:8b' + const msg = await applyLocalAction({ kind: 'clear-model' }) + expect(process.env.CODEBUFF_BASE_URL).toBe('http://localhost:11434/v1') + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined() + expect(msg).toContain('cleared') + }) + + test('clear-model when none set is friendly', async () => { + const msg = await applyLocalAction({ kind: 'clear-model' }) + expect(msg).toContain('No model override') + }) + + test('disable clears baseUrl, apiKey, and model', async () => { + process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1' + process.env.CODEBUFF_PROVIDER_API_KEY = 'ollama' + process.env.CODEBUFF_PROVIDER_MODEL = 'llama3.1:8b' + const msg = await applyLocalAction({ kind: 'disable' }) + expect(process.env.CODEBUFF_BASE_URL).toBeUndefined() + expect(process.env.CODEBUFF_PROVIDER_API_KEY).toBeUndefined() + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined() + expect(msg).toContain('OFF') + expect(msg).toContain('llama3.1:8b') + }) + + test('disable when already off → idempotent', async () => { + const msg = await applyLocalAction({ kind: 'disable' }) + expect(msg).toContain('already OFF') + }) + + test('status when off mentions /local list and shows usage', async () => { + const msg = await applyLocalAction({ kind: 'status' }) + expect(msg).toContain('OFF') + expect(msg).toContain('/local list') + }) + + test('status when on with model shows both URL and model', async () => { + process.env.CODEBUFF_BASE_URL = 'http://localhost:1234/v1' + process.env.CODEBUFF_PROVIDER_MODEL = 'llama3.1:8b' + const msg = await applyLocalAction({ kind: 'status' }) + expect(msg).toContain('ON') + expect(msg).toContain('http://localhost:1234/v1') + expect(msg).toContain('llama3.1:8b') + }) + + test('status when on without model warns about no model override', async () => { + process.env.CODEBUFF_BASE_URL = 'http://localhost:11434/v1' + const msg = await applyLocalAction({ kind: 'status' }) + expect(msg).toContain('ON') + expect(msg).toContain('(none') + }) + + test('invalid returns reason prefixed', async () => { + const msg = await applyLocalAction({ + kind: 'invalid', + reason: 'something wrong', + }) + expect(msg).toContain('something wrong') + }) + + test('list when off returns error', async () => { + const msg = await applyLocalAction({ kind: 'list' }) + expect(msg).toContain('OFF') + }) +}) + +describe('parseLocalArgs + applyLocalAction end-to-end', () => { + let originalBaseUrl: string | undefined + let originalModel: string | undefined + + beforeEach(() => { + originalBaseUrl = process.env.CODEBUFF_BASE_URL + originalModel = process.env.CODEBUFF_PROVIDER_MODEL + delete process.env.CODEBUFF_BASE_URL + delete process.env.CODEBUFF_PROVIDER_MODEL + }) + + afterEach(() => { + if (originalBaseUrl === undefined) delete process.env.CODEBUFF_BASE_URL + else process.env.CODEBUFF_BASE_URL = originalBaseUrl + if (originalModel === undefined) delete process.env.CODEBUFF_PROVIDER_MODEL + else process.env.CODEBUFF_PROVIDER_MODEL = originalModel + }) + + test('user types `/local on llama3.1:8b` → URL default + model set', async () => { + await applyLocalAction(parseLocalArgs('on llama3.1:8b')) + expect(process.env.CODEBUFF_BASE_URL).toBe(DEFAULT_LOCAL_BASE_URL) + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b') + }) + + test('user types `/local llama3.1:8b` (no `on`) → same effect', async () => { + await applyLocalAction(parseLocalArgs('llama3.1:8b')) + expect(process.env.CODEBUFF_BASE_URL).toBe(DEFAULT_LOCAL_BASE_URL) + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b') + }) + + test('user types `/local on http://x/v1 llama3.1:8b` → both set', async () => { + await applyLocalAction(parseLocalArgs('on http://x.example.com:9999/v1 llama3.1:8b')) + expect(process.env.CODEBUFF_BASE_URL).toBe('http://x.example.com:9999/v1') + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b') + }) + + test('user types `/local model llama3.1:8b` after `/local on` → model added', async () => { + await applyLocalAction(parseLocalArgs('on')) + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined() + await applyLocalAction(parseLocalArgs('model llama3.1:8b')) + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBe('llama3.1:8b') + }) + + test('user types `/local off` → both cleared', async () => { + await applyLocalAction(parseLocalArgs('on llama3.1:8b')) + await applyLocalAction(parseLocalArgs('off')) + expect(process.env.CODEBUFF_BASE_URL).toBeUndefined() + expect(process.env.CODEBUFF_PROVIDER_MODEL).toBeUndefined() + }) + + test('mutations are visible via getter functions', async () => { + await applyLocalAction(parseLocalArgs('on llama3.1:8b')) + expect(getActiveLocalBaseUrl()).toBe(DEFAULT_LOCAL_BASE_URL) + expect(getActiveLocalModel()).toBe('llama3.1:8b') + }) + + test('re-enabling without model clears previous model override', async () => { + await applyLocalAction(parseLocalArgs('on llama3.1:8b')) + await applyLocalAction(parseLocalArgs('on')) + expect(getActiveLocalBaseUrl()).toBe(DEFAULT_LOCAL_BASE_URL) + expect(getActiveLocalModel()).toBeUndefined() + }) +}) diff --git a/cli/src/commands/command-registry.ts b/cli/src/commands/command-registry.ts index 0eda49607e..9afa024028 100644 --- a/cli/src/commands/command-registry.ts +++ b/cli/src/commands/command-registry.ts @@ -5,6 +5,7 @@ import { handleAdsEnable, handleAdsDisable } from './ads' import { handleHelpCommand } from './help' import { handleImageCommand } from './image' import { handleInitializationFlowLocally } from './init' +import { applyLocalAction, parseLocalArgs } from './local-provider' import { buildInterviewPrompt, buildPlanPrompt, buildReviewPromptFromArgs } from './prompt-builders' import { runBashCommand } from './router' import { handleUsageCommand } from './usage' @@ -392,6 +393,18 @@ const ALL_COMMANDS: CommandDefinition[] = [ clearInput(params) }, }), + defineCommandWithArgs({ + name: 'local', + handler: async (params, args) => { + const userText = params.inputValue.trim() + params.setMessages((prev) => [...prev, getUserMessage(userText)]) + params.saveToHistory(userText) + clearInput(params) + + const message = await applyLocalAction(parseLocalArgs(args)) + params.setMessages((prev) => [...prev, getSystemMessage(message)]) + }, + }), // Mode commands generated from AGENT_MODES (excluded in Freebuff) ...(IS_FREEBUFF ? [] : AGENT_MODES).map((mode) => defineCommandWithArgs({ diff --git a/cli/src/commands/local-provider.ts b/cli/src/commands/local-provider.ts new file mode 100644 index 0000000000..85a3a65143 --- /dev/null +++ b/cli/src/commands/local-provider.ts @@ -0,0 +1,401 @@ +/** + * /local slash command — runtime toggle for the custom OpenAI-compatible + * provider (Ollama, LM Studio, self-hosted). + * + * Mutates process.env at runtime. The SDK reads these env vars lazily on + * every promptAiSdkStream call, so changes take effect immediately for the + * next request without needing to rebuild the CodebuffClient. + * + * Subcommands: + * /local — show current status + * /local on — enable with default Ollama URL (model unchanged) + * /local on — enable with default URL + model override + * /local on — enable with custom URL (model unchanged) + * /local on — enable with URL + model override + * /local set — alias for `/local on ` + * /local model — set model override only (URL must already be set) + * /local model clear — clear the model override + * /local off — disable, return to Codebuff backend + * /local status — same as `/local` + * /local list — query the local provider for available models + * + * Agent-level providerOptions.baseUrl always wins; /local only affects agents + * that don't set their own baseUrl. Same for the model override — agents with + * an explicit providerOptions.baseUrl use their own declared model. + */ + +import { + PROVIDER_API_KEY_ENV_VAR, + PROVIDER_BASE_URL_ENV_VAR, + PROVIDER_MODEL_ENV_VAR, +} from '@codebuff/common/constants/custom-provider' + +/** Default URL used by `/local on` when the user doesn't specify one. */ +export const DEFAULT_LOCAL_BASE_URL = 'http://localhost:11434/v1' + +export type LocalCommandAction = + | { kind: 'status' } + | { kind: 'enable'; baseUrl: string; model?: string } + | { kind: 'set-model'; model: string } + | { kind: 'clear-model' } + | { kind: 'list' } + | { kind: 'disable' } + | { kind: 'invalid'; reason: string } + +function looksLikeUrl(token: string): boolean { + // Anything with a scheme separator — caller validates the actual scheme. + return token.includes('://') +} + +function isLikelyModelTag(token: string): boolean { + // Ollama-style tags: name[:tag], e.g. "llama3.1:8b", "gemma4:e2b", "qwen2.5". + // Reject URL-shaped tokens and flags. + return Boolean(token) && !looksLikeUrl(token) && !token.startsWith('-') +} + +/** + * Parse the args passed to `/local`. Pure function — no side effects. + * Exported for unit testing. + */ +export function parseLocalArgs(rawArgs: string): LocalCommandAction { + const trimmed = rawArgs.trim() + + // No args → show status + if (!trimmed) { + return { kind: 'status' } + } + + const tokens = trimmed.split(/\s+/) + const subcommand = tokens[0] + const sub = subcommand.toLowerCase() + const rest = tokens.slice(1) + + if (sub === 'status') { + return { kind: 'status' } + } + + if (sub === 'list' || sub === 'models') { + return { kind: 'list' } + } + + if (sub === 'off' || sub === 'disable') { + if (rest.length > 0) { + return { + kind: 'invalid', + reason: `\`/local ${sub}\` does not take arguments. Got: "${rest.join(' ')}"`, + } + } + return { kind: 'disable' } + } + + if (sub === 'model') { + if (rest.length === 0) { + return { + kind: 'invalid', + reason: 'Usage: `/local model ` or `/local model clear`', + } + } + const value = rest.join(' ') + if (value === 'clear' || value === 'off' || value === 'none') { + return { kind: 'clear-model' } + } + if (!isLikelyModelTag(rest[0])) { + return { + kind: 'invalid', + reason: `Invalid model name: "${value}". Expected something like "llama3.1:8b".`, + } + } + return { kind: 'set-model', model: value } + } + + if (sub === 'on' || sub === 'enable' || sub === 'set') { + return parseEnable(rest) + } + + // Looks like a bare URL or bare model (e.g. `/local http://...` or `/local llama3.1:8b`)? + if (looksLikeUrl(subcommand)) { + return parseEnable([subcommand, ...rest]) + } + // Bare model shortcut: must contain `:` so we don't silently accept typos + // like `/local foobar`. Use `/local on ` for tagless models. + if (subcommand.includes(':') && isLikelyModelTag(subcommand) && rest.length === 0) { + return { kind: 'enable', baseUrl: DEFAULT_LOCAL_BASE_URL, model: subcommand } + } + + return { + kind: 'invalid', + reason: `Unknown /local subcommand: "${subcommand}". Try: on, off, model, status, list.`, + } +} + +/** + * Parse the tokens after `/local on` / `/local enable` / `/local set`. + * Supports four shapes: + * (empty) → default URL, no model override + * → URL, no model override + * → default URL + model + * → URL + model + */ +function parseEnable(tokens: string[]): LocalCommandAction { + if (tokens.length === 0) { + return { kind: 'enable', baseUrl: DEFAULT_LOCAL_BASE_URL } + } + + if (tokens.length === 1) { + const t = tokens[0] + // URL-shaped tokens go through URL validation regardless of scheme. + if (looksLikeUrl(t)) { + const v = validateBaseUrl(t) + if (!v.ok) return { kind: 'invalid', reason: v.reason } + return { kind: 'enable', baseUrl: v.url } + } + if (isLikelyModelTag(t)) { + return { kind: 'enable', baseUrl: DEFAULT_LOCAL_BASE_URL, model: t } + } + return { kind: 'invalid', reason: `Could not interpret "${t}" as URL or model name.` } + } + + // Two or more tokens. Pattern: first is URL, rest joined is model. + const [first, ...rest] = tokens + if (!looksLikeUrl(first)) { + return { + kind: 'invalid', + reason: `Expected URL or model. Got: "${first}". Usage: /local on [url] [model]`, + } + } + const v = validateBaseUrl(first) + if (!v.ok) return { kind: 'invalid', reason: v.reason } + const modelToken = rest.join(' ') + if (!isLikelyModelTag(rest[0])) { + return { + kind: 'invalid', + reason: `Invalid model name: "${modelToken}".`, + } + } + return { kind: 'enable', baseUrl: v.url, model: modelToken } +} + +function validateBaseUrl( + raw: string, +): + | { ok: true; url: string } + | { ok: false; reason: string } { + if (!raw || !raw.trim()) { + return { ok: false, reason: 'URL is required.' } + } + if (!raw.startsWith('http://') && !raw.startsWith('https://')) { + return { + ok: false, + reason: `URL must start with http:// or https://. Got: "${raw}"`, + } + } + try { + // eslint-disable-next-line no-new + new URL(raw) + } catch { + return { ok: false, reason: `Invalid URL: "${raw}"` } + } + return { ok: true, url: raw } +} + +/** + * Read the currently-active local provider URL (or undefined if disabled). + * Reads from process.env so it reflects both shell-set values and /local-set values. + */ +export function getActiveLocalBaseUrl(): string | undefined { + return process.env[PROVIDER_BASE_URL_ENV_VAR] +} + +/** Read the currently-active local model override (or undefined). */ +export function getActiveLocalModel(): string | undefined { + return process.env[PROVIDER_MODEL_ENV_VAR] +} + +/** + * Apply an action to process.env. Returns a user-facing message describing what happened. + * Side effects are isolated to this function for testability. + * + * Note: `list` is async because it hits the network. Other actions are sync. + */ +export async function applyLocalAction( + action: LocalCommandAction, +): Promise { + if (action.kind === 'invalid') { + return `❌ ${action.reason}` + } + + if (action.kind === 'status') { + return formatStatus() + } + + if (action.kind === 'list') { + return listModels() + } + + if (action.kind === 'enable') { + process.env[PROVIDER_BASE_URL_ENV_VAR] = action.baseUrl + if (action.model) { + process.env[PROVIDER_MODEL_ENV_VAR] = action.model + } else { + // Important: an `enable` without an explicit model clears any previous + // model override, so an old setting doesn't silently apply to a new URL. + delete process.env[PROVIDER_MODEL_ENV_VAR] + } + const lines = [ + 'Local provider: ON', + ` URL: ${action.baseUrl}`, + ] + if (action.model) { + lines.push(` Model override: ${action.model}`) + lines.push('') + lines.push( + `Agents that would otherwise use a cloud model will use \`${action.model}\` instead.`, + ) + } else { + lines.push('') + lines.push('⚠️ No model override set. Cloud models (e.g.') + lines.push(' `anthropic/claude-opus-4-7`) will not exist on the local provider.') + lines.push(' Run `/local model ` (e.g. `/local model llama3.1:8b`)') + lines.push(' or `/local list` to see available models.') + } + lines.push('') + lines.push('Note: agents with their own `providerOptions.baseUrl` still win.') + lines.push('Disable with: /local off') + return lines.join('\n') + } + + if (action.kind === 'set-model') { + if (!getActiveLocalBaseUrl()) { + return [ + '❌ Local provider is OFF. Enable it first with `/local on` before setting a model.', + ].join('\n') + } + process.env[PROVIDER_MODEL_ENV_VAR] = action.model + return [ + `Model override: ${action.model}`, + '', + `Local provider remains ON at ${getActiveLocalBaseUrl()}.`, + `Agents will use \`${action.model}\` for inference.`, + ].join('\n') + } + + if (action.kind === 'clear-model') { + const wasSet = getActiveLocalModel() + delete process.env[PROVIDER_MODEL_ENV_VAR] + if (!wasSet) { + return 'No model override was set. No change.' + } + return [ + `Model override cleared (was: ${wasSet}).`, + '', + 'Warning: without an override, the agent\'s declared cloud model will be sent', + 'to the local provider — likely a "model not found" error. Either set a new', + 'model with `/local model ` or turn local mode off with `/local off`.', + ].join('\n') + } + + // action.kind === 'disable' + const wasBaseUrl = getActiveLocalBaseUrl() + const wasModel = getActiveLocalModel() + delete process.env[PROVIDER_BASE_URL_ENV_VAR] + delete process.env[PROVIDER_API_KEY_ENV_VAR] + delete process.env[PROVIDER_MODEL_ENV_VAR] + if (!wasBaseUrl && !wasModel) { + return 'Local provider was already OFF. No change.' + } + const lines = ['Local provider: OFF'] + if (wasBaseUrl) lines.push(` Previously: ${wasBaseUrl}`) + if (wasModel) lines.push(` Cleared model override: ${wasModel}`) + lines.push('') + lines.push('Routing returns to the Codebuff backend.') + return lines.join('\n') +} + +function formatStatus(): string { + const url = getActiveLocalBaseUrl() + const model = getActiveLocalModel() + if (!url) { + return [ + 'Local provider: OFF', + '', + 'All agents (without per-agent providerOptions.baseUrl) go through the Codebuff backend.', + '', + `Enable with: /local on (e.g. /local on llama3.1:8b)`, + ` /local on (uses ${DEFAULT_LOCAL_BASE_URL}, no model override)`, + `Discover available local models: /local list`, + ].join('\n') + } + const lines = [ + 'Local provider: ON', + ` URL: ${url}`, + ] + if (model) lines.push(` Model override: ${model}`) + else + lines.push( + ' Model override: (none — agent\'s declared model will be sent as-is)', + ) + lines.push('') + lines.push('Agents without their own providerOptions.baseUrl will use this endpoint.') + lines.push('Commands: /local model , /local off, /local list') + return lines.join('\n') +} + +/** + * Query the local provider's `/api/tags` endpoint (Ollama-compatible) to list + * available models. Best-effort — short timeout, friendly fallback. + */ +async function listModels(): Promise { + const baseUrl = getActiveLocalBaseUrl() + if (!baseUrl) { + return [ + '❌ Local provider is OFF. Run `/local on ` first, then `/local list`.', + ].join('\n') + } + // /api/tags lives at the root of the Ollama server, not under /v1. + // Strip a trailing /v1 if present, then append /api/tags. + const root = baseUrl.replace(/\/+$/, '').replace(/\/v1$/, '') + const tagsUrl = `${root}/api/tags` + + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), 3000) + try { + const res = await fetch(tagsUrl, { signal: controller.signal }) + if (!res.ok) { + return [ + `Could not list models at ${tagsUrl} (HTTP ${res.status}).`, + '', + 'Note: this only works for Ollama-compatible providers.', + 'For LM Studio or others, set the model manually with `/local model `.', + ].join('\n') + } + const body = (await res.json()) as { models?: Array<{ name?: string }> } + const names = (body.models ?? []) + .map((m) => m.name) + .filter((n): n is string => typeof n === 'string') + if (names.length === 0) { + return [ + `Local provider has no models loaded.`, + '', + 'Try `ollama pull llama3.1:8b` (or any tag of your choice) and run `/local list` again.', + ].join('\n') + } + const active = getActiveLocalModel() + const lines = [`Available models at ${root}:`] + for (const name of names) { + const marker = name === active ? ' ▶ ' : ' ' + lines.push(`${marker}${name}`) + } + lines.push('') + lines.push(`Use \`/local model \` to pick one.`) + return lines.join('\n') + } catch (e) { + return [ + `Could not reach ${tagsUrl}.`, + '', + 'Check that the provider is running and the URL is correct.', + 'For non-Ollama providers, set the model manually with `/local model `.', + ].join('\n') + } finally { + clearTimeout(timeout) + } +} diff --git a/cli/src/data/slash-commands.ts b/cli/src/data/slash-commands.ts index 14d71abecd..57e9b2668c 100644 --- a/cli/src/data/slash-commands.ts +++ b/cli/src/data/slash-commands.ts @@ -163,6 +163,11 @@ const ALL_SLASH_COMMANDS: SlashCommand[] = [ description: 'Attach an image file (or Ctrl+V to paste from clipboard)', aliases: ['img', 'attach'], }, + { + id: 'local', + label: 'local', + description: 'Toggle local LLM provider (Ollama/LM Studio). Usage: /local [on |off|model |list|status]', + }, ...MODE_COMMANDS, // { // id: 'publish', diff --git a/common/src/constants/custom-provider.ts b/common/src/constants/custom-provider.ts new file mode 100644 index 0000000000..5b59394287 --- /dev/null +++ b/common/src/constants/custom-provider.ts @@ -0,0 +1,17 @@ +/** Env var that overrides the upstream LLM endpoint with an OpenAI-compatible base URL. + * Lower precedence than per-agent providerOptions.baseUrl and the CodebuffClient option. */ +export const PROVIDER_BASE_URL_ENV_VAR = 'CODEBUFF_BASE_URL' + +/** Env var providing the API key for the endpoint set by PROVIDER_BASE_URL_ENV_VAR. + * Most local runtimes (Ollama, LM Studio) ignore the key entirely. */ +export const PROVIDER_API_KEY_ENV_VAR = 'CODEBUFF_PROVIDER_API_KEY' + +/** Env var overriding the agent's declared model when a custom provider is active. + * Used by `/local on ` to substitute the cloud model (e.g. + * `anthropic/claude-opus-4-7`) with a model the local provider actually has + * (e.g. `llama3.1:8b`). + * + * Only takes effect when PROVIDER_BASE_URL_ENV_VAR is set AND the agent + * itself doesn't declare its own `providerOptions.baseUrl` — agents with an + * explicit baseUrl are assumed to declare a matching model. */ +export const PROVIDER_MODEL_ENV_VAR = 'CODEBUFF_PROVIDER_MODEL' diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts index 030de3a14f..8847a98ecb 100644 --- a/common/src/templates/initial-agents-dir/types/agent-definition.ts +++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts @@ -109,6 +109,25 @@ export interface AgentDefinition { audio?: number | string request?: number | string } + /** + * Override the upstream LLM endpoint with an OpenAI-compatible base URL. + * When set, this agent's LLM calls bypass the Codebuff backend / OpenRouter + * and go directly to `${baseUrl}/chat/completions`. + * + * Use for local models (Ollama, LM Studio) or self-hosted OpenAI-compatible + * providers. The other providerOptions keys (order, allow_fallbacks, etc.) + * are OpenRouter-specific and ignored when `baseUrl` is set. + * + * Falls back to env var CODEBUFF_BASE_URL when unset. + * Example: "http://localhost:11434/v1" + */ + baseUrl?: string + /** + * API key for the endpoint set in `baseUrl`. Ignored if `baseUrl` is unset. + * Falls back to env var CODEBUFF_PROVIDER_API_KEY. Most local runtimes + * (Ollama, LM Studio) ignore the value entirely. + */ + apiKey?: string } // ============================================================================ diff --git a/common/src/types/agent-template.ts b/common/src/types/agent-template.ts index 0d89ba7ede..6cb0a77842 100644 --- a/common/src/types/agent-template.ts +++ b/common/src/types/agent-template.ts @@ -37,6 +37,12 @@ export type OpenRouterReasoningOptions = { } ) +/** + * OpenRouter provider-routing options, plus optional fields to override the + * upstream endpoint with an OpenAI-compatible base URL (e.g. Ollama, LM Studio, + * self-hosted). The routing fields below are OpenRouter-specific and are + * ignored when `baseUrl` is set. + */ export type OpenRouterProviderRoutingOptions = { /** * List of provider slugs to try in order (e.g. ["anthropic", "openai"]) @@ -90,6 +96,20 @@ export type OpenRouterProviderRoutingOptions = { audio?: number | string request?: number | string } + /** + * Override the upstream LLM endpoint with an OpenAI-compatible base URL. + * When set, this agent's calls bypass the Codebuff backend / OpenRouter + * and go directly to `${baseUrl}/chat/completions`. + * + * Other keys above (order, allow_fallbacks, ...) are OpenRouter-specific and + * are ignored when baseUrl is set. Falls back to env var CODEBUFF_BASE_URL. + */ + baseUrl?: string + /** + * API key for the endpoint set in `baseUrl`. Ignored if baseUrl is unset. + * Falls back to env var CODEBUFF_PROVIDER_API_KEY. + */ + apiKey?: string } export type OpenRouterProviderOptions = { diff --git a/common/src/types/contracts/llm.ts b/common/src/types/contracts/llm.ts index 11c5a5ba0c..853ef54b42 100644 --- a/common/src/types/contracts/llm.ts +++ b/common/src/types/contracts/llm.ts @@ -56,6 +56,9 @@ export type PromptAiSdkStreamFn = ( includeCacheControl?: boolean cacheDebugCorrelation?: string agentProviderOptions?: OpenRouterProviderRoutingOptions + /** Fallback custom-provider config injected by the SDK Client. + * Lower precedence than an agent's own providerOptions.baseUrl. */ + clientCustomProvider?: { baseUrl?: string; apiKey?: string } /** List of agents that can be spawned - used to transform agent tool calls */ spawnableAgents?: string[] /** Map of locally available agent templates - used to transform agent tool calls */ diff --git a/common/src/types/dynamic-agent-template.ts b/common/src/types/dynamic-agent-template.ts index d0a4097305..b9762566d1 100644 --- a/common/src/types/dynamic-agent-template.ts +++ b/common/src/types/dynamic-agent-template.ts @@ -167,6 +167,8 @@ export const DynamicAgentDefinitionSchema = z.object({ request: z.union([z.number(), z.string()]).optional(), }) .optional(), + baseUrl: z.string().url().optional(), + apiKey: z.string().optional(), }) .optional(), diff --git a/sdk/src/env.ts b/sdk/src/env.ts index 033e3f245d..b49f88b6fa 100644 --- a/sdk/src/env.ts +++ b/sdk/src/env.ts @@ -7,6 +7,11 @@ import { BYOK_OPENROUTER_ENV_VAR } from '@codebuff/common/constants/byok' import { CHATGPT_OAUTH_TOKEN_ENV_VAR } from '@codebuff/common/constants/chatgpt-oauth' +import { + PROVIDER_API_KEY_ENV_VAR, + PROVIDER_BASE_URL_ENV_VAR, + PROVIDER_MODEL_ENV_VAR, +} from '@codebuff/common/constants/custom-provider' import { API_KEY_ENV_VAR } from '@codebuff/common/constants/paths' import { getBaseEnv } from '@codebuff/common/env-process' @@ -48,3 +53,28 @@ export const getByokOpenrouterApiKeyFromEnv = (): string | undefined => { export const getChatGptOAuthTokenFromEnv = (): string | undefined => { return process.env[CHATGPT_OAUTH_TOKEN_ENV_VAR] } + +/** + * Get the custom upstream provider base URL from environment. + * Used when an agent's providerOptions.baseUrl is unset and no CodebuffClient option overrides it. + */ +export const getCustomProviderBaseUrlFromEnv = (): string | undefined => { + return process.env[PROVIDER_BASE_URL_ENV_VAR] +} + +/** + * Get the custom upstream provider API key from environment. + * Paired with getCustomProviderBaseUrlFromEnv. + */ +export const getCustomProviderApiKeyFromEnv = (): string | undefined => { + return process.env[PROVIDER_API_KEY_ENV_VAR] +} + +/** + * Get the override model name from environment. When the custom provider is + * active, this value replaces the agent's declared model. + * Returns undefined if unset. + */ +export const getCustomProviderModelFromEnv = (): string | undefined => { + return process.env[PROVIDER_MODEL_ENV_VAR] +} diff --git a/sdk/src/impl/__tests__/model-provider-custom.test.ts b/sdk/src/impl/__tests__/model-provider-custom.test.ts new file mode 100644 index 0000000000..0ff4336355 --- /dev/null +++ b/sdk/src/impl/__tests__/model-provider-custom.test.ts @@ -0,0 +1,94 @@ +import { describe, expect, test, afterEach, mock } from 'bun:test' + +describe('getModelForRequest with customProvider', () => { + afterEach(() => { + mock.restore() + }) + + test('returns isCustomProvider: true when customProvider.baseUrl is set', async () => { + const { getModelForRequest } = await import('../model-provider') + + const result = await getModelForRequest({ + apiKey: 'cb-test-key', + model: 'gemma2:9b', + customProvider: { baseUrl: 'http://localhost:11434/v1', apiKey: 'ollama' }, + }) + + expect(result.isCustomProvider).toBe(true) + expect(result.isChatGptOAuth).toBe(false) + expect(result.model).toBeDefined() + expect((result.model as any).modelId).toBe('gemma2:9b') + }) + + test('does not return isCustomProvider when baseUrl is missing', async () => { + const { getModelForRequest } = await import('../model-provider') + + const result = await getModelForRequest({ + apiKey: 'cb-test-key', + model: 'anthropic/claude-sonnet-4', + }) + + expect(result.isCustomProvider).toBe(false) + }) + + test('customProvider takes precedence over ChatGPT OAuth eligibility', async () => { + const { getModelForRequest } = await import('../model-provider') + + const result = await getModelForRequest({ + apiKey: 'cb-test-key', + model: 'openai/gpt-5.3', + customProvider: { baseUrl: 'http://localhost:11434/v1' }, + }) + + expect(result.isCustomProvider).toBe(true) + expect(result.isChatGptOAuth).toBe(false) + }) + + test('trims trailing slash from baseUrl (constructs cleanly)', async () => { + const { getModelForRequest } = await import('../model-provider') + + const result = await getModelForRequest({ + apiKey: 'cb-test-key', + model: 'gemma2:9b', + customProvider: { baseUrl: 'http://localhost:11434/v1/' }, + }) + + expect(result.isCustomProvider).toBe(true) + }) + + test('omitting apiKey is allowed', async () => { + const { getModelForRequest } = await import('../model-provider') + + const result = await getModelForRequest({ + apiKey: 'cb-test-key', + model: 'gemma2:9b', + customProvider: { baseUrl: 'http://localhost:11434/v1' }, + }) + + expect(result.isCustomProvider).toBe(true) + }) + + test('customProvider arg drives selection regardless of env (precedence contract)', async () => { + // This documents the contract: getModelForRequest receives the *resolved* + // customProvider — the caller (promptAiSdkStream) is responsible for + // applying the agent > client > env precedence ladder before calling. + process.env.CODEBUFF_BASE_URL = 'http://from-env:11434/v1' + process.env.CODEBUFF_PROVIDER_API_KEY = 'env-key' + + const { getModelForRequest } = await import('../model-provider') + const result = await getModelForRequest({ + apiKey: 'cb-key', + model: 'gemma2:9b', + customProvider: { + baseUrl: 'http://from-agent:11434/v1', + apiKey: 'agent-key', + }, + }) + + expect(result.isCustomProvider).toBe(true) + expect(result.model).toBeDefined() + + delete process.env.CODEBUFF_BASE_URL + delete process.env.CODEBUFF_PROVIDER_API_KEY + }) +}) diff --git a/sdk/src/impl/agent-runtime.ts b/sdk/src/impl/agent-runtime.ts index 17858d8196..d7d077bf76 100644 --- a/sdk/src/impl/agent-runtime.ts +++ b/sdk/src/impl/agent-runtime.ts @@ -29,6 +29,8 @@ export function getAgentRuntimeImpl( logger?: Logger apiKey: string clientEnv?: ClientEnv + /** Default custom provider used for runs that don't set one per-agent. */ + clientCustomProvider?: { baseUrl?: string; apiKey?: string } } & Pick< AgentRuntimeScopedDeps, | 'handleStepsLogChunk' @@ -44,6 +46,7 @@ export function getAgentRuntimeImpl( logger, apiKey, clientEnv = clientEnvDefault, + clientCustomProvider, handleStepsLogChunk, requestToolCall, requestMcpToolData, @@ -87,7 +90,10 @@ export function getAgentRuntimeImpl( }), // LLM - promptAiSdkStream, + promptAiSdkStream: clientCustomProvider + ? (streamParams) => + promptAiSdkStream({ ...streamParams, clientCustomProvider }) + : promptAiSdkStream, promptAiSdk, promptAiSdkStructured, diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts index 60bb678bb1..60283e1745 100644 --- a/sdk/src/impl/llm.ts +++ b/sdk/src/impl/llm.ts @@ -23,6 +23,11 @@ import { markChatGptOAuthRateLimited, } from './model-provider' import { refreshChatGptOAuthToken } from '../credentials' +import { + getCustomProviderApiKeyFromEnv, + getCustomProviderBaseUrlFromEnv, + getCustomProviderModelFromEnv, +} from '../env' import { getErrorStatusCode } from '../error-utils' import type { ModelRequestParams } from './model-provider' @@ -130,6 +135,71 @@ type OpenRouterUsageAccounting = { } } +/** + * Retry count for direct calls to a custom OpenAI-compatible provider. + * One retry absorbs brief model-load stalls on first call. We deliberately + * don't retry more — local failures are usually deterministic (provider down, + * wrong URL, model not pulled) and extra retries only make errors slower. + */ +const CUSTOM_PROVIDER_MAX_RETRIES = 1 + +/** + * Wrap raw errors from a custom OpenAI-compatible endpoint in a friendly, + * actionable message. Distinguishes connection failures (provider down, + * wrong URL) from model-not-found errors. + */ +function buildCustomProviderError(args: { + baseUrl: string + model: string + rawMessage: string + rawCode?: string +}): string { + const lower = args.rawMessage.toLowerCase() + const codeLower = (args.rawCode ?? '').toLowerCase() + const isConnectionError = + lower.includes('econnrefused') || + lower.includes('connectionrefused') || + lower.includes('connection refused') || + lower.includes('unable to connect') || + lower.includes('fetch failed') || + lower.includes('etimedout') || + lower.includes('enotfound') || + lower.includes('socket hang up') || + codeLower === 'connectionrefused' || + codeLower === 'econnrefused' || + codeLower === 'enotfound' || + codeLower === 'etimedout' + const isModelNotFound = + lower.includes('model not found') || + lower.includes('does not exist') || + (lower.includes('404') && lower.includes(args.model.toLowerCase())) + + if (isConnectionError) { + return [ + `Cannot reach LLM provider at ${args.baseUrl}.`, + ``, + `Check:`, + ` • Is the provider running? (e.g. \`ollama serve\` or LM Studio's Local Server)`, + ` • Is the URL correct? Currently configured: ${args.baseUrl}`, + ` • Is the model '${args.model}' loaded? (e.g. \`ollama list\`)`, + ``, + `Original error: ${args.rawMessage}`, + ].join('\n') + } + if (isModelNotFound) { + return [ + `Model '${args.model}' not found at ${args.baseUrl}.`, + ``, + `Check:`, + ` • Pull the model first: \`ollama pull ${args.model}\``, + ` • Verify the exact tag with \`ollama list\``, + ``, + `Original error: ${args.rawMessage}`, + ].join('\n') + } + return args.rawMessage +} + /** * Check if an error is an OAuth rate limit error that should trigger fallback. */ @@ -303,13 +373,55 @@ export async function* promptAiSdkStream( return promptAborted('User cancelled input') } + // Resolve custom-provider precedence: agent > client option > env. + // First non-empty baseUrl wins; its apiKey comes along to avoid mixing + // credentials with the wrong endpoint. + const customSources = [ + params.agentProviderOptions, + params.clientCustomProvider, + { + baseUrl: getCustomProviderBaseUrlFromEnv(), + apiKey: getCustomProviderApiKeyFromEnv(), + }, + ] + const winningSource = customSources.find((s) => s?.baseUrl) + const resolvedBaseUrl = winningSource?.baseUrl + const resolvedApiKey = winningSource?.apiKey + + // Model override: substitute the agent's declared model with the env-configured + // local model when the custom provider is active. Skipped when an agent + // explicitly sets its own providerOptions.baseUrl — that agent is assumed to + // have declared a matching model. See PROVIDER_MODEL_ENV_VAR JSDoc. + const agentBaseUrl = params.agentProviderOptions?.baseUrl + const envModelOverride = + resolvedBaseUrl && !agentBaseUrl + ? getCustomProviderModelFromEnv() + : undefined + const effectiveModel = envModelOverride ?? params.model + + // Surface the substitution so users can confirm in logs that their /local + // model override is actually being applied to outbound requests. + if (envModelOverride && envModelOverride !== params.model) { + logger.info( + { + requestedModel: params.model, + effectiveModel, + baseUrl: resolvedBaseUrl, + }, + 'Custom provider active: substituting agent model with /local override', + ) + } + const modelParams: ModelRequestParams = { apiKey: params.apiKey, - model: params.model, + model: effectiveModel, skipChatGptOAuth: params.skipChatGptOAuth, costMode: params.costMode, + ...(resolvedBaseUrl + ? { customProvider: { baseUrl: resolvedBaseUrl, apiKey: resolvedApiKey } } + : {}), } - const { model: aiSDKModel, isChatGptOAuth } = + const { model: aiSDKModel, isChatGptOAuth, isCustomProvider } = await getModelForRequest(modelParams) if (isChatGptOAuth) { @@ -329,9 +441,13 @@ export async function* promptAiSdkStream( prompt: undefined, model: aiSDKModel, messages: convertCbToModelMessages(params), - ...(isChatGptOAuth && { maxRetries: 0 }), - // For ChatGPT OAuth direct, don't send codebuff metadata/provider options to OpenAI - ...(isChatGptOAuth + // ChatGPT OAuth: no retries (we fall back to Codebuff on first failure). + // Custom provider: see CUSTOM_PROVIDER_MAX_RETRIES. + ...(isChatGptOAuth ? { maxRetries: 0 } : {}), + ...(isCustomProvider ? { maxRetries: CUSTOM_PROVIDER_MAX_RETRIES } : {}), + // Direct routes (ChatGPT OAuth, custom provider): skip codebuff_metadata + // and OpenRouter routing keys — neither belongs in those request bodies. + ...(isChatGptOAuth || isCustomProvider ? {} : { providerOptions: getProviderOptions({ @@ -458,7 +574,32 @@ export async function* promptAiSdkStream( // Track if we've yielded any content - if so, we can't safely fall back let hasYieldedContent = false - for await (const chunkValue of response.fullStream) { + // For custom-provider streams, a connection refusal at request init throws + // from the iterator before any error chunk is emitted. Rewrap into a + // friendly message so users see "is Ollama running?" not raw "fetch failed". + const stream = isCustomProvider && resolvedBaseUrl + ? (async function* () { + try { + yield* response.fullStream + } catch (e) { + const rawMessage = e instanceof Error ? e.message : String(e) + const rawCode = + e && typeof e === 'object' && 'code' in e + ? String((e as { code?: unknown }).code ?? '') + : undefined + throw new Error( + buildCustomProviderError({ + baseUrl: resolvedBaseUrl, + model: effectiveModel, + rawMessage, + rawCode, + }), + ) + } + })() + : response.fullStream + + for await (const chunkValue of stream) { if (chunkValue.type !== 'text-delta') { const flushed = stopSequenceHandler.flush() if (flushed) { @@ -603,6 +744,25 @@ export async function* promptAiSdkStream( 'Error in AI SDK stream', ) + // For custom-provider failures, rewrap with a friendly, actionable message + // before throwing so users see "is Ollama running?" not raw "fetch failed". + if (isCustomProvider && resolvedBaseUrl) { + const rawCode = + chunkValue.error && + typeof chunkValue.error === 'object' && + 'code' in chunkValue.error + ? String((chunkValue.error as { code?: unknown }).code ?? '') + : undefined + throw new Error( + buildCustomProviderError({ + baseUrl: resolvedBaseUrl, + model: effectiveModel, + rawMessage: errorMessage, + rawCode, + }), + ) + } + // For all other errors, throw them -- they are fatal. throw chunkValue.error } diff --git a/sdk/src/impl/model-provider.ts b/sdk/src/impl/model-provider.ts index 83e016c611..03329aa8c1 100644 --- a/sdk/src/impl/model-provider.ts +++ b/sdk/src/impl/model-provider.ts @@ -86,6 +86,8 @@ export interface ModelRequestParams { skipChatGptOAuth?: boolean /** Cost mode (e.g. 'free') — affects fallback behavior for OAuth routes */ costMode?: string + /** When set, route this request directly to the OpenAI-compatible endpoint and bypass Codebuff/OAuth. */ + customProvider?: { baseUrl: string; apiKey?: string } } /** @@ -96,6 +98,8 @@ export interface ModelResult { model: LanguageModel /** Whether this model uses ChatGPT OAuth direct (affects cost tracking) */ isChatGptOAuth: boolean + /** Whether this model uses a custom OpenAI-compatible endpoint (affects cost tracking + metadata) */ + isCustomProvider: boolean } // Usage accounting type for OpenRouter/Codebuff backend responses @@ -115,7 +119,21 @@ type OpenRouterUsageAccounting = { * This function is async because it may need to refresh the OAuth token. */ export async function getModelForRequest(params: ModelRequestParams): Promise { - const { apiKey, model, skipChatGptOAuth, costMode } = params + const { apiKey, model, skipChatGptOAuth, costMode, customProvider } = params + + // 1) Custom OpenAI-compatible endpoint wins — explicit per-agent / client / env override. + // Bypasses Codebuff backend AND ChatGPT OAuth. + if (customProvider?.baseUrl) { + return { + model: createCustomProviderModel({ + model, + baseUrl: customProvider.baseUrl, + apiKey: customProvider.apiKey, + }), + isChatGptOAuth: false, + isCustomProvider: true, + } + } // Check if we should use ChatGPT OAuth direct // Only attempt for allowlisted models; non-allowlisted models silently fall through to backend. @@ -140,6 +158,7 @@ export async function getModelForRequest(params: ModelRequestParams): Promise `${trimmedBase}${endpoint}`, + headers: () => ({ + // Most local runtimes (Ollama, LM Studio) ignore the Authorization header + // entirely. Send a non-empty placeholder since some servers reject empty + // Bearer values; never send the user's Codebuff key on this code path. + Authorization: `Bearer ${apiKey ?? 'unused'}`, + 'Content-Type': 'application/json', + 'user-agent': `ai-sdk/openai-compatible/${VERSION}/codebuff-custom-provider`, + }), + fetch: undefined, + includeUsage: undefined, + supportsStructuredOutputs: true, + }) +} diff --git a/sdk/src/run.ts b/sdk/src/run.ts index f5794a7def..36944ed0e9 100644 --- a/sdk/src/run.ts +++ b/sdk/src/run.ts @@ -85,6 +85,16 @@ export type CodebuffClientOptions = { maxAgentSteps?: number env?: Record + /** + * Default custom OpenAI-compatible provider base URL for runs that don't set + * one per-agent. Used for local models (Ollama, LM Studio) or self-hosted + * endpoints. Lower precedence than an agent's own providerOptions.baseUrl; + * higher precedence than the CODEBUFF_BASE_URL env var. + */ + providerBaseUrl?: string + /** Default API key paired with providerBaseUrl. Ignored if providerBaseUrl is unset. */ + providerApiKey?: string + handleEvent?: (event: PrintModeEvent) => void | Promise handleStreamChunk?: ( chunk: @@ -198,6 +208,8 @@ async function runOnce({ agentDefinitions, maxAgentSteps = MAX_AGENT_STEPS_DEFAULT, env, + providerBaseUrl, + providerApiKey, handleEvent, handleStreamChunk, @@ -376,6 +388,9 @@ async function runOnce({ const agentRuntimeImpl = getAgentRuntimeImpl({ logger, apiKey, + clientCustomProvider: providerBaseUrl + ? { baseUrl: providerBaseUrl, apiKey: providerApiKey } + : undefined, handleStepsLogChunk: () => { // Does nothing for now },