diff --git a/scripts/dial-model-compat-check.ts b/scripts/dial-model-compat-check.ts new file mode 100644 index 00000000..5a80a853 --- /dev/null +++ b/scripts/dial-model-compat-check.ts @@ -0,0 +1,61 @@ +import { AzureOpenAIModelProxy } from '../src/providers/plugins/azure-openai/azure-openai.models.js'; +import { config } from 'dotenv'; + +config(); + +async function main() { + const baseUrl = process.env.DIAL_BASE_URL || process.env.CODEMIE_AZURE_OPENAI_BASE_URL || ''; + const apiKey = process.env.DIAL_API_KEY || process.env.CODEMIE_AZURE_OPENAI_API_KEY || ''; + const apiVersion = process.env.DIAL_API_VERSION || '2024-06-01'; + if (!baseUrl || !apiKey) { + console.error('[dial-model-compat-check] DIAL_BASE_URL and DIAL_API_KEY required in env'); + process.exit(2); + } + const proxy = new AzureOpenAIModelProxy(baseUrl, apiKey, apiVersion); + const config = { baseUrl, apiKey, azureApiVersion: apiVersion }; + const models = await proxy.fetchModels(config); + console.log(`Found ${models.length} deployments`); + const results: Array<{ id: string; name: string; status: string; error?: string; latency?: number }> = []; + for (const m of models) { + const testPayload = { + model: m.id, + messages: [{ role: 'user', content: 'ping' }], + max_tokens: 16 + }; + const url = `${baseUrl}/openai/deployments/${encodeURIComponent(m.id)}/chat/completions?api-version=${apiVersion}`; + const headers = { + 'api-key': apiKey, + 'Content-Type': 'application/json' + }; + const t0 = Date.now(); + try { + const resp = await fetch(url, { method: 'POST', headers, body: JSON.stringify(testPayload) }); + const body = await resp.text(); + if (!resp.ok) { + results.push({ id: m.id, name: m.name, status: 'FAIL', error: `HTTP ${resp.status}: ${body}` }); + console.error(`[${m.name}] FAIL: HTTP ${resp.status}: ${body}`); + } else { + const delta = Date.now() - t0; + results.push({ id: m.id, name: m.name, status: 'OK', latency: delta }); + console.log(`[${m.name}] OK (${delta} ms)`); + } + } catch (e: any) { + results.push({ id: m.id, name: m.name, status: 'ERROR', error: e?.message || String(e) }); + console.error(`[${m.name}] ERROR: ${e?.message || e}`); + } + } + + // Print summary table + console.log('\n--- DIAL Model Compatibility Report ---'); + results.forEach(r => { + let line = `${r.name.padEnd(26)} | ${r.status}`; + if (r.latency) line += ` (${r.latency} ms)`; + if (r.error) line += ` :: ${r.error.substring(0, 80)}`; + console.log(line); + }); +} + +main().catch(e => { + console.error('[dial-model-compat-check] Fatal:', e); + process.exit(1); +}); diff --git a/src/agents/core/BaseAgentAdapter.ts b/src/agents/core/BaseAgentAdapter.ts index f305b7b9..257c246c 100644 --- a/src/agents/core/BaseAgentAdapter.ts +++ b/src/agents/core/BaseAgentAdapter.ts @@ -506,6 +506,7 @@ export abstract class BaseAgentAdapter implements AgentAdapter { // Display ASCII logo with configuration console.log( renderProfileInfo({ + title: 'Profile', profile: profileName, provider, model, diff --git a/src/agents/core/__tests__/BaseAgentAdapter.test.ts b/src/agents/core/__tests__/BaseAgentAdapter.test.ts index c37ac267..929dd443 100644 --- a/src/agents/core/__tests__/BaseAgentAdapter.test.ts +++ b/src/agents/core/__tests__/BaseAgentAdapter.test.ts @@ -15,12 +15,13 @@ vi.mock('../../../providers/core/registry.js', () => { }; return { ProviderRegistry: { - registerProvider: vi.fn((t: any) => t), - registerSetupSteps: vi.fn(), - registerHealthCheck: vi.fn(), - registerModelProxy: vi.fn(), - getProvider: vi.fn((name: string) => providers[name]), - getProviderNames: vi.fn(() => Object.keys(providers)), + registerProvider: vi.fn((t: any) => t), + registerSetupSteps: vi.fn(), + registerHealthCheck: vi.fn(), + registerModelProxy: vi.fn(), + registerProviderSetup: vi.fn((t: any) => t), + getProvider: vi.fn((name: string) => providers[name]), + getProviderNames: vi.fn(() => Object.keys(providers)), }, }; }); diff --git a/src/agents/plugins/__tests__/codemie-code-reasoning.test.ts b/src/agents/plugins/__tests__/codemie-code-reasoning.test.ts index f64c30ff..121648c0 100644 --- a/src/agents/plugins/__tests__/codemie-code-reasoning.test.ts +++ b/src/agents/plugins/__tests__/codemie-code-reasoning.test.ts @@ -79,6 +79,16 @@ vi.mock('../reasoning-sanitizer/index.js', () => ({ cleanupReasoningSanitizerPlugin: mockCleanupReasoningSanitizer, })); +// Mock azure-dial-sanitizer +const { mockGetAzureDialSanitizerPluginUrl, mockCleanupAzureDialSanitizer } = vi.hoisted(() => ({ + mockGetAzureDialSanitizerPluginUrl: vi.fn(() => 'file:///mock/azure-dial-sanitizer.ts'), + mockCleanupAzureDialSanitizer: vi.fn(), +})); +vi.mock('../azure-dial-sanitizer/index.js', () => ({ + getAzureDialSanitizerPluginUrl: mockGetAzureDialSanitizerPluginUrl, + cleanupAzureDialSanitizerPlugin: mockCleanupAzureDialSanitizer, +})); + // Mock OpenCodeSessionAdapter const { mockDiscoverSessions } = vi.hoisted(() => ({ mockDiscoverSessions: vi.fn(), @@ -107,6 +117,14 @@ vi.mock('../opencode/opencode-dynamic-models.js', () => ({ fetchDynamicModelConfigs: vi.fn(() => Promise.resolve({})), })); +// Mock AzureOpenAIModelProxy so azure-openai provider path doesn't make real requests +vi.mock('../../../providers/plugins/azure-openai/azure-openai.models.js', () => ({ + AzureOpenAIModelProxy: vi.fn().mockImplementation(() => ({ + fetchDeploymentInfos: vi.fn(() => Promise.resolve([])), + fetchModels: vi.fn(() => Promise.resolve([])), + })), +})); + // Mock fs vi.mock('fs', () => ({ existsSync: vi.fn(() => true), @@ -242,6 +260,19 @@ describe('CodeMie Code Plugin — Reasoning Sanitization Integration', () => { expect(config.plugin).toContain('file:///mock/hooks-plugin.js'); expect(config.plugin).toContain('file:///mock/reasoning-sanitizer.ts'); }); + + it('injects azure-dial-sanitizer plugin for azure-openai provider', async () => { + const env = createEnv({ + CODEMIE_PROVIDER: 'azure-openai', + CODEMIE_API_KEY: 'azure-key', + CODEMIE_AZURE_OPENAI_BASE_URL: 'https://dial.example.com', + }); + await beforeRun(env, {} as any); + + const config = parseConfig(env); + expect(config.plugin).toContain('file:///mock/azure-dial-sanitizer.ts'); + expect(mockGetAzureDialSanitizerPluginUrl).toHaveBeenCalled(); + }); }); describe('Cleanup — onSessionEnd', () => { @@ -276,6 +307,7 @@ describe('CodeMie Code Plugin — Reasoning Sanitization Integration', () => { expect(mockCleanupHooksPlugin).toHaveBeenCalled(); expect(mockCleanupReasoningSanitizer).toHaveBeenCalled(); + expect(mockCleanupAzureDialSanitizer).toHaveBeenCalled(); }); }); }); diff --git a/src/agents/plugins/azure-dial-sanitizer/__tests__/azure-dial-sanitizer-source.test.ts b/src/agents/plugins/azure-dial-sanitizer/__tests__/azure-dial-sanitizer-source.test.ts new file mode 100644 index 00000000..05f293a4 --- /dev/null +++ b/src/agents/plugins/azure-dial-sanitizer/__tests__/azure-dial-sanitizer-source.test.ts @@ -0,0 +1,108 @@ +/** + * Tests for AZURE_DIAL_SANITIZER_PLUGIN_SOURCE string constant. + * + * Pure string validation — no mocks needed. + * + * @group unit + */ + +import { describe, it, expect } from 'vitest'; +import { AZURE_DIAL_SANITIZER_PLUGIN_SOURCE } from '../azure-dial-sanitizer-source.js'; + +describe('AZURE_DIAL_SANITIZER_PLUGIN_SOURCE', () => { + it('is a non-empty string', () => { + expect(typeof AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toBe('string'); + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE.length).toBeGreaterThan(0); + }); + + it('contains OpenCode Plugin type import and default export', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('import type { Plugin } from "@opencode-ai/plugin"'); + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('export default'); + }); + + it('contains chat.params hook', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('"chat.params"'); + }); + + describe('provider detection', () => { + it('detects azure-dial- provider by prefix', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('azure-dial-'); + }); + + it('checks providerID', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('providerID'); + }); + + it('uses case-insensitive comparison', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('.toLowerCase()'); + }); + }); + + describe('cache_control stripping', () => { + it('strips cache_control from message content', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('cache_control'); + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('sanitizeMessage'); + }); + + it('strips cache_control from top-level message field', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('delete m["cache_control"]'); + }); + + it('strips cache_control from content[] items inside message', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('delete cleaned["cache_control"]'); + }); + + it('handles array content (multipart messages)', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('Array.isArray'); + }); + + it('strips cache_control for ALL models including Claude', () => { + // No isClaude guard — always strip for azure-dial providers + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).not.toContain('isClaude'); + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).not.toContain('isClaudeModel'); + }); + + it('applies sanitizeMessage to all messages', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('output.messages.map(sanitizeMessage)'); + }); + + it('strips reasoning_content from messages and nested parts', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('reasoning_content'); + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('delete m["reasoning_content"]'); + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('delete cleaned["reasoning_content"]'); + }); + }); + + describe('thinking stripping', () => { + it('strips thinking param', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('"thinking"'); + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('sanitizeParamsContainer'); + }); + }); + + describe('reasoning param stripping', () => { + it('strips reasoningSummary', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('"reasoningSummary"'); + }); + + it('strips reasoning_summary', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('"reasoning_summary"'); + }); + + it('strips reasoning', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('"reasoning"'); + }); + + it('strips broader top-level compatibility fields', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('"parallel_tool_calls"'); + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('"store"'); + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('"metadata"'); + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('"prediction"'); + }); + + it('normalizes messages to allowed OpenAI fields', () => { + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('ALLOWED_MESSAGE_FIELDS'); + expect(AZURE_DIAL_SANITIZER_PLUGIN_SOURCE).toContain('ALLOWED_TOOL_CALL_FIELDS'); + }); + }); +}); diff --git a/src/agents/plugins/azure-dial-sanitizer/auto-retry-sanitizer.ts b/src/agents/plugins/azure-dial-sanitizer/auto-retry-sanitizer.ts new file mode 100644 index 00000000..559d5817 --- /dev/null +++ b/src/agents/plugins/azure-dial-sanitizer/auto-retry-sanitizer.ts @@ -0,0 +1,34 @@ +// Auto-retry request with sanitizer for Azure DIAL/Azure OpenAI endpoint + +import { sanitizeAzureDialPayload } from './azure-dial-sanitizer-source.js'; + +/** + * Performs a request to DIAL/Azure endpoint, automatically sanitizing the payload in case of "Extra inputs are not permitted" error + * @param send Async function making the request (SDK or http fetch) + * @param payload Original request payload + * @returns Successful response/result or exception if the error is not fixable + */ +export async function requestWithSanitizerRetry(send: (payload: any) => Promise, payload: any): Promise { + // First attempt — original payload + try { + return await send(payload); + } catch (err: any) { + // Analysis: only if error is 400 and "Extra inputs are not permitted" + if (err && + (err.status === 400 || err.code === 400 || (err.response && err.response.status === 400)) && + (typeof err.message === 'string' && err.message.includes('Extra inputs are not permitted') || + (err.response && typeof err.response.data === 'string' && err.response.data.includes('Extra inputs are not permitted')) || + (err.data && typeof err.data === 'string' && err.data.includes('Extra inputs are not permitted')))) { + // Applying sanitizer + const cleaned = sanitizeAzureDialPayload(payload); + try { + // Second attempt — sanitized payload + return await send(cleaned); + } catch (err2: any) { + throw new Error(`[DIAL Retry] Request failed after sanitize retry: ${err2?.message || err2}`); + } + } + throw err; + } +} + diff --git a/src/agents/plugins/azure-dial-sanitizer/azure-dial-sanitizer-source.ts b/src/agents/plugins/azure-dial-sanitizer/azure-dial-sanitizer-source.ts new file mode 100644 index 00000000..5602f3eb --- /dev/null +++ b/src/agents/plugins/azure-dial-sanitizer/azure-dial-sanitizer-source.ts @@ -0,0 +1,264 @@ +/** + * Azure / DIAL Request Sanitizer Plugin Source + * + * Contains the OpenCode plugin TypeScript source as a string constant. + * At runtime this is written to a temp file and loaded by the OpenCode binary. + * + * EPAM DIAL (and standard Azure OpenAI) use an OpenAI-compatible Chat Completions + * interface. Several Anthropic-native fields that @ai-sdk or OpenCode may inject + * are not supported and cause HTTP 400 errors: + * + * 1. Anthropic-native fields on messages and content items: + * - `cache_control` + * → DIAL error: "Extra inputs are not permitted on path messages.0.cache_control" + * - `reasoning_content` + * → DIAL error: "Extra inputs are not permitted on path messages.3.reasoning_content" + * - `thinking`, `citations`, and other non-Chat-Completions fields. + * → Strategy: sanitize recursively for every message/content part. + * + * 2. Anthropic/OpenAI reasoning params on top-level request/options: + * - `thinking` / `thinking.budget_tokens` + * - `reasoningSummary`, `reasoning_summary`, `reasoning`, `reasoning_effort` + * - `include_reasoning`, `reasoning_content` + * → These are provider-specific extensions and are not accepted consistently + * by DIAL's OpenAI Chat Completions compatibility layer. + * + * 3. Unsupported OpenAI-compatible params for non-GPT deployments: + * - `parallel_tool_calls`, `store`, `metadata`, `prediction`, `modalities`, + * and similar fields can be emitted by newer AI SDK/OpenCode versions. + * → Keep only the conservative Chat Completions request shape that DIAL + * accepts across GPT, Claude, Gemini, Grok, Qwen, DeepSeek, Llama, etc. + * + * Scope: Only runs for azure-dial-* providers (registered per-deployment by + * buildAzureOpenAIProviders in codemie-code.plugin.ts). Provider IDs always + * start with "azure-dial-". + * + * Why a string constant: The plugin uses `import type { Plugin } from "@opencode-ai/plugin"` + * which doesn't exist in codemie-code's dependencies. Embedding as a string avoids + * TypeScript compilation issues. Bun strips the type import at runtime. + */ + +// Utility: sanitize an OpenAI/DIAL payload for strict DIAL compatibility +export function sanitizeAzureDialPayload(obj: any): any { + if (obj == null || typeof obj !== 'object') return obj; + // conservative OpenAI Chat Completions allowed fields + const allowedRoot = [ + 'model', 'messages', 'temperature', 'max_tokens', 'top_p', 'stream', + 'stop', 'presence_penalty', 'frequency_penalty' + ]; + const cleaned: any = {}; + for (const k of allowedRoot) if (k in obj) cleaned[k] = obj[k]; + // messages: + if (Array.isArray(cleaned.messages)) { + cleaned.messages = cleaned.messages.map((msg: any) => { + const allowedMsg = ['role', 'content', 'name', 'tool_call_id', 'tool_calls', 'function_call']; + const m: any = {}; + for (const key of allowedMsg) if (key in msg) m[key] = msg[key]; + // clean up Anthropic/DIAL-native fields + delete m['cache_control']; + delete m['reasoning_content']; + delete m['thinking']; + if (Array.isArray(m.content)) { + m.content = m.content.map((item: any) => { + if (item && typeof item === 'object') { + const c = { ...item }; + delete c['cache_control']; + delete c['reasoning_content']; + delete c['thinking']; + return c; + } + return item; + }); + } + return m; + }); + } + return cleaned; +} + +export const AZURE_DIAL_SANITIZER_PLUGIN_SOURCE = ` +import type { Plugin } from "@opencode-ai/plugin"; + +const UNSUPPORTED_TOP_LEVEL_FIELDS = [ + "reasoningSummary", + "reasoning_summary", + "reasoning", + "reasoning_effort", + "include_reasoning", + "reasoning_content", + "thinking", + "cache_control", + "betas", + "anthropic_beta", + "anthropic_version", + "store", + "metadata", + "prediction", + "modalities", + "service_tier", + "parallel_tool_calls", + "prompt_cache_key", +]; + +const UNSUPPORTED_NESTED_FIELDS = [ + "cache_control", + "reasoning_content", + "reasoningContent", + "thinking", + "citations", + "signature", + "redacted_thinking", +]; + +const ALLOWED_MESSAGE_FIELDS = new Set([ + "role", + "content", + "name", + "tool_call_id", + "tool_calls", + "function_call", +]); + +const ALLOWED_TOOL_CALL_FIELDS = new Set(["id", "type", "function"]); +const ALLOWED_FUNCTION_FIELDS = new Set(["name", "arguments"]); + +function removeUnsupportedNestedFields(value: unknown): unknown { + if (Array.isArray(value)) { + return value.map(removeUnsupportedNestedFields); + } + + if (!value || typeof value !== "object") { + return value; + } + + const cleaned = { ...(value as Record) }; + for (const field of UNSUPPORTED_NESTED_FIELDS) { + delete cleaned[field]; + } + + for (const [key, nested] of Object.entries(cleaned)) { + cleaned[key] = removeUnsupportedNestedFields(nested); + } + + return cleaned; +} + +function sanitizeToolCall(toolCall: unknown): unknown { + if (!toolCall || typeof toolCall !== "object") return toolCall; + + const input = toolCall as Record; + const cleaned: Record = {}; + for (const [key, value] of Object.entries(input)) { + if (!ALLOWED_TOOL_CALL_FIELDS.has(key)) continue; + + if (key === "function" && value && typeof value === "object") { + const fn: Record = {}; + for (const [fnKey, fnValue] of Object.entries(value as Record)) { + if (ALLOWED_FUNCTION_FIELDS.has(fnKey)) { + fn[fnKey] = fnValue; + } + } + cleaned[key] = fn; + } else { + cleaned[key] = removeUnsupportedNestedFields(value); + } + } + + return cleaned; +} + +/** + * Normalize a message to a conservative OpenAI Chat Completions shape. + * + * DIAL/Azure OpenAI use OpenAI-compatible Chat Completions API and do NOT + * support Anthropic-only fields at any level — even for Claude deployments. + * + * Handles: + * - messages[i].cache_control + * - messages[i].reasoning_content + * - messages[i].content[j].cache_control + * - messages[i].content[j].reasoning_content + * - message/content metadata injected by AI SDKs or prior model responses. + */ +function sanitizeMessage(msg: unknown): unknown { + if (!msg || typeof msg !== "object") return msg; + const input = msg as Record; + const m: Record = {}; + + for (const [key, value] of Object.entries(input)) { + if (!ALLOWED_MESSAGE_FIELDS.has(key)) continue; + + if (key === "tool_calls" && Array.isArray(value)) { + m[key] = value.map(sanitizeToolCall); + } else { + m[key] = removeUnsupportedNestedFields(value); + } + } + + // Remove cache_control from the message object itself + delete m["cache_control"]; + delete m["reasoning_content"]; + delete m["reasoningContent"]; + delete m["thinking"]; + + // Remove cache_control from each item inside content[] + if (Array.isArray(m["content"])) { + m["content"] = (m["content"] as unknown[]).map((item: unknown) => { + if (item && typeof item === "object") { + const cleaned = removeUnsupportedNestedFields(item) as Record; + delete cleaned["cache_control"]; + delete cleaned["reasoning_content"]; + delete cleaned["reasoningContent"]; + delete cleaned["thinking"]; + return cleaned; + } + return item; + }); + } + + return m; +} + +function sanitizeParamsContainer(container: unknown): void { + if (!container || typeof container !== "object") return; + + const params = container as Record; + for (const field of UNSUPPORTED_TOP_LEVEL_FIELDS) { + delete params[field]; + } +} + +/** + * Strip provider-specific fields from messages and top-level params for + * Azure OpenAI / EPAM DIAL providers. + * + * Activated only for providers whose ID starts with "azure-dial-" + * (the naming convention used by buildAzureOpenAIProviders). + * + * Always sanitizes for ALL models (including Claude), because DIAL exposes a + * single OpenAI-compatible schema and rejects extra provider-native fields. + */ +const AzureDialSanitizerPlugin: Plugin = async (_input) => ({ + "chat.params": async (input, output) => { + const pid: string = (input.model?.providerID ?? "").toLowerCase(); + const aid: string = (input.model?.api?.id ?? "").toLowerCase(); + + // Only run for azure-dial-* providers + const isAzureDial = + pid.startsWith("azure-dial-") || + aid.startsWith("azure-dial-"); + if (!isAzureDial) return; + + // 1. Strip provider-native fields from every message and nested content item. + if (Array.isArray(output.messages)) { + output.messages = output.messages.map(sanitizeMessage); + } + + // 2. Strip request-level params from both known OpenCode containers. + sanitizeParamsContainer(output); + sanitizeParamsContainer(output.options); + }, +}); + +export default AzureDialSanitizerPlugin; +`; diff --git a/src/agents/plugins/azure-dial-sanitizer/index.ts b/src/agents/plugins/azure-dial-sanitizer/index.ts new file mode 100644 index 00000000..03324462 --- /dev/null +++ b/src/agents/plugins/azure-dial-sanitizer/index.ts @@ -0,0 +1,2 @@ +export { getAzureDialSanitizerPluginUrl, cleanupAzureDialSanitizerPlugin } from './inject-sanitizer.js'; +export { AZURE_DIAL_SANITIZER_PLUGIN_SOURCE } from './azure-dial-sanitizer-source.js'; diff --git a/src/agents/plugins/azure-dial-sanitizer/inject-sanitizer.ts b/src/agents/plugins/azure-dial-sanitizer/inject-sanitizer.ts new file mode 100644 index 00000000..c0a31a68 --- /dev/null +++ b/src/agents/plugins/azure-dial-sanitizer/inject-sanitizer.ts @@ -0,0 +1,11 @@ +import { createPluginInjector } from '../../core/plugin-injector.js'; +import { AZURE_DIAL_SANITIZER_PLUGIN_SOURCE } from './azure-dial-sanitizer-source.js'; + +const injector = createPluginInjector( + 'azure-dial-sanitizer.ts', + AZURE_DIAL_SANITIZER_PLUGIN_SOURCE, + 'azure-dial-sanitizer' +); + +export const getAzureDialSanitizerPluginUrl = injector.getPluginFileUrl; +export const cleanupAzureDialSanitizerPlugin = injector.cleanup; diff --git a/src/agents/plugins/claude/__tests__/claude.provider-support.test.ts b/src/agents/plugins/claude/__tests__/claude.provider-support.test.ts index 1f6577bc..dfbc8356 100644 --- a/src/agents/plugins/claude/__tests__/claude.provider-support.test.ts +++ b/src/agents/plugins/claude/__tests__/claude.provider-support.test.ts @@ -5,4 +5,8 @@ describe('ClaudePluginMetadata', () => { it('supports anthropic-subscription provider', () => { expect(ClaudePluginMetadata.supportedProviders).toContain('anthropic-subscription'); }); + + it('supports azure-openai provider', () => { + expect(ClaudePluginMetadata.supportedProviders).toContain('azure-openai'); + }); }); diff --git a/src/agents/plugins/claude/claude.plugin.ts b/src/agents/plugins/claude/claude.plugin.ts index fc7cb5b8..1cc5e6af 100644 --- a/src/agents/plugins/claude/claude.plugin.ts +++ b/src/agents/plugins/claude/claude.plugin.ts @@ -84,7 +84,7 @@ export const ClaudePluginMetadata: AgentMetadata = { opusModel: ['ANTHROPIC_DEFAULT_OPUS_MODEL'], }, - supportedProviders: ['litellm', 'ai-run-sso', 'bedrock', 'bearer-auth', 'anthropic-subscription'], + supportedProviders: ['litellm', 'ai-run-sso', 'bedrock', 'bearer-auth', 'anthropic-subscription', 'azure-openai'], blockedModelPatterns: [], recommendedModels: ['claude-sonnet-4-6', 'claude-4-opus', 'gpt-4.1'], diff --git a/src/agents/plugins/codemie-code.plugin.ts b/src/agents/plugins/codemie-code.plugin.ts index 2a83aef6..73d3e129 100644 --- a/src/agents/plugins/codemie-code.plugin.ts +++ b/src/agents/plugins/codemie-code.plugin.ts @@ -4,6 +4,7 @@ import { existsSync } from 'fs'; import { logger } from '../../utils/logger.js'; import { getModelConfig, getChatCompletionsModelConfigs, getResponsesApiModelConfigs } from './opencode/opencode-model-configs.js'; import { fetchDynamicModelConfigs } from './opencode/opencode-dynamic-models.js'; +import { AzureOpenAIModelProxy } from '../../providers/plugins/azure-openai/azure-openai.models.js'; import { BaseAgentAdapter } from '../core/BaseAgentAdapter.js'; import type { SessionAdapter } from '../core/session/BaseSessionAdapter.js'; import type { BaseExtensionInstaller } from '../core/extension/BaseExtensionInstaller.js'; @@ -12,6 +13,7 @@ import { OpenCodeSessionAdapter } from './opencode/opencode.session.js'; import { resolveCodemieOpenCodeBinary, getPlatformPackage } from './codemie-code-binary.js'; import { getHooksPluginFileUrl, cleanupHooksPlugin } from './codemie-code-hooks/index.js'; import { getReasoningSanitizerPluginUrl, cleanupReasoningSanitizerPlugin } from './reasoning-sanitizer/index.js'; +import { getAzureDialSanitizerPluginUrl, cleanupAzureDialSanitizerPlugin } from './azure-dial-sanitizer/index.js'; import { getCodemieHome } from '../../utils/paths.js'; import type { HookProcessingConfig } from '../../cli/commands/hook.js'; import { toBedrockModelId } from '../../providers/plugins/bedrock/bedrock.utils.js'; @@ -31,10 +33,18 @@ const OPENCODE_SUBCOMMANDS = ['run', 'chat', 'config', 'init', 'help', 'version' function determineActiveProvider(provider: string | undefined): string { if (provider === 'ollama') return 'ollama'; if (provider === 'bedrock') return 'amazon-bedrock'; + if (provider === 'azure-openai') return 'azure-openai'; if (provider === 'litellm') return 'litellm'; return 'codemie-proxy'; } +function resolveAzureOpenAIBaseUrl(env: NodeJS.ProcessEnv): string | undefined { + // CODEMIE_AZURE_OPENAI_BASE_URL is the canonical Azure endpoint var, exported by + // AzureOpenAITemplate.exportEnvVars. CODEMIE_BASE_URL is a reliable fallback + // because ConfigLoader always populates it from config.baseUrl for every provider. + return env.CODEMIE_AZURE_OPENAI_BASE_URL || env.CODEMIE_BASE_URL; +} + /** * Get the base storage path for OpenCode sessions. * Used by both beforeRun (XDG_DATA_HOME) and onSessionEnd (OPENCODE_STORAGE_PATH). @@ -72,17 +82,85 @@ function resolveOllamaBaseUrl(baseUrl: string, provider: string | undefined): st return `${baseUrl.replace(/\/$/, '')}/v1`; } +function normalizeOllamaModelId(modelId: string): string { + return modelId.replace(/:latest$/, ''); +} + +/** + * Build Azure OpenAI provider entries for OpenCode config. + * + * Architecture note: EPAM DIAL (and standard Azure OpenAI) require the deployment + * name in the URL path: + * /openai/deployments/{deployment}/chat/completions?api-version={ver} + * + * @ai-sdk/openai-compatible does not support per-model URL routing — it uses + * a single baseURL for the entire provider. To support multiple deployments we + * register one named provider per deployment (azure-openai-{id}) with its own + * baseURL that includes the deployment path. The active model is set to the + * provider entry that matches the selected deployment. + */ +function buildAzureOpenAIProviders( + baseUrl: string, + apiKey: string, + apiVersion: string | undefined, + azureModels: Record, + timeout: number +): { + providers: Record; + enabledProviders: string[]; + activeProviderForModel: (modelId: string) => string; +} { + const providers: Record = {}; + const enabledProviders: string[] = []; + const cleanBase = baseUrl.replace(/\/$/, ''); + + // Register one provider entry per deployment so each gets the correct URL. + for (const modelId of Object.keys(azureModels)) { + // Provider key must be safe for OpenCode config (no dots or colons). + const providerKey = `azure-dial-${modelId.replace(/[^a-zA-Z0-9_-]/g, '-')}`; + const deploymentUrl = `${cleanBase}/openai/deployments/${encodeURIComponent(modelId)}/`; + providers[providerKey] = { + npm: '@ai-sdk/openai-compatible', + name: `Azure OpenAI (${modelId})`, + options: { + baseURL: deploymentUrl, + // IMPORTANT: Do NOT put key in apiKey — sdk would send Authorization: Bearer. + // DIAL requires `api-key` header (Azure standard). + apiKey: '', + headers: { + 'api-key': apiKey, + ...(apiVersion ? { 'api-version': apiVersion } : {}) + }, + timeout, + }, + // Each provider exposes exactly this one model. + models: { [modelId]: azureModels[modelId] } + }; + enabledProviders.push(providerKey); + } + + return { + providers, + enabledProviders, + activeProviderForModel: (modelId: string) => + `azure-dial-${modelId.replace(/[^a-zA-Z0-9_-]/g, '-')}` + }; +} + + /** * Build the OpenCode config object that gets passed to the whitelabel binary. * - * Models are split into two groups: - * - chatModels: routed via codemie-proxy/litellm (Chat Completions API) - * - responsesApiModels: routed via OpenCode's built-in openai CUSTOM_LOADER (Responses API) + * Azure OpenAI uses per-deployment providers (see buildAzureOpenAIProviders). + * All other providers use a single codemie-proxy/litellm/ollama entry. */ function buildOpenCodeConfig(params: { proxyBaseUrl: string | undefined; litellmBaseUrl: string | undefined; litellmApiKey: string | undefined; + azureProviders: Record | undefined; + azureEnabledProviders: string[]; + azureActiveProvider: string | undefined; ollamaBaseUrl: string; activeProvider: string; modelId: string; @@ -93,8 +171,16 @@ function buildOpenCodeConfig(params: { responsesApiBaseUrl: string | undefined; }): Record { const hasResponsesApiModels = Object.keys(params.responsesApiModels).length > 0; + // Base set of known providers; Azure per-deployment providers are added dynamically. +const baseEnabledProviders = ['codemie-proxy', 'openai', 'ollama', 'amazon-bedrock', 'litellm']; +let enabledProviders: string[]; +if (params.activeProvider) { + enabledProviders = [params.activeProvider, ...params.azureEnabledProviders]; +} else { + enabledProviders = [...baseEnabledProviders, ...params.azureEnabledProviders]; +} return { - enabled_providers: ['codemie-proxy', 'openai', 'ollama', 'amazon-bedrock', 'litellm'], + enabled_providers: enabledProviders, share: 'disabled', provider: { ...(params.proxyBaseUrl && { @@ -110,13 +196,11 @@ function buildOpenCodeConfig(params: { models: params.chatModels } }), - // OpenCode's built-in openai CUSTOM_LOADER — uses @ai-sdk/openai sdk.responses() - // which calls POST /v1/responses instead of /v1/chat/completions + // Per-deployment Azure providers (each has its own URL with the deployment path). + ...(params.azureProviders ?? {}), ...(params.responsesApiBaseUrl && hasResponsesApiModels && { openai: { name: 'CodeMie SSO', - // whitelist: suppress the built-in openai model list (GPT-4, GPT-4o, etc.) - // OpenCode merges user models with models.dev — whitelist restricts to ours only whitelist: Object.keys(params.responsesApiModels), options: { baseURL: `${params.responsesApiBaseUrl}/`, @@ -146,10 +230,16 @@ function buildOpenCodeConfig(params: { baseURL: `${params.ollamaBaseUrl}/`, apiKey: 'ollama', timeout: params.timeout, - } + }, + // Provide all models so switching in session is possible + models: params.chatModels } }, - model: `${params.activeProvider}/${params.modelId}` + // For Azure: use the per-deployment provider key as the active provider. + // E.g. "azure-dial-anthropic-claude-sonnet-4-6/anthropic.claude-sonnet-4-6" + model: params.azureActiveProvider + ? `${params.azureActiveProvider}/${params.modelId}` + : `${params.activeProvider}/${params.modelId}` }; } @@ -208,7 +298,7 @@ export const CodeMieCodePluginMetadata: AgentMetadata = { model: [] }, - supportedProviders: ['litellm', 'ai-run-sso', 'ollama', 'bedrock', 'bearer-auth'], + supportedProviders: ['litellm', 'ai-run-sso', 'ollama', 'bedrock', 'bearer-auth', 'azure-openai'], ssoConfig: { enabled: true, clientType: 'codemie-code' }, @@ -238,7 +328,9 @@ export const CodeMieCodePluginMetadata: AgentMetadata = { // ensureSessionFile handles its own errors internally await ensureSessionFile(sessionId, env, BUILTIN_AGENT_NAME); } - + + // Resolve the effective provider name. + // CODEMIE_PROVIDER is set by ConfigLoader.exportProviderEnvVars from config.provider. const provider = env.CODEMIE_PROVIDER; const baseUrl = env.CODEMIE_BASE_URL; @@ -251,47 +343,110 @@ export const CodeMieCodePluginMetadata: AgentMetadata = { return env; } + let profileConfig: any = undefined; + if (env.CODEMIE_PROFILE_CONFIG) { + try { + profileConfig = JSON.parse(env.CODEMIE_PROFILE_CONFIG); + } catch { + logger.warn('[codemie-code] Failed to parse CODEMIE_PROFILE_CONFIG', { agent: 'codemie-code' }); + } + } + // Fetch live model catalogue from the CodeMie API. // Falls back to the static OPENCODE_MODEL_CONFIGS on any error. const allModels = await fetchDynamicModelConfigs( baseUrl, env.CODEMIE_URL, env.CODEMIE_JWT_TOKEN, + provider, + profileConfig, ); - - // Model selection priority: env var > config > default - // Use dynamic catalogue first, then fall back to static getModelConfig for unknown IDs. - const selectedModel = env.CODEMIE_MODEL || config?.model || 'gpt-5-2-2025-12-11'; - const modelConfig = allModels[selectedModel] ?? getModelConfig(selectedModel); + const selectedModel = env.CODEMIE_MODEL || config?.model || 'gpt-4.1'; + const normalizedSelectedModel = provider === 'ollama' ? normalizeOllamaModelId(selectedModel) : selectedModel; + const modelConfig = allModels[normalizedSelectedModel] ?? allModels[selectedModel] ?? getModelConfig(normalizedSelectedModel); const { providerOptions } = modelConfig; const chatModels = getChatCompletionsModelConfigs(allModels); const responsesApiModels = getResponsesApiModelConfigs(allModels); const isBedrock = provider === 'bedrock'; const isLiteLLM = provider === 'litellm'; - const proxyBaseUrl = provider !== 'ollama' && !isBedrock && !isLiteLLM ? baseUrl : undefined; + const isAzureOpenAI = provider === 'azure-openai'; + const proxyBaseUrl = provider !== 'ollama' && !isBedrock && !isLiteLLM && !isAzureOpenAI ? baseUrl : undefined; const ollamaBaseUrl = resolveOllamaBaseUrl(baseUrl, provider); const activeProvider = determineActiveProvider(provider); const timeout = providerOptions?.timeout ?? parseInt(env.CODEMIE_TIMEOUT || '600') * 1000; const modelId = isBedrock ? toBedrockModelId(modelConfig.id, env.AWS_REGION || env.CODEMIE_AWS_REGION) - : modelConfig.id; - - // Responses API base URL: use proxyBaseUrl for SSO/bearer-auth, or baseUrl for LiteLLM. - // Always set regardless of selected model — fixes model-switching bug where switching - // from a Claude model to a GPT model mid-session would miss the CUSTOM_LOADER. + : provider === 'ollama' + ? normalizeOllamaModelId(modelConfig.id) + : modelConfig.id; + const responsesApiBaseUrl = proxyBaseUrl || (isLiteLLM ? baseUrl : undefined); if (responsesApiBaseUrl && Object.keys(responsesApiModels).length > 0) { env.OPENAI_API_KEY = 'proxy-handled'; logger.debug('[codemie-code] Enabling openai CUSTOM_LOADER for Responses API models'); } - + + // For Azure OpenAI: build per-deployment providers using buildAzureOpenAIProviders. + // Each deployment gets its own named provider entry with the correct URL. + let azureProvidersResult: ReturnType | undefined; + if (isAzureOpenAI) { + const azureEndpoint = resolveAzureOpenAIBaseUrl(env); + const azureApiVersion = env.CODEMIE_AZURE_OPENAI_API_VERSION || env.AZURE_OPENAI_API_VERSION; + const azureApiKey = env.CODEMIE_API_KEY || ''; + if (azureEndpoint) { + // Start with the selected model as fallback + const initialAzureModels: Record = {}; + const cfg = getModelConfig(modelId); + const { displayName: _dn, providerOptions: _po, use_responses_api: _ra, ...opencodeCfg } = cfg as any; + initialAzureModels[modelId] = opencodeCfg; + + // Try to fetch all deployments from DIAL/Azure for a full model list + try { + const proxy = new AzureOpenAIModelProxy(azureEndpoint, azureApiKey, azureApiVersion); + const deployments = await proxy.fetchDeploymentInfos({ + provider: 'azure-openai', + baseUrl: azureEndpoint, + apiKey: azureApiKey, + model: modelId, + azureApiVersion, + } as any); + for (const d of deployments) { + const dcfg = getModelConfig(d.id); + const { displayName: _ddn, providerOptions: _dpo, use_responses_api: _dra, ...dOpencodeCfg } = dcfg as any; + initialAzureModels[d.id] = dOpencodeCfg; + } + logger.debug(`[codemie-code] Loaded ${deployments.length} Azure deployments for per-deployment providers`); + } catch (err) { + logger.debug(`[codemie-code] Azure deployments fetch failed (using selected model only): ${err instanceof Error ? err.message : String(err)}`); + } + + azureProvidersResult = buildAzureOpenAIProviders( + azureEndpoint, + azureApiKey, + azureApiVersion, + initialAzureModels, + timeout + ); + logger.debug(`[codemie-code] Azure: active provider = ${azureProvidersResult.activeProviderForModel(modelId)}`); + } + } + const openCodeConfig = buildOpenCodeConfig({ proxyBaseUrl, litellmBaseUrl: isLiteLLM ? baseUrl : undefined, litellmApiKey: isLiteLLM ? env.CODEMIE_API_KEY : undefined, - ollamaBaseUrl, activeProvider, modelId, timeout, providerOptions, - chatModels, responsesApiModels, responsesApiBaseUrl + azureProviders: azureProvidersResult?.providers, + azureEnabledProviders: azureProvidersResult?.enabledProviders ?? [], + azureActiveProvider: azureProvidersResult?.activeProviderForModel(modelId), + ollamaBaseUrl, + activeProvider, + modelId, + timeout, + providerOptions, + chatModels, + responsesApiModels, + responsesApiBaseUrl, }); // --- Hooks injection --- @@ -347,6 +502,15 @@ export const CodeMieCodePluginMetadata: AgentMetadata = { const sanitizerPluginUrl = getReasoningSanitizerPluginUrl(); plugins.push(sanitizerPluginUrl); logger.debug(`[codemie-code] Injected reasoning-sanitizer plugin: ${sanitizerPluginUrl}`); + + // Inject Azure/DIAL sanitizer for all azure-dial-* providers. + // Strips cache_control from messages, thinking and reasoning params + // which DIAL/Azure OpenAI do not support (would return HTTP 400). + if (isAzureOpenAI) { + const azureDialSanitizerUrl = getAzureDialSanitizerPluginUrl(); + plugins.push(azureDialSanitizerUrl); + logger.debug(`[codemie-code] Injected azure-dial-sanitizer plugin: ${azureDialSanitizerUrl}`); + } // --- Storage path configuration --- // Configure storage path for OpenCode sessions @@ -453,6 +617,7 @@ export const CodeMieCodePluginMetadata: AgentMetadata = { delete process.env.OPENCODE_STORAGE_PATH; cleanupHooksPlugin(); cleanupReasoningSanitizerPlugin(); + cleanupAzureDialSanitizerPlugin(); } } } diff --git a/src/agents/plugins/codex/__tests__/codex.plugin.version-support.test.ts b/src/agents/plugins/codex/__tests__/codex.plugin.version-support.test.ts index d4af2d74..86af61cd 100644 --- a/src/agents/plugins/codex/__tests__/codex.plugin.version-support.test.ts +++ b/src/agents/plugins/codex/__tests__/codex.plugin.version-support.test.ts @@ -2,12 +2,13 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; vi.mock('../../../../providers/core/registry.js', () => ({ ProviderRegistry: { - registerProvider: vi.fn((template: unknown) => template), - registerSetupSteps: vi.fn(), - registerHealthCheck: vi.fn(), - registerModelProxy: vi.fn(), - getProvider: vi.fn(), - getProviderNames: vi.fn(() => []), + registerProvider: vi.fn((template: unknown) => template), + registerSetupSteps: vi.fn(), + registerHealthCheck: vi.fn(), + registerModelProxy: vi.fn(), + registerProviderSetup: vi.fn((template: unknown) => template), + getProvider: vi.fn(), + getProviderNames: vi.fn(() => []), }, })); diff --git a/src/agents/plugins/opencode/opencode-dynamic-models.ts b/src/agents/plugins/opencode/opencode-dynamic-models.ts index 9938e54c..4c107e65 100644 --- a/src/agents/plugins/opencode/opencode-dynamic-models.ts +++ b/src/agents/plugins/opencode/opencode-dynamic-models.ts @@ -48,15 +48,19 @@ function isResponsesApiModel(id: string): boolean { // ── Family detection ───────────────────────────────────────────────────────── function detectFamily(id: string): string { - if (id.startsWith('claude')) return 'claude-4'; - if (id.startsWith('gemini')) return 'gemini-2'; - if (id.startsWith('gpt-4')) return 'gpt-4'; - if (id.startsWith('gpt-5')) return 'gpt-5'; - if (/^o[134]-/.test(id) || id === 'o1') return 'openai-reasoning'; - if (id.startsWith('qwen')) return 'qwen3'; - if (id.startsWith('deepseek')) return 'deepseek'; + // Support vendor-prefixed model names (e.g. "anthropic.claude-...", "meta.llama-..."). + const bare = id.includes('.') ? id.split('.').slice(1).join('.') : id; + if (bare.startsWith('claude') || id.startsWith('claude')) return 'claude-4'; + if (bare.startsWith('gemini') || id.startsWith('gemini')) return 'gemini-2'; + if (bare.startsWith('gpt-4') || id.startsWith('gpt-4')) return 'gpt-4'; + if (bare.startsWith('gpt-5') || id.startsWith('gpt-5')) return 'gpt-5'; + if (/^o[134]-/.test(bare) || bare === 'o1' || /^o[134]-/.test(id) || id === 'o1') return 'openai-reasoning'; + if (bare.startsWith('qwen') || id.startsWith('qwen')) return 'qwen3'; + if (bare.startsWith('deepseek') || id.startsWith('deepseek')) return 'deepseek'; + if (bare.startsWith('llama') || id.startsWith('llama') || id.startsWith('meta.llama')) return 'llama'; + if (bare.startsWith('mistral') || id.startsWith('mistral')) return 'mistral'; if (id.startsWith('moonshotai') || id.startsWith('kimi')) return 'kimi'; - return id.split('-')[0] || id; + return id.split('.').pop()?.split('-')[0] || id.split('-')[0] || id; } // ── Token-limit heuristics ─────────────────────────────────────────────────── @@ -65,7 +69,7 @@ function detectFamily(id: string): string { // We derive reasonable defaults from the model family. function detectLimits(id: string, family: string): { context: number; output: number } { - if (family === 'claude-4' || id.startsWith('claude')) return { context: 200000, output: 64000 }; + if (family === 'claude-4' || id.startsWith('claude') || id.includes('.claude')) return { context: 200000, output: 64000 }; if (family === 'gemini-2' || id.startsWith('gemini')) return { context: 1048576, output: 65536 }; if (id.startsWith('gpt-4.1')) return { context: 1048576, output: 32768 }; if (id.startsWith('gpt-4o')) return { context: 128000, output: 16384 }; @@ -128,25 +132,268 @@ export function convertApiModelToOpenCodeConfig(model: LlmModel): OpenCodeModelC }; } +// ── Ollama /api/show autodetect ────────────────────────────────────────────── + +/** + * Subset of the Ollama /api/show response we rely on. + * See: https://github.com/ollama/ollama/blob/main/docs/api.md + * + * Note: `parameters` is a STRING in the real response (e.g. "num_ctx 32768\n..."), + * not an object. The authoritative context length lives in `details.context_length` + * or in `model_info[].context_length`. + */ +export interface OllamaShowResponse { + details?: { + parent_model?: string; + format?: string; + family?: string; + families?: string[]; + parameter_size?: string; + quantization_level?: string; + context_length?: number; + embedding_length?: number; + }; + model_info?: Record>; + parameters?: string; +} + +export interface OllamaModelLimits { + context: number; + output: number; +} + +/** + * Per-family default context windows for Ollama models when /api/show + * does not (or cannot) report a value. Tuned conservatively: most local + * Ollama installs default to 2048/4096 unless `num_ctx` was raised. + */ +function getFamilyDefaultContext(modelId: string): number { + const id = modelId.toLowerCase(); + if (id.startsWith('llama3') || id.startsWith('llama-3')) return 128000; + if (id.startsWith('llama2') || id.startsWith('llama-2')) return 4096; + if (id.startsWith('llama')) return 4096; + if (id.startsWith('qwen2.5') || id.startsWith('qwen2-5')) return 32768; + if (id.startsWith('qwen3')) return 32768; + if (id.startsWith('qwen')) return 32768; + if (id.startsWith('gemma2') || id.startsWith('gemma-2')) return 8192; + if (id.startsWith('gemma3') || id.startsWith('gemma-3')) return 128000; + if (id.startsWith('gemma')) return 8192; + if (id.startsWith('codellama')) return 16384; + if (id.startsWith('deepseek-coder')) return 16384; + if (id.startsWith('deepseek-r1')) return 65536; + if (id.startsWith('deepseek')) return 32768; + if (id.startsWith('mistral') || id.startsWith('mixtral')) return 32768; + if (id.startsWith('phi3') || id.startsWith('phi-3')) return 4096; + if (id.startsWith('phi4') || id.startsWith('phi-4')) return 16384; + if (id.startsWith('command-r')) return 128000; + if (id.startsWith('yi')) return 32768; + return 32768; +} + +/** + * Per-family default output (max_tokens) limits. Ollama does not advertise + * a max output size in /api/show, so we approximate from common defaults. + * Returns 4096 as a safe generic fallback. + */ +export function getOllamaFamilyOutputLimit(modelId: string): number { + const id = modelId.toLowerCase(); + if (id.startsWith('llama3') || id.startsWith('llama-3')) return 8192; + if (id.startsWith('llama2') || id.startsWith('llama-2')) return 4096; + if (id.startsWith('llama')) return 4096; + if (id.startsWith('qwen')) return 8192; + if (id.startsWith('gemma2') || id.startsWith('gemma-2')) return 8192; + if (id.startsWith('gemma3') || id.startsWith('gemma-3')) return 8192; + if (id.startsWith('gemma')) return 4096; + if (id.startsWith('codellama')) return 4096; + if (id.startsWith('deepseek-coder')) return 8192; + if (id.startsWith('deepseek-r1')) return 8192; + if (id.startsWith('deepseek')) return 8192; + if (id.startsWith('mistral') || id.startsWith('mixtral')) return 8192; + if (id.startsWith('phi3') || id.startsWith('phi-3')) return 4096; + if (id.startsWith('phi4') || id.startsWith('phi-4')) return 4096; + if (id.startsWith('command-r')) return 4096; + if (id.startsWith('yi')) return 4096; + return 4096; +} + +/** + * Fetch /api/show for a single model and extract its context length. + * Returns `undefined` on any failure so the caller can chain fallbacks. + * + * Cached per-process via {@link ollamaShowCache} so the same model is + * only fetched once per discovery run, even when looked up by both + * its full tag and its normalized (tag-stripped) name. + */ +async function fetchOllamaContextLength( + ollamaApiUrl: string, + modelName: string, +): Promise { + if (ollamaShowCache.has(modelName)) { + return ollamaShowCache.get(modelName); + } + try { + const showResp = await fetch(`${ollamaApiUrl}/api/show`, { + method: 'POST', + body: JSON.stringify({ name: modelName }), + }); + if (!showResp.ok) { + ollamaShowCache.set(modelName, undefined as unknown as OllamaShowResponse); + return undefined; + } + const data = (await showResp.json()) as OllamaShowResponse; + ollamaShowCache.set(modelName, data); + return data; + } catch { + logger.debug(`[dynamic-models] [ollama] Failed to fetch /api/show for ${modelName}`); + ollamaShowCache.set(modelName, undefined as unknown as OllamaShowResponse); + return undefined; + } +} + +const ollamaShowCache: Map = new Map(); + +/** + * Resolve the effective context window for an Ollama model using a + * 5-step fallback chain. The first source that yields a positive + * integer wins. + */ +function resolveOllamaContext( + modelId: string, + showData: OllamaShowResponse | undefined, + profileConfig: { contextWindow?: number; maxPhysicalContext?: number } | undefined, +): number { + // 1. Explicit profile override (absolute control). + if (profileConfig?.contextWindow && profileConfig.contextWindow > 0) { + return profileConfig.contextWindow; + } + + let resolvedContext = 32768; // Default fallback + + if (showData) { + // 2. details.context_length (set when the model was created with an explicit num_ctx). + const detailsCtx = showData.details?.context_length; + if (detailsCtx && detailsCtx > 0) { + resolvedContext = detailsCtx; + } else { + // 3. model_info[].context_length. + // Note: Ollama sometimes prefixes keys with the family name (e.g. "gemma4.context_length") + // or uses "general.architecture" as the key. + const arch = showData.details?.family || (showData.model_info?.['general.architecture'] as string | undefined); + if (arch && showData.model_info) { + // Try specific family-prefixed key first (e.g. "gemma4.context_length") + const familyKey = `${arch}.context_length`; + const familyVal = showData.model_info[familyKey]; + if (familyVal && typeof familyVal === 'number' && familyVal > 0) { + resolvedContext = familyVal; + } else { + // Fallback to architecture-based entry + const archEntry = showData.model_info[arch]; + const archCtx = archEntry?.context_length; + if (archCtx && archCtx > 0) { + resolvedContext = archCtx; + } + } + } + + // Special case: check all keys for anything ending in ".context_length" + if (resolvedContext === 32768 && showData.model_info) { + for (const key of Object.keys(showData.model_info)) { + if (key.endsWith('.context_length')) { + const val = showData.model_info[key]; + if (typeof val === 'number' && val > 0) { + resolvedContext = val; + break; + } + } + } + } + } + } else { + // 4. Family-based heuristic. + const familyCtx = getFamilyDefaultContext(modelId); + if (familyCtx > 0) { + resolvedContext = familyCtx; + } + } + + // 5. Physical RAM Limit (maxPhysicalContext). + // We use the minimum of the model's support and the physical limit to avoid OOM. + if (profileConfig?.maxPhysicalContext && profileConfig.maxPhysicalContext > 0) { + resolvedContext = Math.min(resolvedContext, profileConfig.maxPhysicalContext); + } + + return resolvedContext; +} + // ── Main export ────────────────────────────────────────────────────────────── /** - * Fetch the live model catalogue from the CodeMie API and convert it to - * OpenCodeModelConfig format. + * Fetch the dynamic model catalogue for any provider. Supports Ollama dynamic discovery. * - * @param baseUrl - CODEMIE_BASE_URL (authenticated proxy endpoint) - * @param codeMieUrl - CODEMIE_URL (CodeMie org URL used for SSO credential lookup) - * @param jwtToken - CODEMIE_JWT_TOKEN (optional Bearer token, preferred over SSO) + * @param baseUrl - Provider base URL (Ollama: http://localhost:11434, ...) + * @param codeMieUrl - CODEMIE_URL (for SSO providers) + * @param jwtToken - JWT token if available + * @param providerOverride - Provider name override (e.g. "ollama") + * @param profileConfig - Profile config (used for `contextWindow` override on Ollama) * @returns Map of modelId → OpenCodeModelConfig (dynamic) or OPENCODE_MODEL_CONFIGS (fallback) */ export async function fetchDynamicModelConfigs( baseUrl: string, codeMieUrl: string | undefined, jwtToken?: string, + providerOverride?: string, + profileConfig?: { contextWindow?: number; maxPhysicalContext?: number } & Record, ): Promise> { - try { - let rawModels: LlmModel[]; + // === Dynamic Ollama discovery === + if ((providerOverride && providerOverride === 'ollama') || (baseUrl && /11434/.test(baseUrl))) { + try { + const ollamaApiUrl = baseUrl.replace(/\/v1\/?$/, '').replace(/\/$/, ''); + const resp = await fetch(`${ollamaApiUrl}/api/tags`); + const data = await resp.json(); + const ollamaModels: Record = {}; + for (const { name } of (Array.isArray((data as any).models) ? (data as any).models : [])) { + const exactId = name; + const normalizedId = name.replace(/:.*$/, ''); // "qwen3.6:latest" -> "qwen3.6" + + // Fetch /api/show once and reuse the cached result for both + // the tag-suffixed and the normalized (tag-stripped) lookups. + const showData = await fetchOllamaContextLength(ollamaApiUrl, exactId); + const context = resolveOllamaContext(normalizedId, showData, profileConfig); + const output = getOllamaFamilyOutputLimit(normalizedId); + const config: OpenCodeModelConfig = { + id: normalizedId, + name: normalizedId, + family: normalizedId.split('.')[0], + displayName: normalizedId, + tool_call: true, + reasoning: true, + attachment: false, + temperature: true, + structured_output: false, + use_responses_api: false, + modalities: { input: ['text'], output: ['text'] }, + knowledge: new Date().toISOString().split('T')[0], + release_date: new Date().toISOString().split('T')[0], + last_updated: new Date().toISOString().split('T')[0], + open_weights: true, + cost: { input: 0, output: 0 }, + limit: { context, output } + }; + ollamaModels[normalizedId] = config; + ollamaModels[exactId] = { ...config, id: exactId, name: exactId, displayName: exactId }; + } + logger.debug(`[dynamic-models] [ollama] Loaded ${Object.keys(ollamaModels).length} models from /api/tags`); + if (Object.keys(ollamaModels).length > 0) return ollamaModels; + } catch (err) { + logger.debug(`[dynamic-models] [ollama] Dynamic model fetch failed, falling back.`, { error: err instanceof Error ? err.message : String(err) }); + } + // Continue to fallback model loading logic below... + } + + let rawModels: LlmModel[]; + + try { if (jwtToken) { rawModels = await fetchCodeMieLlmModels(baseUrl, jwtToken); logger.debug('[dynamic-models] Fetched model list via JWT auth'); diff --git a/src/agents/plugins/opencode/opencode-model-configs.ts b/src/agents/plugins/opencode/opencode-model-configs.ts index 20788d14..79816047 100644 --- a/src/agents/plugins/opencode/opencode-model-configs.ts +++ b/src/agents/plugins/opencode/opencode-model-configs.ts @@ -567,6 +567,9 @@ export function getResponsesApiModelConfigs( * Family-specific defaults for unknown model variants. * Used by getModelConfig() when an exact match isn't found but * the model ID prefix matches a known family. + * + * Keys are matched against both the full model ID and the part after the + * first dot (to handle vendor-prefixed names like "anthropic.claude-…"). */ const MODEL_FAMILY_DEFAULTS: Record> = { 'claude': { @@ -602,6 +605,22 @@ const MODEL_FAMILY_DEFAULTS: Record> = { temperature: true, modalities: { input: ['text'], output: ['text'] }, limit: { context: 262000, output: 65536 } + }, + 'llama': { + family: 'llama', + reasoning: true, + attachment: false, + temperature: true, + modalities: { input: ['text'], output: ['text'] }, + limit: { context: 128000, output: 8192 } + }, + 'mistral': { + family: 'mistral', + reasoning: true, + attachment: false, + temperature: true, + modalities: { input: ['text'], output: ['text'] }, + limit: { context: 32000, output: 8192 } } }; @@ -625,14 +644,27 @@ export function getModelConfig(modelId: string): OpenCodeModelConfig { return config; } - // Detect model family from prefix for smarter defaults + // Strip vendor prefix (e.g. "anthropic.claude-..." → "claude-...") and + // try the catalogue again so that Azure-style deployment names resolve to + // the correct static config when one exists. + const bareName = modelId.includes('.') ? modelId.split('.').slice(1).join('.') : modelId; + const bareConfig = bareName !== modelId ? OPENCODE_MODEL_CONFIGS[bareName] : undefined; + if (bareConfig) { + // Return the static config but with the original (Azure) deployment id + // so OpenCode routes it to the right provider entry. + return { ...bareConfig, id: modelId, name: bareConfig.name, displayName: bareConfig.displayName ?? modelId }; + } + + // Detect model family from prefix for smarter defaults. + // Check both the full id and the bare name (after stripping vendor prefix). const familyPrefix = Object.keys(MODEL_FAMILY_DEFAULTS).find( - prefix => modelId.startsWith(prefix) + prefix => modelId.startsWith(prefix) || bareName.startsWith(prefix) ); const familyDefaults = familyPrefix ? MODEL_FAMILY_DEFAULTS[familyPrefix] : {}; // Extract family from model ID (e.g., "gpt-4o" -> "gpt-4", "claude-4-5-sonnet" -> "claude-4") const family = familyDefaults.family + || bareName.split('-').slice(0, 2).join('-') || modelId.split('-').slice(0, 2).join('-') || modelId; diff --git a/src/agents/plugins/opencode/opencode.plugin.ts b/src/agents/plugins/opencode/opencode.plugin.ts index fad59caf..1297e7a1 100644 --- a/src/agents/plugins/opencode/opencode.plugin.ts +++ b/src/agents/plugins/opencode/opencode.plugin.ts @@ -52,7 +52,13 @@ export const OpenCodePluginMetadata: AgentMetadata = { } } - const provider = env.CODEMIE_PROVIDER; + function normalizeProvider(provider: string | undefined, baseUrl: string | undefined): string | undefined { + if (provider === 'azure-openai') return 'azure-openai'; + if (provider === 'bedrock' && baseUrl && /openai\.azure\.com/i.test(baseUrl)) return 'azure-openai'; + return provider; + } + + const provider = normalizeProvider(env.CODEMIE_PROVIDER, env.CODEMIE_BASE_URL); const baseUrl = env.CODEMIE_BASE_URL; if (!baseUrl) { @@ -64,18 +70,30 @@ export const OpenCodePluginMetadata: AgentMetadata = { return env; } + let profileConfig: any = undefined; + if (env.CODEMIE_PROFILE_CONFIG) { + try { + profileConfig = JSON.parse(env.CODEMIE_PROFILE_CONFIG); + } catch { + logger.warn('[opencode] Failed to parse CODEMIE_PROFILE_CONFIG', { agent: 'opencode' }); + } + } + // Fetch live model catalogue from the CodeMie API. // Falls back to the static OPENCODE_MODEL_CONFIGS on any error. const allModels = await fetchDynamicModelConfigs( baseUrl, env.CODEMIE_URL, env.CODEMIE_JWT_TOKEN, + provider, + profileConfig, ); // Model selection priority: env var > config > default // Use dynamic catalogue first, then fall back to static getModelConfig for unknown IDs. const selectedModel = env.CODEMIE_MODEL || config?.model || 'gpt-5-2-2025-12-11'; - const modelConfig = allModels[selectedModel] ?? getModelConfig(selectedModel); + const normalizedSelectedModel = provider === 'ollama' ? selectedModel.replace(/:latest$/, '') : selectedModel; + const modelConfig = allModels[normalizedSelectedModel] ?? allModels[selectedModel] ?? getModelConfig(normalizedSelectedModel); const { providerOptions } = modelConfig; @@ -85,7 +103,8 @@ export const OpenCodePluginMetadata: AgentMetadata = { // Determine URLs based on provider type const isBedrock = provider === 'bedrock'; - const proxyBaseUrl = provider !== 'ollama' && !isBedrock ? baseUrl : undefined; + const isProxy = provider !== 'ollama' && !isBedrock && provider !== 'azure-openai'; + const proxyBaseUrl = isProxy ? baseUrl : undefined; const ollamaBaseUrl = provider === 'ollama' ? (baseUrl.endsWith('/v1') || baseUrl.includes('/v1/') ? baseUrl : `${baseUrl.replace(/\/$/, '')}/v1`) : 'http://localhost:11434/v1'; @@ -94,8 +113,13 @@ export const OpenCodePluginMetadata: AgentMetadata = { // - ollama: uses ollama provider directly // - bedrock: uses OpenCode's built-in amazon-bedrock provider (AWS env vars set by provider hook) // - all others: route through codemie-proxy (SSO/proxy) - const activeProvider = provider === 'ollama' ? 'ollama' : (isBedrock ? 'amazon-bedrock' : 'codemie-proxy'); + const activeProvider = provider === 'ollama' ? 'ollama' : (isBedrock ? 'amazon-bedrock' : provider === 'azure-openai' ? 'azure-openai' : 'codemie-proxy'); const timeout = providerOptions?.timeout ?? parseInt(env.CODEMIE_TIMEOUT || '600') * 1000; + const modelId = isBedrock + ? toBedrockModelId(modelConfig.id, env.AWS_REGION || env.CODEMIE_AWS_REGION) + : provider === 'ollama' + ? modelConfig.id.replace(/:latest$/, '') + : modelConfig.id; // Always enable openai CUSTOM_LOADER when Responses API models exist. // This fixes model-switching: if user starts with Claude and switches to GPT, @@ -149,7 +173,7 @@ export const OpenCodePluginMetadata: AgentMetadata = { } } }, - model: `${activeProvider}/${isBedrock ? toBedrockModelId(modelConfig.id, env.AWS_REGION || env.CODEMIE_AWS_REGION) : modelConfig.id}` + model: `${activeProvider}/${modelId}` }; // --- Hooks injection --- diff --git a/src/cli/commands/doctor/checks/AIConfigCheck.ts b/src/cli/commands/doctor/checks/AIConfigCheck.ts index c622b86f..da721574 100644 --- a/src/cli/commands/doctor/checks/AIConfigCheck.ts +++ b/src/cli/commands/doctor/checks/AIConfigCheck.ts @@ -84,6 +84,7 @@ export class AIConfigCheck implements HealthCheck { } else { onProgress?.('Checking base URL'); // For other providers, show Base URL + // Azure OpenAI stores its endpoint in baseUrl (set by buildConfig) if (hasBaseUrl) { details.push({ status: 'ok', diff --git a/src/cli/commands/doctor/index.ts b/src/cli/commands/doctor/index.ts index 9cf82b0c..f4ca7560 100644 --- a/src/cli/commands/doctor/index.ts +++ b/src/cli/commands/doctor/index.ts @@ -29,7 +29,8 @@ export function createDoctorCommand(): Command { command .description('Check system health and configuration') .option('-v, --verbose', 'Enable verbose debug output with detailed API logs') - .action(async (options: { verbose?: boolean }) => { + .option('--test-dial', 'Run DIAL/Azure OpenAI integration test on all models (only for DIAL provider)') + .action(async (options: { verbose?: boolean, testDial?: boolean }) => { // Enable debug mode if verbose flag is set if (options.verbose) { process.env.CODEMIE_DEBUG = 'true'; @@ -41,6 +42,20 @@ export function createDoctorCommand(): Command { } } + // DIAL integration test if the corresponding flag is selected + if (options.testDial) { + const { ConfigLoader } = await import('../../../utils/config.js'); + const { runDialIntegrationTest } = await import('../../../utils/dial-model-integrity.js'); + const config = await ConfigLoader.load(); + if (!config.provider || (config.provider !== 'azure-openai' && !config.provider.toLowerCase().includes('dial'))) { + console.log('\n⚠ dial integration test: active profile provider не DIAL/azure-openai. Выберите профиль DIAL через codemie profile switch или setup.\n'); + process.exit(1); + } + console.log(chalk.bold('\n🔍 CodeMie Code Health Check\n')); + console.log('Running integration test on all DIAL models...\n'); + const ok = await runDialIntegrationTest(config); + process.exit(ok ? 0 : 1); + } // Log system information for debugging logger.debug('=== CodeMie Doctor - System Information ==='); logger.debug(`Platform: ${os.platform()}`); diff --git a/src/cli/commands/setup.ts b/src/cli/commands/setup.ts index cbefb713..5e0aa5e2 100644 --- a/src/cli/commands/setup.ts +++ b/src/cli/commands/setup.ts @@ -53,6 +53,7 @@ export function createSetupCommand(): Command { async function runSetupWizard(force?: boolean): Promise { // Show ecosystem introduction + logger.debug('[setup] starting setup wizard'); FirstTimeExperience.showEcosystemIntro(); // Check if config already exists (both global and local) @@ -221,6 +222,7 @@ async function runSetupWizard(force?: boolean): Promise { // Use plugin-based setup flow await handlePluginSetup(provider, setupSteps, profileName, isUpdate, storageLocation); + logger.debug(`[setup] handlePluginSetup completed for ${provider}`); } /** @@ -253,8 +255,9 @@ async function handlePluginSetup( try { models = await setupSteps.fetchModels(credentials); modelsSpinner.succeed(chalk.green(`Found ${models.length} available models`)); - } catch { + } catch (error) { modelsSpinner.warn(chalk.yellow('Could not fetch models - will use manual entry')); + console.error(chalk.red('Failed to fetch models'), error); models = []; } @@ -264,11 +267,15 @@ async function handlePluginSetup( ? await setupSteps.selectModel(credentials, models, providerTemplate) : undefined; + logger.debug(`[setup] selectModel result: ${preselectedModel ?? 'none'}`); + if (preselectedModel) { selectedModel = preselectedModel; logger.success(`Model selected automatically: ${selectedModel}`); } else { + logger.debug('[setup] falling back to manual model selection'); selectedModel = await promptForModelSelection(models, providerTemplate); + logger.debug(`[setup] manual model selected: ${selectedModel}`); } // Step 3.5: Install model if provider supports it (e.g., Ollama) @@ -288,6 +295,7 @@ async function handlePluginSetup( } // Step 4: Build configuration + logger.debug('[setup] building final configuration'); const config = setupSteps.buildConfig(credentials, selectedModel); const userEmail = credentials.additionalConfig?.userEmail as string | undefined; @@ -300,13 +308,29 @@ async function handlePluginSetup( if (modelTiers.sonnetModel) config.sonnetModel = modelTiers.sonnetModel; if (modelTiers.opusModel) config.opusModel = modelTiers.opusModel; + // --- FIX: Handle Profile Updates --- + if (isUpdate && profileName) { + const workingDir = process.cwd(); + const currentProfile = await ConfigLoader.getProfile(profileName, workingDir); + if (currentProfile) { + // Merge new setup config into the existing profile + Object.assign(currentProfile, config); + // Update the config object to be the merged result for the save step + Object.assign(config, currentProfile); + config.name = profileName; + } + } + // --------------------------------- + // Step 5: Ask for profile name (if creating new) let finalProfileName = profileName; if (!isUpdate && profileName === null) { finalProfileName = await promptForProfileName(providerName); } + // Step 6: Save profile + logger.debug('[setup] saving profile'); const saveSpinner = ora('Saving profile...').start(); try { @@ -373,6 +397,7 @@ async function handlePluginSetup( } // Display success + logger.debug('[setup] setup completed successfully'); displaySetupSuccess(finalProfileName!, providerName, selectedModel); // Show next steps based on storage location @@ -397,6 +422,7 @@ async function handlePluginSetup( } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); const providerTemplate = ProviderRegistry.getProvider(providerName); + logger.error(`[setup] plugin setup failed for provider ${providerName}: ${errorMessage}`); displaySetupError(new Error(errorMessage), providerTemplate?.setupInstructions); throw error; } @@ -455,6 +481,7 @@ async function promptForModelSelection( models: string[], providerTemplate?: any ): Promise { + logger.debug(`[setup] promptForModelSelection: models=${models.length}`); if (models.length === 0) { const { manualModel } = await inquirer.prompt([ { @@ -465,6 +492,7 @@ async function promptForModelSelection( validate: (input: string) => input.trim() !== '' || 'Model name is required' } ]); + logger.debug(`[setup] manual model input accepted: ${manualModel}`); return manualModel ? manualModel.trim() : manualModel; } @@ -484,6 +512,8 @@ async function promptForModelSelection( } ]); + logger.debug(`[setup] model list selection accepted: ${selectedModel}`); + if (selectedModel === 'custom') { const { customModel } = await inquirer.prompt([ { @@ -493,6 +523,7 @@ async function promptForModelSelection( validate: (input: string) => input.trim() !== '' || 'Model is required' } ]); + logger.debug(`[setup] custom model input accepted: ${customModel}`); return customModel ? customModel.trim() : customModel; } diff --git a/src/env/types.ts b/src/env/types.ts index 9915375f..482fd72c 100644 --- a/src/env/types.ts +++ b/src/env/types.ts @@ -51,6 +51,7 @@ export interface ProviderProfile { name?: string; // Optional - set during save provider?: string; baseUrl?: string; + azureOpenAIBaseUrl?: string; apiKey?: string; model?: string; @@ -90,6 +91,13 @@ export interface ProviderProfile { // Token configuration (for Claude Code with Bedrock) maxOutputTokens?: number; maxThinkingTokens?: number; + contextWindow?: number; + maxPhysicalContext?: number; + + + // Azure OpenAI-specific fields + azureApiVersion?: string; + azureDeployment?: string; // Metrics configuration metrics?: { diff --git a/src/providers/capabilities/dial-capabilities.ts b/src/providers/capabilities/dial-capabilities.ts new file mode 100644 index 00000000..9cfb4bba --- /dev/null +++ b/src/providers/capabilities/dial-capabilities.ts @@ -0,0 +1,57 @@ +// Capabilities detection for DIAL models: foundation only + +export interface ModelCapabilities { + openaiCompatible: boolean; + tools: boolean; + reasoning: boolean; + thinking: boolean; + vision: boolean; +} + +export function detectCapabilitiesFromModelName(modelId: string): ModelCapabilities { + // Simple heuristic for demo/foundation + if (/claude/i.test(modelId)) { + return { + openaiCompatible: true, + tools: true, + reasoning: false, + thinking: false, + vision: false, + }; + } + if (/gemini/i.test(modelId)) { + return { + openaiCompatible: true, + tools: true, + reasoning: false, + thinking: false, + vision: true, + }; + } + if (/grok/i.test(modelId)) { + return { + openaiCompatible: true, + tools: true, + reasoning: false, + thinking: false, + vision: false, + }; + } + if (/gpt|openai/i.test(modelId)) { + return { + openaiCompatible: true, + tools: true, + reasoning: true, + thinking: false, + vision: false + }; + } + // Fallback: openai compatible only + return { + openaiCompatible: true, + tools: false, + reasoning: false, + thinking: false, + vision: false, + }; +} diff --git a/src/providers/core/registry.ts b/src/providers/core/registry.ts index 33f4f3b5..ce59e33d 100644 --- a/src/providers/core/registry.ts +++ b/src/providers/core/registry.ts @@ -51,6 +51,15 @@ export class ProviderRegistry { this.setupSteps.set(name, steps); } + /** + * Register provider template together with its setup steps + */ + static registerProviderSetup(template: T, steps: ProviderSetupSteps): T { + this.registerProvider(template); + this.registerSetupSteps(template.name, steps); + return template; + } + /** * Get provider by name */ diff --git a/src/providers/index.ts b/src/providers/index.ts index 83cd62c5..ddb31363 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -37,6 +37,7 @@ import './plugins/jwt/index.js'; import './plugins/litellm/index.js'; import './plugins/bedrock/index.js'; import './plugins/anthropic-subscription/index.js'; +import './plugins/azure-openai/index.js'; // Re-export plugin modules for direct access if needed export * as Ollama from './plugins/ollama/index.js'; @@ -45,3 +46,4 @@ export * as JWT from './plugins/jwt/index.js'; export * as LiteLLM from './plugins/litellm/index.js'; export * as Bedrock from './plugins/bedrock/index.js'; export * as AnthropicSubscription from './plugins/anthropic-subscription/index.js'; +export * as AzureOpenAI from './plugins/azure-openai/index.js'; diff --git a/src/providers/integration/setup-ui.ts b/src/providers/integration/setup-ui.ts index 8da5fd63..540535b6 100644 --- a/src/providers/integration/setup-ui.ts +++ b/src/providers/integration/setup-ui.ts @@ -6,7 +6,7 @@ */ import chalk from 'chalk'; -import type { ProviderTemplate } from '../core/types.js'; +import type { ProviderTemplate, ModelInfo } from '../core/types.js'; /** * Format provider choice for inquirer @@ -18,6 +18,10 @@ import type { ProviderTemplate } from '../core/types.js'; * - Capability hints (dimmed) */ export function formatProviderChoice(template: ProviderTemplate): string { + // For Azure OpenAI EPAM DIAL — brief remark + if (template.name === 'azure-openai' && template.displayName.toLowerCase().includes('dial')) { + return `${template.displayName} - ${template.description} — features limited`; + } return `${template.displayName} - ${template.description}`; } @@ -111,6 +115,15 @@ export function formatModelChoice( ): { name: string; value: string } { const metadata = template?.modelMetadata?.[modelId]; + // Note for models via DIAL (Azure OpenAI) — reasoning/thinking disabled + let featureNote = ''; + if ( + template?.name === 'azure-openai' && template?.displayName?.toLowerCase().includes('dial') && + (!modelId.toLowerCase().includes('gpt') && !modelId.toLowerCase().includes('openai')) + ) { + featureNote = ' [no reasoning]'; + } + // Check if model is recommended (with partial matching support) const isRecommended = metadata?.popular || @@ -119,11 +132,11 @@ export function formatModelChoice( // If no metadata and not recommended, return plain format if (!metadata && !isRecommended) { - return { name: modelId, value: modelId }; + return { name: modelId + featureNote, value: modelId }; } const popularBadge = isRecommended ? chalk.yellow('⭐ ') : ''; - const mainLine = `${popularBadge}${chalk.white.bold(metadata?.name || modelId)}`; + const mainLine = `${popularBadge}${chalk.white.bold(metadata?.name || modelId)}${featureNote}`; const details: string[] = []; if (metadata?.description) { @@ -177,12 +190,18 @@ function isRecommendedModel(modelId: string, recommendedPattern: string): boolea * 2. Alphabetically by model ID */ export function getAllModelChoices( - models: string[], + models: string[] | ModelInfo[], template?: ProviderTemplate ): Array<{ name: string; value: string }> { - // Sort models using common rules - const sortedModels = [...models].sort((a, b) => { - // Check if models are recommended (with partial matching) + const normalizedModels = models.map(model => typeof model === 'string' ? model : model.id); + const infoMap = new Map(); + for (const model of models) { + if (typeof model !== 'string') { + infoMap.set(model.id, model); + } + } + + const sortedModels = [...normalizedModels].sort((a, b) => { const aRecommended = template?.recommendedModels?.some(pattern => isRecommendedModel(a, pattern) ) || false; @@ -190,15 +209,22 @@ export function getAllModelChoices( isRecommendedModel(b, pattern) ) || false; - // Recommended models first if (aRecommended && !bRecommended) return -1; if (!aRecommended && bRecommended) return 1; - // Then sort alphabetically return a.localeCompare(b); }); - return sortedModels.map(model => formatModelChoice(model, template)); + return sortedModels.map(model => { + const metadata = infoMap.get(model); + if (metadata?.description) { + return { + name: `${model}\n ${chalk.dim(metadata.description)}`, + value: model + }; + } + return formatModelChoice(model, template); + }); } /** diff --git a/src/providers/plugins/azure-openai/__tests__/azure-openai.template.test.ts b/src/providers/plugins/azure-openai/__tests__/azure-openai.template.test.ts new file mode 100644 index 00000000..59f8180f --- /dev/null +++ b/src/providers/plugins/azure-openai/__tests__/azure-openai.template.test.ts @@ -0,0 +1,198 @@ +/** + * Azure OpenAI Template — beforeRun hook tests + * + * Verifies the claude-specific beforeRun hook correctly configures Claude Code + * for Azure / EPAM DIAL usage, including DIAL compatibility settings that + * prevent HTTP 400 errors caused by Anthropic-native request fields. + * + * @group unit + */ + +import { describe, it, expect } from 'vitest'; +import { AzureOpenAITemplate } from '../azure-openai.template.js'; + +/** Build a minimal env simulating what BaseAgentAdapter.transformEnvVars produces */ +function makeEnv(overrides: Record = {}): NodeJS.ProcessEnv { + return { + CODEMIE_PROVIDER: 'azure-openai', + CODEMIE_BASE_URL: 'https://my-epam-dial.example.com', + CODEMIE_AZURE_OPENAI_BASE_URL: 'https://my-epam-dial.example.com', + CODEMIE_API_KEY: 'test-api-key-1234567890', + CODEMIE_MODEL: 'anthropic.claude-sonnet-4-6', + AZURE_OPENAI_API_KEY: 'test-api-key-1234567890', // set by wildcard hook first + ANTHROPIC_BASE_URL: 'https://my-epam-dial.example.com', + ANTHROPIC_AUTH_TOKEN: 'test-api-key-1234567890', + ...overrides, + }; +} + +/** + * Invoke the claude-specific beforeRun hook directly. + * The wildcard hook runs first in production but here we test the claude hook in isolation. + */ +async function runClaudeBeforeRunHook(env: NodeJS.ProcessEnv): Promise { + const claudeHook = AzureOpenAITemplate.agentHooks?.['claude']?.beforeRun; + if (!claudeHook) { + throw new Error('claude beforeRun hook not found in AzureOpenAITemplate'); + } + return claudeHook(env, { agent: 'claude', agentDisplayName: 'Claude Code' } as any); +} + +// ────────────────────────────────────────────────────────────────────────────── +describe('AzureOpenAITemplate — claude.beforeRun hook', () => { + + describe('Azure mode activation', () => { + it('sets CLAUDE_CODE_USE_AZURE_OPENAI=1', async () => { + const env = makeEnv(); + const result = await runClaudeBeforeRunHook(env); + expect(result.CLAUDE_CODE_USE_AZURE_OPENAI).toBe('1'); + }); + + it('sets ANTHROPIC_BASE_URL to the Azure/DIAL endpoint', async () => { + const env = makeEnv(); + const result = await runClaudeBeforeRunHook(env); + expect(result.ANTHROPIC_BASE_URL).toBe('https://my-epam-dial.example.com'); + }); + + it('prefers CODEMIE_AZURE_OPENAI_BASE_URL over CODEMIE_BASE_URL for ANTHROPIC_BASE_URL', async () => { + const env = makeEnv({ + CODEMIE_BASE_URL: 'http://localhost:3001', // proxy URL + CODEMIE_AZURE_OPENAI_BASE_URL: 'https://real-dial-endpoint.example.com', + }); + const result = await runClaudeBeforeRunHook(env); + expect(result.ANTHROPIC_BASE_URL).toBe('https://real-dial-endpoint.example.com'); + }); + + it('removes ANTHROPIC_AUTH_TOKEN to prevent Anthropic API auth attempt', async () => { + const env = makeEnv({ ANTHROPIC_AUTH_TOKEN: 'should-be-removed' }); + const result = await runClaudeBeforeRunHook(env); + expect(result.ANTHROPIC_AUTH_TOKEN).toBeUndefined(); + }); + + it('sets ANTHROPIC_MODEL to the deployment name from CODEMIE_MODEL', async () => { + const env = makeEnv({ CODEMIE_MODEL: 'anthropic.claude-sonnet-4-6' }); + const result = await runClaudeBeforeRunHook(env); + expect(result.ANTHROPIC_MODEL).toBe('anthropic.claude-sonnet-4-6'); + }); + }); + + // ────────────────────────────────────────────────────────────────────────── + describe('DIAL/Azure compatibility — cache_control prevention', () => { + it('disables prompt caching (ENABLE_PROMPT_CACHING_1H=0) to prevent cache_control in requests', async () => { + const env = makeEnv(); + const result = await runClaudeBeforeRunHook(env); + expect(result.ENABLE_PROMPT_CACHING_1H).toBe('0'); + }); + + it('overrides ENABLE_PROMPT_CACHING_1H even if previously set to 1', async () => { + const env = makeEnv({ ENABLE_PROMPT_CACHING_1H: '1' }); + const result = await runClaudeBeforeRunHook(env); + expect(result.ENABLE_PROMPT_CACHING_1H).toBe('0'); + }); + + it('disables experimental betas (CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1) to prevent beta headers', async () => { + const env = makeEnv(); + const result = await runClaudeBeforeRunHook(env); + expect(result.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS).toBe('1'); + }); + + it('overrides CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS even if previously unset', async () => { + const env = makeEnv({ CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS: undefined }); + const result = await runClaudeBeforeRunHook(env); + expect(result.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS).toBe('1'); + }); + + it('disables thinking-related features that DIAL rejects', async () => { + const env = makeEnv({ + CLAUDE_CODE_DISABLE_THINKING: undefined, + MAX_THINKING_TOKENS: undefined, + DISABLE_INTERLEAVED_THINKING: undefined, + CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING: undefined, + }); + const result = await runClaudeBeforeRunHook(env); + expect(result.CLAUDE_CODE_DISABLE_THINKING).toBe('1'); + expect(result.MAX_THINKING_TOKENS).toBe('0'); + expect(result.DISABLE_INTERLEAVED_THINKING).toBe('1'); + expect(result.CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING).toBe('1'); + }); + }); + + // ────────────────────────────────────────────────────────────────────────── + describe('Claude Code essential defaults (re-applied because agent default hook is skipped)', () => { + it('sets CLAUDE_CODE_ENABLE_TELEMETRY=0 when not already configured', async () => { + const env = makeEnv({ CLAUDE_CODE_ENABLE_TELEMETRY: undefined }); + const result = await runClaudeBeforeRunHook(env); + expect(result.CLAUDE_CODE_ENABLE_TELEMETRY).toBe('0'); + }); + + it('does not override CLAUDE_CODE_ENABLE_TELEMETRY if user already set it', async () => { + const env = makeEnv({ CLAUDE_CODE_ENABLE_TELEMETRY: '1' }); + const result = await runClaudeBeforeRunHook(env); + expect(result.CLAUDE_CODE_ENABLE_TELEMETRY).toBe('1'); + }); + + it('sets DISABLE_AUTOUPDATER=1 when not already configured', async () => { + const env = makeEnv({ DISABLE_AUTOUPDATER: undefined }); + const result = await runClaudeBeforeRunHook(env); + expect(result.DISABLE_AUTOUPDATER).toBe('1'); + }); + + it('does not override DISABLE_AUTOUPDATER if user already set it', async () => { + const env = makeEnv({ DISABLE_AUTOUPDATER: '0' }); + const result = await runClaudeBeforeRunHook(env); + expect(result.DISABLE_AUTOUPDATER).toBe('0'); + }); + + it('sets ENABLE_TOOL_SEARCH=0 when not already configured', async () => { + const env = makeEnv({ ENABLE_TOOL_SEARCH: undefined }); + const result = await runClaudeBeforeRunHook(env); + expect(result.ENABLE_TOOL_SEARCH).toBe('0'); + }); + + it('sets CLAUDE_AUTOCOMPACT_PCT_OVERRIDE=80 as default', async () => { + const env = makeEnv({ CLAUDE_AUTOCOMPACT_PCT_OVERRIDE: undefined }); + const result = await runClaudeBeforeRunHook(env); + expect(result.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE).toBe('80'); + }); + + it('reads claudeAutocompactPct from CODEMIE_PROFILE_CONFIG when set', async () => { + const env = makeEnv({ + CLAUDE_AUTOCOMPACT_PCT_OVERRIDE: undefined, + CODEMIE_PROFILE_CONFIG: JSON.stringify({ claudeAutocompactPct: 60 }), + }); + const result = await runClaudeBeforeRunHook(env); + expect(result.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE).toBe('60'); + }); + + it('uses default autocompact 80 when CODEMIE_PROFILE_CONFIG is malformed JSON', async () => { + const env = makeEnv({ + CLAUDE_AUTOCOMPACT_PCT_OVERRIDE: undefined, + CODEMIE_PROFILE_CONFIG: 'not-valid-json{{{', + }); + const result = await runClaudeBeforeRunHook(env); + expect(result.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE).toBe('80'); + }); + + it('does not override CLAUDE_AUTOCOMPACT_PCT_OVERRIDE if already set', async () => { + const env = makeEnv({ CLAUDE_AUTOCOMPACT_PCT_OVERRIDE: '50' }); + const result = await runClaudeBeforeRunHook(env); + expect(result.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE).toBe('50'); + }); + + it('pins Anthropic family aliases to the active Azure deployment', async () => { + const env = makeEnv({ + ANTHROPIC_DEFAULT_HAIKU_MODEL: undefined, + ANTHROPIC_DEFAULT_SONNET_MODEL: undefined, + ANTHROPIC_DEFAULT_OPUS_MODEL: undefined, + ANTHROPIC_DEFAULT_FABLE_MODEL: undefined, + CLAUDE_CODE_SUBAGENT_MODEL: undefined, + }); + const result = await runClaudeBeforeRunHook(env); + expect(result.ANTHROPIC_DEFAULT_HAIKU_MODEL).toBe('anthropic.claude-sonnet-4-6'); + expect(result.ANTHROPIC_DEFAULT_SONNET_MODEL).toBe('anthropic.claude-sonnet-4-6'); + expect(result.ANTHROPIC_DEFAULT_OPUS_MODEL).toBe('anthropic.claude-sonnet-4-6'); + expect(result.ANTHROPIC_DEFAULT_FABLE_MODEL).toBe('anthropic.claude-sonnet-4-6'); + expect(result.CLAUDE_CODE_SUBAGENT_MODEL).toBe('inherit'); + }); + }); +}); diff --git a/src/providers/plugins/azure-openai/azure-openai.health.ts b/src/providers/plugins/azure-openai/azure-openai.health.ts new file mode 100644 index 00000000..f52b3777 --- /dev/null +++ b/src/providers/plugins/azure-openai/azure-openai.health.ts @@ -0,0 +1,98 @@ +/** + * Azure OpenAI Health Check Implementation + * + * Validates Azure OpenAI endpoint availability and deployment discovery. + */ + +import type { CodeMieConfigOptions } from '../../../env/types.js'; +import type { HealthCheckResult, ModelInfo } from '../../core/types.js'; +import { BaseHealthCheck } from '../../core/base/BaseHealthCheck.js'; +import { ProviderRegistry } from '../../core/registry.js'; +import { AzureOpenAITemplate } from './azure-openai.template.js'; +import { AzureOpenAIModelProxy } from './azure-openai.models.js'; + +export class AzureOpenAIHealthCheck extends BaseHealthCheck { + private modelProxy: AzureOpenAIModelProxy; + private azureApiVersion = '2024-06-01'; + private azureDeployment?: string; + private azureApiKey?: string; + // Tracks the effective endpoint after check() is called (not the constructor default). + private activeBaseUrl: string; + + constructor(baseUrl: string = AzureOpenAITemplate.defaultBaseUrl) { + super({ + provider: 'azure-openai', + baseUrl, + timeout: 10000 + }); + this.activeBaseUrl = baseUrl; + this.modelProxy = new AzureOpenAIModelProxy(baseUrl); + } + + supports(provider: string): boolean { + return provider === 'azure-openai'; + } + + async check(config: CodeMieConfigOptions): Promise { + this.azureApiVersion = config.azureApiVersion || '2024-06-01'; + this.azureDeployment = config.azureDeployment || config.model; + this.azureApiKey = config.apiKey; + // Always use the runtime endpoint from config, NOT this.config.baseUrl which + // is frozen to the constructor default (the auto-registered singleton is created + // without an endpoint, so this.config.baseUrl would be the placeholder URL). + this.activeBaseUrl = config.baseUrl || AzureOpenAITemplate.defaultBaseUrl; + this.modelProxy = new AzureOpenAIModelProxy( + this.activeBaseUrl, + config.apiKey, + this.azureApiVersion + ); + return super.check(config); + } + + protected async ping(): Promise { + const models = await this.listModels(); + if (models.length === 0) { + throw new Error('No Azure OpenAI deployments found. Verify that at least one deployment exists and that the API version matches the resource.'); + } + } + + protected async getVersion(): Promise { + return `api-version: ${this.azureApiVersion}`; + } + + async listModels(): Promise { + // Use this.activeBaseUrl (set in check()) rather than this.config.baseUrl + // which is frozen to the placeholder URL from the auto-registered singleton. + return this.modelProxy.fetchModels({ + provider: 'azure-openai', + baseUrl: this.activeBaseUrl, + apiKey: this.azureApiKey, + model: this.azureDeployment || 'temp', + timeout: 300, + azureApiVersion: this.azureApiVersion + } as CodeMieConfigOptions); + } + + protected getUnreachableResult(): HealthCheckResult { + const endpoint = this.activeBaseUrl || AzureOpenAITemplate.defaultBaseUrl; + const apiVersion = this.azureApiVersion; + return { + provider: 'azure-openai', + status: 'unreachable', + message: 'Cannot connect to Azure OpenAI', + remediation: `Check Azure OpenAI configuration:\n 1. Verify the resource endpoint is correct: ${endpoint}\n 2. Verify the API key is valid\n 3. Verify the deployment exists and is accessible\n 4. Ensure the API version is supported: ${apiVersion}\n 5. Ensure the deployment name matches the Azure OpenAI Studio deployment\n\nSetup Azure OpenAI:\n - Create a resource in Azure Portal\n - Deploy a model in Azure OpenAI Studio\n - Configure endpoint, key, API version, and deployment name in CodeMie` + }; + } + + protected getHealthyMessage(models: ModelInfo[]): string { + return models.length > 0 + ? `Azure OpenAI is accessible with ${models.length} deployment(s) available${this.azureDeployment ? ` (active: ${this.azureDeployment})` : ''}` + : 'Azure OpenAI is accessible'; + } + + protected getNoModelsRemediation(): string { + return 'Create a deployment in Azure OpenAI Studio and try again.'; + } +} + +ProviderRegistry.registerHealthCheck('azure-openai', new AzureOpenAIHealthCheck()); diff --git a/src/providers/plugins/azure-openai/azure-openai.models.ts b/src/providers/plugins/azure-openai/azure-openai.models.ts new file mode 100644 index 00000000..95046d59 --- /dev/null +++ b/src/providers/plugins/azure-openai/azure-openai.models.ts @@ -0,0 +1,92 @@ +/** + * Azure OpenAI Model Proxy + * + * Fetches available deployments from Azure OpenAI via the OpenAI-compatible endpoint. + */ + +import type { CodeMieConfigOptions } from '../../../env/types.js'; +import type { ModelInfo, ProviderModelFetcher } from '../../core/types.js'; +import { ProviderRegistry } from '../../core/registry.js'; + +export interface AzureOpenAIDeploymentInfo { + id: string; + name: string; + description?: string; + model?: string; +} + +export class AzureOpenAIModelProxy implements ProviderModelFetcher { + constructor( + private baseUrl: string, + private apiKey?: string, + private apiVersion: string = '2024-06-01' + ) {} + + supports(provider: string): boolean { + return provider === 'azure-openai'; + } + + private buildDeploymentsUrl(endpoint: string, apiVersion: string): string { + return new URL(`/openai/deployments?api-version=${encodeURIComponent(apiVersion)}`, endpoint).toString(); + } + + async fetchDeploymentInfos(config: CodeMieConfigOptions): Promise { + // baseUrl is always set by buildConfig; no legacy azureOpenAIBaseUrl fallback needed. + const endpoint = config.baseUrl || this.baseUrl; + const apiKey = config.apiKey || this.apiKey; + const apiVersion = config.azureApiVersion || this.apiVersion; + + if (!endpoint) { + return []; + } + + const response = await fetch(this.buildDeploymentsUrl(endpoint, apiVersion), { + headers: { + 'Content-Type': 'application/json', + ...(apiKey ? { 'api-key': apiKey } : {}) + } + }); + + if (!response.ok) { + throw new Error(`Failed to fetch Azure OpenAI deployments: ${response.status} ${response.statusText}`); + } + + const data = await response.json() as { data?: Array> }; + const deployments = data.data ?? []; + + return deployments + .map((deployment): AzureOpenAIDeploymentInfo | null => { + const id = String(deployment.id || deployment.name || deployment.model || '').trim(); + if (!id) { + return null; + } + + const name = String(deployment.name || deployment.id || id).trim(); + const description = typeof deployment.model === 'string' ? `Model: ${deployment.model}` : undefined; + + return { + id, + name, + description, + model: typeof deployment.model === 'string' ? deployment.model : undefined + }; + }) + .filter((deployment): deployment is AzureOpenAIDeploymentInfo => deployment !== null) + .sort((a, b) => a.name.localeCompare(b.name)); + } + + async fetchModels(config: CodeMieConfigOptions): Promise { + const deployments = await this.fetchDeploymentInfos(config); + return deployments.map((deployment) => ({ + id: deployment.id, + name: deployment.name, + description: deployment.description, + metadata: { + deploymentName: deployment.name, + model: deployment.model + } + })); + } +} + +ProviderRegistry.registerModelProxy('azure-openai', new AzureOpenAIModelProxy('')); diff --git a/src/providers/plugins/azure-openai/azure-openai.setup-steps.ts b/src/providers/plugins/azure-openai/azure-openai.setup-steps.ts new file mode 100644 index 00000000..13f0ed15 --- /dev/null +++ b/src/providers/plugins/azure-openai/azure-openai.setup-steps.ts @@ -0,0 +1,147 @@ +/** + * Azure OpenAI Setup Steps + * + * Interactive setup flow for Azure OpenAI provider. + */ + +import inquirer from 'inquirer'; +import type { CodeMieConfigOptions } from '../../../env/types.js'; +import type { ProviderCredentials, ProviderSetupSteps, ValidationResult } from '../../core/types.js'; +import { AzureOpenAITemplate } from './azure-openai.template.js'; +import { AzureOpenAIModelProxy } from './azure-openai.models.js'; + +const FALLBACK_AZURE_MODEL = AzureOpenAITemplate.recommendedModels[0] || 'gpt-4o'; + +export const AzureOpenAISetupSteps: ProviderSetupSteps = { + name: 'azure-openai', + + async getCredentials(_isUpdate = false): Promise { + const answers = await inquirer.prompt([ + { + type: 'input', + name: 'baseUrl', + message: 'Azure OpenAI endpoint:', + default: AzureOpenAITemplate.defaultBaseUrl, + validate: (input: string) => input.trim() !== '' || 'Endpoint is required' + }, + { + type: 'password', + name: 'apiKey', + message: 'Azure OpenAI API Key:', + mask: '*', + validate: (input: string) => input.trim() !== '' || 'API key is required' + }, + { + type: 'input', + name: 'azureApiVersion', + message: 'Azure OpenAI API version:', + default: '2024-06-01', + validate: (input: string) => input.trim() !== '' || 'API version is required' + } + ]); + + return { + baseUrl: answers.baseUrl.trim(), + apiKey: answers.apiKey.trim(), + additionalConfig: { + azureApiVersion: answers.azureApiVersion.trim() + } + }; + }, + + async fetchModels(credentials: ProviderCredentials): Promise { + const modelProxy = new AzureOpenAIModelProxy( + credentials.baseUrl || AzureOpenAITemplate.defaultBaseUrl, + credentials.apiKey, + credentials.additionalConfig?.azureApiVersion as string | undefined + ); + + try { + const deployments = await modelProxy.fetchDeploymentInfos({ + provider: 'azure-openai', + baseUrl: credentials.baseUrl || AzureOpenAITemplate.defaultBaseUrl, + apiKey: credentials.apiKey, + model: 'temp', + timeout: 300, + azureApiVersion: credentials.additionalConfig?.azureApiVersion as string | undefined + } as CodeMieConfigOptions); + + return deployments.map(deployment => deployment.id); + } catch { + return AzureOpenAITemplate.recommendedModels.length > 0 + ? AzureOpenAITemplate.recommendedModels + : [FALLBACK_AZURE_MODEL]; + } + }, + + async selectModel(credentials: ProviderCredentials, _models: string[], _template?: typeof AzureOpenAITemplate): Promise { + const modelProxy = new AzureOpenAIModelProxy( + credentials.baseUrl || AzureOpenAITemplate.defaultBaseUrl, + credentials.apiKey, + credentials.additionalConfig?.azureApiVersion as string | undefined + ); + + try { + const deployments = await modelProxy.fetchDeploymentInfos({ + provider: 'azure-openai', + baseUrl: credentials.baseUrl || AzureOpenAITemplate.defaultBaseUrl, + apiKey: credentials.apiKey, + model: 'temp', + timeout: 300, + azureApiVersion: credentials.additionalConfig?.azureApiVersion as string | undefined + } as CodeMieConfigOptions); + + if (deployments.length === 0) { + return null; + } + + const deploymentChoices = deployments.map((deployment) => ({ + name: deployment.description + ? `${deployment.name} — ${deployment.description}` + : deployment.name, + value: deployment.id + })); + + const { selectedDeployment } = await inquirer.prompt([ + { + type: 'list', + name: 'selectedDeployment', + message: 'Select Azure OpenAI deployment:', + choices: deploymentChoices, + pageSize: 15 + } + ]); + + return selectedDeployment; + } catch { + return null; + } + }, + + buildConfig(credentials: ProviderCredentials, selectedModel: string): Partial { + return { + provider: 'azure-openai', + baseUrl: credentials.baseUrl, + apiKey: credentials.apiKey, + model: selectedModel, + azureDeployment: selectedModel, + azureApiVersion: credentials.additionalConfig?.azureApiVersion as string | undefined + }; + }, + + async validate(config: Partial): Promise { + if (!config.baseUrl) { + return { valid: false, errors: ['Azure OpenAI endpoint is required'] }; + } + + if (!config.apiKey) { + return { valid: false, errors: ['Azure OpenAI API key is required'] }; + } + + if (!config.azureApiVersion) { + return { valid: false, errors: ['Azure OpenAI API version is required'] }; + } + + return { valid: true }; + } +}; diff --git a/src/providers/plugins/azure-openai/azure-openai.template.ts b/src/providers/plugins/azure-openai/azure-openai.template.ts new file mode 100644 index 00000000..142329e2 --- /dev/null +++ b/src/providers/plugins/azure-openai/azure-openai.template.ts @@ -0,0 +1,264 @@ +/** + * Azure OpenAI Provider Template + * + * Template definition for Azure OpenAI. + * Auto-registers on import via registerProvider(). + * + * Key architecture notes for EPAM DIAL and standard Azure OpenAI: + * + * 1. AUTH HEADER: Azure OpenAI (including DIAL) uses `api-key: {key}` header, + * NOT `Authorization: Bearer {key}`. @ai-sdk/openai-compatible always sends + * the Bearer header when apiKey is set, so we pass apiKey='' and inject the + * correct header explicitly via `headers: { 'api-key': key }`. + * + * 2. URL ROUTING: Azure routes to a specific deployment via: + * /openai/deployments/{deployment}/chat/completions?api-version={ver} + * @ai-sdk/openai-compatible appends /chat/completions to baseURL, so + * baseURL must be: {endpoint}/openai/deployments/{deployment}/ + * + * 3. Environment variable flow for Claude Code agent: + * Config → exportEnvVars → CODEMIE_AZURE_OPENAI_BASE_URL / CODEMIE_API_KEY / CODEMIE_MODEL + * BaseAgentAdapter.transformEnvVars → ANTHROPIC_BASE_URL, ANTHROPIC_AUTH_TOKEN (from envMapping) + * agentHooks['*'].beforeRun → AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_API_VERSION, AZURE_OPENAI_DEPLOYMENT + * agentHooks['claude'].beforeRun → CLAUDE_CODE_USE_AZURE_OPENAI=1, delete ANTHROPIC_AUTH_TOKEN, + * AZURE_OPENAI_API_KEY=, ANTHROPIC_MODEL= + * + * 4. CACHE_CONTROL STRIPPING (DIAL/Azure): When CLAUDE_CODE_USE_AZURE_OPENAI=1, + * Claude Code bypasses the SSO proxy entirely and sends requests directly to + * the Azure/DIAL endpoint. This means proxy-level sanitizers (e.g., + * ClaudeRequestNormalizerPlugin) are NOT applied. + * + * DIAL and Azure OpenAI use the OpenAI Chat Completions spec and do NOT support + * Anthropic-native fields such as `cache_control` on messages or content items, + * `thinking`, or `betas` request headers. Claude Code in recent versions adds + * these fields when prompt caching and experimental betas are enabled. + * + * To prevent HTTP 400 errors ("Extra inputs are not permitted on path + * messages.0.cache_control"), the claude hook MUST: + * - Set ENABLE_PROMPT_CACHING_1H=0 → prevents cache_control in messages + * - Set CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 → prevents beta HTTP headers + * - Set CLAUDE_CODE_DISABLE_THINKING=1 / MAX_THINKING_TOKENS=0 + * → prevents `thinking` and follow-up `reasoning_content` blocks that + * DIAL rejects on later turns for Claude deployments + * - Set DISABLE_INTERLEAVED_THINKING=1 + * → prevents interleaved-thinking beta behavior on gateway/provider + * combinations that do not preserve Anthropic semantics fully + * that some DIAL versions reject (e.g. anthropic-beta: prompt-caching-2024-07-31) + * + * NOTE: The agent default lifecycle.beforeRun (claude.plugin.ts) is NOT executed + * when a provider supplies BOTH a wildcard (*) AND agent-specific (claude) hook, + * because lifecycle-helpers.ts chains those two provider hooks and skips the + * agent default. All essential Claude Code defaults must therefore be re-applied + * here explicitly. + */ + +import type { ProviderTemplate } from '../../core/types.js'; +import { registerProvider } from '../../core/decorators.js'; + +const DEFAULT_AZURE_API_VERSION = '2024-06-01'; + +export const AzureOpenAITemplate = registerProvider({ + name: 'azure-openai', + displayName: 'Azure OpenAI', + description: 'Microsoft Azure OpenAI Service — supports GPT-4, o-series and any deployed model', + defaultBaseUrl: 'https://YOUR-RESOURCE-NAME.openai.azure.com', + requiresAuth: true, + authType: 'api-key', + priority: 13, + defaultProfileName: 'azure-openai', + // Models are fetched dynamically from the Azure deployments API during setup. + // These are shown only as a fallback when the API call fails. + recommendedModels: ['gpt-4.1', 'gpt-4o', 'o3-mini'], + capabilities: ['streaming', 'tools', 'function-calling', 'vision', 'json-mode'], + supportsModelInstallation: false, + supportsStreaming: true, + + // Export Azure-specific fields as CODEMIE_AZURE_OPENAI_* env vars. + // The standard CODEMIE_BASE_URL / CODEMIE_API_KEY / CODEMIE_MODEL are set by + // ConfigLoader.exportProviderEnvVars automatically from config.baseUrl / apiKey / model. + exportEnvVars: (config) => { + const env: Record = {}; + + // Mirror baseUrl into a dedicated Azure var so agent hooks can distinguish it + // from the generic proxy URL that SSO providers put in CODEMIE_BASE_URL. + if (config.baseUrl) env.CODEMIE_AZURE_OPENAI_BASE_URL = config.baseUrl; + if (config.azureApiVersion) env.CODEMIE_AZURE_OPENAI_API_VERSION = config.azureApiVersion; + // Deployment name (= model by default, may differ if user set azureDeployment explicitly) + if (config.azureDeployment) env.CODEMIE_AZURE_OPENAI_DEPLOYMENT = config.azureDeployment; + + return env; + }, + + agentHooks: { + // Wildcard hook: runs for ALL agents before the agent-specific hook. + // Sets the standard Azure SDK env vars used by OpenAI-compatible clients. + '*': { + beforeRun: async (env) => { + // Azure endpoint (prefer the dedicated var; fall back to generic base URL) + const endpoint = env.CODEMIE_AZURE_OPENAI_BASE_URL || env.CODEMIE_BASE_URL; + if (endpoint) { + env.AZURE_OPENAI_ENDPOINT = endpoint; + } + + // API version + env.AZURE_OPENAI_API_VERSION = + env.CODEMIE_AZURE_OPENAI_API_VERSION || DEFAULT_AZURE_API_VERSION; + + // Deployment name (falls back to model id — valid for most Azure setups + // where the deployment name matches the base model name) + env.AZURE_OPENAI_DEPLOYMENT = + env.CODEMIE_AZURE_OPENAI_DEPLOYMENT || env.CODEMIE_MODEL || ''; + + // Azure API key for generic SDK usage + if (env.CODEMIE_API_KEY) { + env.AZURE_OPENAI_API_KEY = env.CODEMIE_API_KEY; + } + + return env; + } + }, + + // Claude-specific hook: runs after the wildcard hook. + // Switches Claude Code into Azure OpenAI mode. + // See: https://docs.anthropic.com/en/docs/claude-code/azure-and-vertex + 'claude': { + beforeRun: async (env) => { + // Signal Claude Code to use Azure OpenAI instead of the Anthropic API. + env.CLAUDE_CODE_USE_AZURE_OPENAI = '1'; + + // Claude Code in Azure mode reads ANTHROPIC_BASE_URL as the Azure endpoint. + // (BaseAgentAdapter.transformEnvVars already mapped CODEMIE_BASE_URL → ANTHROPIC_BASE_URL; + // here we ensure it points to the Azure endpoint, not a potential proxy URL.) + const endpoint = env.CODEMIE_AZURE_OPENAI_BASE_URL || env.CODEMIE_BASE_URL; + if (endpoint) { + env.ANTHROPIC_BASE_URL = endpoint; + } + + // CRITICAL: Claude Code in Azure mode authenticates via AZURE_OPENAI_API_KEY. + // ANTHROPIC_AUTH_TOKEN was set by transformEnvVars (envMapping.apiKey) with the + // Azure key, which would cause Claude Code to attempt Anthropic API auth → 401. + delete env.ANTHROPIC_AUTH_TOKEN; + + // AZURE_OPENAI_API_KEY already set by the wildcard hook above; no duplication needed. + + // Model / deployment: Claude Code respects ANTHROPIC_MODEL for the active model. + // In Azure/DIAL, model == deployment name. + if (env.CODEMIE_MODEL) { + env.ANTHROPIC_MODEL = env.CODEMIE_MODEL; + + // Keep internal/background model selection on the same deployment. + // This avoids hidden switches to Anthropic defaults that do not exist + // on a DIAL gateway or under a custom deployment naming scheme. + if (!env.ANTHROPIC_DEFAULT_HAIKU_MODEL) { + env.ANTHROPIC_DEFAULT_HAIKU_MODEL = env.CODEMIE_MODEL; + } + if (!env.ANTHROPIC_DEFAULT_SONNET_MODEL) { + env.ANTHROPIC_DEFAULT_SONNET_MODEL = env.CODEMIE_MODEL; + } + if (!env.ANTHROPIC_DEFAULT_OPUS_MODEL) { + env.ANTHROPIC_DEFAULT_OPUS_MODEL = env.CODEMIE_MODEL; + } + if (!env.ANTHROPIC_DEFAULT_FABLE_MODEL) { + env.ANTHROPIC_DEFAULT_FABLE_MODEL = env.CODEMIE_MODEL; + } + } + + if (!env.CLAUDE_CODE_SUBAGENT_MODEL) { + env.CLAUDE_CODE_SUBAGENT_MODEL = 'inherit'; + } + + // ---------------------------------------------------------------- + // DIAL/Azure compatibility: disable Anthropic-specific request fields + // ---------------------------------------------------------------- + // + // Claude Code in Azure mode sends requests DIRECTLY to the Azure/DIAL + // endpoint — the SSO proxy (and its ClaudeRequestNormalizerPlugin) is + // NOT in the path. DIAL uses the OpenAI Chat Completions spec and + // rejects Anthropic-native fields with HTTP 400: + // + // • cache_control on messages / content items + // → added by Claude Code when ENABLE_PROMPT_CACHING_1H=1 + // → DIAL error: "Extra inputs are not permitted on path messages.0.cache_control" + // + // • anthropic-beta: prompt-caching-* request header + // → added when experimental betas are enabled + // → some DIAL gateway versions reject unknown beta headers + // + // Fix: explicitly disable both features for Azure/DIAL sessions. + env.ENABLE_PROMPT_CACHING_1H = '0'; + env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS = '1'; + + // Azure/DIAL exposes an OpenAI-compatible endpoint, not Anthropic's + // full Messages API surface. Disable extended/interleaved thinking so + // Claude Code does not emit `thinking` params or persist + // `reasoning_content` blocks into follow-up messages. + env.CLAUDE_CODE_DISABLE_THINKING = '1'; + env.MAX_THINKING_TOKENS = '0'; + env.DISABLE_INTERLEAVED_THINKING = '1'; + env.CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING = '1'; + + // ---------------------------------------------------------------- + // Re-apply essential Claude Code defaults that are normally set by + // the agent's lifecycle.beforeRun (claude.plugin.ts). That hook is + // NOT executed when the provider supplies both a wildcard (*) and an + // agent-specific (claude) hook — lifecycle-helpers.ts chains only + // the two provider hooks and skips the agent default entirely. + // ---------------------------------------------------------------- + if (!env.CLAUDE_CODE_ENABLE_TELEMETRY) { + env.CLAUDE_CODE_ENABLE_TELEMETRY = '0'; + } + if (!env.DISABLE_AUTOUPDATER) { + env.DISABLE_AUTOUPDATER = '1'; + } + if (!env.ENABLE_TOOL_SEARCH) { + env.ENABLE_TOOL_SEARCH = '0'; + } + if (!env.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE) { + let autocompactPct = 80; + if (env.CODEMIE_PROFILE_CONFIG) { + try { + const profileConfig = JSON.parse(env.CODEMIE_PROFILE_CONFIG); + if (typeof profileConfig.claudeAutocompactPct === 'number') { + autocompactPct = profileConfig.claudeAutocompactPct; + } + } catch { + // ignore malformed profile config + } + } + env.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE = String(autocompactPct); + } + + return env; + } + } + }, + + setupInstructions: ` +# Azure OpenAI Setup Instructions + +## Prerequisites + +1. Azure subscription with Azure OpenAI access +2. An Azure OpenAI resource +3. At least one deployed model in Azure OpenAI Studio + +## Required Settings + +- **Endpoint**: https://.openai.azure.com +- **API Key**: Azure OpenAI key +- **API Version**: 2024-06-01 or newer +- **Deployment Name**: Azure deployment identifier + +## Using CodeMie with Azure OpenAI + +\`\`\`bash +codemie setup +# Select "Azure OpenAI" as provider +\`\`\` + +## Documentation + +- Azure OpenAI: https://learn.microsoft.com/azure/ai-services/openai/ +- Quotas and limits: https://learn.microsoft.com/azure/ai-services/openai/quotas-limits +` +}); diff --git a/src/providers/plugins/azure-openai/index.ts b/src/providers/plugins/azure-openai/index.ts new file mode 100644 index 00000000..ba45f193 --- /dev/null +++ b/src/providers/plugins/azure-openai/index.ts @@ -0,0 +1,17 @@ +/** + * Azure OpenAI Provider - Complete Provider Implementation + * + * Auto-registers with ProviderRegistry on import. + */ + +export { AzureOpenAITemplate } from './azure-openai.template.js'; +export { AzureOpenAISetupSteps } from './azure-openai.setup-steps.js'; +export { AzureOpenAIModelProxy } from './azure-openai.models.js'; +export { AzureOpenAIHealthCheck } from './azure-openai.health.js'; + +// Auto-register setup steps +import { ProviderRegistry } from '../../core/registry.js'; +import { AzureOpenAITemplate } from './azure-openai.template.js'; +import { AzureOpenAISetupSteps } from './azure-openai.setup-steps.js'; + +ProviderRegistry.registerProviderSetup(AzureOpenAITemplate, AzureOpenAISetupSteps); diff --git a/src/providers/plugins/ollama/ollama.setup-steps.ts b/src/providers/plugins/ollama/ollama.setup-steps.ts index b2fc62dc..ea39b35a 100644 --- a/src/providers/plugins/ollama/ollama.setup-steps.ts +++ b/src/providers/plugins/ollama/ollama.setup-steps.ts @@ -182,10 +182,13 @@ export const OllamaSetupSteps: ProviderSetupSteps = { apiKey: '', // Ollama doesn't use API keys model, timeout: 300, - debug: false + debug: false, + contextWindow: 32768, + maxPhysicalContext: 32768 }; } -}; +} + // Auto-register setup steps ProviderRegistry.registerSetupSteps('ollama', OllamaSetupSteps); diff --git a/src/utils/config.ts b/src/utils/config.ts index fb15c056..37acd059 100644 --- a/src/utils/config.ts +++ b/src/utils/config.ts @@ -733,11 +733,16 @@ export class ConfigLoader { const configDir = path.join(workingDir, '.codemie'); await fs.mkdir(configDir, { recursive: true }); - // Create multi-provider config structure + // Load existing local config to preserve other profiles and settings + const existingConfig = (await this.loadLocalMultiProviderConfig(workingDir).catch(() => ({ + version: 2 as const, + activeProfile: 'default', + profiles: {} + }))) as MultiProviderConfig; + const profileName = overrides?.profileName || 'default'; const profile: Partial = {}; - // Add overrides if provided if (overrides?.codeMieProject) { profile.codeMieProject = overrides.codeMieProject; } @@ -745,7 +750,6 @@ export class ConfigLoader { profile.codeMieIntegration = overrides.codeMieIntegration; } - // Add any other overrides for (const [key, value] of Object.entries(overrides || {})) { if (key !== 'profileName' && key !== 'codeMieProject' && key !== 'codeMieIntegration' && value !== undefined) { (profile as any)[key] = value; @@ -753,9 +757,10 @@ export class ConfigLoader { } const config: MultiProviderConfig = { - version: 2, + ...existingConfig, activeProfile: profileName, profiles: { + ...existingConfig.profiles, [profileName]: profile as any } }; diff --git a/src/utils/dial-model-integrity.ts b/src/utils/dial-model-integrity.ts new file mode 100644 index 00000000..415002d9 --- /dev/null +++ b/src/utils/dial-model-integrity.ts @@ -0,0 +1,85 @@ +import { AzureOpenAIModelProxy } from '../providers/plugins/azure-openai/azure-openai.models.js'; +import type { CodeMieConfigOptions } from './config.js'; +import chalk from 'chalk'; + +function apiLabel(modelId: string) { + const id = modelId.toLowerCase(); + if (id.startsWith('openai') || id.startsWith('gpt') || id.startsWith('tts-') || id.startsWith('audio-') || id.includes('embedding')) { + return 'full api features'; + } + return 'limited api features'; +} + +export async function runDialIntegrationTest(config: CodeMieConfigOptions): Promise { + const { baseUrl, apiKey, azureApiVersion = '2024-06-01' } = config; + if (!baseUrl || !apiKey) { + console.log(chalk.red('Missing DIAL baseUrl or apiKey.')); + return false; + } + const proxy = new AzureOpenAIModelProxy(baseUrl, apiKey, azureApiVersion); + let models; + try { + models = await proxy.fetchModels({ baseUrl, apiKey, azureApiVersion }); + } catch (err: any) { + console.log(chalk.red('Failed to list DIAL models: ' + (err?.message || err))); + return false; + } + if (!models || models.length === 0) { + console.log(chalk.yellow('No DIAL models found.')); + return false; + } + console.log(`\nFound ${models.length} models to test.\n`); + let success = true; + let stats = { full: 0, fullOk: 0, limited: 0, limitedOk: 0, errors: 0 }; + let idx = 0; + + for (const m of models) { + idx++; + const labelStr = apiLabel(m.id); + const isLimited = labelStr === 'limited api features'; + if (isLimited) stats.limited++; + else stats.full++; + const payload = { model: m.id, messages: [{ role: 'user', content: 'ping' }], max_tokens: 16 }; + const url = `${baseUrl}/openai/deployments/${encodeURIComponent(m.id)}/chat/completions?api-version=${azureApiVersion}`; + const headers = { 'api-key': apiKey, 'Content-Type': 'application/json' }; + const t0 = Date.now(); + let status = 'ok'; + let msg = ''; + let errLong = ''; + try { + const resp = await fetch(url, { method: 'POST', headers, body: JSON.stringify(payload) }); + const body = await resp.text(); + if (resp.ok) { + msg = chalk.green(`OK (${Date.now() - t0} ms)`); + if (isLimited) stats.limitedOk++; + else stats.fullOk++; + } else { + status = 'error'; + success = false; + errLong = body; + msg = chalk.red(`HTTP ${resp.status}`); + stats.errors++; + } + } catch (e: any) { + status = 'error'; + success = false; + errLong = String(e?.message || e); + msg = chalk.red('ERROR'); + stats.errors++; + } + const icon = status === 'ok' ? chalk.green('✓') : chalk.red('✗'); + const idxStr = chalk.gray(`[${idx}/${models.length}]`); + const featureStr = chalk.gray(labelStr); + const line = `${icon} ${idxStr} ${m.name} | ${featureStr} | ${msg}`; + console.log(line); + if (status === 'error' && errLong) { + console.log(chalk.redBright(' Error details: ') + chalk.gray(errLong)); + } + } + // Summary finisher + const statLine = `\n${chalk.gray('[full api features]')}: total ${stats.full}, OK: ${stats.fullOk}` + + chalk.gray(' | ') + + `${chalk.gray('[limited api features]')}: total ${stats.limited}, OK: ${stats.limitedOk}, errors: ${stats.errors}`; + console.log(statLine); + return success; +} diff --git a/src/utils/profile.ts b/src/utils/profile.ts index 48901220..da461786 100644 --- a/src/utils/profile.ts +++ b/src/utils/profile.ts @@ -24,6 +24,7 @@ export function renderProfileInfo(config: { cliVersion?: string; sessionId?: string; isActive?: boolean; + title?: string; }): string { // Build complete output with logo and info const outputLines: string[] = []; @@ -35,6 +36,11 @@ export function renderProfileInfo(config: { return chalk.cyan(label.padEnd(13) + '│ ') + colorFn(value); }; + if (config.title) { + outputLines.push(chalk.bold.cyan(config.title)); + outputLines.push(''); + } + // Configuration details if (config.cliVersion) { outputLines.push(formatRow('CLI Version', config.cliVersion));