diff --git a/docs/MIGRATION_FROM_CLAUDE_CODE.md b/docs/MIGRATION_FROM_CLAUDE_CODE.md new file mode 100644 index 0000000..bf98a82 --- /dev/null +++ b/docs/MIGRATION_FROM_CLAUDE_CODE.md @@ -0,0 +1,163 @@ +# Migrating from Claude Code + +DeepCode targets Claude Code parity. If you already use Claude Code, +most of your workflow ports over with renames + a different API key. + +## TL;DR — the 5-minute switch + +```bash +# 1. Install DeepCode CLI +npm install -g deepcode-cli + +# 2. Set your DeepSeek key +mkdir -p ~/.deepcode +cat > ~/.deepcode/credentials.json </CLAUDE.md /AGENTS.md # if you have one + +# 4. Convert ~/.claude/ → ~/.deepcode/ +mv ~/.claude/settings.json ~/.deepcode/settings.json +mv ~/.claude/skills ~/.deepcode/skills +mv ~/.claude/agents ~/.deepcode/agents +mv ~/.claude/plugins ~/.deepcode/plugins +mv ~/.claude/keybindings.json ~/.deepcode/keybindings.json + +# 5. Run +deepcode +``` + +## Field-by-field mapping + +| Claude Code | DeepCode | Notes | +| ------------------------------------ | ----------------------------------------- | ----- | +| `~/.claude/credentials.json` | `~/.deepcode/credentials.json` | Same shape; just rename. | +| `~/.claude/settings.json` | `~/.deepcode/settings.json` | Schema mostly identical; see Settings table below. | +| `/.claude/settings.json` | `/.deepcode/settings.json` | Same. | +| `~/.claude/skills//SKILL.md` | `~/.deepcode/skills//SKILL.md` | Same frontmatter format. | +| `~/.claude/agents/*.md` | `~/.deepcode/agents/*.md` | Same shape. | +| `~/.claude/plugins/` | `~/.deepcode/plugins/` | Plugin manifest is identical (plugin.json). | +| `CLAUDE.md` (project root) | `AGENTS.md` (project root) | Or `DEEPCODE.md`. Both names recognized; AGENTS.md preferred. | +| `claude` CLI | `deepcode` CLI | Most flags identical (-p, --mode, --model, --effort). | +| `claude doctor` | `deepcode doctor` | Same. | +| `/login` | n/a — re-onboard via `deepcode` no-args | We don't have separate login state. | + +## Settings.json — model field + +Claude Code: +```json +{ "model": "claude-sonnet-4-5" } +``` + +DeepCode: +```json +{ "model": "deepseek-chat" } +``` + +Valid values: `deepseek-chat` (general/tool-use) · `deepseek-reasoner` +(multi-step reasoning) · `deepseek-v4-flash` · `deepseek-v4-pro`. + +## Slash commands + +Most commands are identical: + +| Command | Claude Code | DeepCode | +| ------------------ | ----------- | -------- | +| `/help`, `/?` | ✓ | ✓ | +| `/clear` | ✓ | ✓ | +| `/exit`, `/quit` | ✓ | ✓ | +| `/model` | ✓ | ✓ (constrained to DeepSeek) | +| `/mode` | ✓ | ✓ | +| `/effort` | ✓ | ✓ | +| `/cost` | ✓ | ✓ | +| `/context` | ✓ | ✓ | +| `/init` | ✓ | ✓ | +| `/mcp` | ✓ | ✓ | +| `/todos` | ✓ | ✓ | +| `/plugins` | ✓ | ✓ | +| `/keybindings` | ✓ | ✓ | +| `/vim` | ✓ | ✓ | + +See `docs/BEHAVIOR_PARITY.md` for the full comparison. + +## Hooks + +Identical schema. Copy your `hooks` block from Claude's settings.json +verbatim. DeepCode supports the same 10 event types (PreToolUse, +PostToolUse, Stop, SubagentStop, PreCompact, PostCompact, SessionStart, +SessionEnd, UserPromptSubmit, Notification) and the same 5 handler +types (command, http, mcp_tool, prompt, agent). + +## Permission rules + +Identical syntax: `Tool(spec)`. The 4 sub-syntaxes (bare, subcommand, +prefix, domain) work the same way: + +```jsonc +{ + "permissions": { + "deny": ["Bash(rm -rf /:*)", "WebFetch(domain:internal.corp)"], + "ask": ["Bash(npm install:*)"], + "allow": ["Read", "Bash(git diff:*)"] + } +} +``` + +## Sandbox + +Claude Code's sandbox subsystem maps directly. `sandbox.filesystem`, +`sandbox.network`, `sandbox.excludedCommands` all work identically. +**Difference**: DeepCode's M3.5-ext adds shell-pipeline analysis — a +pipeline like `git status && rm -rf /` will NOT bypass `excludedCommands` +even if `git` is excluded. (Claude Code allows the bypass.) See +`docs/security-model.md`. + +## Plugins + +Plugin manifest schema is identical. Plugins authored for Claude Code +should load in DeepCode unmodified. The trust ladder + hash pin work +the same way. **Difference**: DeepCode's M5.2 ships marketplace + +ed25519 signature verification; if you want that, sign your plugins. + +## MCP servers + +Identical. Copy your `mcpServers` block verbatim. DeepCode uses +`@modelcontextprotocol/sdk` so all standard stdio/http/sse MCP servers +work as-is. + +## Sub-agents + +`~/.deepcode/agents/.md` — same frontmatter shape as Claude Code's +sub-agents. Both reference systems work. + +## Behaviors that DIFFER + +1. **Models**: only DeepSeek models. The `/model` picker constrains to + `deepseek-*`. To use Claude/GPT, keep Claude Code (or use the LSP + bridge once IDE-provider-routing lands — TBD). +2. **Pricing**: DeepSeek is 10-20× cheaper than Claude for similar + token counts. `/cost` reflects DeepSeek pricing. +3. **No image input yet**: vision provider abstraction exists but no + provider configured (v1.1). +4. **`/rewind`**: skeleton only — full rewind UX is in M7 (Mac client). + +## Behaviors that are NEW in DeepCode + +- `auto` classifier mode (LLM-judged per-tool-call approval) +- Effort-bench (`packages/core/scripts/effort-bench.ts`) for measuring + cost/latency per tier +- Pipeline-aware sandbox bypass (vs Claude Code's leading-token-only) +- LSP bridge (Neovim / Emacs / Sublime via `deepcode-lsp`) +- VS Code extension (skeleton; ships in v1.1) + +## Getting help + +- `deepcode doctor` — diagnostic dump +- `deepcode --help` — flag reference +- `~/.deepcode/sessions/.jsonl` — transcript of every session +- File issues at https://github.com/oratis/deepcode/issues diff --git a/packages/core/package.json b/packages/core/package.json index 1090c58..6066f93 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -16,6 +16,7 @@ "files": [ "dist/", "skills/", + "schemas/", "README.md" ], "scripts": { diff --git a/packages/core/schemas/settings.schema.json b/packages/core/schemas/settings.schema.json new file mode 100644 index 0000000..030f1ae --- /dev/null +++ b/packages/core/schemas/settings.schema.json @@ -0,0 +1,199 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://deepcode.dev/schemas/settings.schema.json", + "title": "DeepCode settings.json", + "description": "Per-user/project/local config. Three precedence layers: user (~/.deepcode/settings.json) < project (.deepcode/settings.json) < local (.deepcode/settings.local.json, gitignored).", + "type": "object", + "additionalProperties": true, + "properties": { + "model": { + "type": "string", + "enum": ["deepseek-chat", "deepseek-reasoner", "deepseek-v4-flash", "deepseek-v4-pro"], + "description": "Default model. deepseek-chat = fast general; deepseek-reasoner = multi-step reasoning." + }, + "baseURL": { + "type": "string", + "format": "uri", + "description": "DeepSeek API base URL. Default: https://api.deepseek.com/v1." + }, + "apiKeyHelper": { + "type": "string", + "description": "Path to an executable that prints the API key on stdout. Overrides credentials.json." + }, + "effortLevel": { + "type": "string", + "enum": ["low", "medium", "high", "xhigh", "max"], + "description": "Default effort tier — affects maxTokens + temperature + multi-turn budget." + }, + "permissions": { + "type": "object", + "additionalProperties": false, + "properties": { + "defaultMode": { + "type": "string", + "enum": ["default", "acceptEdits", "plan", "auto", "dontAsk", "bypassPermissions"] + }, + "allow": { "type": "array", "items": { "type": "string" } }, + "ask": { "type": "array", "items": { "type": "string" } }, + "deny": { "type": "array", "items": { "type": "string" } }, + "additionalDirectories": { "type": "array", "items": { "type": "string" } } + } + }, + "autoMode": { + "type": "object", + "additionalProperties": false, + "properties": { + "allow": { "type": "array", "items": { "type": "string" } }, + "soft_deny": { "type": "array", "items": { "type": "string" } }, + "hard_deny": { "type": "array", "items": { "type": "string" } }, + "model": { "type": "string" }, + "fallback": { "type": "string", "enum": ["ask", "deny"] } + } + }, + "sandbox": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { "type": "boolean" }, + "filesystem": { + "type": "object", + "additionalProperties": false, + "properties": { + "allowWrite": { "type": "array", "items": { "type": "string" } }, + "denyWrite": { "type": "array", "items": { "type": "string" } }, + "allowRead": { "type": "array", "items": { "type": "string" } }, + "denyRead": { "type": "array", "items": { "type": "string" } } + } + }, + "network": { + "type": "object", + "additionalProperties": false, + "properties": { + "allowedDomains": { "type": "array", "items": { "type": "string" } }, + "deniedDomains": { "type": "array", "items": { "type": "string" } }, + "allowUnixSockets": { "type": "boolean" }, + "allowLocalBinding": { "type": "boolean" } + } + }, + "excludedCommands": { + "type": "array", + "items": { "type": "string" }, + "description": "Whole-pipeline command names that bypass sandbox (e.g. 'git')." + } + } + }, + "hooks": { + "type": "object", + "additionalProperties": false, + "properties": { + "PreToolUse": { "$ref": "#/definitions/HookMatcherList" }, + "PostToolUse": { "$ref": "#/definitions/HookMatcherList" }, + "Stop": { "$ref": "#/definitions/HookMatcherList" }, + "SubagentStop": { "$ref": "#/definitions/HookMatcherList" }, + "PreCompact": { "$ref": "#/definitions/HookMatcherList" }, + "PostCompact": { "$ref": "#/definitions/HookMatcherList" }, + "SessionStart": { "$ref": "#/definitions/HookMatcherList" }, + "SessionEnd": { "$ref": "#/definitions/HookMatcherList" }, + "UserPromptSubmit": { "$ref": "#/definitions/HookMatcherList" }, + "Notification": { "$ref": "#/definitions/HookMatcherList" } + } + }, + "allowedHttpHookUrls": { + "type": "array", + "items": { "type": "string", "format": "uri" }, + "description": "Whitelist of URL prefixes that 'http' hook handlers may POST to." + }, + "disableAllHooks": { "type": "boolean" }, + "memoryLoadCapKB": { "type": "integer", "minimum": 1, "maximum": 10000 }, + "outputStyle": { "type": "string" }, + "skillOverrides": { + "type": "object", + "additionalProperties": { "type": "string" } + }, + "mcpServers": { + "type": "object", + "additionalProperties": { "$ref": "#/definitions/McpServer" } + }, + "enabledMcpjsonServers": { "type": "array", "items": { "type": "string" } }, + "disabledMcpjsonServers": { "type": "array", "items": { "type": "string" } }, + "disabledPlugins": { "type": "array", "items": { "type": "string" } }, + "worktree": { + "type": "object", + "additionalProperties": false, + "properties": { + "baseRef": { "type": "string" }, + "symlinkDirectories": { "type": "array", "items": { "type": "string" } }, + "sparsePaths": { "type": "array", "items": { "type": "string" } }, + "bgIsolation": { "type": "boolean" } + } + }, + "voice": { + "type": "object", + "additionalProperties": false, + "properties": { + "provider": { "type": "string", "enum": ["whisper.cpp", "stub"] }, + "binPath": { "type": "string" }, + "modelPath": { "type": "string" } + } + } + }, + "definitions": { + "HookMatcherList": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "matcher": { "type": "string" }, + "hooks": { + "type": "array", + "items": { "$ref": "#/definitions/HookHandler" } + } + } + } + }, + "HookHandler": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "type": "string", + "enum": ["command", "http", "mcp_tool", "prompt", "agent"] + }, + "command": { "type": "string" }, + "url": { "type": "string", "format": "uri" }, + "headers": { + "type": "object", + "additionalProperties": { "type": "string" } + }, + "server": { "type": "string" }, + "tool": { "type": "string" }, + "prompt": { "type": "string" }, + "agent": { "type": "string" }, + "timeout": { "type": "integer", "minimum": 1, "maximum": 3600 }, + "if": { "type": "string", "description": "Permission-rule-syntax filter." } + }, + "required": ["type"] + }, + "McpServer": { + "type": "object", + "additionalProperties": false, + "properties": { + "command": { "type": "string" }, + "args": { "type": "array", "items": { "type": "string" } }, + "env": { + "type": "object", + "additionalProperties": { "type": "string" } + }, + "url": { "type": "string", "format": "uri" }, + "transport": { "type": "string", "enum": ["stdio", "http", "sse"] }, + "headers": { + "type": "object", + "additionalProperties": { "type": "string" } + }, + "headersHelper": { "type": "string" }, + "alwaysLoad": { "type": "boolean" } + } + } + } +} diff --git a/packages/core/src/config/schema.test.ts b/packages/core/src/config/schema.test.ts new file mode 100644 index 0000000..3ee0774 --- /dev/null +++ b/packages/core/src/config/schema.test.ts @@ -0,0 +1,66 @@ +import { describe, expect, it } from 'vitest'; +import { settingsSchemaJson, settingsSchemaObject, validateSettingsShallow } from './schema.js'; + +describe('settingsSchemaJson', () => { + it('returns valid JSON that parses to an object with $schema', async () => { + const raw = await settingsSchemaJson(); + expect(raw.length).toBeGreaterThan(100); + const obj = JSON.parse(raw) as { $schema: string; title: string }; + expect(obj.$schema).toContain('draft-07'); + expect(obj.title).toMatch(/settings/i); + }); +}); + +describe('settingsSchemaObject', () => { + it('returns the parsed object', async () => { + const o = await settingsSchemaObject(); + expect(o['title']).toMatch(/settings/i); + expect(typeof o['properties']).toBe('object'); + }); +}); + +describe('validateSettingsShallow', () => { + it('returns empty array for a clean config', () => { + expect( + validateSettingsShallow({ + model: 'deepseek-chat', + effortLevel: 'high', + permissions: { defaultMode: 'plan' }, + }), + ).toEqual([]); + }); + + it('flags unknown model', () => { + const errs = validateSettingsShallow({ model: 'gpt-4' }); + expect(errs.length).toBe(1); + expect(errs[0]).toMatch(/gpt-4/); + }); + + it('flags unknown effort tier', () => { + const errs = validateSettingsShallow({ effortLevel: 'turbo' }); + expect(errs[0]).toMatch(/turbo/); + }); + + it('flags unknown defaultMode', () => { + const errs = validateSettingsShallow({ permissions: { defaultMode: 'YOLO' } }); + expect(errs[0]).toMatch(/YOLO/); + }); + + it('flags unknown hook event', () => { + const errs = validateSettingsShallow({ hooks: { OnEverything: [] } }); + expect(errs[0]).toMatch(/OnEverything/); + }); + + it('returns no errors on empty config', () => { + expect(validateSettingsShallow({})).toEqual([]); + }); + + it('catches multiple errors at once', () => { + const errs = validateSettingsShallow({ + model: 'bad', + effortLevel: 'bad', + permissions: { defaultMode: 'bad' }, + }); + expect(errs.length).toBe(3); + }); +}); diff --git a/packages/core/src/config/schema.ts b/packages/core/src/config/schema.ts new file mode 100644 index 0000000..f4f02f8 --- /dev/null +++ b/packages/core/src/config/schema.ts @@ -0,0 +1,91 @@ +// Settings JSON schema — exposes the schema for IDE autocomplete. +// Spec: docs/DEVELOPMENT_PLAN.md §3.9 +// +// The schema itself lives in packages/core/schemas/settings.schema.json +// (deliberately outside src/ so it's published as a static asset and +// referenced via `$schema` from user settings.json files). +// +// At runtime we expose `settingsSchemaJson()` which reads + returns the +// schema body — used by the `/doctor` command and the desktop client's +// Settings screen for validation. + +import { readFile } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +// Schema file is at /schemas/settings.schema.json +// From dist/config/schema.js the relative path is ../../schemas/... +// From src/config/schema.ts the relative path is ../../schemas/... too. +const SCHEMA_PATH = join(__dirname, '..', '..', 'schemas', 'settings.schema.json'); + +let cached: string | undefined; + +export async function settingsSchemaJson(): Promise { + if (cached === undefined) { + cached = await readFile(SCHEMA_PATH, 'utf8'); + } + return cached; +} + +export async function settingsSchemaObject(): Promise> { + const raw = await settingsSchemaJson(); + return JSON.parse(raw) as Record; +} + +/** + * Lightweight validation: checks required-ish fields + enum membership for + * the fields users most often misspell. Returns an array of error strings; + * empty array = valid (or at least no detected issues). + * + * This is NOT a full draft-07 validator; for that, route through an + * external library. The goal here is fast feedback in `/doctor` without + * dragging ajv into the runtime. + */ +export function validateSettingsShallow(settings: Record): string[] { + const errors: string[] = []; + + const modelEnum = ['deepseek-chat', 'deepseek-reasoner', 'deepseek-v4-flash', 'deepseek-v4-pro']; + if (settings['model'] !== undefined && !modelEnum.includes(settings['model'] as string)) { + errors.push(`settings.model "${settings['model']}" not in ${modelEnum.join(' | ')}`); + } + + const effortEnum = ['low', 'medium', 'high', 'xhigh', 'max']; + if ( + settings['effortLevel'] !== undefined && + !effortEnum.includes(settings['effortLevel'] as string) + ) { + errors.push(`settings.effortLevel "${settings['effortLevel']}" not in ${effortEnum.join(' | ')}`); + } + + const modeEnum = ['default', 'acceptEdits', 'plan', 'auto', 'dontAsk', 'bypassPermissions']; + const perm = settings['permissions'] as { defaultMode?: string } | undefined; + if (perm?.defaultMode && !modeEnum.includes(perm.defaultMode)) { + errors.push(`permissions.defaultMode "${perm.defaultMode}" not in ${modeEnum.join(' | ')}`); + } + + const hooks = settings['hooks'] as Record | undefined; + if (hooks) { + const validEvents = [ + 'PreToolUse', + 'PostToolUse', + 'Stop', + 'SubagentStop', + 'PreCompact', + 'PostCompact', + 'SessionStart', + 'SessionEnd', + 'UserPromptSubmit', + 'Notification', + ]; + for (const k of Object.keys(hooks)) { + if (!validEvents.includes(k)) { + errors.push(`hooks.${k} is not a known event (valid: ${validEvents.join(', ')})`); + } + } + } + + return errors; +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 511387e..4751c6a 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -241,6 +241,25 @@ export { type MarketplaceConfig, } from './plugins/index.js'; +// Settings JSON schema + shallow validator (v1.1) +export { + settingsSchemaJson, + settingsSchemaObject, + validateSettingsShallow, +} from './config/schema.js'; + +// Vision (v1.1 — image input abstraction) +export { + StubVisionProvider, + OpenAICompatVisionProvider, + loadImage, + parseDataUrl, + guessContentType, + type VisionProvider, + type ImageContentBlock, + type ProviderImagePayload, +} from './vision/index.js'; + // IPC protocol (M6-rest — renderer ↔ main process type-safe channels) export { newTurnId, diff --git a/packages/core/src/vision/index.test.ts b/packages/core/src/vision/index.test.ts new file mode 100644 index 0000000..8a066a2 --- /dev/null +++ b/packages/core/src/vision/index.test.ts @@ -0,0 +1,147 @@ +import { promises as fs } from 'node:fs'; +import { createServer, type Server } from 'node:http'; +import { AddressInfo } from 'node:net'; +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { + guessContentType, + loadImage, + OpenAICompatVisionProvider, + parseDataUrl, + StubVisionProvider, +} from './index.js'; + +describe('guessContentType', () => { + it.each([ + ['/x/y.png', 'image/png'], + ['/x/y.jpg', 'image/jpeg'], + ['/x/y.jpeg', 'image/jpeg'], + ['/x/y.webp', 'image/webp'], + ['/x/y.gif', 'image/gif'], + ['/x/y.svg', 'image/svg+xml'], + ['/x/y.bin', 'application/octet-stream'], + ])('%s → %s', (p, ct) => expect(guessContentType(p)).toBe(ct)); +}); + +describe('parseDataUrl', () => { + it('decodes base64 data URL', () => { + const r = parseDataUrl('data:image/png;base64,iVBORw0KGgo='); + expect(r.contentType).toBe('image/png'); + expect(r.base64).toBe('iVBORw0KGgo='); + expect(r.byteSize).toBeGreaterThan(0); + }); + it('decodes plain (non-base64) data URL', () => { + const r = parseDataUrl('data:text/plain,hello%20world'); + expect(r.contentType).toBe('text/plain'); + expect(Buffer.from(r.base64, 'base64').toString('utf8')).toBe('hello world'); + }); + it('throws on malformed', () => { + expect(() => parseDataUrl('not-a-data-url')).toThrow(); + }); +}); + +describe('loadImage', () => { + let dir: string; + beforeEach(async () => { + dir = await mkdtemp(join(tmpdir(), 'dc-vision-')); + }); + afterEach(async () => { + await rm(dir, { recursive: true, force: true }); + }); + + it('reads a local file', async () => { + const path = join(dir, 'x.png'); + await fs.writeFile(path, Buffer.from([0x89, 0x50, 0x4e, 0x47])); // PNG magic + const r = await loadImage({ type: 'image', source: path }); + expect(r.contentType).toBe('image/png'); + expect(r.byteSize).toBe(4); + }); + + it('handles a data URL', async () => { + const r = await loadImage({ + type: 'image', + source: 'data:image/png;base64,iVBORw0KGgo=', + }); + expect(r.contentType).toBe('image/png'); + }); + + it('fetches a remote URL', async () => { + const server = createServer((_req, res) => { + res.writeHead(200, { 'content-type': 'image/jpeg' }); + res.end(Buffer.from([0xff, 0xd8, 0xff])); // JPEG magic + }); + await new Promise((r) => server.listen(0, '127.0.0.1', () => r())); + const addr = server.address() as AddressInfo; + try { + const r = await loadImage({ + type: 'image', + source: `http://127.0.0.1:${addr.port}/x.jpg`, + }); + expect(r.contentType).toBe('image/jpeg'); + expect(r.byteSize).toBe(3); + } finally { + await new Promise((res) => server.close(() => res())); + } + }); + + it('throws on remote 4xx', async () => { + const server = createServer((_req, res) => { + res.writeHead(404); + res.end(); + }); + await new Promise((r) => server.listen(0, '127.0.0.1', () => r())); + const addr = server.address() as AddressInfo; + try { + await expect( + loadImage({ type: 'image', source: `http://127.0.0.1:${addr.port}/x.jpg` }), + ).rejects.toThrow(/404/); + } finally { + await new Promise((res) => server.close(() => res())); + } + }); +}); + +describe('StubVisionProvider', () => { + it('reports no support and throws on encode', async () => { + const p = new StubVisionProvider(); + expect(p.supports()).toBe(false); + await expect(p.encode()).rejects.toThrow(/no vision/); + }); +}); + +describe('OpenAICompatVisionProvider', () => { + let dir: string; + beforeEach(async () => { + dir = await mkdtemp(join(tmpdir(), 'dc-vis-prov-')); + }); + afterEach(async () => { + await rm(dir, { recursive: true, force: true }); + }); + + it('encodes a local file to an image_url payload with a data URL', async () => { + const path = join(dir, 'pic.png'); + await fs.writeFile(path, Buffer.from([0x89, 0x50, 0x4e, 0x47])); + const p = new OpenAICompatVisionProvider(); + const out = await p.encode({ type: 'image', source: path }); + expect(out.byteSize).toBe(4); + const pl = out.payload as { type: string; image_url: { url: string; detail: string } }; + expect(pl.type).toBe('image_url'); + expect(pl.image_url.url).toMatch(/^data:image\/png;base64,/); + expect(pl.image_url.detail).toBe('auto'); + }); + + it('throws when image exceeds maxBytes', async () => { + const p = new OpenAICompatVisionProvider(); + p.maxBytes = 2; // ridiculously low + const path = join(dir, 'big.png'); + await fs.writeFile(path, Buffer.alloc(100)); + await expect(p.encode({ type: 'image', source: path })).rejects.toThrow(/too large/); + }); + + it('supports all image blocks (provider decides upstream)', () => { + const p = new OpenAICompatVisionProvider(); + expect(p.supports({ type: 'image', source: 'x' })).toBe(true); + }); +}); diff --git a/packages/core/src/vision/index.ts b/packages/core/src/vision/index.ts new file mode 100644 index 0000000..2ee9793 --- /dev/null +++ b/packages/core/src/vision/index.ts @@ -0,0 +1,152 @@ +// Image input subsystem — abstraction for sending images alongside text +// to a multimodal model. Spec: docs/DEVELOPMENT_PLAN.md §v1.1 (image input) +// +// DeepSeek doesn't ship a vision model as of v1; this scaffold defines the +// interface so the agent loop can carry image content blocks once a vision +// provider is wired (Qwen-VL via OpenRouter, GPT-4o via OpenAI-compat, etc.). +// +// The shape mirrors text streaming — provider implementations decode an +// image_url or base64 block into whatever shape their API wants. + +import { promises as fs } from 'node:fs'; +import { extname } from 'node:path'; + +export interface ImageContentBlock { + type: 'image'; + /** One of: data URL, file path, or http(s) URL. */ + source: string; + /** Optional alt-text — useful for accessibility + provider hints. */ + altText?: string; +} + +export interface VisionProvider { + readonly name: string; + /** Whether this provider can handle the image (e.g. some only accept JPEG). */ + supports(block: ImageContentBlock): boolean; + /** + * Convert an ImageContentBlock to the provider-specific shape. Most APIs + * accept either base64 data URLs or remote URLs. + */ + encode(block: ImageContentBlock): Promise; +} + +/** What the provider's chat API wants for an image attachment. */ +export interface ProviderImagePayload { + /** Provider-native shape. The agent loop treats this opaquely. */ + payload: unknown; + /** Provider-reported size in bytes (for cost accounting). */ + byteSize: number; +} + +/** + * Resolve any of (data URL | file path | http URL) into a normalized + * { contentType, base64, byteSize } tuple. Used by providers that want + * to upload as base64 vs link directly. + */ +export async function loadImage(block: ImageContentBlock): Promise<{ + contentType: string; + base64: string; + byteSize: number; +}> { + const src = block.source; + if (src.startsWith('data:')) { + return parseDataUrl(src); + } + if (/^https?:/i.test(src)) { + const res = await fetch(src); + if (!res.ok) throw new Error(`image fetch failed: ${res.status}`); + const buf = Buffer.from(await res.arrayBuffer()); + const ct = res.headers.get('content-type') ?? guessContentType(src); + return { contentType: ct, base64: buf.toString('base64'), byteSize: buf.length }; + } + // Local file + const buf = await fs.readFile(src); + return { + contentType: guessContentType(src), + base64: buf.toString('base64'), + byteSize: buf.length, + }; +} + +export function parseDataUrl(dataUrl: string): { + contentType: string; + base64: string; + byteSize: number; +} { + const m = /^data:([^;,]+)(?:;([^,]+))?,(.+)$/.exec(dataUrl); + if (!m) throw new Error('malformed data URL'); + const [, contentType = 'application/octet-stream', encoding, body = ''] = m; + if (encoding !== 'base64') { + // URL-encoded text → re-encode as base64 + const buf = Buffer.from(decodeURIComponent(body), 'utf8'); + return { contentType, base64: buf.toString('base64'), byteSize: buf.length }; + } + const buf = Buffer.from(body, 'base64'); + return { contentType, base64: body, byteSize: buf.length }; +} + +export function guessContentType(path: string): string { + const ext = extname(path).toLowerCase(); + switch (ext) { + case '.png': + return 'image/png'; + case '.jpg': + case '.jpeg': + return 'image/jpeg'; + case '.webp': + return 'image/webp'; + case '.gif': + return 'image/gif'; + case '.svg': + return 'image/svg+xml'; + default: + return 'application/octet-stream'; + } +} + +// ────────────────────────────────────────────────────────────────────────── +// Stub provider — used when no vision model is configured. Errors on +// supports() so the agent loop skips image content blocks instead of +// silently dropping them. +// ────────────────────────────────────────────────────────────────────────── + +export class StubVisionProvider implements VisionProvider { + readonly name = 'stub'; + supports(): boolean { + return false; + } + async encode(): Promise { + throw new Error('no vision provider configured'); + } +} + +// ────────────────────────────────────────────────────────────────────────── +// OpenAI-compatible provider (works with most multimodal endpoints that +// implement the OpenAI chat API — Qwen-VL via OpenRouter, GPT-4o, etc.) +// ────────────────────────────────────────────────────────────────────────── + +export class OpenAICompatVisionProvider implements VisionProvider { + readonly name = 'openai-compat'; + /** Max image bytes the provider accepts; default 20 MB. */ + maxBytes = 20 * 1024 * 1024; + + supports(block: ImageContentBlock): boolean { + // Any image type — let the upstream decide. + return block.type === 'image'; + } + + async encode(block: ImageContentBlock): Promise { + const img = await loadImage(block); + if (img.byteSize > this.maxBytes) { + throw new Error(`image too large: ${img.byteSize} > ${this.maxBytes} bytes`); + } + const dataUrl = `data:${img.contentType};base64,${img.base64}`; + return { + payload: { + type: 'image_url', + image_url: { url: dataUrl, detail: 'auto' }, + }, + byteSize: img.byteSize, + }; + } +}